aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/defines15
-rw-r--r--sys/conf/files274
-rw-r--r--sys/conf/files.newconf274
-rw-r--r--sys/conf/newvers.sh48
-rw-r--r--sys/conf/nfsswapvmunix.c152
-rw-r--r--sys/conf/param.c145
-rw-r--r--sys/conf/systags.sh72
-rw-r--r--sys/fs/cd9660/TODO77
-rw-r--r--sys/fs/cd9660/TODO.hibler22
-rw-r--r--sys/fs/cd9660/cd9660_bmap.c102
-rw-r--r--sys/fs/cd9660/cd9660_lookup.c465
-rw-r--r--sys/fs/cd9660/cd9660_node.c648
-rw-r--r--sys/fs/cd9660/cd9660_node.h143
-rw-r--r--sys/fs/cd9660/cd9660_rrip.c685
-rw-r--r--sys/fs/cd9660/cd9660_rrip.h146
-rw-r--r--sys/fs/cd9660/cd9660_util.c236
-rw-r--r--sys/fs/cd9660/cd9660_vfsops.c681
-rw-r--r--sys/fs/cd9660/cd9660_vnops.c1038
-rw-r--r--sys/fs/cd9660/iso.h256
-rw-r--r--sys/fs/cd9660/iso_rrip.h83
-rw-r--r--sys/fs/deadfs/dead_vnops.c354
-rw-r--r--sys/fs/fdescfs/fdesc.h82
-rw-r--r--sys/fs/fdescfs/fdesc_vfsops.c288
-rw-r--r--sys/fs/fdescfs/fdesc_vnops.c974
-rw-r--r--sys/fs/fifofs/fifo.h85
-rw-r--r--sys/fs/fifofs/fifo_vnops.c494
-rw-r--r--sys/fs/nullfs/null.h75
-rw-r--r--sys/fs/nullfs/null_subr.c293
-rw-r--r--sys/fs/nullfs/null_vfsops.c366
-rw-r--r--sys/fs/nullfs/null_vnops.c462
-rw-r--r--sys/fs/portalfs/portal.h72
-rw-r--r--sys/fs/portalfs/portal_vfsops.c313
-rw-r--r--sys/fs/portalfs/portal_vnops.c707
-rw-r--r--sys/fs/procfs/README113
-rw-r--r--sys/fs/procfs/procfs.h186
-rw-r--r--sys/fs/procfs/procfs_ctl.c302
-rw-r--r--sys/fs/procfs/procfs_fpregs.c87
-rw-r--r--sys/fs/procfs/procfs_mem.c302
-rw-r--r--sys/fs/procfs/procfs_note.c73
-rw-r--r--sys/fs/procfs/procfs_regs.c87
-rw-r--r--sys/fs/procfs/procfs_status.c145
-rw-r--r--sys/fs/procfs/procfs_subr.c314
-rw-r--r--sys/fs/procfs/procfs_vfsops.c243
-rw-r--r--sys/fs/procfs/procfs_vnops.c814
-rw-r--r--sys/fs/specfs/spec_vnops.c689
-rw-r--r--sys/fs/umapfs/umap.h92
-rw-r--r--sys/fs/umapfs/umap_subr.c397
-rw-r--r--sys/fs/umapfs/umap_vfsops.c407
-rw-r--r--sys/fs/umapfs/umap_vnops.c488
-rw-r--r--sys/fs/unionfs/union.h117
-rw-r--r--sys/fs/unionfs/union_subr.c744
-rw-r--r--sys/fs/unionfs/union_vfsops.c550
-rw-r--r--sys/fs/unionfs/union_vnops.c1495
-rw-r--r--sys/gnu/ext2fs/ext2_bmap.c294
-rw-r--r--sys/gnu/ext2fs/ext2_ihash.c154
-rw-r--r--sys/gnu/ext2fs/ext2_mount.h83
-rw-r--r--sys/gnu/ext2fs/inode.h162
-rw-r--r--sys/gnu/fs/ext2fs/ext2_bmap.c294
-rw-r--r--sys/gnu/fs/ext2fs/ext2_mount.h83
-rw-r--r--sys/gnu/fs/ext2fs/inode.h162
-rw-r--r--sys/isofs/cd9660/TODO77
-rw-r--r--sys/isofs/cd9660/TODO.hibler22
-rw-r--r--sys/isofs/cd9660/cd9660_bmap.c102
-rw-r--r--sys/isofs/cd9660/cd9660_lookup.c465
-rw-r--r--sys/isofs/cd9660/cd9660_node.c648
-rw-r--r--sys/isofs/cd9660/cd9660_node.h143
-rw-r--r--sys/isofs/cd9660/cd9660_rrip.c685
-rw-r--r--sys/isofs/cd9660/cd9660_rrip.h146
-rw-r--r--sys/isofs/cd9660/cd9660_util.c236
-rw-r--r--sys/isofs/cd9660/cd9660_vfsops.c681
-rw-r--r--sys/isofs/cd9660/cd9660_vnops.c1038
-rw-r--r--sys/isofs/cd9660/iso.h256
-rw-r--r--sys/isofs/cd9660/iso_rrip.h83
-rw-r--r--sys/kern/Make.tags.inc18
-rw-r--r--sys/kern/Makefile50
-rw-r--r--sys/kern/init_main.c394
-rw-r--r--sys/kern/init_sysent.c480
-rw-r--r--sys/kern/kern_acct.c116
-rw-r--r--sys/kern/kern_clock.c528
-rw-r--r--sys/kern/kern_descrip.c914
-rw-r--r--sys/kern/kern_exec.c64
-rw-r--r--sys/kern/kern_exit.c492
-rw-r--r--sys/kern/kern_fork.c315
-rw-r--r--sys/kern/kern_ktrace.c466
-rw-r--r--sys/kern/kern_malloc.c381
-rw-r--r--sys/kern/kern_physio.c93
-rw-r--r--sys/kern/kern_proc.c401
-rw-r--r--sys/kern/kern_prot.c566
-rw-r--r--sys/kern/kern_resource.c476
-rw-r--r--sys/kern/kern_sig.c1197
-rw-r--r--sys/kern/kern_subr.c210
-rw-r--r--sys/kern/kern_synch.c666
-rw-r--r--sys/kern/kern_sysctl.c787
-rw-r--r--sys/kern/kern_tc.c528
-rw-r--r--sys/kern/kern_time.c416
-rw-r--r--sys/kern/kern_timeout.c528
-rw-r--r--sys/kern/kern_xxx.c138
-rw-r--r--sys/kern/makesyscalls.sh171
-rw-r--r--sys/kern/subr_autoconf.c342
-rw-r--r--sys/kern/subr_clist.c159
-rw-r--r--sys/kern/subr_disklabel.c364
-rw-r--r--sys/kern/subr_log.c232
-rw-r--r--sys/kern/subr_param.c145
-rw-r--r--sys/kern/subr_prf.c601
-rw-r--r--sys/kern/subr_prof.c256
-rw-r--r--sys/kern/subr_rmap.c81
-rw-r--r--sys/kern/subr_xxx.c100
-rw-r--r--sys/kern/sys_generic.c683
-rw-r--r--sys/kern/sys_process.c74
-rw-r--r--sys/kern/sys_socket.c197
-rw-r--r--sys/kern/syscalls.c251
-rw-r--r--sys/kern/syscalls.master276
-rw-r--r--sys/kern/tty.c1923
-rw-r--r--sys/kern/tty_compat.c411
-rw-r--r--sys/kern/tty_conf.c126
-rw-r--r--sys/kern/tty_pty.c691
-rw-r--r--sys/kern/tty_subr.c159
-rw-r--r--sys/kern/tty_tb.c366
-rw-r--r--sys/kern/tty_tty.c147
-rw-r--r--sys/kern/uipc_domain.c220
-rw-r--r--sys/kern/uipc_mbuf.c655
-rw-r--r--sys/kern/uipc_proto.c72
-rw-r--r--sys/kern/uipc_sockbuf.c755
-rw-r--r--sys/kern/uipc_socket.c1024
-rw-r--r--sys/kern/uipc_socket2.c755
-rw-r--r--sys/kern/uipc_syscalls.c1217
-rw-r--r--sys/kern/uipc_usrreq.c823
-rw-r--r--sys/kern/vfs_bio.c339
-rw-r--r--sys/kern/vfs_cache.c328
-rw-r--r--sys/kern/vfs_cluster.c746
-rw-r--r--sys/kern/vfs_conf.c260
-rw-r--r--sys/kern/vfs_export.c1322
-rw-r--r--sys/kern/vfs_extattr.c2107
-rw-r--r--sys/kern/vfs_init.c246
-rw-r--r--sys/kern/vfs_lookup.c506
-rw-r--r--sys/kern/vfs_mount.c260
-rw-r--r--sys/kern/vfs_subr.c1322
-rw-r--r--sys/kern/vfs_syscalls.c2107
-rw-r--r--sys/kern/vfs_vnops.c422
-rw-r--r--sys/kern/vnode_if.pl433
-rw-r--r--sys/kern/vnode_if.sh433
-rw-r--r--sys/kern/vnode_if.src296
-rw-r--r--sys/libkern/Makefile20
-rw-r--r--sys/libkern/adddi3.c60
-rw-r--r--sys/libkern/anddi3.c58
-rw-r--r--sys/libkern/ashldi3.c66
-rw-r--r--sys/libkern/ashrdi3.c75
-rw-r--r--sys/libkern/bcmp.c59
-rw-r--r--sys/libkern/cmpdi2.c59
-rw-r--r--sys/libkern/divdi3.c65
-rw-r--r--sys/libkern/ffs.c54
-rw-r--r--sys/libkern/iordi3.c58
-rw-r--r--sys/libkern/libkern.h98
-rw-r--r--sys/libkern/locc.c50
-rw-r--r--sys/libkern/lshldi3.c66
-rw-r--r--sys/libkern/lshrdi3.c65
-rw-r--r--sys/libkern/mcount.c178
-rw-r--r--sys/libkern/moddi3.c67
-rw-r--r--sys/libkern/muldi3.c246
-rw-r--r--sys/libkern/negdi2.c57
-rw-r--r--sys/libkern/notdi2.c58
-rw-r--r--sys/libkern/qdivrem.c279
-rw-r--r--sys/libkern/quad.h110
-rw-r--r--sys/libkern/random.c63
-rw-r--r--sys/libkern/rindex.c59
-rw-r--r--sys/libkern/scanc.c50
-rw-r--r--sys/libkern/skpc.c50
-rw-r--r--sys/libkern/strcat.c50
-rw-r--r--sys/libkern/strcmp.c55
-rw-r--r--sys/libkern/strcpy.c50
-rw-r--r--sys/libkern/strlen.c50
-rw-r--r--sys/libkern/strncpy.c68
-rw-r--r--sys/libkern/subdi3.c59
-rw-r--r--sys/libkern/ucmpdi2.c58
-rw-r--r--sys/libkern/udivdi3.c53
-rw-r--r--sys/libkern/umoddi3.c55
-rw-r--r--sys/libkern/xordi3.c58
-rw-r--r--sys/miscfs/deadfs/dead_vnops.c354
-rw-r--r--sys/miscfs/fdesc/fdesc.h82
-rw-r--r--sys/miscfs/fdesc/fdesc_vfsops.c288
-rw-r--r--sys/miscfs/fdesc/fdesc_vnops.c974
-rw-r--r--sys/miscfs/fifofs/fifo.h85
-rw-r--r--sys/miscfs/fifofs/fifo_vnops.c494
-rw-r--r--sys/miscfs/kernfs/kernfs.h56
-rw-r--r--sys/miscfs/kernfs/kernfs_vfsops.c329
-rw-r--r--sys/miscfs/kernfs/kernfs_vnops.c759
-rw-r--r--sys/miscfs/nullfs/null.h75
-rw-r--r--sys/miscfs/nullfs/null_subr.c293
-rw-r--r--sys/miscfs/nullfs/null_vfsops.c366
-rw-r--r--sys/miscfs/nullfs/null_vnops.c462
-rw-r--r--sys/miscfs/portal/portal.h72
-rw-r--r--sys/miscfs/portal/portal_vfsops.c313
-rw-r--r--sys/miscfs/portal/portal_vnops.c707
-rw-r--r--sys/miscfs/procfs/README113
-rw-r--r--sys/miscfs/procfs/procfs.h186
-rw-r--r--sys/miscfs/procfs/procfs_ctl.c302
-rw-r--r--sys/miscfs/procfs/procfs_fpregs.c87
-rw-r--r--sys/miscfs/procfs/procfs_mem.c302
-rw-r--r--sys/miscfs/procfs/procfs_note.c73
-rw-r--r--sys/miscfs/procfs/procfs_regs.c87
-rw-r--r--sys/miscfs/procfs/procfs_status.c145
-rw-r--r--sys/miscfs/procfs/procfs_subr.c314
-rw-r--r--sys/miscfs/procfs/procfs_vfsops.c243
-rw-r--r--sys/miscfs/procfs/procfs_vnops.c814
-rw-r--r--sys/miscfs/specfs/spec_vnops.c689
-rw-r--r--sys/miscfs/specfs/specdev.h127
-rw-r--r--sys/miscfs/umapfs/umap.h92
-rw-r--r--sys/miscfs/umapfs/umap_subr.c397
-rw-r--r--sys/miscfs/umapfs/umap_vfsops.c407
-rw-r--r--sys/miscfs/umapfs/umap_vnops.c488
-rw-r--r--sys/miscfs/union/README7
-rw-r--r--sys/miscfs/union/libc.opendir.c225
-rw-r--r--sys/miscfs/union/union.h117
-rw-r--r--sys/miscfs/union/union_subr.c744
-rw-r--r--sys/miscfs/union/union_vfsops.c550
-rw-r--r--sys/miscfs/union/union_vnops.c1495
-rw-r--r--sys/net/bpf.c1316
-rw-r--r--sys/net/bpf.h252
-rw-r--r--sys/net/bpf_compat.h50
-rw-r--r--sys/net/bpf_filter.c548
-rw-r--r--sys/net/bpfdesc.h98
-rw-r--r--sys/net/if.c670
-rw-r--r--sys/net/if.h363
-rw-r--r--sys/net/if_arp.h85
-rw-r--r--sys/net/if_dl.h80
-rw-r--r--sys/net/if_ethersubr.c675
-rw-r--r--sys/net/if_llc.h145
-rw-r--r--sys/net/if_loop.c247
-rw-r--r--sys/net/if_sl.c839
-rw-r--r--sys/net/if_slvar.h80
-rw-r--r--sys/net/if_types.h95
-rw-r--r--sys/net/netisr.h87
-rw-r--r--sys/net/radix.c757
-rw-r--r--sys/net/radix.h153
-rw-r--r--sys/net/raw_cb.c144
-rw-r--r--sys/net/raw_cb.h69
-rw-r--r--sys/net/raw_usrreq.c306
-rw-r--r--sys/net/route.c538
-rw-r--r--sys/net/route.h261
-rw-r--r--sys/net/rtsock.c833
-rw-r--r--sys/net/slcompress.c535
-rw-r--r--sys/net/slcompress.h157
-rw-r--r--sys/net/slip.h51
-rw-r--r--sys/netccitt/README.hdlc50
-rw-r--r--sys/netccitt/README.packet36
-rw-r--r--sys/netccitt/ccitt_proto.c93
-rw-r--r--sys/netccitt/dll.h83
-rw-r--r--sys/netccitt/hd_debug.c212
-rw-r--r--sys/netccitt/hd_input.c669
-rw-r--r--sys/netccitt/hd_output.c247
-rw-r--r--sys/netccitt/hd_subr.c391
-rw-r--r--sys/netccitt/hd_timer.c147
-rw-r--r--sys/netccitt/hd_var.h107
-rw-r--r--sys/netccitt/hdlc.h156
-rw-r--r--sys/netccitt/if_x25subr.c801
-rw-r--r--sys/netccitt/llc_input.c468
-rw-r--r--sys/netccitt/llc_output.c304
-rw-r--r--sys/netccitt/llc_subr.c2356
-rw-r--r--sys/netccitt/llc_timer.c180
-rw-r--r--sys/netccitt/llc_var.h659
-rw-r--r--sys/netccitt/pk.h207
-rw-r--r--sys/netccitt/pk_acct.c145
-rw-r--r--sys/netccitt/pk_debug.c140
-rw-r--r--sys/netccitt/pk_input.c1119
-rw-r--r--sys/netccitt/pk_llcsubr.c369
-rw-r--r--sys/netccitt/pk_output.c216
-rw-r--r--sys/netccitt/pk_subr.c1192
-rw-r--r--sys/netccitt/pk_timer.c126
-rw-r--r--sys/netccitt/pk_usrreq.c604
-rw-r--r--sys/netccitt/pk_var.h231
-rw-r--r--sys/netccitt/x25.h157
-rw-r--r--sys/netccitt/x25acct.h70
-rw-r--r--sys/netccitt/x25err.h64
-rw-r--r--sys/netinet/icmp_var.h68
-rw-r--r--sys/netinet/if_ether.c554
-rw-r--r--sys/netinet/if_ether.h224
-rw-r--r--sys/netinet/igmp.c313
-rw-r--r--sys/netinet/igmp.h59
-rw-r--r--sys/netinet/igmp_var.h85
-rw-r--r--sys/netinet/in.c622
-rw-r--r--sys/netinet/in.h238
-rw-r--r--sys/netinet/in_cksum.c149
-rw-r--r--sys/netinet/in_pcb.c497
-rw-r--r--sys/netinet/in_pcb.h88
-rw-r--r--sys/netinet/in_proto.c183
-rw-r--r--sys/netinet/in_systm.h56
-rw-r--r--sys/netinet/in_var.h200
-rw-r--r--sys/netinet/ip.h168
-rw-r--r--sys/netinet/ip_icmp.c591
-rw-r--r--sys/netinet/ip_icmp.h160
-rw-r--r--sys/netinet/ip_input.c1166
-rw-r--r--sys/netinet/ip_mroute.c834
-rw-r--r--sys/netinet/ip_mroute.h173
-rw-r--r--sys/netinet/ip_output.c1064
-rw-r--r--sys/netinet/ip_var.h188
-rw-r--r--sys/netinet/raw_ip.c389
-rw-r--r--sys/netinet/tcp.h98
-rw-r--r--sys/netinet/tcp_debug.c159
-rw-r--r--sys/netinet/tcp_debug.h59
-rw-r--r--sys/netinet/tcp_fsm.h85
-rw-r--r--sys/netinet/tcp_input.c1647
-rw-r--r--sys/netinet/tcp_output.c599
-rw-r--r--sys/netinet/tcp_reass.c1647
-rw-r--r--sys/netinet/tcp_seq.h62
-rw-r--r--sys/netinet/tcp_subr.c445
-rw-r--r--sys/netinet/tcp_timer.c312
-rw-r--r--sys/netinet/tcp_timer.h128
-rw-r--r--sys/netinet/tcp_timewait.c445
-rw-r--r--sys/netinet/tcp_usrreq.c517
-rw-r--r--sys/netinet/tcp_var.h278
-rw-r--r--sys/netinet/tcpip.h59
-rw-r--r--sys/netinet/udp.h45
-rw-r--r--sys/netinet/udp_usrreq.c640
-rw-r--r--sys/netinet/udp_var.h92
-rw-r--r--sys/netiso/argo_debug.h296
-rw-r--r--sys/netiso/clnl.h64
-rw-r--r--sys/netiso/clnp.h463
-rw-r--r--sys/netiso/clnp_debug.c260
-rw-r--r--sys/netiso/clnp_er.c375
-rw-r--r--sys/netiso/clnp_frag.c859
-rw-r--r--sys/netiso/clnp_input.c551
-rw-r--r--sys/netiso/clnp_options.c532
-rw-r--r--sys/netiso/clnp_output.c561
-rw-r--r--sys/netiso/clnp_raw.c352
-rw-r--r--sys/netiso/clnp_stat.h103
-rw-r--r--sys/netiso/clnp_subr.c658
-rw-r--r--sys/netiso/clnp_timer.c180
-rw-r--r--sys/netiso/cltp_usrreq.c405
-rw-r--r--sys/netiso/cltp_var.h54
-rw-r--r--sys/netiso/cons.h92
-rw-r--r--sys/netiso/cons_pcb.h193
-rw-r--r--sys/netiso/eonvar.h170
-rw-r--r--sys/netiso/esis.c1063
-rw-r--r--sys/netiso/esis.h135
-rw-r--r--sys/netiso/idrp_usrreq.c177
-rw-r--r--sys/netiso/if_cons.c960
-rw-r--r--sys/netiso/if_eon.c609
-rw-r--r--sys/netiso/iso.c919
-rw-r--r--sys/netiso/iso.h195
-rw-r--r--sys/netiso/iso_chksum.c360
-rw-r--r--sys/netiso/iso_errno.h274
-rw-r--r--sys/netiso/iso_pcb.c617
-rw-r--r--sys/netiso/iso_pcb.h113
-rw-r--r--sys/netiso/iso_proto.c197
-rw-r--r--sys/netiso/iso_snpac.c736
-rw-r--r--sys/netiso/iso_snpac.h112
-rw-r--r--sys/netiso/iso_var.h137
-rw-r--r--sys/netiso/tp.trans1342
-rw-r--r--sys/netiso/tp_astring.c74
-rw-r--r--sys/netiso/tp_clnp.h94
-rw-r--r--sys/netiso/tp_cons.c308
-rw-r--r--sys/netiso/tp_driver.c999
-rw-r--r--sys/netiso/tp_emit.c996
-rw-r--r--sys/netiso/tp_events.h84
-rw-r--r--sys/netiso/tp_inet.c688
-rw-r--r--sys/netiso/tp_input.c1624
-rw-r--r--sys/netiso/tp_ip.h91
-rw-r--r--sys/netiso/tp_iso.c693
-rw-r--r--sys/netiso/tp_meas.c127
-rw-r--r--sys/netiso/tp_meas.h94
-rw-r--r--sys/netiso/tp_output.c712
-rw-r--r--sys/netiso/tp_param.h367
-rw-r--r--sys/netiso/tp_pcb.c999
-rw-r--r--sys/netiso/tp_pcb.h356
-rw-r--r--sys/netiso/tp_seq.h124
-rw-r--r--sys/netiso/tp_stat.h283
-rw-r--r--sys/netiso/tp_states.h13
-rw-r--r--sys/netiso/tp_states.init75
-rw-r--r--sys/netiso/tp_subr.c947
-rw-r--r--sys/netiso/tp_subr2.c880
-rw-r--r--sys/netiso/tp_timer.c377
-rw-r--r--sys/netiso/tp_timer.h93
-rw-r--r--sys/netiso/tp_tpdu.h296
-rw-r--r--sys/netiso/tp_trace.c175
-rw-r--r--sys/netiso/tp_trace.h198
-rw-r--r--sys/netiso/tp_user.h162
-rw-r--r--sys/netiso/tp_usrreq.c756
-rw-r--r--sys/netiso/tuba_subr.c348
-rw-r--r--sys/netiso/tuba_table.c143
-rw-r--r--sys/netiso/tuba_table.h59
-rw-r--r--sys/netiso/tuba_usrreq.c312
-rw-r--r--sys/netiso/xebec/Makefile8
-rw-r--r--sys/netiso/xebec/debug.h22
-rw-r--r--sys/netiso/xebec/llparse.c366
-rw-r--r--sys/netiso/xebec/llparse.h145
-rw-r--r--sys/netiso/xebec/llscan.c430
-rw-r--r--sys/netiso/xebec/main.c410
-rw-r--r--sys/netiso/xebec/main.h32
-rw-r--r--sys/netiso/xebec/malloc.c136
-rw-r--r--sys/netiso/xebec/malloc.h4
-rw-r--r--sys/netiso/xebec/procs.c437
-rw-r--r--sys/netiso/xebec/procs.h5
-rw-r--r--sys/netiso/xebec/putdriver.c244
-rw-r--r--sys/netiso/xebec/sets.c472
-rw-r--r--sys/netiso/xebec/sets.h36
-rw-r--r--sys/netiso/xebec/test.trans64
-rw-r--r--sys/netiso/xebec/test_def.h13
-rw-r--r--sys/netiso/xebec/xebec.bnf315
-rw-r--r--sys/netiso/xebec/xebec.c451
-rw-r--r--sys/netiso/xebec/xebec.h88
-rw-r--r--sys/netns/idp.h46
-rw-r--r--sys/netns/idp_usrreq.c566
-rw-r--r--sys/netns/idp_var.h49
-rw-r--r--sys/netns/ns.c368
-rw-r--r--sys/netns/ns.h151
-rw-r--r--sys/netns/ns_cksum.c204
-rw-r--r--sys/netns/ns_error.c323
-rw-r--r--sys/netns/ns_error.h90
-rw-r--r--sys/netns/ns_if.h82
-rw-r--r--sys/netns/ns_input.c485
-rw-r--r--sys/netns/ns_ip.c440
-rw-r--r--sys/netns/ns_output.c160
-rw-r--r--sys/netns/ns_pcb.c363
-rw-r--r--sys/netns/ns_pcb.h80
-rw-r--r--sys/netns/ns_proto.c96
-rw-r--r--sys/netns/sp.h52
-rw-r--r--sys/netns/spidp.h62
-rw-r--r--sys/netns/spp_debug.c170
-rw-r--r--sys/netns/spp_debug.h59
-rw-r--r--sys/netns/spp_timer.h122
-rw-r--r--sys/netns/spp_usrreq.c1804
-rw-r--r--sys/netns/spp_var.h215
-rw-r--r--sys/nfs/nfs.h297
-rw-r--r--sys/nfs/nfs_bio.c799
-rw-r--r--sys/nfs/nfs_common.c1130
-rw-r--r--sys/nfs/nfs_common.h269
-rw-r--r--sys/nfs/nfs_node.c294
-rw-r--r--sys/nfs/nfs_nqlease.c1228
-rw-r--r--sys/nfs/nfs_serv.c1908
-rw-r--r--sys/nfs/nfs_socket.c1990
-rw-r--r--sys/nfs/nfs_srvcache.c348
-rw-r--r--sys/nfs/nfs_subs.c1130
-rw-r--r--sys/nfs/nfs_syscalls.c874
-rw-r--r--sys/nfs/nfs_vfsops.c740
-rw-r--r--sys/nfs/nfs_vnops.c2539
-rw-r--r--sys/nfs/nfsdiskless.h66
-rw-r--r--sys/nfs/nfsm_subs.h269
-rw-r--r--sys/nfs/nfsmount.h127
-rw-r--r--sys/nfs/nfsnode.h166
-rw-r--r--sys/nfs/nfsrtt.h96
-rw-r--r--sys/nfs/nfsrvcache.h84
-rw-r--r--sys/nfs/nfsv2.h260
-rw-r--r--sys/nfs/nqnfs.h198
-rw-r--r--sys/nfs/rpcv2.h88
-rw-r--r--sys/nfs/xdr_subs.h78
-rw-r--r--sys/nfsclient/nfs.h297
-rw-r--r--sys/nfsclient/nfs_bio.c799
-rw-r--r--sys/nfsclient/nfs_nfsiod.c874
-rw-r--r--sys/nfsclient/nfs_node.c294
-rw-r--r--sys/nfsclient/nfs_socket.c1990
-rw-r--r--sys/nfsclient/nfs_subs.c1130
-rw-r--r--sys/nfsclient/nfs_vfsops.c740
-rw-r--r--sys/nfsclient/nfs_vnops.c2539
-rw-r--r--sys/nfsclient/nfsargs.h297
-rw-r--r--sys/nfsclient/nfsdiskless.h66
-rw-r--r--sys/nfsclient/nfsm_subs.h269
-rw-r--r--sys/nfsclient/nfsmount.h127
-rw-r--r--sys/nfsclient/nfsnode.h166
-rw-r--r--sys/nfsclient/nfsstats.h297
-rw-r--r--sys/nfsserver/nfs.h297
-rw-r--r--sys/nfsserver/nfs_serv.c1908
-rw-r--r--sys/nfsserver/nfs_srvcache.c348
-rw-r--r--sys/nfsserver/nfs_srvsock.c1990
-rw-r--r--sys/nfsserver/nfs_srvsubs.c1130
-rw-r--r--sys/nfsserver/nfs_syscalls.c874
-rw-r--r--sys/nfsserver/nfsm_subs.h269
-rw-r--r--sys/nfsserver/nfsrvcache.h84
-rw-r--r--sys/nfsserver/nfsrvstats.h297
-rw-r--r--sys/sys/_sigset.h194
-rw-r--r--sys/sys/acct.h75
-rw-r--r--sys/sys/bio.h178
-rw-r--r--sys/sys/buf.h178
-rw-r--r--sys/sys/callout.h51
-rw-r--r--sys/sys/cdefs.h122
-rw-r--r--sys/sys/clist.h45
-rw-r--r--sys/sys/conf.h123
-rw-r--r--sys/sys/device.h143
-rw-r--r--sys/sys/dir.h61
-rw-r--r--sys/sys/dirent.h76
-rw-r--r--sys/sys/disk.h112
-rw-r--r--sys/sys/disklabel.h332
-rw-r--r--sys/sys/diskmbr.h332
-rw-r--r--sys/sys/diskpc98.h332
-rw-r--r--sys/sys/dkbad.h68
-rw-r--r--sys/sys/dkstat.h64
-rw-r--r--sys/sys/dmap.h60
-rw-r--r--sys/sys/domain.h64
-rw-r--r--sys/sys/errno.h163
-rw-r--r--sys/sys/exec.h71
-rw-r--r--sys/sys/fbio.h186
-rw-r--r--sys/sys/fcntl.h190
-rw-r--r--sys/sys/file.h76
-rw-r--r--sys/sys/filedesc.h99
-rw-r--r--sys/sys/filio.h55
-rw-r--r--sys/sys/gmon.h159
-rw-r--r--sys/sys/ioccom.h64
-rw-r--r--sys/sys/ioctl.h84
-rw-r--r--sys/sys/ioctl_compat.h167
-rw-r--r--sys/sys/ipc.h79
-rw-r--r--sys/sys/kernel.h59
-rw-r--r--sys/sys/ktrace.h156
-rw-r--r--sys/sys/libkern.h98
-rw-r--r--sys/sys/linedisc.h123
-rw-r--r--sys/sys/malloc.h306
-rw-r--r--sys/sys/map.h82
-rw-r--r--sys/sys/mbuf.h402
-rw-r--r--sys/sys/mman.h89
-rw-r--r--sys/sys/mount.h418
-rw-r--r--sys/sys/msgbuf.h46
-rw-r--r--sys/sys/mtio.h120
-rw-r--r--sys/sys/namei.h188
-rw-r--r--sys/sys/param.h216
-rw-r--r--sys/sys/proc.h263
-rw-r--r--sys/sys/protosw.h210
-rw-r--r--sys/sys/ptrace.h67
-rw-r--r--sys/sys/queue.h245
-rw-r--r--sys/sys/reboot.h88
-rw-r--r--sys/sys/resource.h125
-rw-r--r--sys/sys/resourcevar.h90
-rw-r--r--sys/sys/select.h56
-rw-r--r--sys/sys/selinfo.h56
-rw-r--r--sys/sys/signal.h194
-rw-r--r--sys/sys/signalvar.h167
-rw-r--r--sys/sys/socket.h339
-rw-r--r--sys/sys/socketvar.h207
-rw-r--r--sys/sys/sockio.h77
-rw-r--r--sys/sys/stat.h193
-rw-r--r--sys/sys/syscall.h186
-rw-r--r--sys/sys/sysctl.h344
-rw-r--r--sys/sys/syslimits.h54
-rw-r--r--sys/sys/syslog.h187
-rw-r--r--sys/sys/systm.h165
-rw-r--r--sys/sys/tablet.h94
-rw-r--r--sys/sys/termios.h278
-rw-r--r--sys/sys/time.h126
-rw-r--r--sys/sys/timeb.h47
-rw-r--r--sys/sys/times.h65
-rw-r--r--sys/sys/timetc.h126
-rw-r--r--sys/sys/tprintf.h41
-rw-r--r--sys/sys/trace.h116
-rw-r--r--sys/sys/tty.h217
-rw-r--r--sys/sys/ttychars.h63
-rw-r--r--sys/sys/ttycom.h128
-rw-r--r--sys/sys/ttydefaults.h96
-rw-r--r--sys/sys/ttydev.h60
-rw-r--r--sys/sys/types.h162
-rw-r--r--sys/sys/ucred.h59
-rw-r--r--sys/sys/uio.h83
-rw-r--r--sys/sys/un.h52
-rw-r--r--sys/sys/unistd.h124
-rw-r--r--sys/sys/unpcb.h73
-rw-r--r--sys/sys/user.h91
-rw-r--r--sys/sys/utsname.h56
-rw-r--r--sys/sys/vadvise.h49
-rw-r--r--sys/sys/vcmd.h43
-rw-r--r--sys/sys/vlimit.h49
-rw-r--r--sys/sys/vmmeter.h147
-rw-r--r--sys/sys/vnode.h397
-rw-r--r--sys/sys/vsio.h153
-rw-r--r--sys/sys/wait.h156
-rw-r--r--sys/tools/vnode_if.awk433
-rw-r--r--sys/ufs/ffs/ffs_alloc.c1474
-rw-r--r--sys/ufs/ffs/ffs_balloc.c282
-rw-r--r--sys/ufs/ffs/ffs_extern.h101
-rw-r--r--sys/ufs/ffs/ffs_inode.c488
-rw-r--r--sys/ufs/ffs/ffs_subr.c238
-rw-r--r--sys/ufs/ffs/ffs_tables.c136
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c843
-rw-r--r--sys/ufs/ffs/ffs_vnops.c288
-rw-r--r--sys/ufs/ffs/fs.h489
-rw-r--r--sys/ufs/lfs/README139
-rw-r--r--sys/ufs/lfs/TODO116
-rw-r--r--sys/ufs/lfs/lfs.h353
-rw-r--r--sys/ufs/lfs/lfs_alloc.c251
-rw-r--r--sys/ufs/lfs/lfs_balloc.c136
-rw-r--r--sys/ufs/lfs/lfs_bio.c187
-rw-r--r--sys/ufs/lfs/lfs_cksum.c58
-rw-r--r--sys/ufs/lfs/lfs_debug.c137
-rw-r--r--sys/ufs/lfs/lfs_extern.h106
-rw-r--r--sys/ufs/lfs/lfs_inode.c359
-rw-r--r--sys/ufs/lfs/lfs_segment.c1111
-rw-r--r--sys/ufs/lfs/lfs_subr.c184
-rw-r--r--sys/ufs/lfs/lfs_syscalls.c562
-rw-r--r--sys/ufs/lfs/lfs_vfsops.c573
-rw-r--r--sys/ufs/lfs/lfs_vnops.c487
-rw-r--r--sys/ufs/mfs/mfs_extern.h60
-rw-r--r--sys/ufs/mfs/mfs_vfsops.c307
-rw-r--r--sys/ufs/mfs/mfs_vnops.c432
-rw-r--r--sys/ufs/mfs/mfsiom.h37
-rw-r--r--sys/ufs/mfs/mfsnode.h86
-rw-r--r--sys/ufs/ufs/dinode.h108
-rw-r--r--sys/ufs/ufs/dir.h147
-rw-r--r--sys/ufs/ufs/inode.h162
-rw-r--r--sys/ufs/ufs/lockf.h79
-rw-r--r--sys/ufs/ufs/quota.h207
-rw-r--r--sys/ufs/ufs/ufs_bmap.c294
-rw-r--r--sys/ufs/ufs/ufs_disksubr.c364
-rw-r--r--sys/ufs/ufs/ufs_extern.h125
-rw-r--r--sys/ufs/ufs/ufs_ihash.c154
-rw-r--r--sys/ufs/ufs/ufs_inode.c188
-rw-r--r--sys/ufs/ufs/ufs_lockf.c707
-rw-r--r--sys/ufs/ufs/ufs_lookup.c970
-rw-r--r--sys/ufs/ufs/ufs_quota.c938
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c295
-rw-r--r--sys/ufs/ufs/ufs_vfsops.c206
-rw-r--r--sys/ufs/ufs/ufs_vnops.c2159
-rw-r--r--sys/ufs/ufs/ufsmount.h83
-rw-r--r--sys/vm/device_pager.c368
-rw-r--r--sys/vm/device_pager.h53
-rw-r--r--sys/vm/kern_lock.c534
-rw-r--r--sys/vm/lock.h172
-rw-r--r--sys/vm/pmap.h122
-rw-r--r--sys/vm/swap_pager.c1009
-rw-r--r--sys/vm/swap_pager.h90
-rw-r--r--sys/vm/vm.h91
-rw-r--r--sys/vm/vm_extern.h125
-rw-r--r--sys/vm/vm_fault.c1035
-rw-r--r--sys/vm/vm_glue.c605
-rw-r--r--sys/vm/vm_inherit.h83
-rw-r--r--sys/vm/vm_init.c103
-rw-r--r--sys/vm/vm_kern.c450
-rw-r--r--sys/vm/vm_kern.h72
-rw-r--r--sys/vm/vm_map.c2626
-rw-r--r--sys/vm/vm_map.h228
-rw-r--r--sys/vm/vm_meter.c224
-rw-r--r--sys/vm/vm_mmap.c832
-rw-r--r--sys/vm/vm_object.c1436
-rw-r--r--sys/vm/vm_object.h173
-rw-r--r--sys/vm/vm_page.c696
-rw-r--r--sys/vm/vm_page.h242
-rw-r--r--sys/vm/vm_pageout.c567
-rw-r--r--sys/vm/vm_pageout.h96
-rw-r--r--sys/vm/vm_pager.c381
-rw-r--r--sys/vm/vm_pager.h147
-rw-r--r--sys/vm/vm_param.h159
-rw-r--r--sys/vm/vm_prot.h102
-rw-r--r--sys/vm/vm_swap.c427
-rw-r--r--sys/vm/vm_unix.c137
-rw-r--r--sys/vm/vm_user.c312
-rw-r--r--sys/vm/vnode_pager.c580
-rw-r--r--sys/vm/vnode_pager.h59
641 files changed, 225102 insertions, 0 deletions
diff --git a/sys/conf/defines b/sys/conf/defines
new file mode 100644
index 000000000000..ea7046324e3b
--- /dev/null
+++ b/sys/conf/defines
@@ -0,0 +1,15 @@
+/:#if.*[ \t]*KPROF/d
+/:#if.*[ \t]*PGINPROF/d
+/:#if.*[ \t]*UNFAST/d
+/:#if.*[ \t]*INSECURE/d
+/:#if.*[ \t]*TRACE/d
+/:#if.*[ \t]*DISKMON/d
+/:#if.*[ \t]*INTRLVE/d
+/:#if.*[ \t]*lint/d
+/:#if.*[ \t]*notdef/d
+/:#if.*[ \t]*unneeded/d
+/:#if.*[ \t]*vax/d
+/:#if.*[ \t]*TCPTRUEOOB/d
+/:#if.*[ \t]*irele/d
+/:#if.*[ \t]*ilock/d
+/:#if.*[ \t]*notyet/d
diff --git a/sys/conf/files b/sys/conf/files
new file mode 100644
index 000000000000..c083f2e1deee
--- /dev/null
+++ b/sys/conf/files
@@ -0,0 +1,274 @@
+isofs/cd9660/cd9660_bmap.c optional cd9660
+isofs/cd9660/cd9660_lookup.c optional cd9660
+isofs/cd9660/cd9660_node.c optional cd9660
+isofs/cd9660/cd9660_rrip.c optional cd9660
+isofs/cd9660/cd9660_util.c optional cd9660
+isofs/cd9660/cd9660_vfsops.c optional cd9660
+isofs/cd9660/cd9660_vnops.c optional cd9660
+kdb/kdb_access.c optional kadb
+kdb/kdb_command.c optional kadb
+kdb/kdb_ctype.c optional kadb
+kdb/kdb_expr.c optional kadb
+kdb/kdb_format.c optional kadb
+kdb/kdb_input.c optional kadb
+kdb/kdb_message.c optional kadb
+kdb/kdb_output.c optional kadb
+kdb/kdb_pcs.c optional kadb
+kdb/kdb_print.c optional kadb
+kdb/kdb_runpcs.c optional kadb
+kdb/kdb_sym.c optional kadb
+kdb/kdb_trap.c optional kadb
+kern/init_main.c standard
+kern/init_sysent.c standard
+kern/kern_acct.c standard
+kern/kern_clock.c standard
+kern/kern_descrip.c standard
+kern/kern_exec.c standard
+kern/kern_exit.c standard
+kern/kern_fork.c standard
+kern/kern_ktrace.c standard
+kern/kern_malloc.c standard
+kern/kern_physio.c standard
+kern/kern_proc.c standard
+kern/kern_prot.c standard
+kern/kern_resource.c standard
+kern/kern_sig.c standard
+kern/kern_subr.c standard
+kern/kern_synch.c standard
+kern/kern_sysctl.c standard
+kern/kern_time.c standard
+kern/kern_xxx.c standard
+kern/subr_log.c standard
+kern/subr_prf.c standard
+kern/subr_prof.c standard
+kern/subr_rmap.c standard
+kern/subr_xxx.c standard
+kern/sys_generic.c standard
+kern/sys_process.c standard
+kern/sys_socket.c standard
+kern/sysv_shm.c optional sysvshm
+kern/tty.c standard
+kern/tty_compat.c standard
+kern/tty_conf.c standard
+kern/tty_pty.c optional pty
+kern/tty_subr.c standard
+kern/tty_tb.c optional tb
+kern/tty_tty.c standard
+kern/uipc_domain.c standard
+kern/uipc_mbuf.c standard
+kern/uipc_proto.c standard
+kern/uipc_socket.c standard
+kern/uipc_socket2.c standard
+kern/uipc_syscalls.c standard
+kern/uipc_usrreq.c standard
+kern/vfs_bio.c standard
+kern/vfs_cache.c standard
+kern/vfs_cluster.c standard
+kern/vfs_conf.c standard
+kern/vfs_init.c standard
+kern/vfs_lookup.c standard
+kern/vfs_subr.c standard
+kern/vfs_syscalls.c standard
+kern/vfs_vnops.c standard
+miscfs/deadfs/dead_vnops.c standard
+miscfs/fdesc/fdesc_vfsops.c optional fdesc
+miscfs/fdesc/fdesc_vnops.c optional fdesc
+miscfs/fifofs/fifo_vnops.c optional fifo
+miscfs/kernfs/kernfs_vfsops.c optional kernfs
+miscfs/kernfs/kernfs_vnops.c optional kernfs
+miscfs/nullfs/null_subr.c optional nullfs
+miscfs/nullfs/null_vfsops.c optional nullfs
+miscfs/nullfs/null_vnops.c optional nullfs
+miscfs/portal/portal_vfsops.c optional portal
+miscfs/portal/portal_vnops.c optional portal
+miscfs/procfs/procfs_subr.c optional procfs
+miscfs/procfs/procfs_vnops.c optional procfs
+miscfs/procfs/procfs_vfsops.c optional procfs
+miscfs/procfs/procfs_note.c optional procfs
+miscfs/procfs/procfs_mem.c optional procfs
+miscfs/procfs/procfs_ctl.c optional procfs
+miscfs/procfs/procfs_status.c optional procfs
+miscfs/procfs/procfs_regs.c optional procfs
+miscfs/procfs/procfs_fpregs.c optional procfs
+miscfs/specfs/spec_vnops.c standard
+miscfs/umapfs/umap_subr.c optional umapfs
+miscfs/umapfs/umap_vfsops.c optional umapfs
+miscfs/umapfs/umap_vnops.c optional umapfs
+miscfs/union/union_subr.c optional union
+miscfs/union/union_vfsops.c optional union
+miscfs/union/union_vnops.c optional union
+net/bpf.c optional bpfilter
+net/bpf_filter.c optional bpfilter
+net/if.c standard
+net/if_ethersubr.c optional ether
+net/if_loop.c optional loop
+net/if_sl.c optional sl
+net/radix.c standard
+net/raw_cb.c standard
+net/raw_usrreq.c standard
+net/route.c standard
+net/rtsock.c standard
+net/slcompress.c optional sl
+netccitt/ccitt_proto.c optional ccitt
+netccitt/llc_input.c optional llc
+netccitt/llc_output.c optional llc
+netccitt/llc_subr.c optional llc
+netccitt/llc_timer.c optional llc
+netccitt/pk_llcsubr.c optional llc
+netccitt/pk_llcsubr.c optional hdlc
+netccitt/hd_debug.c optional hdlc
+netccitt/hd_input.c optional hdlc
+netccitt/hd_output.c optional hdlc
+netccitt/hd_subr.c optional hdlc
+netccitt/hd_timer.c optional hdlc
+netccitt/if_x25subr.c optional ccitt
+netccitt/pk_acct.c optional ccitt
+netccitt/pk_debug.c optional ccitt
+netccitt/pk_input.c optional ccitt
+netccitt/pk_output.c optional ccitt
+netccitt/pk_subr.c optional ccitt
+netccitt/pk_timer.c optional ccitt
+netccitt/pk_usrreq.c optional ccitt
+netimp/if_imp.c optional imp
+netimp/if_imphost.c optional imp
+netimp/raw_imp.c optional imp
+netinet/if_ether.c optional ether
+netinet/igmp.c optional inet
+netinet/in.c optional inet
+netinet/in_pcb.c optional inet
+netinet/in_proto.c optional inet
+netinet/ip_icmp.c optional inet
+netinet/ip_input.c optional inet
+netinet/ip_mroute.c optional inet mrouting
+netinet/ip_output.c optional inet
+netinet/raw_ip.c optional inet
+netinet/tcp_debug.c optional inet
+netinet/tcp_input.c optional inet
+netinet/tcp_output.c optional inet
+netinet/tcp_subr.c optional inet
+netinet/tcp_timer.c optional inet
+netinet/tcp_usrreq.c optional inet
+netinet/udp_usrreq.c optional inet
+netiso/clnp_debug.c optional iso
+netiso/clnp_er.c optional iso
+netiso/clnp_frag.c optional iso
+netiso/clnp_input.c optional iso
+netiso/clnp_options.c optional iso
+netiso/clnp_output.c optional iso
+netiso/clnp_raw.c optional iso
+netiso/clnp_subr.c optional iso
+netiso/clnp_timer.c optional iso
+netiso/cltp_usrreq.c optional iso
+netiso/esis.c optional iso
+netiso/idrp_usrreq.c optional iso
+netiso/if_eon.c optional eon
+netiso/iso.c optional iso
+netiso/iso_chksum.c optional iso
+netiso/iso_pcb.c optional iso
+netiso/iso_proto.c optional iso
+netiso/iso_snpac.c optional iso
+netiso/tp_astring.c optional iso
+netiso/tp_astring.c optional tpip
+netiso/tp_cons.c optional iso
+netiso/tp_driver.c optional iso
+netiso/tp_driver.c optional tpip
+netiso/tp_emit.c optional iso
+netiso/tp_emit.c optional tpip
+netiso/tp_inet.c optional iso
+netiso/tp_inet.c optional tpip
+netiso/tp_input.c optional iso
+netiso/tp_input.c optional tpip
+netiso/tp_iso.c optional iso
+netiso/tp_meas.c optional iso
+netiso/tp_meas.c optional tpip
+netiso/tp_output.c optional iso
+netiso/tp_output.c optional tpip
+netiso/tp_pcb.c optional iso
+netiso/tp_pcb.c optional tpip
+netiso/tp_subr.c optional iso
+netiso/tp_subr.c optional tpip
+netiso/tp_subr2.c optional iso
+netiso/tp_subr2.c optional tpip
+netiso/tp_timer.c optional iso
+netiso/tp_timer.c optional tpip
+netiso/tp_trace.c optional iso
+netiso/tp_trace.c optional tpip
+netiso/tp_usrreq.c optional iso
+netiso/tp_usrreq.c optional tpip
+netiso/tuba_subr.c optional iso tuba
+netiso/tuba_table.c optional iso tuba
+netiso/tuba_usrreq.c optional iso tuba
+netns/idp_usrreq.c optional ns
+netns/ns.c optional ns
+netns/ns_error.c optional ns
+netns/ns_input.c optional ns
+netns/ns_ip.c optional ns
+netns/ns_output.c optional ns
+netns/ns_pcb.c optional ns
+netns/ns_proto.c optional ns
+netns/spp_debug.c optional ns
+netns/spp_usrreq.c optional ns
+nfs/nfs_bio.c optional nfs
+nfs/nfs_node.c optional nfs
+nfs/nfs_nqlease.c optional nfs
+nfs/nfs_serv.c optional nfs
+nfs/nfs_socket.c optional nfs
+nfs/nfs_srvcache.c optional nfs
+nfs/nfs_subs.c optional nfs
+nfs/nfs_syscalls.c optional nfs
+nfs/nfs_vfsops.c optional nfs
+nfs/nfs_vnops.c optional nfs
+ufs/ffs/ffs_alloc.c optional ffs
+ufs/ffs/ffs_alloc.c optional mfs
+ufs/ffs/ffs_balloc.c optional ffs
+ufs/ffs/ffs_balloc.c optional mfs
+ufs/ffs/ffs_inode.c optional ffs
+ufs/ffs/ffs_inode.c optional mfs
+ufs/ffs/ffs_subr.c optional ffs
+ufs/ffs/ffs_subr.c optional mfs
+ufs/ffs/ffs_tables.c optional ffs
+ufs/ffs/ffs_tables.c optional mfs
+ufs/ffs/ffs_vfsops.c optional ffs
+ufs/ffs/ffs_vfsops.c optional mfs
+ufs/ffs/ffs_vnops.c optional ffs
+ufs/ffs/ffs_vnops.c optional mfs
+ufs/lfs/lfs_alloc.c optional lfs
+ufs/lfs/lfs_bio.c optional lfs
+ufs/lfs/lfs_balloc.c optional lfs
+ufs/lfs/lfs_cksum.c optional lfs
+ufs/lfs/lfs_debug.c optional lfs
+ufs/lfs/lfs_inode.c optional lfs
+ufs/lfs/lfs_segment.c optional lfs
+ufs/lfs/lfs_subr.c optional lfs
+ufs/lfs/lfs_syscalls.c optional lfs
+ufs/lfs/lfs_vfsops.c optional lfs
+ufs/lfs/lfs_vnops.c optional lfs
+ufs/mfs/mfs_vfsops.c optional mfs
+ufs/mfs/mfs_vnops.c optional mfs
+ufs/ufs/ufs_bmap.c standard
+ufs/ufs/ufs_disksubr.c standard
+ufs/ufs/ufs_ihash.c standard
+ufs/ufs/ufs_inode.c standard
+ufs/ufs/ufs_lockf.c standard
+ufs/ufs/ufs_lookup.c standard
+ufs/ufs/ufs_quota.c standard
+ufs/ufs/ufs_vfsops.c standard
+ufs/ufs/ufs_vnops.c standard
+vm/device_pager.c optional devpager
+vm/kern_lock.c standard
+vm/swap_pager.c optional swappager
+vm/vm_fault.c standard
+vm/vm_glue.c standard
+vm/vm_init.c standard
+vm/vm_kern.c standard
+vm/vm_map.c standard
+vm/vm_meter.c standard
+vm/vm_mmap.c standard
+vm/vm_object.c standard
+vm/vm_page.c standard
+vm/vm_pageout.c standard
+vm/vm_pager.c standard
+vm/vm_swap.c standard
+vm/vm_unix.c standard
+vm/vm_user.c standard
+vm/vnode_pager.c optional vnodepager
diff --git a/sys/conf/files.newconf b/sys/conf/files.newconf
new file mode 100644
index 000000000000..7b0907ab8944
--- /dev/null
+++ b/sys/conf/files.newconf
@@ -0,0 +1,274 @@
+# @(#)files.newconf 8.9 (Berkeley) 3/31/94
+
+# generic attributes
+define disk
+define tape
+define ifnet
+define tty
+
+# net device attributes - we have generic code for ether.
+# we should have imp but right now it is a pseudo-device.
+define ether
+# define imp
+pseudo-device imp
+
+# scsi driver and associated stuff
+define scsi { target = -1 }
+device tg at scsi { drive = -1 }
+file dev/scsi/scsi_subr.c scsi
+
+device sd at tg: disk
+file dev/scsi/sd.c sd needs-flag
+
+# device st at tg: tape -- not yet
+
+# legitimate pseudo-devices
+pseudo-device bpfilter
+pseudo-device cd: disk
+pseudo-device loop
+pseudo-device pty: tty
+pseudo-device sl
+pseudo-device vn: disk
+
+# kernel sources
+file isofs/cd9660/isofs_bmap.c isofs
+file isofs/cd9660/isofs_lookup.c isofs
+file isofs/cd9660/isofs_node.c isofs
+file isofs/cd9660/isofs_rrip.c isofs
+file isofs/cd9660/isofs_util.c isofs
+file isofs/cd9660/isofs_vfsops.c isofs
+file isofs/cd9660/isofs_vnops.c isofs
+file kern/init_main.c
+file kern/init_sysent.c
+file kern/kern_acct.c
+file kern/kern_clock.c
+file kern/kern_descrip.c
+file kern/kern_exec.c
+file kern/kern_exit.c
+file kern/kern_fork.c
+file kern/kern_ktrace.c ktrace
+file kern/kern_malloc.c
+file kern/kern_physio.c
+file kern/kern_proc.c
+file kern/kern_prot.c
+file kern/kern_resource.c
+file kern/kern_sig.c
+file kern/kern_subr.c
+file kern/kern_synch.c
+file kern/kern_sysctl.c
+file kern/kern_time.c
+file kern/kern_xxx.c
+file kern/subr_autoconf.c
+file kern/subr_log.c
+file kern/subr_prf.c
+file kern/subr_prof.c
+file kern/subr_rmap.c
+file kern/subr_xxx.c
+file kern/sys_generic.c
+file kern/sys_process.c
+file kern/sys_socket.c
+file kern/sysv_shm.c sysvshm
+file kern/tty.c
+file kern/tty_compat.c
+file kern/tty_conf.c
+file kern/tty_pty.c pty needs-count
+file kern/tty_subr.c
+file kern/tty_tb.c tb needs-flag
+file kern/tty_tty.c
+file kern/uipc_domain.c
+file kern/uipc_mbuf.c
+file kern/uipc_proto.c
+file kern/uipc_socket.c
+file kern/uipc_socket2.c
+file kern/uipc_syscalls.c
+file kern/uipc_usrreq.c
+file kern/vfs_bio.c
+file kern/vfs_cache.c
+file kern/vfs_cluster.c
+file kern/vfs_conf.c
+file kern/vfs_init.c
+file kern/vfs_lookup.c
+file kern/vfs_subr.c
+file kern/vfs_syscalls.c
+file kern/vfs_vnops.c
+file miscfs/deadfs/dead_vnops.c
+file miscfs/fdesc/fdesc_vfsops.c fdesc
+file miscfs/fdesc/fdesc_vnops.c fdesc
+file miscfs/fifofs/fifo_vnops.c fifo
+file miscfs/kernfs/kernfs_vfsops.c kernfs
+file miscfs/kernfs/kernfs_vnops.c kernfs
+file miscfs/nullfs/null_subr.c nullfs
+file miscfs/nullfs/null_vfsops.c nullfs
+file miscfs/nullfs/null_vnops.c nullfs
+file miscfs/portal/portal_vfsops.c portal
+file miscfs/portal/portal_vnops.c portal
+file miscfs/procfs/procfs_subr.c procfs
+file miscfs/procfs/procfs_vnops.c procfs
+file miscfs/procfs/procfs_vfsops.c procfs
+file miscfs/procfs/procfs_note.c procfs
+file miscfs/procfs/procfs_mem.c procfs
+file miscfs/procfs/procfs_ctl.c procfs
+file miscfs/procfs/procfs_status.c procfs
+file miscfs/procfs/procfs_regs.c procfs
+file miscfs/procfs/procfs_fpregs.c procfs
+file miscfs/specfs/spec_vnops.c
+file miscfs/umapfs/umap_subr.c umapfs
+file miscfs/umapfs/umap_vfsops.c umapfs
+file miscfs/umapfs/umap_vnops.c umapfs
+file miscfs/union/union_subr.c union
+file miscfs/union/union_vfsops.c union
+file miscfs/union/union_vnops.c union
+file net/bpf.c bpfilter needs-count
+file net/bpf_filter.c bpfilter needs-count
+file net/if.c
+file net/if_ethersubr.c ether needs-flag
+file net/if_loop.c loop needs-count
+file net/if_sl.c sl needs-count
+file net/radix.c
+file net/raw_cb.c
+file net/raw_usrreq.c
+file net/route.c
+file net/rtsock.c
+file net/slcompress.c sl
+file netccitt/ccitt_proto.c ccitt
+file netccitt/llc_input.c llc
+file netccitt/llc_output.c llc
+file netccitt/llc_subr.c llc
+file netccitt/llc_timer.c llc
+file netccitt/hd_debug.c hdlc
+file netccitt/hd_input.c hdlc
+file netccitt/hd_output.c hdlc
+file netccitt/hd_subr.c hdlc
+file netccitt/hd_timer.c hdlc
+file netccitt/if_x25subr.c ccitt
+file netccitt/pk_acct.c ccitt
+file netccitt/pk_debug.c ccitt
+file netccitt/pk_input.c ccitt
+file netccitt/pk_llcsubr.c llc hdlc
+file netccitt/pk_output.c ccitt
+file netccitt/pk_subr.c ccitt
+file netccitt/pk_timer.c ccitt
+file netccitt/pk_usrreq.c ccitt
+file netimp/if_imp.c imp needs-count
+file netimp/if_imphost.c imp needs-count
+file netimp/raw_imp.c imp
+file netinet/if_ether.c ether
+file netinet/igmp.c inet
+file netinet/in.c inet
+file netinet/in_pcb.c inet
+file netinet/in_proto.c inet
+file netinet/ip_icmp.c inet
+file netinet/ip_input.c inet
+file netinet/ip_mroute.c inet
+file netinet/ip_output.c inet
+file netinet/raw_ip.c inet
+file netinet/tcp_debug.c inet
+file netinet/tcp_input.c inet
+file netinet/tcp_output.c inet
+file netinet/tcp_subr.c inet
+file netinet/tcp_timer.c inet
+file netinet/tcp_usrreq.c inet
+file netinet/udp_usrreq.c inet
+file netiso/clnp_debug.c iso
+file netiso/clnp_er.c iso
+file netiso/clnp_frag.c iso
+file netiso/clnp_input.c iso
+file netiso/clnp_options.c iso
+file netiso/clnp_output.c iso
+file netiso/clnp_raw.c iso
+file netiso/clnp_subr.c iso
+file netiso/clnp_timer.c iso
+file netiso/cltp_usrreq.c iso
+file netiso/esis.c iso
+file netiso/if_eon.c eon
+file netiso/idrp_usrreq.c iso
+file netiso/iso.c iso
+file netiso/iso_chksum.c iso
+file netiso/iso_pcb.c iso
+file netiso/iso_proto.c iso
+file netiso/iso_snpac.c iso
+file netiso/tp_astring.c iso tpip
+file netiso/tp_cons.c iso
+file netiso/tp_driver.c iso tpip
+file netiso/tp_emit.c iso tpip
+file netiso/tp_inet.c iso tpip
+file netiso/tp_input.c iso tpip
+file netiso/tp_iso.c iso
+file netiso/tp_meas.c iso tpip
+file netiso/tp_output.c iso tpip
+file netiso/tp_pcb.c iso tpip
+file netiso/tp_subr.c iso tpip
+file netiso/tp_subr2.c iso tpip
+file netiso/tp_timer.c iso tpip
+file netiso/tp_trace.c iso tpip
+file netiso/tp_usrreq.c iso tpip
+file netiso/tuba_subr.c iso tuba
+file netiso/tuba_table.c iso tuba
+file netiso/tuba_usrreq.c iso tuba
+file netns/idp_usrreq.c ns
+file netns/ns.c ns
+file netns/ns_error.c ns
+file netns/ns_input.c ns
+file netns/ns_ip.c ns
+file netns/ns_output.c ns
+file netns/ns_pcb.c ns
+file netns/ns_proto.c ns
+file netns/spp_debug.c ns
+file netns/spp_usrreq.c ns
+file nfs/nfs_bio.c nfs
+file nfs/nfs_node.c nfs
+file nfs/nfs_nqlease.c nfs
+file nfs/nfs_serv.c nfs
+file nfs/nfs_socket.c nfs
+file nfs/nfs_srvcache.c nfs
+file nfs/nfs_subs.c nfs
+file nfs/nfs_syscalls.c nfs
+file nfs/nfs_vfsops.c nfs
+file nfs/nfs_vnops.c nfs
+file ufs/ffs/ffs_alloc.c ffs mfs
+file ufs/ffs/ffs_balloc.c ffs mfs
+file ufs/ffs/ffs_inode.c ffs mfs
+file ufs/ffs/ffs_subr.c ffs mfs
+file ufs/ffs/ffs_tables.c ffs mfs
+file ufs/ffs/ffs_vfsops.c ffs mfs
+file ufs/ffs/ffs_vnops.c ffs mfs
+file ufs/lfs/lfs_alloc.c lfs
+file ufs/lfs/lfs_bio.c lfs
+file ufs/lfs/lfs_balloc.c lfs
+file ufs/lfs/lfs_cksum.c lfs
+file ufs/lfs/lfs_debug.c lfs
+file ufs/lfs/lfs_inode.c lfs
+file ufs/lfs/lfs_segment.c lfs
+file ufs/lfs/lfs_subr.c lfs
+file ufs/lfs/lfs_syscalls.c lfs
+file ufs/lfs/lfs_vfsops.c lfs
+file ufs/lfs/lfs_vnops.c lfs
+file ufs/mfs/mfs_vfsops.c mfs
+file ufs/mfs/mfs_vnops.c mfs
+file ufs/ufs/ufs_bmap.c ffs lfs mfs
+file ufs/ufs/ufs_disksubr.c ffs lfs mfs
+file ufs/ufs/ufs_ihash.c ffs lfs mfs
+file ufs/ufs/ufs_inode.c ffs lfs mfs
+file ufs/ufs/ufs_lockf.c ffs lfs mfs
+file ufs/ufs/ufs_lookup.c ffs lfs mfs
+file ufs/ufs/ufs_quota.c ffs lfs mfs
+file ufs/ufs/ufs_vfsops.c ffs lfs mfs
+file ufs/ufs/ufs_vnops.c ffs lfs mfs
+file vm/device_pager.c devpager
+file vm/kern_lock.c
+file vm/swap_pager.c swappager
+file vm/vm_fault.c
+file vm/vm_glue.c
+file vm/vm_init.c
+file vm/vm_kern.c
+file vm/vm_map.c
+file vm/vm_meter.c
+file vm/vm_mmap.c
+file vm/vm_object.c
+file vm/vm_page.c
+file vm/vm_pageout.c
+file vm/vm_pager.c
+file vm/vm_swap.c
+file vm/vm_unix.c
+file vm/vm_user.c
+file vm/vnode_pager.c vnodepager
diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh
new file mode 100644
index 000000000000..83a2f04ad7ac
--- /dev/null
+++ b/sys/conf/newvers.sh
@@ -0,0 +1,48 @@
+#!/bin/sh -
+#
+# Copyright (c) 1984, 1986, 1990, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)newvers.sh 8.1 (Berkeley) 4/20/94
+
+if [ ! -r version ]
+then
+ echo 0 > version
+fi
+
+touch version
+v=`cat version` u=${USER-root} d=`pwd` h=`hostname` t=`date`
+echo "char ostype[] = \"4.4BSD\";" > vers.c
+echo "char osrelease[] = \"4.4BSD-Lite\";" >> vers.c
+echo "char sccs[4] = { '@', '(', '#', ')' };" >>vers.c
+echo "char version[] = \"4.4BSD-Lite #${v}: ${t}\\n ${u}@${h}:${d}\\n\";" >>vers.c
+
+echo `expr ${v} + 1` > version
diff --git a/sys/conf/nfsswapvmunix.c b/sys/conf/nfsswapvmunix.c
new file mode 100644
index 000000000000..f9812eb33147
--- /dev/null
+++ b/sys/conf/nfsswapvmunix.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsswapvmunix.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Sample NFS swapvmunix configuration file.
+ * This should be filled in by the bootstrap program.
+ * See /sys/nfs/nfsdiskless.h for details of the fields.
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+
+#include <net/if.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsdiskless.h>
+
+extern int nfs_mountroot();
+int (*mountroot)() = nfs_mountroot;
+
+dev_t rootdev = NODEV;
+dev_t argdev = NODEV;
+dev_t dumpdev = NODEV;
+
+struct swdevt swdevt[] = {
+ { NODEV, 0, 5000 }, /* happy:/u/swap.dopey */
+ { 0, 0, 0 }
+};
+struct nfs_diskless nfs_diskless = {
+ { { 'q', 'e', '0', '\0' },
+ { 0x10, 0x2, { 0x0, 0x0, 0x83, 0x68, 0x30, 0x2, } },
+ { 0x10, 0x2, { 0x0, 0x0, 0x83, 0x68, 0x30, 0xff, } },
+ { 0x10, 0x0, { 0x0, 0x0, 0xff, 0xff, 0xff, 0x0, } },
+ },
+ { 0x10, 0x2, { 0x0, 0x0, 0x83, 0x68, 0x30, 0x12, } },
+ {
+ (struct sockaddr *)0, SOCK_DGRAM, 0, (nfsv2fh_t *)0,
+ 0, 8192, 8192, 10, 100, (char *)0,
+ },
+ {
+ 0xf,
+ 0x9,
+ 0x0,
+ 0x0,
+ 0x1,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0xc,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x6,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x27,
+ 0x18,
+ 0x79,
+ 0x27,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ },
+ { 0x10, 0x2, { 0x8, 0x1, 0x83, 0x68, 0x30, 0x5, } },
+ "happy",
+ {
+ (struct sockaddr *)0, SOCK_DGRAM, 0, (nfsv2fh_t *)0,
+ 0, 8192, 8192, 10, 100, (char *)0,
+ },
+ {
+ 0x0,
+ 0x9,
+ 0x0,
+ 0x0,
+ 0x1,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0xc,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x2,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0xd0,
+ 0x48,
+ 0x42,
+ 0x25,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ },
+ { 0x10, 0x2, { 0x8, 0x1, 0x83, 0x68, 0x30, 0x5, } },
+ "happy",
+};
diff --git a/sys/conf/param.c b/sys/conf/param.c
new file mode 100644
index 000000000000..9f4e2cae857c
--- /dev/null
+++ b/sys/conf/param.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)param.c 8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/callout.h>
+#include <sys/clist.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+
+#ifdef SYSVSHM
+#include <machine/vmparam.h>
+#include <sys/shm.h>
+#endif
+
+/*
+ * System parameter formulae.
+ *
+ * This file is copied into each directory where we compile
+ * the kernel; it should be modified there to suit local taste
+ * if necessary.
+ *
+ * Compiled with -DHZ=xx -DTIMEZONE=x -DDST=x -DMAXUSERS=xx
+ */
+
+#ifndef HZ
+#define HZ 100
+#endif
+int hz = HZ;
+int tick = 1000000 / HZ;
+int tickadj = 30000 / (60 * HZ); /* can adjust 30ms in 60s */
+struct timezone tz = { TIMEZONE, DST };
+#define NPROC (20 + 16 * MAXUSERS)
+int maxproc = NPROC;
+#define NTEXT (80 + NPROC / 8) /* actually the object cache */
+#define NVNODE (NPROC + NTEXT + 100)
+int desiredvnodes = NVNODE;
+int maxfiles = 3 * (NPROC + MAXUSERS) + 80;
+int ncallout = 16 + NPROC;
+int nclist = 60 + 12 * MAXUSERS;
+int nmbclusters = NMBCLUSTERS;
+int fscale = FSCALE; /* kernel uses `FSCALE', user uses `fscale' */
+
+/*
+ * Values in support of System V compatible shared memory. XXX
+ */
+#ifdef SYSVSHM
+#define SHMMAX (SHMMAXPGS*NBPG)
+#define SHMMIN 1
+#define SHMMNI 32 /* <= SHMMMNI in shm.h */
+#define SHMSEG 8
+#define SHMALL (SHMMAXPGS/CLSIZE)
+
+struct shminfo shminfo = {
+ SHMMAX,
+ SHMMIN,
+ SHMMNI,
+ SHMSEG,
+ SHMALL
+};
+#endif
+
+/*
+ * These are initialized at bootstrap time
+ * to values dependent on memory size
+ */
+int nbuf, nswbuf;
+
+/*
+ * These have to be allocated somewhere; allocating
+ * them here forces loader errors if this file is omitted
+ * (if they've been externed everywhere else; hah!).
+ */
+struct callout *callout;
+struct cblock *cfree;
+struct buf *buf, *swbuf;
+char *buffers;
+
+/*
+ * Proc/pgrp hashing.
+ * Here so that hash table sizes can depend on MAXUSERS/NPROC.
+ * Hash size must be a power of two.
+ * NOW omission of this file will cause loader errors!
+ */
+
+#if NPROC > 1024
+#define PIDHSZ 512
+#else
+#if NPROC > 512
+#define PIDHSZ 256
+#else
+#if NPROC > 256
+#define PIDHSZ 128
+#else
+#define PIDHSZ 64
+#endif
+#endif
+#endif
+
+struct proc *pidhash[PIDHSZ];
+struct pgrp *pgrphash[PIDHSZ];
+int pidhashmask = PIDHSZ - 1;
diff --git a/sys/conf/systags.sh b/sys/conf/systags.sh
new file mode 100644
index 000000000000..90714d769743
--- /dev/null
+++ b/sys/conf/systags.sh
@@ -0,0 +1,72 @@
+#! /bin/sh
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)systags.sh 8.1 (Berkeley) 6/10/93
+#
+# systags.sh - construct a system tags file using dependence relations
+# in a .depend file
+#
+# First written May 16, 1992 by Van Jacobson, Lawrence Berkeley Laboratory.
+#
+# from: $Header: systags.sh,v 1.7 92/07/12 08:18:21 torek Exp $
+
+rm -f tags tags.tmp tags.cfiles tags.sfiles tags.hfiles
+MACHINE=`machine`
+sed -e "s,\./machine/,../../$MACHINE/include/,g" \
+ -e 's,[a-z][^/ ]*/\.\./,,g' .depend | awk '{
+ for (i = 1; i <= NF; ++i) {
+ t = substr($i, length($i) - 1)
+ if (t == ".c")
+ cfiles[$i] = 1;
+ else if (t == ".h")
+ hfiles[$i] = 1;
+ else if (t == ".s")
+ sfiles[$i] = 1;
+ }
+ };
+ END {
+ for (i in cfiles)
+ print i > "tags.cfiles";
+ for (i in sfiles)
+ print i > "tags.sfiles";
+ for (i in hfiles)
+ print i > "tags.hfiles";
+ }'
+
+ctags -t -d -w `cat tags.cfiles tags.hfiles tags.sfiles`
+egrep -o "^ENTRY\(.*\)|^ALTENTRY\(.*\)" `cat tags.sfiles` | \
+ sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3 \1 /^\2(\3\4$/;" >> tags
+
+mv tags tags.tmp
+sort -u tags.tmp > tags
+rm tags.tmp tags.cfiles tags.sfiles tags.hfiles
diff --git a/sys/fs/cd9660/TODO b/sys/fs/cd9660/TODO
new file mode 100644
index 000000000000..555d26ad7d11
--- /dev/null
+++ b/sys/fs/cd9660/TODO
@@ -0,0 +1,77 @@
+# $Id: TODO,v 1.4 1993/09/07 15:40:51 ws Exp $
+
+ 1) should understand "older", original High Sierra ("CDROM001") type
+
+ Not yet. ( I don't have this technical information, yet. )
+
+ 2) should understand Rock Ridge
+
+ Yes, we have follows function.
+
+ o Symbolic Link
+ o Real Name(long name)
+ o File Attribute
+ o Time stamp
+ o uid, gid
+ o Devices
+ o Relocated directories
+
+ Except follows:
+
+ o POSIX device number mapping
+
+ There is some preliminary stuff in there that (ab-)uses the mknod
+ system call, but this needs a writable filesystem
+
+ 3) should be called cdfs, as there are other ISO file system soon possible
+
+ Not yet. Probably we should make another file system when the ECMA draft
+ is valid and do it. For doing Rock Ridge Support, I can use almost same
+ code. So I just use the same file system interface...
+
+ 4) should have file handles implemented for use with NFS, etc
+
+ Yes. we have already this one, and I based it for this release.
+
+ 5) should have name translation enabled by mount flag
+
+ Yes. we can disable the Rock Ridge Extension by follows option;
+
+ "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+ are slow)
+
+ Not yet.
+
+ 7) ECMA support.
+
+ Not yet. we need not only a technical spec but also ECMA format
+ cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+ Not yet. We should also hack the other part of system as 8 bit
+ clean. As far as I know, if you export the cdrom by NFS, the client
+ can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
+ 9) Access checks in isofs_access
+
+ Not yet.
+
+ 10) Support for generation numbers
+
+ Yes. Default is to list only the last file (the one with the highest
+ generation number). If you mount with -gen, all files are shown with
+ their generation numbers. In both cases you can specify the generation
+ number on opening files (if you happen to know it) or leave it off,
+ when it will again find the last file.
+
+ 11) Support for extended attributes
+
+ Yes. Since this requires an extra block buffer for the attributes
+ this must be enabled on mounting with the option -extattr.
+
+----------
+Last update July 19, '93 by Atsushi Murai. (amurai@spec.co.jp)
+Last update August 19, '93 by Wolfgang Solfrank. (ws@tools.de)
diff --git a/sys/fs/cd9660/TODO.hibler b/sys/fs/cd9660/TODO.hibler
new file mode 100644
index 000000000000..3501aa296cd2
--- /dev/null
+++ b/sys/fs/cd9660/TODO.hibler
@@ -0,0 +1,22 @@
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+ Since it was modelled after the inode code, we might be able to merge
+ them back. It looks like a seperate (but very similar) lookup routine
+ will be needed due to the associated file stuff.
+
+2. Make filesystem exportable. This comes for free if stacked with UFS.
+ Otherwise, the ufs_export routines need to be elevated to vfs_* routines.
+ [ DONE - hibler ]
+
+3. If it can't be merged with UFS, at least get them in sync. For example,
+ it could use the same style hashing routines as in ufs/ufs_ihash.c
+
+4. It would be nice to be able to use the vfs_cluster code.
+ Unfortunately, if the logical block size is smaller than the page size,
+ it won't work. Also, if throughtput is relatively constant for any
+ block size (as it is for the HP drive--150kbs) then clustering may not
+ buy much (or may even hurt when vfs_cluster comes up with a large sync
+ cluster).
+
+5. Seems like there should be a "notrans" or some such mount option to show
+ filenames as they really are without lower-casing, stripping of version
+ numbers, etc. Does this make sense?
diff --git a/sys/fs/cd9660/cd9660_bmap.c b/sys/fs/cd9660/cd9660_bmap.c
new file mode 100644
index 000000000000..911eedfd06ae
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_bmap.c
@@ -0,0 +1,102 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_bmap.c 8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ struct iso_node *ip = VTOI(ap->a_vp);
+ daddr_t lblkno = ap->a_bn;
+ long bsize;
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ip->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ /*
+ * Compute the requested block number
+ */
+ bsize = ip->i_mnt->logical_block_size;
+ *ap->a_bnp = (ip->iso_start + lblkno) * btodb(bsize);
+
+ /*
+ * Determine maximum number of readahead blocks following the
+ * requested block.
+ */
+ if (ap->a_runp) {
+ int nblk;
+
+ nblk = (ip->i_size - (lblkno + 1) * bsize) / bsize;
+ if (nblk <= 0)
+ *ap->a_runp = 0;
+ else if (nblk >= MAXBSIZE/bsize)
+ *ap->a_runp = MAXBSIZE/bsize - 1;
+ else
+ *ap->a_runp = nblk;
+ }
+
+ return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c
new file mode 100644
index 000000000000..62d1d3fc791e
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_lookup.c
@@ -0,0 +1,465 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)ufs_lookup.c 7.33 (Berkeley) 5/19/91
+ *
+ * @(#)cd9660_lookup.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+
+struct nchstats iso_nchstats;
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".". When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ * check accessibility of directory
+ * look for name in cache, if found, then if at end of path
+ * and deleting or creating, drop it, else return name
+ * search for name in directory, to found or notfound
+ * notfound:
+ * if creating, return locked directory, leaving info on available slots
+ * else return error
+ * found:
+ * if at end of path and deleting, return information to allow delete
+ * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ * inode and return info to allow rewrite
+ * if not at end, add name to cache; if at end and neither creating
+ * nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+cd9660_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vdp; /* vnode for directory being searched */
+ register struct iso_node *dp; /* inode for directory being searched */
+ register struct iso_mnt *imp; /* file system that directory is in */
+ struct buf *bp; /* a buffer of directory entries */
+ struct iso_directory_record *ep;/* the current directory entry */
+ int entryoffsetinblock; /* offset of ep in bp's buffer */
+ int saveoffset; /* offset of last directory entry in dir */
+ int numdirpasses; /* strategy for directory search */
+ doff_t endsearch; /* offset to end directory search */
+ struct iso_node *pdp; /* saved dp during symlink work */
+ struct iso_node *tdp; /* returned by iget */
+ int lockparent; /* 1 => lockparent flag is set */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int error;
+ ino_t ino = 0;
+ int reclen;
+ u_short namelen;
+ char altname[NAME_MAX];
+ int res;
+ int assoc, len;
+ char *name;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ int flags = cnp->cn_flags;
+ int nameiop = cnp->cn_nameiop;
+
+ bp = NULL;
+ *vpp = NULL;
+ vdp = ap->a_dvp;
+ dp = VTOI(vdp);
+ imp = dp->i_mnt;
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+ /*
+ * Check accessiblity of directory.
+ */
+ if (vdp->v_type != VDIR)
+ return (ENOTDIR);
+ if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+ return (error);
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ *
+ * Before tediously performing a linear scan of the directory,
+ * check the name cache to see if the directory/name pair
+ * we are looking for is known already.
+ */
+ if (error = cache_lookup(vdp, vpp, cnp)) {
+ int vpid; /* capability number of vnode */
+
+ if (error == ENOENT)
+ return (error);
+#ifdef PARANOID
+ if ((vdp->v_flag & VROOT) && (flags & ISDOTDOT))
+ panic("ufs_lookup: .. through root");
+#endif
+ /*
+ * Get the next vnode in the path.
+ * See comment below starting `Step through' for
+ * an explaination of the locking protocol.
+ */
+ pdp = dp;
+ dp = VTOI(*vpp);
+ vdp = *vpp;
+ vpid = vdp->v_id;
+ if (pdp == dp) {
+ VREF(vdp);
+ error = 0;
+ } else if (flags & ISDOTDOT) {
+ ISO_IUNLOCK(pdp);
+ error = vget(vdp, 1);
+ if (!error && lockparent && (flags & ISLASTCN))
+ ISO_ILOCK(pdp);
+ } else {
+ error = vget(vdp, 1);
+ if (!lockparent || error || !(flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ }
+ /*
+ * Check that the capability number did not change
+ * while we were waiting for the lock.
+ */
+ if (!error) {
+ if (vpid == vdp->v_id)
+ return (0);
+ iso_iput(dp);
+ if (lockparent && pdp != dp && (flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ }
+ ISO_ILOCK(pdp);
+ dp = pdp;
+ vdp = ITOV(dp);
+ *vpp = NULL;
+ }
+
+ len = cnp->cn_namelen;
+ name = cnp->cn_nameptr;
+ /*
+ * A leading `=' means, we are looking for an associated file
+ */
+ if (assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)) {
+ len--;
+ name++;
+ }
+
+ /*
+ * If there is cached information on a previous search of
+ * this directory, pick up where we last left off.
+ * We cache only lookups as these are the most common
+ * and have the greatest payoff. Caching CREATE has little
+ * benefit as it usually must search the entire directory
+ * to determine that the entry does not exist. Caching the
+ * location of the last DELETE or RENAME has not reduced
+ * profiling time and hence has been removed in the interest
+ * of simplicity.
+ */
+ if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+ dp->i_diroff > dp->i_size) {
+ entryoffsetinblock = 0;
+ dp->i_offset = 0;
+ numdirpasses = 1;
+ } else {
+ dp->i_offset = dp->i_diroff;
+ entryoffsetinblock = iso_blkoff(imp, dp->i_offset);
+ if (entryoffsetinblock != 0) {
+ if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+ return (error);
+ }
+ numdirpasses = 2;
+ iso_nchstats.ncs_2passes++;
+ }
+ endsearch = roundup(dp->i_size, imp->logical_block_size);
+
+searchloop:
+ while (dp->i_offset < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+ if (iso_blkoff(imp, dp->i_offset) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+ return (error);
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+ ep = (struct iso_directory_record *)
+ (bp->b_un.b_addr + entryoffsetinblock);
+
+ reclen = isonum_711 (ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ dp->i_offset =
+ roundup(dp->i_offset, imp->logical_block_size);
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+ /* illegal entry, stop */
+ break;
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size)
+ /* entries are not allowed to cross boundaries */
+ break;
+
+ /*
+ * Check for a name match.
+ */
+ namelen = isonum_711(ep->name_len);
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+ /* illegal entry, stop */
+ break;
+
+ switch (imp->iso_ftype) {
+ default:
+ if ((!(isonum_711(ep->flags)&4)) == !assoc) {
+ if ((len == 1
+ && *name == '.')
+ || (flags & ISDOTDOT)) {
+ if (namelen == 1
+ && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+ /*
+ * Save directory entry's inode number and
+ * reclen in ndp->ni_ufs area, and release
+ * directory buffer.
+ */
+ isodirino(&dp->i_ino,ep,imp);
+ goto found;
+ }
+ if (namelen != 1
+ || ep->name[0] != 0)
+ goto notfound;
+ } else if (!(res = isofncmp(name,len,
+ ep->name,namelen))) {
+ if (isonum_711(ep->flags)&2)
+ isodirino(&ino,ep,imp);
+ else
+ ino = dbtob(bp->b_blkno)
+ + entryoffsetinblock;
+ saveoffset = dp->i_offset;
+ } else if (ino)
+ goto foundino;
+#ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */
+ else if (res < 0)
+ goto notfound;
+ else if (res > 0 && numdirpasses == 2)
+ numdirpasses++;
+#endif
+ }
+ break;
+ case ISO_FTYPE_RRIP:
+ if (isonum_711(ep->flags)&2)
+ isodirino(&ino,ep,imp);
+ else
+ ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+ dp->i_ino = ino;
+ cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+ if (namelen == cnp->cn_namelen
+ && !bcmp(name,altname,namelen))
+ goto found;
+ ino = 0;
+ break;
+ }
+ dp->i_offset += reclen;
+ entryoffsetinblock += reclen;
+ }
+ if (ino) {
+foundino:
+ dp->i_ino = ino;
+ if (saveoffset != dp->i_offset) {
+ if (iso_lblkno(imp,dp->i_offset)
+ != iso_lblkno(imp,saveoffset)) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(dp, saveoffset, &bp))
+ return (error);
+ }
+ ep = (struct iso_directory_record *)(bp->b_un.b_addr
+ + iso_blkoff(imp,saveoffset));
+ dp->i_offset = saveoffset;
+ }
+ goto found;
+ }
+notfound:
+ /*
+ * If we started in the middle of the directory and failed
+ * to find our target, we must check the beginning as well.
+ */
+ if (numdirpasses == 2) {
+ numdirpasses--;
+ dp->i_offset = 0;
+ endsearch = dp->i_diroff;
+ goto searchloop;
+ }
+ if (bp != NULL)
+ brelse(bp);
+ /*
+ * Insert name into cache (as non-existent) if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ if (nameiop == CREATE || nameiop == RENAME)
+ return (EJUSTRETURN);
+ return (ENOENT);
+
+found:
+ if (numdirpasses == 2)
+ iso_nchstats.ncs_pass2++;
+ if (bp != NULL)
+ brelse(bp);
+
+ /*
+ * Found component in pathname.
+ * If the final component of path name, save information
+ * in the cache as to where the entry was found.
+ */
+ if ((flags & ISLASTCN) && nameiop == LOOKUP)
+ dp->i_diroff = dp->i_offset;
+
+ /*
+ * Step through the translation in the name. We do not `iput' the
+ * directory because we may need it again if a symbolic link
+ * is relative to the current directory. Instead we save it
+ * unlocked as "pdp". We must get the target inode before unlocking
+ * the directory to insure that the inode will not be removed
+ * before we get it. We prevent deadlock by always fetching
+ * inodes from the root, moving down the directory tree. Thus
+ * when following backward pointers ".." we must unlock the
+ * parent directory before getting the requested directory.
+ * There is a potential race condition here if both the current
+ * and parent directories are removed before the `iget' for the
+ * inode associated with ".." returns. We hope that this occurs
+ * infrequently since we cannot avoid this race condition without
+ * implementing a sophisticated deadlock detection algorithm.
+ * Note also that this simple deadlock detection scheme will not
+ * work if the file system has any hard links other than ".."
+ * that point backwards in the directory structure.
+ */
+ pdp = dp;
+ /*
+ * If ino is different from dp->i_ino,
+ * it's a relocated directory.
+ */
+ if (flags & ISDOTDOT) {
+ ISO_IUNLOCK(pdp); /* race to get the inode */
+ if (error = iso_iget(dp,dp->i_ino,
+ dp->i_ino != ino,
+ &tdp,ep)) {
+ ISO_ILOCK(pdp);
+ return (error);
+ }
+ if (lockparent && (flags & ISLASTCN))
+ ISO_ILOCK(pdp);
+ *vpp = ITOV(tdp);
+ } else if (dp->i_number == dp->i_ino) {
+ VREF(vdp); /* we want ourself, ie "." */
+ *vpp = vdp;
+ } else {
+ if (error = iso_iget(dp,dp->i_ino,dp->i_ino!=ino,&tdp,ep))
+ return (error);
+ if (!lockparent || !(flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ *vpp = ITOV(tdp);
+ }
+
+ /*
+ * Insert name into cache if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ return (0);
+}
+
+/*
+ * Return buffer with contents of block "offset"
+ * from the beginning of directory "ip". If "res"
+ * is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+iso_blkatoff(ip, offset, bpp)
+ struct iso_node *ip;
+ doff_t offset;
+ struct buf **bpp;
+{
+ register struct iso_mnt *imp = ip->i_mnt;
+ daddr_t lbn = iso_lblkno(imp,offset);
+ int bsize = iso_blksize(imp,ip,lbn);
+ struct buf *bp;
+ int error;
+
+ if (error = bread(ITOV(ip),lbn,bsize,NOCRED,&bp)) {
+ brelse(bp);
+ *bpp = 0;
+ return (error);
+ }
+ *bpp = bp;
+
+ return (0);
+}
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
new file mode 100644
index 000000000000..d83a7a6f126a
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -0,0 +1,648 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#define INOHSZ 512
+#if ((INOHSZ&(INOHSZ-1)) == 0)
+#define INOHASH(dev,ino) (((dev)+((ino)>>12))&(INOHSZ-1))
+#else
+#define INOHASH(dev,ino) (((unsigned)((dev)+((ino)>>12)))%INOHSZ)
+#endif
+
+union iso_ihead {
+ union iso_ihead *ih_head[2];
+ struct iso_node *ih_chain[2];
+} iso_ihead[INOHSZ];
+
+#ifdef ISODEVMAP
+#define DNOHSZ 64
+#if ((DNOHSZ&(DNOHSZ-1)) == 0)
+#define DNOHASH(dev,ino) (((dev)+((ino)>>12))&(DNOHSZ-1))
+#else
+#define DNOHASH(dev,ino) (((unsigned)((dev)+((ino)>>12)))%DNOHSZ)
+#endif
+
+union iso_dhead {
+ union iso_dhead *dh_head[2];
+ struct iso_dnode *dh_chain[2];
+} iso_dhead[DNOHSZ];
+#endif
+
+int prtactive; /* 1 => print out reclaim of active vnodes */
+
+/*
+ * Initialize hash links for inodes and dnodes.
+ */
+cd9660_init()
+{
+ register int i;
+ register union iso_ihead *ih = iso_ihead;
+#ifdef ISODEVMAP
+ register union iso_dhead *dh = iso_dhead;
+#endif
+
+ for (i = INOHSZ; --i >= 0; ih++) {
+ ih->ih_head[0] = ih;
+ ih->ih_head[1] = ih;
+ }
+#ifdef ISODEVMAP
+ for (i = DNOHSZ; --i >= 0; dh++) {
+ dh->dh_head[0] = dh;
+ dh->dh_head[1] = dh;
+ }
+#endif
+}
+
+#ifdef ISODEVMAP
+/*
+ * Enter a new node into the device hash list
+ */
+struct iso_dnode *
+iso_dmap(dev,ino,create)
+ dev_t dev;
+ ino_t ino;
+ int create;
+{
+ struct iso_dnode *dp;
+ union iso_dhead *dh;
+
+ dh = &iso_dhead[DNOHASH(dev, ino)];
+ for (dp = dh->dh_chain[0];
+ dp != (struct iso_dnode *)dh;
+ dp = dp->d_forw)
+ if (ino == dp->i_number && dev == dp->i_dev)
+ return dp;
+
+ if (!create)
+ return (struct iso_dnode *)0;
+
+ MALLOC(dp,struct iso_dnode *,sizeof(struct iso_dnode),M_CACHE,M_WAITOK);
+ dp->i_dev = dev;
+ dp->i_number = ino;
+ insque(dp,dh);
+
+ return dp;
+}
+
+void
+iso_dunmap(dev)
+ dev_t dev;
+{
+ struct iso_dnode *dp, *dq;
+ union iso_dhead *dh;
+
+ for (dh = iso_dhead; dh < iso_dhead + DNOHSZ; dh++) {
+ for (dp = dh->dh_chain[0];
+ dp != (struct iso_dnode *)dh;
+ dp = dq) {
+ dq = dp->d_forw;
+ if (dev == dp->i_dev) {
+ remque(dp);
+ FREE(dp,M_CACHE);
+ }
+ }
+ }
+}
+#endif
+
+/*
+ * Look up a ISOFS dinode number to find its incore vnode.
+ * If it is not in core, read it in from the specified device.
+ * If it is in core, wait for the lock bit to clear, then
+ * return the inode locked. Detection and handling of mount
+ * points must be done by the calling routine.
+ */
+iso_iget(xp, ino, relocated, ipp, isodir)
+ struct iso_node *xp;
+ ino_t ino;
+ struct iso_node **ipp;
+ struct iso_directory_record *isodir;
+{
+ dev_t dev = xp->i_dev;
+ struct mount *mntp = ITOV(xp)->v_mount;
+ register struct iso_node *ip, *iq;
+ register struct vnode *vp;
+ register struct iso_dnode *dp;
+ struct vnode *nvp;
+ struct buf *bp = NULL, *bp2 = NULL;
+ union iso_ihead *ih;
+ union iso_dhead *dh;
+ int i, error, result;
+ struct iso_mnt *imp;
+ ino_t defino;
+
+ ih = &iso_ihead[INOHASH(dev, ino)];
+loop:
+ for (ip = ih->ih_chain[0];
+ ip != (struct iso_node *)ih;
+ ip = ip->i_forw) {
+ if (ino != ip->i_number || dev != ip->i_dev)
+ continue;
+ if ((ip->i_flag&ILOCKED) != 0) {
+ ip->i_flag |= IWANT;
+ sleep((caddr_t)ip, PINOD);
+ goto loop;
+ }
+ if (vget(ITOV(ip), 1))
+ goto loop;
+ *ipp = ip;
+ return 0;
+ }
+ /*
+ * Allocate a new vnode/iso_node.
+ */
+ if (error = getnewvnode(VT_ISOFS, mntp, cd9660_vnodeop_p, &nvp)) {
+ *ipp = 0;
+ return error;
+ }
+ MALLOC(ip, struct iso_node *, sizeof(struct iso_node),
+ M_ISOFSNODE, M_WAITOK);
+ bzero((caddr_t)ip, sizeof(struct iso_node));
+ nvp->v_data = ip;
+ ip->i_vnode = nvp;
+ ip->i_flag = 0;
+ ip->i_devvp = 0;
+ ip->i_diroff = 0;
+ ip->i_lockf = 0;
+
+ /*
+ * Put it onto its hash chain and lock it so that other requests for
+ * this inode will block if they arrive while we are sleeping waiting
+ * for old data structures to be purged or for the contents of the
+ * disk portion of this inode to be read.
+ */
+ ip->i_dev = dev;
+ ip->i_number = ino;
+ insque(ip, ih);
+ ISO_ILOCK(ip);
+
+ imp = VFSTOISOFS (mntp);
+ ip->i_mnt = imp;
+ ip->i_devvp = imp->im_devvp;
+ VREF(ip->i_devvp);
+
+ if (relocated) {
+ /*
+ * On relocated directories we must
+ * read the `.' entry out of a dir.
+ */
+ ip->iso_start = ino >> imp->im_bshift;
+ if (error = iso_blkatoff(ip,0,&bp)) {
+ vrele(ip->i_devvp);
+ remque(ip);
+ ip->i_forw = ip;
+ ip->i_back = ip;
+ iso_iput(ip);
+ *ipp = 0;
+ return error;
+ }
+ isodir = (struct iso_directory_record *)bp->b_un.b_addr;
+ }
+
+ ip->iso_extent = isonum_733(isodir->extent);
+ ip->i_size = isonum_733(isodir->size);
+ ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+
+ vp = ITOV(ip);
+
+ /*
+ * Setup time stamp, attribute
+ */
+ vp->v_type = VNON;
+ switch (imp->iso_ftype) {
+ default: /* ISO_FTYPE_9660 */
+ if ((imp->im_flags&ISOFSMNT_EXTATT)
+ && isonum_711(isodir->ext_attr_length))
+ iso_blkatoff(ip,-isonum_711(isodir->ext_attr_length),
+ &bp2);
+ cd9660_defattr(isodir,ip,bp2 );
+ cd9660_deftstamp(isodir,ip,bp2 );
+ break;
+ case ISO_FTYPE_RRIP:
+ result = cd9660_rrip_analyze(isodir,ip,imp);
+ break;
+ }
+ if (bp2)
+ brelse(bp2);
+ if (bp)
+ brelse(bp);
+
+ /*
+ * Initialize the associated vnode
+ */
+ vp->v_type = IFTOVT(ip->inode.iso_mode);
+
+ if ( vp->v_type == VFIFO ) {
+#ifdef FIFO
+ extern int (**cd9660_fifoop_p)();
+ vp->v_op = cd9660_fifoop_p;
+#else
+ iso_iput(ip);
+ *ipp = 0;
+ return EOPNOTSUPP;
+#endif /* FIFO */
+ } else if ( vp->v_type == VCHR || vp->v_type == VBLK ) {
+ extern int (**cd9660_specop_p)();
+
+ /*
+ * if device, look at device number table for translation
+ */
+#ifdef ISODEVMAP
+ if (dp = iso_dmap(dev,ino,0))
+ ip->inode.iso_rdev = dp->d_dev;
+#endif
+ vp->v_op = cd9660_specop_p;
+ if (nvp = checkalias(vp, ip->inode.iso_rdev, mntp)) {
+ /*
+ * Reinitialize aliased inode.
+ */
+ vp = nvp;
+ iq = VTOI(vp);
+ iq->i_vnode = vp;
+ iq->i_flag = 0;
+ ISO_ILOCK(iq);
+ iq->i_dev = dev;
+ iq->i_number = ino;
+ iq->i_mnt = ip->i_mnt;
+ bcopy(&ip->iso_extent,&iq->iso_extent,
+ (char *)(ip + 1) - (char *)&ip->iso_extent);
+ insque(iq, ih);
+ /*
+ * Discard unneeded vnode
+ * (This introduces the need of INACTIVE modification)
+ */
+ ip->inode.iso_mode = 0;
+ iso_iput(ip);
+ ip = iq;
+ }
+ }
+
+ if (ip->iso_extent == imp->root_extent)
+ vp->v_flag |= VROOT;
+
+ *ipp = ip;
+ return 0;
+}
+
+/*
+ * Unlock and decrement the reference count of an inode structure.
+ */
+iso_iput(ip)
+ register struct iso_node *ip;
+{
+
+ if ((ip->i_flag & ILOCKED) == 0)
+ panic("iso_iput");
+ ISO_IUNLOCK(ip);
+ vrele(ITOV(ip));
+}
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ register struct iso_node *ip = VTOI(vp);
+ int mode, error = 0;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_inactive: pushing active", vp);
+
+ ip->i_flag = 0;
+ /*
+ * If we are done with the inode, reclaim it
+ * so that it can be reused immediately.
+ */
+ if (vp->v_usecount == 0 && ip->inode.iso_mode == 0)
+ vgone(vp);
+ return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct iso_node *ip = VTOI(vp);
+ int i;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_reclaim: pushing active", vp);
+ /*
+ * Remove the inode from its hash chain.
+ */
+ remque(ip);
+ ip->i_forw = ip;
+ ip->i_back = ip;
+ /*
+ * Purge old data structures associated with the inode.
+ */
+ cache_purge(vp);
+ if (ip->i_devvp) {
+ vrele(ip->i_devvp);
+ ip->i_devvp = 0;
+ }
+ FREE(vp->v_data, M_ISOFSNODE);
+ vp->v_data = NULL;
+ return 0;
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+iso_ilock(ip)
+ register struct iso_node *ip;
+{
+
+ while (ip->i_flag & ILOCKED) {
+ ip->i_flag |= IWANT;
+ if (ip->i_spare0 == curproc->p_pid)
+ panic("locking against myself");
+ ip->i_spare1 = curproc->p_pid;
+ (void) sleep((caddr_t)ip, PINOD);
+ }
+ ip->i_spare1 = 0;
+ ip->i_spare0 = curproc->p_pid;
+ ip->i_flag |= ILOCKED;
+}
+
+/*
+ * Unlock an inode. If WANT bit is on, wakeup.
+ */
+iso_iunlock(ip)
+ register struct iso_node *ip;
+{
+
+ if ((ip->i_flag & ILOCKED) == 0)
+ vprint("iso_iunlock: unlocked inode", ITOV(ip));
+ ip->i_spare0 = 0;
+ ip->i_flag &= ~ILOCKED;
+ if (ip->i_flag&IWANT) {
+ ip->i_flag &= ~IWANT;
+ wakeup((caddr_t)ip);
+ }
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir,inop,bp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ if (isonum_711(isodir->flags)&2) {
+ inop->inode.iso_mode = S_IFDIR;
+ /*
+ * If we return 2, fts() will assume there are no subdirectories
+ * (just links for the path and .), so instead we return 1.
+ */
+ inop->inode.iso_links = 1;
+ } else {
+ inop->inode.iso_mode = S_IFREG;
+ inop->inode.iso_links = 1;
+ }
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+
+ if (isonum_711(ap->version) == 1) {
+ if (!(ap->perm[0]&0x40))
+ inop->inode.iso_mode |= VEXEC >> 6;
+ if (!(ap->perm[0]&0x10))
+ inop->inode.iso_mode |= VREAD >> 6;
+ if (!(ap->perm[0]&4))
+ inop->inode.iso_mode |= VEXEC >> 3;
+ if (!(ap->perm[0]&1))
+ inop->inode.iso_mode |= VREAD >> 3;
+ if (!(ap->perm[1]&0x40))
+ inop->inode.iso_mode |= VEXEC;
+ if (!(ap->perm[1]&0x10))
+ inop->inode.iso_mode |= VREAD;
+ inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+ inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+ inop->inode.iso_uid = (uid_t)0;
+ inop->inode.iso_gid = (gid_t)0;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+
+ if (isonum_711(ap->version) == 1) {
+ if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+ cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+ if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+ inop->inode.iso_ctime = inop->inode.iso_atime;
+ if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime);
+ inop->inode.iso_atime = inop->inode.iso_ctime;
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu)
+char *pi;
+struct timeval *pu;
+{
+ int i;
+ int crtime, days;
+ int y, m, d, hour, minute, second, tz;
+
+ y = pi[0] + 1900;
+ m = pi[1];
+ d = pi[2];
+ hour = pi[3];
+ minute = pi[4];
+ second = pi[5];
+ tz = pi[6];
+
+ if (y < 1970) {
+ pu->tv_sec = 0;
+ pu->tv_usec = 0;
+ return 0;
+ } else {
+#ifdef ORIGINAL
+ /* computes day number relative to Sept. 19th,1989 */
+ /* don't even *THINK* about changing formula. It works! */
+ days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+ /*
+ * Changed :-) to make it relative to Jan. 1st, 1970
+ * and to disambiguate negative division
+ */
+ days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+ crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+
+ /* timezone offset is unreliable on some disks */
+ if (-48 <= tz && tz <= 52)
+ crtime += tz * 15 * 60;
+ }
+ pu->tv_sec = crtime;
+ pu->tv_usec = 0;
+ return 1;
+}
+
+static unsigned
+cd9660_chars2ui(begin,len)
+ unsigned char *begin;
+ int len;
+{
+ unsigned rc;
+
+ for (rc = 0; --len >= 0;) {
+ rc *= 10;
+ rc += *begin++ - '0';
+ }
+ return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+ unsigned char *pi;
+ struct timeval *pu;
+{
+ unsigned char buf[7];
+
+ /* year:"0001"-"9999" -> -1900 */
+ buf[0] = cd9660_chars2ui(pi,4) - 1900;
+
+ /* month: " 1"-"12" -> 1 - 12 */
+ buf[1] = cd9660_chars2ui(pi + 4,2);
+
+ /* day: " 1"-"31" -> 1 - 31 */
+ buf[2] = cd9660_chars2ui(pi + 6,2);
+
+ /* hour: " 0"-"23" -> 0 - 23 */
+ buf[3] = cd9660_chars2ui(pi + 8,2);
+
+ /* minute:" 0"-"59" -> 0 - 59 */
+ buf[4] = cd9660_chars2ui(pi + 10,2);
+
+ /* second:" 0"-"59" -> 0 - 59 */
+ buf[5] = cd9660_chars2ui(pi + 12,2);
+
+ /* difference of GMT */
+ buf[6] = pi[16];
+
+ return cd9660_tstamp_conv7(buf,pu);
+}
+
+void
+isodirino(inump,isodir,imp)
+ ino_t *inump;
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ *inump = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+ * imp->logical_block_size;
+}
diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h
new file mode 100644
index 000000000000..45de67f1a6be
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.h
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.h 8.2 (Berkeley) 1/23/94
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+typedef struct {
+ struct timespec iso_atime; /* time of last access */
+ struct timespec iso_mtime; /* time of last modification */
+ struct timespec iso_ctime; /* time file changed */
+ u_short iso_mode; /* files access mode and type */
+ uid_t iso_uid; /* owner user id */
+ gid_t iso_gid; /* owner group id */
+ short iso_links; /* links of file */
+ dev_t iso_rdev; /* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+#ifdef ISODEVMAP
+/*
+ * FOr device# (major,minor) translation table
+ */
+struct iso_dnode {
+ struct iso_dnode *d_chain[2]; /* hash chain, MUST be first */
+ dev_t i_dev; /* device where dnode resides */
+ ino_t i_number; /* the identity of the inode */
+ dev_t d_dev; /* device # for translation */
+};
+#define d_forw d_chain[0]
+#define d_back d_chain[1]
+#endif
+
+struct iso_node {
+ struct iso_node *i_chain[2]; /* hash chain, MUST be first */
+ struct vnode *i_vnode; /* vnode associated with this inode */
+ struct vnode *i_devvp; /* vnode for block I/O */
+ u_long i_flag; /* see below */
+ dev_t i_dev; /* device where inode resides */
+ ino_t i_number; /* the identity of the inode */
+ /* we use the actual starting block of the file */
+ struct iso_mnt *i_mnt; /* filesystem associated with this inode */
+ struct lockf *i_lockf; /* head of byte-level lock list */
+ doff_t i_endoff; /* end of useful stuff in directory */
+ doff_t i_diroff; /* offset in dir, where we found last entry */
+ doff_t i_offset; /* offset of free space in directory */
+ ino_t i_ino; /* inode number of found directory */
+ long i_spare0;
+ long i_spare1;
+
+ long iso_extent; /* extent of file */
+ long i_size;
+ long iso_start; /* actual start of data of file (may be different */
+ /* from iso_extent, if file has extended attributes) */
+ ISO_RRIP_INODE inode;
+};
+
+#define i_forw i_chain[0]
+#define i_back i_chain[1]
+
+/* flags */
+#define ILOCKED 0x0001 /* inode is locked */
+#define IWANT 0x0002 /* some process waiting on lock */
+#define IACC 0x0020 /* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ISO_ILOCK(ip) iso_ilock(ip)
+#define ISO_IUNLOCK(ip) iso_iunlock(ip)
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup __P((struct vop_lookup_args *));
+int cd9660_open __P((struct vop_open_args *));
+int cd9660_close __P((struct vop_close_args *));
+int cd9660_access __P((struct vop_access_args *));
+int cd9660_getattr __P((struct vop_getattr_args *));
+int cd9660_read __P((struct vop_read_args *));
+int cd9660_ioctl __P((struct vop_ioctl_args *));
+int cd9660_select __P((struct vop_select_args *));
+int cd9660_mmap __P((struct vop_mmap_args *));
+int cd9660_seek __P((struct vop_seek_args *));
+int cd9660_readdir __P((struct vop_readdir_args *));
+int cd9660_abortop __P((struct vop_abortop_args *));
+int cd9660_inactive __P((struct vop_inactive_args *));
+int cd9660_reclaim __P((struct vop_reclaim_args *));
+int cd9660_bmap __P((struct vop_bmap_args *));
+int cd9660_lock __P((struct vop_lock_args *));
+int cd9660_unlock __P((struct vop_unlock_args *));
+int cd9660_strategy __P((struct vop_strategy_args *));
+int cd9660_print __P((struct vop_print_args *));
+int cd9660_islocked __P((struct vop_islocked_args *));
+void cd9660_defattr __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *));
+void cd9660_deftstamp __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *));
+#ifdef ISODEVMAP
+struct iso_dnode *iso_dmap __P((dev_t, ino_t, int));
+void iso_dunmap __P((dev_t));
+#endif
diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c
new file mode 100644
index 000000000000..0923fa014773
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.c
@@ -0,0 +1,685 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/time.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+ ISO_RRIP_ATTR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->inop->inode.iso_mode = isonum_731(p->mode_l);
+ ana->inop->inode.iso_uid = (uid_t)isonum_731(p->uid_l);
+ ana->inop->inode.iso_gid = (gid_t)isonum_731(p->gid_l);
+ ana->inop->inode.iso_links = isonum_731(p->links_l);
+ ana->fields &= ~ISO_SUSP_ATTR;
+ return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* But this is a required field! */
+ printf("RRIP without PX field?\n");
+ cd9660_defattr(isodir,ana->inop,NULL);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+ ISO_RRIP_SLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ register ISO_RRIP_SLINK_COMPONENT *pcomp;
+ register ISO_RRIP_SLINK_COMPONENT *pcompe;
+ int len, wlen, cont;
+ char *outbuf, *inbuf;
+
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+ pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+ len = *ana->outlen;
+ outbuf = ana->outbuf;
+ cont = ana->cont;
+
+ /*
+ * Gathering a Symbolic name from each component with path
+ */
+ for (;
+ pcomp < pcompe;
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+ + isonum_711(pcomp->clen))) {
+
+ if (!cont) {
+ if (len < ana->maxlen) {
+ len++;
+ *outbuf++ = '/';
+ }
+ }
+ cont = 0;
+
+ inbuf = "..";
+ wlen = 0;
+
+ switch (*pcomp->cflag) {
+
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_ROOT:
+ /* Inserting slash for ROOT */
+ /* start over from beginning(?) */
+ outbuf -= len;
+ len = 0;
+ break;
+
+ case ISO_SUSP_CFLAG_VOLROOT:
+ /* Inserting a mount point i.e. "/cdrom" */
+ /* same as above */
+ outbuf -= len;
+ len = 0;
+ inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+ wlen = strlen(inbuf);
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = hostnamelen;
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(pcomp->clen);
+ inbuf = pcomp->name;
+ break;
+ default:
+ printf("RRIP with incorrect flags?");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if (len + wlen > ana->maxlen) {
+ /* indicate error to caller */
+ ana->cont = 1;
+ ana->fields = 0;
+ ana->outbuf -= *ana->outlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,outbuf,wlen);
+ outbuf += wlen;
+ len += wlen;
+
+ }
+ ana->outbuf = outbuf;
+ *ana->outlen = len;
+ ana->cont = cont;
+
+ if (!isonum_711(p->flags)) {
+ ana->fields &= ~ISO_SUSP_SLINK;
+ return ISO_SUSP_SLINK;
+ }
+ return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+ ISO_RRIP_ALTNAME *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ char *inbuf;
+ int wlen;
+ int cont;
+
+ inbuf = "..";
+ wlen = 0;
+ cont = 0;
+
+ switch (*p->flags) {
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = hostnamelen;
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(p->h.length) - 5;
+ inbuf = (char *)p + 5;
+ break;
+
+ default:
+ printf("RRIP with incorrect NM flags?\n");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if ((*ana->outlen += wlen) > ana->maxlen) {
+ /* treat as no name field */
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ ana->outbuf -= *ana->outlen - wlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,ana->outbuf,wlen);
+ ana->outbuf += wlen;
+
+ if (!cont) {
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ return ISO_SUSP_ALTNAME;
+ }
+ return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ strcpy(ana->outbuf,"..");
+ switch (*isodir->name) {
+ default:
+ isofntrans(isodir->name,isonum_711(isodir->name_len),
+ ana->outbuf,ana->outlen,
+ 1,isonum_711(isodir->flags)&4);
+ break;
+ case 0:
+ *ana->outlen = 1;
+ break;
+ case 1:
+ *ana->outlen = 2;
+ break;
+ }
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+ ISO_RRIP_CLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+ ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+ return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+ ISO_RRIP_RELDIR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* special hack to make caller aware of RE field */
+ *ana->outlen = 0;
+ ana->fields = 0;
+ return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+ ISO_RRIP_TSTAMP *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ unsigned char *ptime;
+
+ ptime = p->time;
+
+ /* Check a format of time stamp (7bytes/17bytes) */
+ if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 7;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime);
+ ptime += 7;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime);
+ ptime += 7;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ } else {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 17;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+ ptime += 17;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+ ptime += 17;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ }
+ ana->fields &= ~ISO_SUSP_TSTAMP;
+ return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ cd9660_deftstamp(isodir,ana->inop,NULL);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+ ISO_RRIP_DEVICE *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ unsigned high, low;
+
+ high = isonum_733(p->dev_t_high_l);
+ low = isonum_733(p->dev_t_low_l);
+
+ if ( high == 0 ) {
+ ana->inop->inode.iso_rdev = makedev( major(low), minor(low) );
+ } else {
+ ana->inop->inode.iso_rdev = makedev( high, minor(low) );
+ }
+ ana->fields &= ~ISO_SUSP_DEVICE;
+ return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+ ISO_RRIP_IDFLAG *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+ /* special handling of RE field */
+ if (ana->fields&ISO_SUSP_RELDIR)
+ return cd9660_rrip_reldir(p,ana);
+
+ return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+ ISO_RRIP_CONT *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->iso_ce_blk = isonum_733(p->location);
+ ana->iso_ce_off = isonum_733(p->offset);
+ ana->iso_ce_len = isonum_733(p->length);
+ return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+ ISO_SUSP_HEADER *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* stop analyzing */
+ ana->fields = 0;
+ return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+ ISO_RRIP_EXTREF *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ if (isonum_711(p->len_id) != 10
+ || bcmp((char *)p + 8,"RRIP_1991A",10)
+ || isonum_711(p->version) != 1)
+ return 0;
+ ana->fields &= ~ISO_SUSP_EXTREF;
+ return ISO_SUSP_EXTREF;
+}
+
+typedef struct {
+ char type[2];
+ int (*func)();
+ void (*func2)();
+ int result;
+} RRIP_TABLE;
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+ RRIP_TABLE *table;
+{
+ register RRIP_TABLE *ptable;
+ register ISO_SUSP_HEADER *phead;
+ register ISO_SUSP_HEADER *pend;
+ struct buf *bp = NULL;
+ int i;
+ char *pwhead;
+ int result;
+
+ /*
+ * Note: If name length is odd,
+ * it will be padding 1 byte after the name
+ */
+ pwhead = isodir->name + isonum_711(isodir->name_len);
+ if (!(isonum_711(isodir->name_len)&1))
+ pwhead++;
+
+ /* If it's not the '.' entry of the root dir obey SP field */
+ if (*isodir->name != 0
+ || isonum_733(isodir->extent) != ana->imp->root_extent)
+ pwhead += ana->imp->rr_skip;
+ else
+ pwhead += ana->imp->rr_skip0;
+
+ phead = (ISO_SUSP_HEADER *)pwhead;
+ pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+
+ result = 0;
+ while (1) {
+ ana->iso_ce_len = 0;
+ /*
+ * Note: "pend" should be more than one SUSP header
+ */
+ while (pend >= phead + 1) {
+ if (isonum_711(phead->version) == 1) {
+ for (ptable = table; ptable->func; ptable++) {
+ if (*phead->type == *ptable->type
+ && phead->type[1] == ptable->type[1]) {
+ result |= ptable->func(phead,ana);
+ break;
+ }
+ }
+ if (!ana->fields)
+ break;
+ }
+ /*
+ * move to next SUSP
+ * Hopefully this works with newer versions, too
+ */
+ phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+ }
+
+ if ( ana->fields && ana->iso_ce_len ) {
+ if (ana->iso_ce_blk >= ana->imp->volume_space_size
+ || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+ || bread(ana->imp->im_devvp,
+ ana->iso_ce_blk * ana->imp->logical_block_size / DEV_BSIZE,
+ ana->imp->logical_block_size,NOCRED,&bp))
+ /* what to do now? */
+ break;
+ phead = (ISO_SUSP_HEADER *)(bp->b_un.b_addr + ana->iso_ce_off);
+ pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+ } else
+ break;
+ }
+ if (bp)
+ brelse(bp);
+ /*
+ * If we don't find the Basic SUSP stuffs, just set default value
+ * ( attribute/time stamp )
+ */
+ for (ptable = table; ptable->func2; ptable++)
+ if (!(ptable->result&result))
+ ptable->func2(isodir,ana);
+
+ return result;
+}
+
+static RRIP_TABLE rrip_table_analyze[] = {
+ { "PX", cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR },
+ { "TF", cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP },
+ { "PN", cd9660_rrip_device, 0, ISO_SUSP_DEVICE },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.inop = inop;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+
+ return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/*
+ * Get Alternate Name from 'AL' record
+ * If either no AL record or 0 length,
+ * it will be return the translated ISO9660 name,
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+ { "NM", cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME },
+ { "CL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "PL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "RE", cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ ino_t *inump;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+ RRIP_TABLE *tab;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ analyze.maxlen = NAME_MAX;
+ analyze.inump = inump;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+ *outlen = 0;
+
+ tab = rrip_table_getname;
+ if (*isodir->name == 0
+ || *isodir->name == 1) {
+ cd9660_rrip_defname(isodir,&analyze);
+
+ analyze.fields &= ~ISO_SUSP_ALTNAME;
+ tab++;
+ }
+
+ return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/*
+ * Get Symbolic Name from 'SL' record
+ *
+ * Note: isodir should contains SL record!
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+ { "SL", cd9660_rrip_slink, 0, ISO_SUSP_SLINK },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ *outlen = 0;
+ analyze.maxlen = MAXPATHLEN;
+ analyze.cont = 1; /* don't start with a slash */
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_SLINK;
+
+ return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+ { "ER", cd9660_rrip_extref, 0, ISO_SUSP_EXTREF },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We require the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_OFFSET *p;
+ ISO_RRIP_ANALYZE analyze;
+
+ imp->rr_skip0 = 0;
+ p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+ if (bcmp(p,"SP\7\1\276\357",6)) {
+ /* Maybe, it's a CDROM XA disc? */
+ imp->rr_skip0 = 15;
+ p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+ if (bcmp(p,"SP\7\1\276\357",6))
+ return -1;
+ }
+
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_EXTREF;
+ if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+ return -1;
+
+ return isonum_711(p->skip);
+}
diff --git a/sys/fs/cd9660/cd9660_rrip.h b/sys/fs/cd9660/cd9660_rrip.h
new file mode 100644
index 000000000000..b4017281f065
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.h 8.1 (Berkeley) 1/21/94
+ */
+
+typedef struct {
+ char type [ISODCL ( 0, 1)];
+ unsigned char length [ISODCL ( 2, 2)]; /* 711 */
+ unsigned char version [ISODCL ( 3, 3)];
+} ISO_SUSP_HEADER;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char mode_l [ISODCL ( 4, 7)]; /* 731 */
+ char mode_m [ISODCL ( 8, 11)]; /* 732 */
+ char links_l [ISODCL ( 12, 15)]; /* 731 */
+ char links_m [ISODCL ( 16, 19)]; /* 732 */
+ char uid_l [ISODCL ( 20, 23)]; /* 731 */
+ char uid_m [ISODCL ( 24, 27)]; /* 732 */
+ char gid_l [ISODCL ( 28, 31)]; /* 731 */
+ char gid_m [ISODCL ( 32, 35)]; /* 732 */
+} ISO_RRIP_ATTR;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dev_t_high_l [ISODCL ( 4, 7)]; /* 731 */
+ char dev_t_high_m [ISODCL ( 8, 11)]; /* 732 */
+ char dev_t_low_l [ISODCL ( 12, 15)]; /* 731 */
+ char dev_t_low_m [ISODCL ( 16, 19)]; /* 732 */
+} ISO_RRIP_DEVICE;
+
+#define ISO_SUSP_CFLAG_CONTINUE 0x01
+#define ISO_SUSP_CFLAG_CURRENT 0x02
+#define ISO_SUSP_CFLAG_PARENT 0x04
+#define ISO_SUSP_CFLAG_ROOT 0x08
+#define ISO_SUSP_CFLAG_VOLROOT 0x10
+#define ISO_SUSP_CFLAG_HOST 0x20
+
+typedef struct {
+ u_char cflag [ISODCL ( 1, 1)];
+ u_char clen [ISODCL ( 2, 2)];
+ u_char name [0];
+} ISO_RRIP_SLINK_COMPONENT;
+#define ISO_RRIP_SLSIZ 2
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ u_char flags [ISODCL ( 4, 4)];
+ u_char component [ISODCL ( 5, 5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+} ISO_RRIP_RELDIR;
+
+#define ISO_SUSP_TSTAMP_FORM17 0x80
+#define ISO_SUSP_TSTAMP_FORM7 0x00
+#define ISO_SUSP_TSTAMP_CREAT 0x01
+#define ISO_SUSP_TSTAMP_MODIFY 0x02
+#define ISO_SUSP_TSTAMP_ACCESS 0x04
+#define ISO_SUSP_TSTAMP_ATTR 0x08
+#define ISO_SUSP_TSTAMP_BACKUP 0x10
+#define ISO_SUSP_TSTAMP_EXPIRE 0x20
+#define ISO_SUSP_TSTAMP_EFFECT 0x40
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ unsigned char flags [ISODCL ( 4, 4)];
+ unsigned char time [ISODCL ( 5, 5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ unsigned char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char len_id [ISODCL ( 4, 4)];
+ char len_des [ISODCL ( 5, 5)];
+ char len_src [ISODCL ( 6, 6)];
+ char version [ISODCL ( 7, 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char check [ISODCL ( 4, 5)];
+ char skip [ISODCL ( 6, 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char location [ISODCL ( 4, 11)];
+ char offset [ISODCL ( 12, 19)];
+ char length [ISODCL ( 20, 27)];
+} ISO_RRIP_CONT;
diff --git a/sys/fs/cd9660/cd9660_util.c b/sys/fs/cd9660/cd9660_util.c
new file mode 100644
index 000000000000..f74f0515ff77
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_util.c
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_util.c 8.1 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */
+#include <miscfs/fifofs/fifo.h> /* XXX */
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+
+#ifdef __notanymore__
+int
+isonum_711 (p)
+unsigned char *p;
+{
+ return (*p);
+}
+
+int
+isonum_712 (p)
+signed char *p;
+{
+ return (*p);
+}
+
+int
+isonum_721 (p)
+unsigned char *p;
+{
+ /* little endian short */
+#if BYTE_ORDER != LITTLE_ENDIAN
+ printf ("isonum_721 called on non little-endian machine!\n");
+#endif
+
+ return *(short *)p;
+}
+
+int
+isonum_722 (p)
+unsigned char *p;
+{
+ /* big endian short */
+#if BYTE_ORDER != BIG_ENDIAN
+ printf ("isonum_722 called on non big-endian machine!\n");
+#endif
+
+ return *(short *)p;
+}
+
+int
+isonum_723 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+ return isonum_722 (p + 2);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ return isonum_721 (p);
+#else
+ printf ("isonum_723 unsupported byte order!\n");
+ return 0;
+#endif
+}
+
+int
+isonum_731 (p)
+unsigned char *p;
+{
+ /* little endian long */
+#if BYTE_ORDER != LITTLE_ENDIAN
+ printf ("isonum_731 called on non little-endian machine!\n");
+#endif
+
+ return *(long *)p;
+}
+
+int
+isonum_732 (p)
+unsigned char *p;
+{
+ /* big endian long */
+#if BYTE_ORDER != BIG_ENDIAN
+ printf ("isonum_732 called on non big-endian machine!\n");
+#endif
+
+ return *(long *)p;
+}
+
+int
+isonum_733 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+ return isonum_732 (p + 4);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ return isonum_731 (p);
+#else
+ printf ("isonum_733 unsupported byte order!\n");
+ return 0;
+#endif
+}
+#endif /* __notanymore__ */
+
+/*
+ * translate and compare a filename
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(unsigned char *fn,int fnlen,unsigned char *isofn,int isolen)
+{
+ int i, j;
+ char c;
+
+ while (--fnlen >= 0) {
+ if (--isolen < 0)
+ return *fn;
+ if ((c = *isofn++) == ';') {
+ switch (*fn++) {
+ default:
+ return *--fn;
+ case 0:
+ return 0;
+ case ';':
+ break;
+ }
+ for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
+ if (*fn < '0' || *fn > '9') {
+ return -1;
+ }
+ }
+ for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0');
+ return i - j;
+ }
+ if (c != *fn) {
+ if (c >= 'A' && c <= 'Z') {
+ if (c + ('a' - 'A') != *fn) {
+ if (*fn >= 'a' && *fn <= 'z')
+ return *fn - ('a' - 'A') - c;
+ else
+ return *fn - c;
+ }
+ } else
+ return *fn - c;
+ }
+ fn++;
+ }
+ if (isolen > 0) {
+ switch (*isofn) {
+ default:
+ return -1;
+ case '.':
+ if (isofn[1] != ';')
+ return -1;
+ case ';':
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+ * translate a filename
+ */
+void
+isofntrans(unsigned char *infn,int infnlen,
+ unsigned char *outfn,unsigned short *outfnlen,
+ int original,int assoc)
+{
+ int fnidx = 0;
+
+ if (assoc) {
+ *outfn++ = ASSOCCHAR;
+ fnidx++;
+ }
+ for (; fnidx < infnlen; fnidx++) {
+ char c = *infn++;
+
+ if (!original && c >= 'A' && c <= 'Z')
+ *outfn++ = c + ('a' - 'A');
+ else if (!original && c == '.' && *infn == ';')
+ break;
+ else if (!original && c == ';')
+ break;
+ else
+ *outfn++ = c;
+ }
+ *outfnlen = fnidx;
+}
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
new file mode 100644
index 000000000000..02dd92af66f6
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,681 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vfsops.c 8.3 (Berkeley) 1/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/dkbad.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+extern int enodev ();
+
+struct vfsops cd9660_vfsops = {
+ cd9660_mount,
+ cd9660_start,
+ cd9660_unmount,
+ cd9660_root,
+ cd9660_quotactl,
+ cd9660_statfs,
+ cd9660_sync,
+ cd9660_vget,
+ cd9660_fhtovp,
+ cd9660_vptofh,
+ cd9660_init,
+};
+
+/*
+ * Called by vfs_mountroot when iso is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME "root_device"
+
+static iso_mountfs();
+
+cd9660_mountroot()
+{
+ register struct mount *mp;
+ extern struct vnode *rootvp;
+ struct proc *p = curproc; /* XXX */
+ struct iso_mnt *imp;
+ register struct fs *fs;
+ u_int size;
+ int error;
+ struct iso_args args;
+
+ /*
+ * Get vnodes for swapdev and rootdev.
+ */
+ if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+ panic("cd9660_mountroot: can't setup bdevvp's");
+
+ mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &cd9660_vfsops;
+ mp->mnt_flag = MNT_RDONLY;
+ args.flags = ISOFSMNT_ROOT;
+ if (error = iso_mountfs(rootvp, mp, p, &args)) {
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ if (error = vfs_lock(mp)) {
+ (void)cd9660_unmount(mp, 0, p);
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ imp = VFSTOISOFS(mp);
+ bzero(imp->im_fsmnt, sizeof(imp->im_fsmnt));
+ imp->im_fsmnt[0] = '/';
+ bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) cd9660_statfs(mp, &mp->mnt_stat, p);
+ vfs_unlock(mp);
+ return (0);
+}
+
+/*
+ * Flag to allow forcible unmounting.
+ */
+int iso_doforce = 1;
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+cd9660_mount(mp, path, data, ndp, p)
+ register struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp;
+ struct iso_args args;
+ u_int size;
+ int error;
+ struct iso_mnt *imp;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))
+ return (error);
+
+ if ((mp->mnt_flag & MNT_RDONLY) == 0)
+ return (EROFS);
+
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ imp = VFSTOISOFS(mp);
+ if (args.fspec == 0)
+ return (vfs_export(mp, &imp->im_export, &args.export));
+ }
+ /*
+ * Not an update, or updating the name: look up the name
+ * and verify that it refers to a sensible block device.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+ if (error = namei(ndp))
+ return (error);
+ devvp = ndp->ni_vp;
+
+ if (devvp->v_type != VBLK) {
+ vrele(devvp);
+ return ENOTBLK;
+ }
+ if (major(devvp->v_rdev) >= nblkdev) {
+ vrele(devvp);
+ return ENXIO;
+ }
+ if ((mp->mnt_flag & MNT_UPDATE) == 0)
+ error = iso_mountfs(devvp, mp, p, &args);
+ else {
+ if (devvp != imp->im_devvp)
+ error = EINVAL; /* needs translation */
+ else
+ vrele(devvp);
+ }
+ if (error) {
+ vrele(devvp);
+ return error;
+ }
+ imp = VFSTOISOFS(mp);
+ (void) copyinstr(path, imp->im_fsmnt, sizeof(imp->im_fsmnt)-1, &size);
+ bzero(imp->im_fsmnt + size, sizeof(imp->im_fsmnt) - size);
+ bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) cd9660_statfs(mp, &mp->mnt_stat, p);
+ return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static iso_mountfs(devvp, mp, p, argp)
+ register struct vnode *devvp;
+ struct mount *mp;
+ struct proc *p;
+ struct iso_args *argp;
+{
+ register struct iso_mnt *isomp = (struct iso_mnt *)0;
+ struct buf *bp = NULL;
+ dev_t dev = devvp->v_rdev;
+ caddr_t base, space;
+ int havepart = 0, blks;
+ int error = EINVAL, i, size;
+ int needclose = 0;
+ int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ extern struct vnode *rootvp;
+ int j;
+ int iso_bsize;
+ int iso_blknum;
+ struct iso_volume_descriptor *vdp;
+ struct iso_primary_descriptor *pri;
+ struct iso_directory_record *rootp;
+ int logical_block_size;
+
+ if (!ronly)
+ return EROFS;
+
+ /*
+ * Disallow multiple mounts of the same device.
+ * Disallow mounting of a device that is currently in use
+ * (except for root, which might share swap device for miniroot).
+ * Flush out any old buffers remaining from a previous use.
+ */
+ if (error = vfs_mountedon(devvp))
+ return error;
+ if (vcount(devvp) > 1 && devvp != rootvp)
+ return EBUSY;
+ if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+
+ if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+ return error;
+ needclose = 1;
+
+ /* This is the "logical sector size". The standard says this
+ * should be 2048 or the physical sector size on the device,
+ * whichever is greater. For now, we'll just use a constant.
+ */
+ iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
+
+ for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) {
+ if (error = bread (devvp, btodb(iso_blknum * iso_bsize),
+ iso_bsize, NOCRED, &bp))
+ goto out;
+
+ vdp = (struct iso_volume_descriptor *)bp->b_un.b_addr;
+ if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (isonum_711 (vdp->type) == ISO_VD_END) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (isonum_711 (vdp->type) == ISO_VD_PRIMARY)
+ break;
+ brelse(bp);
+ }
+
+ if (isonum_711 (vdp->type) != ISO_VD_PRIMARY) {
+ error = EINVAL;
+ goto out;
+ }
+
+ pri = (struct iso_primary_descriptor *)vdp;
+
+ logical_block_size = isonum_723 (pri->logical_block_size);
+
+ if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+ || (logical_block_size & (logical_block_size - 1)) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ rootp = (struct iso_directory_record *)pri->root_directory_record;
+
+ isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
+ bzero((caddr_t)isomp, sizeof *isomp);
+ isomp->logical_block_size = logical_block_size;
+ isomp->volume_space_size = isonum_733 (pri->volume_space_size);
+ bcopy (rootp, isomp->root, sizeof isomp->root);
+ isomp->root_extent = isonum_733 (rootp->extent);
+ isomp->root_size = isonum_733 (rootp->size);
+
+ isomp->im_bmask = logical_block_size - 1;
+ isomp->im_bshift = 0;
+ while ((1 << isomp->im_bshift) < isomp->logical_block_size)
+ isomp->im_bshift++;
+
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+
+ mp->mnt_data = (qaddr_t)isomp;
+ mp->mnt_stat.f_fsid.val[0] = (long)dev;
+ mp->mnt_stat.f_fsid.val[1] = MOUNT_CD9660;
+ mp->mnt_maxsymlinklen = 0;
+ mp->mnt_flag |= MNT_LOCAL;
+ isomp->im_mountp = mp;
+ isomp->im_dev = dev;
+ isomp->im_devvp = devvp;
+
+ devvp->v_specflags |= SI_MOUNTEDON;
+
+ /* Check the Rock Ridge Extention support */
+ if (!(argp->flags & ISOFSMNT_NORRIP)) {
+ if (error = bread (isomp->im_devvp,
+ (isomp->root_extent + isonum_711(rootp->ext_attr_length))
+ * isomp->logical_block_size / DEV_BSIZE,
+ isomp->logical_block_size,NOCRED,&bp))
+ goto out;
+
+ rootp = (struct iso_directory_record *)bp->b_un.b_addr;
+
+ if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+ argp->flags |= ISOFSMNT_NORRIP;
+ } else {
+ argp->flags &= ~ISOFSMNT_GENS;
+ }
+
+ /*
+ * The contents are valid,
+ * but they will get reread as part of another vnode, so...
+ */
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+ }
+ isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
+ switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+ default:
+ isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+ break;
+ case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+ isomp->iso_ftype = ISO_FTYPE_9660;
+ break;
+ case 0:
+ isomp->iso_ftype = ISO_FTYPE_RRIP;
+ break;
+ }
+
+ return 0;
+out:
+ if (bp)
+ brelse(bp);
+ if (needclose)
+ (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+ if (isomp) {
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ }
+ return error;
+}
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+cd9660_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return 0;
+}
+
+/*
+ * unmount system call
+ */
+int
+cd9660_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+ int i, error, ronly, flags = 0;
+
+ if (mntflags & MNT_FORCE) {
+ if (!iso_doforce || (mp->mnt_flag & MNT_ROOTFS))
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp))
+ return EBUSY;
+#endif
+ if (error = vflush(mp, NULLVP, flags))
+ return (error);
+
+ isomp = VFSTOISOFS(mp);
+
+#ifdef ISODEVMAP
+ if (isomp->iso_ftype == ISO_FTYPE_RRIP)
+ iso_dunmap(isomp->im_dev);
+#endif
+
+ isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON;
+ error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
+ vrele(isomp->im_devvp);
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+cd9660_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ register struct iso_node *ip;
+ struct iso_node tip, *nip;
+ struct vnode tvp;
+ int error;
+ struct iso_mnt *imp = VFSTOISOFS (mp);
+ struct iso_directory_record *dp;
+
+ tvp.v_mount = mp;
+ tvp.v_data = &tip;
+ ip = VTOI(&tvp);
+ ip->i_vnode = &tvp;
+ ip->i_dev = imp->im_dev;
+ ip->i_diroff = 0;
+ dp = (struct iso_directory_record *)imp->root;
+ isodirino(&ip->i_number,dp,imp);
+
+ /*
+ * With RRIP we must use the `.' entry of the root directory.
+ * Simply tell iget, that it's a relocated directory.
+ */
+ error = iso_iget(ip,ip->i_number,
+ imp->iso_ftype == ISO_FTYPE_RRIP,
+ &nip,dp);
+ if (error)
+ return error;
+ *vpp = ITOV(nip);
+ return 0;
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+cd9660_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+cd9660_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+ register struct fs *fs;
+
+ isomp = VFSTOISOFS(mp);
+
+ sbp->f_type = MOUNT_CD9660;
+ sbp->f_bsize = isomp->logical_block_size;
+ sbp->f_iosize = sbp->f_bsize; /* XXX */
+ sbp->f_blocks = isomp->volume_space_size;
+ sbp->f_bfree = 0; /* total free blocks */
+ sbp->f_bavail = 0; /* blocks free for non superuser */
+ sbp->f_files = 0; /* total files */
+ sbp->f_ffree = 0; /* free file nodes */
+ if (sbp != &mp->mnt_stat) {
+ bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+ (caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+ bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+ (caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+ }
+ /* Use the first spare for flags: */
+ sbp->f_spare[0] = isomp->im_flags;
+ return 0;
+}
+
+/* ARGSUSED */
+int
+cd9660_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ return (0);
+}
+
+/*
+ * Flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+struct ifid {
+ ushort ifid_len;
+ ushort ifid_pad;
+ int ifid_ino;
+ long ifid_start;
+};
+
+/* ARGSUSED */
+int
+cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ struct vnode tvp;
+ int error;
+ int lbn, off;
+ struct ifid *ifhp;
+ struct iso_mnt *imp;
+ struct buf *bp;
+ struct iso_directory_record *dirp;
+ struct iso_node tip, *ip, *nip;
+ struct netcred *np;
+
+ imp = VFSTOISOFS (mp);
+ ifhp = (struct ifid *)fhp;
+
+#ifdef ISOFS_DBG
+ printf("fhtovp: ino %d, start %ld\n",
+ ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+
+ np = vfs_export_lookup(mp, &imp->im_export, nam);
+ if (np == NULL)
+ return (EACCES);
+
+ lbn = iso_lblkno(imp, ifhp->ifid_ino);
+ if (lbn >= imp->volume_space_size) {
+ printf("fhtovp: lbn exceed volume space %d\n", lbn);
+ return (ESTALE);
+ }
+
+ off = iso_blkoff(imp, ifhp->ifid_ino);
+ if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+ printf("fhtovp: crosses block boundary %d\n",
+ off + ISO_DIRECTORY_RECORD_SIZE);
+ return (ESTALE);
+ }
+
+ error = bread(imp->im_devvp, btodb(lbn * imp->logical_block_size),
+ imp->logical_block_size, NOCRED, &bp);
+ if (error) {
+ printf("fhtovp: bread error %d\n",error);
+ brelse(bp);
+ return (error);
+ }
+
+ dirp = (struct iso_directory_record *)(bp->b_un.b_addr + off);
+ if (off + isonum_711(dirp->length) > imp->logical_block_size) {
+ brelse(bp);
+ printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+ off+isonum_711(dirp->length), off,
+ isonum_711(dirp->length));
+ return (ESTALE);
+ }
+
+ if (isonum_733(dirp->extent) + isonum_711(dirp->ext_attr_length) !=
+ ifhp->ifid_start) {
+ brelse(bp);
+ printf("fhtovp: file start miss %d vs %d\n",
+ isonum_733(dirp->extent)+isonum_711(dirp->ext_attr_length),
+ ifhp->ifid_start);
+ return (ESTALE);
+ }
+ brelse(bp);
+
+ ip = &tip;
+ tvp.v_mount = mp;
+ tvp.v_data = ip;
+ ip->i_vnode = &tvp;
+ ip->i_dev = imp->im_dev;
+ if (error = iso_iget(ip, ifhp->ifid_ino, 0, &nip, dirp)) {
+ *vpp = NULLVP;
+ printf("fhtovp: failed to get inode\n");
+ return (error);
+ }
+ ip = nip;
+ /*
+ * XXX need generation number?
+ */
+ if (ip->inode.iso_mode == 0) {
+ iso_iput(ip);
+ *vpp = NULLVP;
+ printf("fhtovp: inode mode == 0\n");
+ return (ESTALE);
+ }
+ *vpp = ITOV(ip);
+ *exflagsp = np->netc_exflags;
+ *credanonp = &np->netc_anon;
+ return 0;
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+cd9660_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ register struct iso_node *ip = VTOI(vp);
+ register struct ifid *ifhp;
+ register struct iso_mnt *mp = ip->i_mnt;
+
+ ifhp = (struct ifid *)fhp;
+ ifhp->ifid_len = sizeof(struct ifid);
+
+ ifhp->ifid_ino = ip->i_number;
+ ifhp->ifid_start = ip->iso_start;
+
+#ifdef ISOFS_DBG
+ printf("vptofh: ino %d, start %ld\n",
+ ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+ return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
new file mode 100644
index 000000000000..59f5a73f5c86
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -0,0 +1,1038 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vnops.c 8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#if 0
+/*
+ * Mknod vnode call
+ * Actually remap the device number
+ */
+cd9660_mknod(ndp, vap, cred, p)
+ struct nameidata *ndp;
+ struct ucred *cred;
+ struct vattr *vap;
+ struct proc *p;
+{
+#ifndef ISODEVMAP
+ free(ndp->ni_pnbuf, M_NAMEI);
+ vput(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ return EINVAL;
+#else
+ register struct vnode *vp;
+ struct iso_node *ip;
+ struct iso_dnode *dp;
+ int error;
+
+ vp = ndp->ni_vp;
+ ip = VTOI(vp);
+
+ if (ip->i_mnt->iso_ftype != ISO_FTYPE_RRIP
+ || vap->va_type != vp->v_type
+ || (vap->va_type != VCHR && vap->va_type != VBLK)) {
+ free(ndp->ni_pnbuf, M_NAMEI);
+ vput(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ return EINVAL;
+ }
+
+ dp = iso_dmap(ip->i_dev,ip->i_number,1);
+ if (ip->inode.iso_rdev == vap->va_rdev || vap->va_rdev == VNOVAL) {
+ /* same as the unmapped one, delete the mapping */
+ remque(dp);
+ FREE(dp,M_CACHE);
+ } else
+ /* enter new mapping */
+ dp->d_dev = vap->va_rdev;
+
+ /*
+ * Remove inode so that it will be reloaded by iget and
+ * checked to see if it is an alias of an existing entry
+ * in the inode cache.
+ */
+ vput(vp);
+ vp->v_type = VNON;
+ vgone(vp);
+ return (0);
+#endif
+}
+#endif
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+cd9660_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+/*
+ * Close called
+ *
+ * Update the times on the inode on writeable file systems.
+ */
+/* ARGSUSED */
+int
+cd9660_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+cd9660_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+cd9660_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+
+{
+ struct vnode *vp = ap->a_vp;
+ register struct vattr *vap = ap->a_vap;
+ register struct iso_node *ip = VTOI(vp);
+ int i;
+
+ vap->va_fsid = ip->i_dev;
+ vap->va_fileid = ip->i_number;
+
+ vap->va_mode = ip->inode.iso_mode;
+ vap->va_nlink = ip->inode.iso_links;
+ vap->va_uid = ip->inode.iso_uid;
+ vap->va_gid = ip->inode.iso_gid;
+ vap->va_atime = ip->inode.iso_atime;
+ vap->va_mtime = ip->inode.iso_mtime;
+ vap->va_ctime = ip->inode.iso_ctime;
+ vap->va_rdev = ip->inode.iso_rdev;
+
+ vap->va_size = (u_quad_t) ip->i_size;
+ vap->va_flags = 0;
+ vap->va_gen = 1;
+ vap->va_blocksize = ip->i_mnt->logical_block_size;
+ vap->va_bytes = (u_quad_t) ip->i_size;
+ vap->va_type = vp->v_type;
+ return (0);
+}
+
+#if ISO_DEFAULT_BLOCK_SIZE >= NBPG
+#ifdef DEBUG
+extern int doclusterread;
+#else
+#define doclusterread 1
+#endif
+#else
+/* XXX until cluster routines can handle block sizes less than one page */
+#define doclusterread 0
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+cd9660_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ register struct iso_node *ip = VTOI(vp);
+ register struct iso_mnt *imp;
+ struct buf *bp;
+ daddr_t lbn, bn, rablock;
+ off_t diff;
+ int rasize, error = 0;
+ long size, n, on;
+
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ ip->i_flag |= IACC;
+ imp = ip->i_mnt;
+ do {
+ lbn = iso_lblkno(imp, uio->uio_offset);
+ on = iso_blkoff(imp, uio->uio_offset);
+ n = min((unsigned)(imp->logical_block_size - on),
+ uio->uio_resid);
+ diff = (off_t)ip->i_size - uio->uio_offset;
+ if (diff <= 0)
+ return (0);
+ if (diff < n)
+ n = diff;
+ size = iso_blksize(imp, ip, lbn);
+ rablock = lbn + 1;
+ if (doclusterread) {
+ if (iso_lblktosize(imp, rablock) <= ip->i_size)
+ error = cluster_read(vp, (off_t)ip->i_size,
+ lbn, size, NOCRED, &bp);
+ else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ } else {
+ if (vp->v_lastr + 1 == lbn &&
+ iso_lblktosize(imp, rablock) < ip->i_size) {
+ rasize = iso_blksize(imp, ip, rablock);
+ error = breadn(vp, lbn, size, &rablock,
+ &rasize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ }
+ vp->v_lastr = lbn;
+ n = min(n, size - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+ if (n + on == imp->logical_block_size ||
+ uio->uio_offset == (off_t)ip->i_size)
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+}
+
+/* ARGSUSED */
+int
+cd9660_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ printf("You did ioctl for isofs !!\n");
+ return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+cd9660_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * We should really check to see if I/O is possible.
+ */
+ return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+cd9660_seek(ap)
+ struct vop_seek_args /* {
+ struct vnode *a_vp;
+ off_t a_oldoff;
+ off_t a_newoff;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+ struct dirent saveent;
+ struct dirent assocent;
+ struct dirent current;
+ off_t saveoff;
+ off_t assocoff;
+ off_t curroff;
+ struct uio *uio;
+ off_t uio_off;
+ u_int *cookiep;
+ int ncookies;
+ int eof;
+};
+
+static int
+iso_uiodir(idp,dp,off)
+ struct isoreaddir *idp;
+ struct dirent *dp;
+ off_t off;
+{
+ int error;
+
+ dp->d_name[dp->d_namlen] = 0;
+ dp->d_reclen = DIRSIZ(dp);
+
+ if (idp->uio->uio_resid < dp->d_reclen) {
+ idp->eof = 0;
+ return -1;
+ }
+
+ if (idp->cookiep) {
+ if (idp->ncookies <= 0) {
+ idp->eof = 0;
+ return -1;
+ }
+
+ *idp->cookiep++ = off;
+ --idp->ncookies;
+ }
+
+ if (error = uiomove(dp,dp->d_reclen,idp->uio))
+ return error;
+ idp->uio_off = off;
+ return 0;
+}
+
+static int
+iso_shipdir(idp)
+ struct isoreaddir *idp;
+{
+ struct dirent *dp;
+ int cl, sl, assoc;
+ int error;
+ char *cname, *sname;
+
+ cl = idp->current.d_namlen;
+ cname = idp->current.d_name;
+ if (assoc = cl > 1 && *cname == ASSOCCHAR) {
+ cl--;
+ cname++;
+ }
+
+ dp = &idp->saveent;
+ sname = dp->d_name;
+ if (!(sl = dp->d_namlen)) {
+ dp = &idp->assocent;
+ sname = dp->d_name + 1;
+ sl = dp->d_namlen - 1;
+ }
+ if (sl > 0) {
+ if (sl != cl
+ || bcmp(sname,cname,sl)) {
+ if (idp->assocent.d_namlen) {
+ if (error = iso_uiodir(idp,&idp->assocent,idp->assocoff))
+ return error;
+ idp->assocent.d_namlen = 0;
+ }
+ if (idp->saveent.d_namlen) {
+ if (error = iso_uiodir(idp,&idp->saveent,idp->saveoff))
+ return error;
+ idp->saveent.d_namlen = 0;
+ }
+ }
+ }
+ idp->current.d_reclen = DIRSIZ(&idp->current);
+ if (assoc) {
+ idp->assocoff = idp->curroff;
+ bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+ } else {
+ idp->saveoff = idp->curroff;
+ bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+ }
+ return 0;
+}
+
+/*
+ * Vnode op for readdir
+ * XXX make sure everything still works now that eofflagp and cookiep
+ * are no longer args.
+ */
+int
+cd9660_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ struct isoreaddir *idp;
+ int entryoffsetinblock;
+ int error = 0;
+ int endsearch;
+ struct iso_directory_record *ep;
+ u_short elen;
+ int reclen;
+ struct iso_mnt *imp;
+ struct iso_node *ip;
+ struct buf *bp = NULL;
+
+ ip = VTOI(ap->a_vp);
+ imp = ip->i_mnt;
+
+ MALLOC(idp,struct isoreaddir *,sizeof(*idp),M_TEMP,M_WAITOK);
+ idp->saveent.d_namlen = 0;
+ idp->assocent.d_namlen = 0;
+ idp->uio = uio;
+#if 0
+ idp->cookiep = cookies;
+ idp->ncookies = ncookies;
+ idp->eof = 1;
+#else
+ idp->cookiep = 0;
+#endif
+ idp->curroff = uio->uio_offset;
+
+ entryoffsetinblock = iso_blkoff(imp, idp->curroff);
+ if (entryoffsetinblock != 0) {
+ if (error = iso_blkatoff(ip, idp->curroff, &bp)) {
+ FREE(idp,M_TEMP);
+ return (error);
+ }
+ }
+
+ endsearch = ip->i_size;
+
+ while (idp->curroff < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+
+ if (iso_blkoff(imp, idp->curroff) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(ip, idp->curroff, &bp))
+ break;
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+
+ ep = (struct iso_directory_record *)
+ (bp->b_un.b_addr + entryoffsetinblock);
+
+ reclen = isonum_711 (ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ idp->curroff = roundup (idp->curroff,
+ imp->logical_block_size);
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size) {
+ error = EINVAL;
+ /* illegal directory, so stop looking */
+ break;
+ }
+
+ idp->current.d_namlen = isonum_711 (ep->name_len);
+ if (isonum_711(ep->flags)&2)
+ isodirino(&idp->current.d_fileno,ep,imp);
+ else
+ idp->current.d_fileno = dbtob(bp->b_blkno) +
+ idp->curroff;
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ idp->curroff += reclen;
+ /*
+ *
+ */
+ switch (imp->iso_ftype) {
+ case ISO_FTYPE_RRIP:
+ cd9660_rrip_getname(ep,idp->current.d_name,
+ (u_short *)&idp->current.d_namlen,
+ &idp->current.d_fileno,imp);
+ if (idp->current.d_namlen)
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 */
+ strcpy(idp->current.d_name,"..");
+ switch (ep->name[0]) {
+ case 0:
+ idp->current.d_namlen = 1;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ case 1:
+ idp->current.d_namlen = 2;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default:
+ isofntrans(ep->name,idp->current.d_namlen,
+ idp->current.d_name, &elen,
+ imp->iso_ftype == ISO_FTYPE_9660,
+ isonum_711(ep->flags)&4);
+ idp->current.d_namlen = (u_char)elen;
+ if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+ error = iso_shipdir(idp);
+ else
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ }
+ }
+ if (error)
+ break;
+
+ entryoffsetinblock += reclen;
+ }
+
+ if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+ idp->current.d_namlen = 0;
+ error = iso_shipdir(idp);
+ }
+ if (error < 0)
+ error = 0;
+
+ if (bp)
+ brelse (bp);
+
+ uio->uio_offset = idp->uio_off;
+#if 0
+ *eofflagp = idp->eof;
+#endif
+
+ FREE(idp,M_TEMP);
+
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node ISONODE;
+typedef struct iso_mnt ISOMNT;
+int
+cd9660_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ ISONODE *ip;
+ ISODIR *dirp;
+ ISOMNT *imp;
+ struct buf *bp;
+ u_short symlen;
+ int error;
+ char *symname;
+ ino_t ino;
+
+ ip = VTOI(ap->a_vp);
+ imp = ip->i_mnt;
+
+ if (imp->iso_ftype != ISO_FTYPE_RRIP)
+ return EINVAL;
+
+ /*
+ * Get parents directory record block that this inode included.
+ */
+ error = bread(imp->im_devvp,
+ (daddr_t)(ip->i_number / DEV_BSIZE),
+ imp->logical_block_size,
+ NOCRED,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return EINVAL;
+ }
+
+ /*
+ * Setup the directory pointer for this inode
+ */
+ dirp = (ISODIR *)(bp->b_un.b_addr + (ip->i_number & imp->im_bmask));
+#ifdef DEBUG
+ printf("lbn=%d,off=%d,bsize=%d,DEV_BSIZE=%d, dirp= %08x, b_addr=%08x, offset=%08x(%08x)\n",
+ (daddr_t)(ip->i_number >> imp->im_bshift),
+ ip->i_number & imp->im_bmask,
+ imp->logical_block_size,
+ DEV_BSIZE,
+ dirp,
+ bp->b_un.b_addr,
+ ip->i_number,
+ ip->i_number & imp->im_bmask );
+#endif
+
+ /*
+ * Just make sure, we have a right one....
+ * 1: Check not cross boundary on block
+ */
+ if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+ > imp->logical_block_size) {
+ brelse(bp);
+ return EINVAL;
+ }
+
+ /*
+ * Now get a buffer
+ * Abuse a namei buffer for now.
+ */
+ MALLOC(symname,char *,MAXPATHLEN,M_NAMEI,M_WAITOK);
+
+ /*
+ * Ok, we just gathering a symbolic name in SL record.
+ */
+ if (cd9660_rrip_getsymname(dirp,symname,&symlen,imp) == 0) {
+ FREE(symname,M_NAMEI);
+ brelse(bp);
+ return EINVAL;
+ }
+ /*
+ * Don't forget before you leave from home ;-)
+ */
+ brelse(bp);
+
+ /*
+ * return with the symbolic name to caller's.
+ */
+ error = uiomove(symname,symlen,ap->a_uio);
+
+ FREE(symname,M_NAMEI);
+
+ return error;
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+int
+cd9660_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return 0;
+}
+
+/*
+ * Lock an inode.
+ */
+int
+cd9660_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct iso_node *ip = VTOI(ap->a_vp);
+
+ ISO_ILOCK(ip);
+ return 0;
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+cd9660_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct iso_node *ip = VTOI(ap->a_vp);
+
+ if (!(ip->i_flag & ILOCKED))
+ panic("cd9660_unlock NOT LOCKED");
+ ISO_IUNLOCK(ip);
+ return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+cd9660_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (VTOI(ap->a_vp)->i_flag & ILOCKED)
+ return 1;
+ return 0;
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+cd9660_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ register struct vnode *vp = bp->b_vp;
+ register struct iso_node *ip;
+ int error;
+
+ ip = VTOI(vp);
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ panic("cd9660_strategy: spec");
+ if (bp->b_blkno == bp->b_lblkno) {
+ if (error =
+ VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+ bp->b_error = error;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return (error);
+ }
+ if ((long)bp->b_blkno == -1)
+ clrbuf(bp);
+ }
+ if ((long)bp->b_blkno == -1) {
+ biodone(bp);
+ return (0);
+ }
+ vp = ip->i_devvp;
+ bp->b_dev = vp->v_rdev;
+ VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+ return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+cd9660_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ printf("tag VT_ISOFS, isofs vnode\n");
+ return 0;
+}
+
+/*
+ * Unsupported operation
+ */
+int
+cd9660_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Global vfs data structures for isofs
+ */
+#define cd9660_create \
+ ((int (*) __P((struct vop_create_args *)))cd9660_enotsupp)
+#define cd9660_mknod ((int (*) __P((struct vop_mknod_args *)))cd9660_enotsupp)
+#define cd9660_setattr \
+ ((int (*) __P((struct vop_setattr_args *)))cd9660_enotsupp)
+#define cd9660_write ((int (*) __P((struct vop_write_args *)))cd9660_enotsupp)
+#define cd9660_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define cd9660_remove \
+ ((int (*) __P((struct vop_remove_args *)))cd9660_enotsupp)
+#define cd9660_link ((int (*) __P((struct vop_link_args *)))cd9660_enotsupp)
+#define cd9660_rename \
+ ((int (*) __P((struct vop_rename_args *)))cd9660_enotsupp)
+#define cd9660_mkdir ((int (*) __P((struct vop_mkdir_args *)))cd9660_enotsupp)
+#define cd9660_rmdir ((int (*) __P((struct vop_rmdir_args *)))cd9660_enotsupp)
+#define cd9660_symlink \
+ ((int (*) __P((struct vop_symlink_args *)))cd9660_enotsupp)
+#define cd9660_pathconf \
+ ((int (*) __P((struct vop_pathconf_args *)))cd9660_enotsupp)
+#define cd9660_advlock \
+ ((int (*) __P((struct vop_advlock_args *)))cd9660_enotsupp)
+#define cd9660_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))cd9660_enotsupp)
+#define cd9660_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) cd9660_enotsupp)
+#define cd9660_vfree ((int (*) __P((struct vop_vfree_args *)))cd9660_enotsupp)
+#define cd9660_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))cd9660_enotsupp)
+#define cd9660_update \
+ ((int (*) __P((struct vop_update_args *)))cd9660_enotsupp)
+#define cd9660_bwrite \
+ ((int (*) __P((struct vop_bwrite_args *)))cd9660_enotsupp)
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**cd9660_vnodeop_p)();
+struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, cd9660_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, cd9660_open }, /* open */
+ { &vop_close_desc, cd9660_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, cd9660_read }, /* read */
+ { &vop_write_desc, cd9660_write }, /* write */
+ { &vop_ioctl_desc, cd9660_ioctl }, /* ioctl */
+ { &vop_select_desc, cd9660_select }, /* select */
+ { &vop_mmap_desc, cd9660_mmap }, /* mmap */
+ { &vop_fsync_desc, cd9660_fsync }, /* fsync */
+ { &vop_seek_desc, cd9660_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, cd9660_readdir }, /* readdir */
+ { &vop_readlink_desc, cd9660_readlink },/* readlink */
+ { &vop_abortop_desc, cd9660_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, cd9660_bmap }, /* bmap */
+ { &vop_strategy_desc, cd9660_strategy },/* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
+ { &vop_advlock_desc, cd9660_advlock }, /* advlock */
+ { &vop_blkatoff_desc, cd9660_blkatoff },/* blkatoff */
+ { &vop_valloc_desc, cd9660_valloc }, /* valloc */
+ { &vop_vfree_desc, cd9660_vfree }, /* vfree */
+ { &vop_truncate_desc, cd9660_truncate },/* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_vnodeop_opv_desc =
+ { &cd9660_vnodeop_p, cd9660_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**cd9660_specop_p)();
+struct vnodeopv_entry_desc cd9660_specop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, spec_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, spec_read }, /* read */
+ { &vop_write_desc, spec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ /* XXX strategy: panics, should be notsupp instead? */
+ { &vop_strategy_desc, cd9660_strategy },/* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_specop_opv_desc =
+ { &cd9660_specop_p, cd9660_specop_entries };
+
+#ifdef FIFO
+int (**cd9660_fifoop_p)();
+struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, fifo_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, fifo_read }, /* read */
+ { &vop_write_desc, fifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, fifo_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_badop }, /* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_fifoop_opv_desc =
+ { &cd9660_fifoop_p, cd9660_fifoop_entries };
+#endif /* FIFO */
diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h
new file mode 100644
index 000000000000..e3567066e1cd
--- /dev/null
+++ b/sys/fs/cd9660/iso.h
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso.h 8.2 (Berkeley) 1/23/94
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+ char type[ISODCL(1,1)]; /* 711 */
+ char id[ISODCL(2,6)];
+ char version[ISODCL(7,7)];
+ char data[ISODCL(8,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID "CDW01"
+
+struct iso_primary_descriptor {
+ char type [ISODCL ( 1, 1)]; /* 711 */
+ char id [ISODCL ( 2, 6)];
+ char version [ISODCL ( 7, 7)]; /* 711 */
+ char unused1 [ISODCL ( 8, 8)];
+ char system_id [ISODCL ( 9, 40)]; /* achars */
+ char volume_id [ISODCL ( 41, 72)]; /* dchars */
+ char unused2 [ISODCL ( 73, 80)];
+ char volume_space_size [ISODCL ( 81, 88)]; /* 733 */
+ char unused3 [ISODCL ( 89, 120)];
+ char volume_set_size [ISODCL (121, 124)]; /* 723 */
+ char volume_sequence_number [ISODCL (125, 128)]; /* 723 */
+ char logical_block_size [ISODCL (129, 132)]; /* 723 */
+ char path_table_size [ISODCL (133, 140)]; /* 733 */
+ char type_l_path_table [ISODCL (141, 144)]; /* 731 */
+ char opt_type_l_path_table [ISODCL (145, 148)]; /* 731 */
+ char type_m_path_table [ISODCL (149, 152)]; /* 732 */
+ char opt_type_m_path_table [ISODCL (153, 156)]; /* 732 */
+ char root_directory_record [ISODCL (157, 190)]; /* 9.1 */
+ char volume_set_id [ISODCL (191, 318)]; /* dchars */
+ char publisher_id [ISODCL (319, 446)]; /* achars */
+ char preparer_id [ISODCL (447, 574)]; /* achars */
+ char application_id [ISODCL (575, 702)]; /* achars */
+ char copyright_file_id [ISODCL (703, 739)]; /* 7.5 dchars */
+ char abstract_file_id [ISODCL (740, 776)]; /* 7.5 dchars */
+ char bibliographic_file_id [ISODCL (777, 813)]; /* 7.5 dchars */
+ char creation_date [ISODCL (814, 830)]; /* 8.4.26.1 */
+ char modification_date [ISODCL (831, 847)]; /* 8.4.26.1 */
+ char expiration_date [ISODCL (848, 864)]; /* 8.4.26.1 */
+ char effective_date [ISODCL (865, 881)]; /* 8.4.26.1 */
+ char file_structure_version [ISODCL (882, 882)]; /* 711 */
+ char unused4 [ISODCL (883, 883)];
+ char application_data [ISODCL (884, 1395)];
+ char unused5 [ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE 2048
+
+struct iso_directory_record {
+ char length [ISODCL (1, 1)]; /* 711 */
+ char ext_attr_length [ISODCL (2, 2)]; /* 711 */
+ unsigned char extent [ISODCL (3, 10)]; /* 733 */
+ unsigned char size [ISODCL (11, 18)]; /* 733 */
+ char date [ISODCL (19, 25)]; /* 7 by 711 */
+ char flags [ISODCL (26, 26)];
+ char file_unit_size [ISODCL (27, 27)]; /* 711 */
+ char interleave [ISODCL (28, 28)]; /* 711 */
+ char volume_sequence_number [ISODCL (29, 32)]; /* 723 */
+ char name_len [ISODCL (33, 33)]; /* 711 */
+ char name [0];
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+ of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE 33
+
+struct iso_extended_attributes {
+ unsigned char owner [ISODCL (1, 4)]; /* 723 */
+ unsigned char group [ISODCL (5, 8)]; /* 723 */
+ unsigned char perm [ISODCL (9, 10)]; /* 9.5.3 */
+ char ctime [ISODCL (11, 27)]; /* 8.4.26.1 */
+ char mtime [ISODCL (28, 44)]; /* 8.4.26.1 */
+ char xtime [ISODCL (45, 61)]; /* 8.4.26.1 */
+ char ftime [ISODCL (62, 78)]; /* 8.4.26.1 */
+ char recfmt [ISODCL (79, 79)]; /* 711 */
+ char recattr [ISODCL (80, 80)]; /* 711 */
+ unsigned char reclen [ISODCL (81, 84)]; /* 723 */
+ char system_id [ISODCL (85, 116)]; /* achars */
+ char system_use [ISODCL (117, 180)];
+ char version [ISODCL (181, 181)]; /* 711 */
+ char len_esc [ISODCL (182, 182)]; /* 711 */
+ char reserved [ISODCL (183, 246)];
+ unsigned char len_au [ISODCL (247, 250)]; /* 723 */
+};
+
+/* CD-ROM Format type */
+enum ISO_FTYPE { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, ISO_FTYPE_ECMA };
+
+#ifndef ISOFSMNT_ROOT
+#define ISOFSMNT_ROOT 0
+#endif
+
+struct iso_mnt {
+ int im_flags;
+
+ struct mount *im_mountp;
+ dev_t im_dev;
+ struct vnode *im_devvp;
+
+ int logical_block_size;
+ int im_bshift;
+ int im_bmask;
+
+ int volume_space_size;
+ char im_fsmnt[50];
+ struct netexport im_export;
+
+ char root[ISODCL (157, 190)];
+ int root_extent;
+ int root_size;
+ enum ISO_FTYPE iso_ftype;
+
+ int rr_skip;
+ int rr_skip0;
+};
+
+#define VFSTOISOFS(mp) ((struct iso_mnt *)((mp)->mnt_data))
+
+#define iso_blkoff(imp, loc) ((loc) & (imp)->im_bmask)
+#define iso_lblkno(imp, loc) ((loc) >> (imp)->im_bshift)
+#define iso_blksize(imp, ip, lbn) ((imp)->logical_block_size)
+#define iso_lblktosize(imp, blk) ((blk) << (imp)->im_bshift)
+
+int cd9660_mount __P((struct mount *,
+ char *, caddr_t, struct nameidata *, struct proc *));
+int cd9660_start __P((struct mount *, int, struct proc *));
+int cd9660_unmount __P((struct mount *, int, struct proc *));
+int cd9660_root __P((struct mount *, struct vnode **));
+int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
+int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
+int cd9660_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+ struct vnode **, int *, struct ucred **));
+int cd9660_vptofh __P((struct vnode *, struct fid *));
+int cd9660_init __P(());
+
+struct iso_node;
+int iso_blkatoff __P((struct iso_node *ip, long offset, struct buf **bpp));
+int iso_iget __P((struct iso_node *xp, ino_t ino, int relocated,
+ struct iso_node **ipp, struct iso_directory_record *isodir));
+int iso_iput __P((struct iso_node *ip));
+int iso_ilock __P((struct iso_node *ip));
+int iso_iunlock __P((struct iso_node *ip));
+int cd9660_mountroot __P((void));
+
+extern int (**cd9660_vnodeop_p)();
+
+extern inline int
+isonum_711(p)
+ unsigned char *p;
+{
+ return *p;
+}
+
+extern inline int
+isonum_712(p)
+ char *p;
+{
+ return *p;
+}
+
+extern inline int
+isonum_721(p)
+ unsigned char *p;
+{
+ return *p|((char)p[1] << 8);
+}
+
+extern inline int
+isonum_722(p)
+ unsigned char *p;
+{
+ return ((char)*p << 8)|p[1];
+}
+
+extern inline int
+isonum_723(p)
+ unsigned char *p;
+{
+ return isonum_721(p);
+}
+
+extern inline int
+isonum_731(p)
+ unsigned char *p;
+{
+ return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24);
+}
+
+extern inline int
+isonum_732(p)
+ unsigned char *p;
+{
+ return (*p << 24)|(p[1] << 16)|(p[2] << 8)|p[3];
+}
+
+extern inline int
+isonum_733(p)
+ unsigned char *p;
+{
+ return isonum_731(p);
+}
+
+int isofncmp __P((unsigned char *, int, unsigned char *, int));
+void isofntrans __P((unsigned char *, int, unsigned char *, unsigned short *,
+ int, int));
+
+/*
+ * Associated files have a leading '='.
+ */
+#define ASSOCCHAR '='
diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h
new file mode 100644
index 000000000000..78e4a775201b
--- /dev/null
+++ b/sys/fs/cd9660/iso_rrip.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_rrip.h 8.2 (Berkeley) 1/23/94
+ */
+
+
+/*
+ * Analyze function flag (similar to RR field bits)
+ */
+#define ISO_SUSP_ATTR 0x0001
+#define ISO_SUSP_DEVICE 0x0002
+#define ISO_SUSP_SLINK 0x0004
+#define ISO_SUSP_ALTNAME 0x0008
+#define ISO_SUSP_CLINK 0x0010
+#define ISO_SUSP_PLINK 0x0020
+#define ISO_SUSP_RELDIR 0x0040
+#define ISO_SUSP_TSTAMP 0x0080
+#define ISO_SUSP_IDFLAG 0x0100
+#define ISO_SUSP_EXTREF 0x0200
+#define ISO_SUSP_CONT 0x0400
+#define ISO_SUSP_OFFSET 0x0800
+#define ISO_SUSP_STOP 0x1000
+#define ISO_SUSP_UNKNOWN 0x8000
+
+typedef struct {
+ struct iso_node *inop;
+ int fields; /* interesting fields in this analysis */
+ daddr_t iso_ce_blk; /* block of continuation area */
+ off_t iso_ce_off; /* offset of continuation area */
+ int iso_ce_len; /* length of continuation area */
+ struct iso_mnt *imp; /* mount structure */
+ ino_t *inump; /* inode number pointer */
+ char *outbuf; /* name/symbolic link output area */
+ u_short *outlen; /* length of above */
+ u_short maxlen; /* maximum length of above */
+ int cont; /* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+int cd9660_rrip_analyze __P((struct iso_directory_record *isodir,
+ struct iso_node *inop, struct iso_mnt *imp));
+int cd9660_rrip_getname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ ino_t *inump, struct iso_mnt *imp));
+int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ struct iso_mnt *imp));
+int cd9660_rrip_offset __P((struct iso_directory_record *isodir,
+ struct iso_mnt *imp));
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
new file mode 100644
index 000000000000..9d04652b7fc8
--- /dev/null
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dead_vnops.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+
+/*
+ * Prototypes for dead operations on vnodes.
+ */
+int dead_badop(),
+ dead_ebadf();
+int dead_lookup __P((struct vop_lookup_args *));
+#define dead_create ((int (*) __P((struct vop_create_args *)))dead_badop)
+#define dead_mknod ((int (*) __P((struct vop_mknod_args *)))dead_badop)
+int dead_open __P((struct vop_open_args *));
+#define dead_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define dead_access ((int (*) __P((struct vop_access_args *)))dead_ebadf)
+#define dead_getattr ((int (*) __P((struct vop_getattr_args *)))dead_ebadf)
+#define dead_setattr ((int (*) __P((struct vop_setattr_args *)))dead_ebadf)
+int dead_read __P((struct vop_read_args *));
+int dead_write __P((struct vop_write_args *));
+int dead_ioctl __P((struct vop_ioctl_args *));
+int dead_select __P((struct vop_select_args *));
+#define dead_mmap ((int (*) __P((struct vop_mmap_args *)))dead_badop)
+#define dead_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define dead_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define dead_remove ((int (*) __P((struct vop_remove_args *)))dead_badop)
+#define dead_link ((int (*) __P((struct vop_link_args *)))dead_badop)
+#define dead_rename ((int (*) __P((struct vop_rename_args *)))dead_badop)
+#define dead_mkdir ((int (*) __P((struct vop_mkdir_args *)))dead_badop)
+#define dead_rmdir ((int (*) __P((struct vop_rmdir_args *)))dead_badop)
+#define dead_symlink ((int (*) __P((struct vop_symlink_args *)))dead_badop)
+#define dead_readdir ((int (*) __P((struct vop_readdir_args *)))dead_ebadf)
+#define dead_readlink ((int (*) __P((struct vop_readlink_args *)))dead_ebadf)
+#define dead_abortop ((int (*) __P((struct vop_abortop_args *)))dead_badop)
+#define dead_inactive ((int (*) __P((struct vop_inactive_args *)))nullop)
+#define dead_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop)
+int dead_lock __P((struct vop_lock_args *));
+#define dead_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+int dead_bmap __P((struct vop_bmap_args *));
+int dead_strategy __P((struct vop_strategy_args *));
+int dead_print __P((struct vop_print_args *));
+#define dead_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define dead_pathconf ((int (*) __P((struct vop_pathconf_args *)))dead_ebadf)
+#define dead_advlock ((int (*) __P((struct vop_advlock_args *)))dead_ebadf)
+#define dead_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))dead_badop)
+#define dead_valloc ((int (*) __P((struct vop_valloc_args *)))dead_badop)
+#define dead_vfree ((int (*) __P((struct vop_vfree_args *)))dead_badop)
+#define dead_truncate ((int (*) __P((struct vop_truncate_args *)))nullop)
+#define dead_update ((int (*) __P((struct vop_update_args *)))nullop)
+#define dead_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop)
+
+int (**dead_vnodeop_p)();
+struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, dead_lookup }, /* lookup */
+ { &vop_create_desc, dead_create }, /* create */
+ { &vop_mknod_desc, dead_mknod }, /* mknod */
+ { &vop_open_desc, dead_open }, /* open */
+ { &vop_close_desc, dead_close }, /* close */
+ { &vop_access_desc, dead_access }, /* access */
+ { &vop_getattr_desc, dead_getattr }, /* getattr */
+ { &vop_setattr_desc, dead_setattr }, /* setattr */
+ { &vop_read_desc, dead_read }, /* read */
+ { &vop_write_desc, dead_write }, /* write */
+ { &vop_ioctl_desc, dead_ioctl }, /* ioctl */
+ { &vop_select_desc, dead_select }, /* select */
+ { &vop_mmap_desc, dead_mmap }, /* mmap */
+ { &vop_fsync_desc, dead_fsync }, /* fsync */
+ { &vop_seek_desc, dead_seek }, /* seek */
+ { &vop_remove_desc, dead_remove }, /* remove */
+ { &vop_link_desc, dead_link }, /* link */
+ { &vop_rename_desc, dead_rename }, /* rename */
+ { &vop_mkdir_desc, dead_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, dead_rmdir }, /* rmdir */
+ { &vop_symlink_desc, dead_symlink }, /* symlink */
+ { &vop_readdir_desc, dead_readdir }, /* readdir */
+ { &vop_readlink_desc, dead_readlink }, /* readlink */
+ { &vop_abortop_desc, dead_abortop }, /* abortop */
+ { &vop_inactive_desc, dead_inactive }, /* inactive */
+ { &vop_reclaim_desc, dead_reclaim }, /* reclaim */
+ { &vop_lock_desc, dead_lock }, /* lock */
+ { &vop_unlock_desc, dead_unlock }, /* unlock */
+ { &vop_bmap_desc, dead_bmap }, /* bmap */
+ { &vop_strategy_desc, dead_strategy }, /* strategy */
+ { &vop_print_desc, dead_print }, /* print */
+ { &vop_islocked_desc, dead_islocked }, /* islocked */
+ { &vop_pathconf_desc, dead_pathconf }, /* pathconf */
+ { &vop_advlock_desc, dead_advlock }, /* advlock */
+ { &vop_blkatoff_desc, dead_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, dead_valloc }, /* valloc */
+ { &vop_vfree_desc, dead_vfree }, /* vfree */
+ { &vop_truncate_desc, dead_truncate }, /* truncate */
+ { &vop_update_desc, dead_update }, /* update */
+ { &vop_bwrite_desc, dead_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc dead_vnodeop_opv_desc =
+ { &dead_vnodeop_p, dead_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+int
+dead_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open always fails as if device did not exist.
+ */
+/* ARGSUSED */
+dead_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (ENXIO);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+dead_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_read: lock");
+ /*
+ * Return EOF for character devices, EIO for others
+ */
+ if (ap->a_vp->v_type != VCHR)
+ return (EIO);
+ return (0);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+dead_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_write: lock");
+ return (EIO);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+dead_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (EBADF);
+ return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap));
+}
+
+/* ARGSUSED */
+dead_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Let the user find out that the descriptor is gone.
+ */
+ return (1);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+dead_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+
+ if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) {
+ ap->a_bp->b_flags |= B_ERROR;
+ biodone(ap->a_bp);
+ return (EIO);
+ }
+ return (VOP_STRATEGY(ap->a_bp));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (0);
+ return (VCALL(ap->a_vp, VOFFSET(vop_lock), ap));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (EIO);
+ return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp));
+}
+
+/*
+ * Print out the contents of a dead vnode.
+ */
+/* ARGSUSED */
+dead_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dead vnode\n");
+}
+
+/*
+ * Empty vnode failed operation
+ */
+dead_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Empty vnode bad operation
+ */
+dead_badop()
+{
+
+ panic("dead_badop called");
+ /* NOTREACHED */
+}
+
+/*
+ * Empty vnode null operation
+ */
+dead_nullop()
+{
+
+ return (0);
+}
+
+/*
+ * We have to wait during times when the vnode is
+ * in a state of change.
+ */
+chkvnlock(vp)
+ register struct vnode *vp;
+{
+ int locked = 0;
+
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ locked = 1;
+ }
+ return (locked);
+}
diff --git a/sys/fs/fdescfs/fdesc.h b/sys/fs/fdescfs/fdesc.h
new file mode 100644
index 000000000000..4c682e7bd370
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc.h 8.5 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc.h,v 1.8 1993/04/06 15:28:33 jsp Exp $
+ */
+
+#ifdef KERNEL
+struct fdescmount {
+ struct vnode *f_root; /* Root node */
+};
+
+#define FD_ROOT 2
+#define FD_DEVFD 3
+#define FD_STDIN 4
+#define FD_STDOUT 5
+#define FD_STDERR 6
+#define FD_CTTY 7
+#define FD_DESC 8
+#define FD_MAX 12
+
+typedef enum {
+ Froot,
+ Fdevfd,
+ Fdesc,
+ Flink,
+ Fctty
+} fdntype;
+
+struct fdescnode {
+ struct fdescnode *fd_forw; /* Hash chain */
+ struct fdescnode *fd_back;
+ struct vnode *fd_vnode; /* Back ptr to vnode */
+ fdntype fd_type; /* Type of this node */
+ unsigned fd_fd; /* Fd to be dup'ed */
+ char *fd_link; /* Link to fd/n */
+ int fd_ix; /* filesystem index */
+};
+
+#define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data))
+#define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
+
+extern dev_t devctty;
+extern int fdesc_init __P((void));
+extern int fdesc_root __P((struct mount *, struct vnode **));
+extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **));
+extern int (**fdesc_vnodeop_p)();
+extern struct vfsops fdesc_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c
new file mode 100644
index 000000000000..80c543da6550
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vfsops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vfsops.c 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vfsops.c,v 1.9 1993/04/06 15:28:33 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/fdesc/fdesc.h>
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+fdesc_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ u_int size;
+ struct fdescmount *fmp;
+ struct vnode *rvp;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp);
+ if (error)
+ return (error);
+
+ MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ fmp->f_root = rvp;
+ /* XXX -- don't mark as local to work around fts() problems */
+ /*mp->mnt_flag |= MNT_LOCAL;*/
+ mp->mnt_data = (qaddr_t) fmp;
+ getnewfsid(mp, MOUNT_FDESC);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc"));
+ return (0);
+}
+
+int
+fdesc_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+}
+
+int
+fdesc_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ int flags = 0;
+ extern int doforce;
+ struct vnode *rootvp = VFSTOFDESC(mp)->f_root;
+
+ if (mntflags & MNT_FORCE) {
+ /* fdesc can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, rootvp, flags))
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Finally, throw away the fdescmount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+
+ return (0);
+}
+
+int
+fdesc_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOFDESC(mp)->f_root;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+fdesc_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ struct filedesc *fdp;
+ int lim;
+ int i;
+ int last;
+ int freefd;
+
+ /*
+ * Compute number of free file descriptors.
+ * [ Strange results will ensue if the open file
+ * limit is ever reduced below the current number
+ * of open files... ]
+ */
+ lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur;
+ fdp = p->p_fd;
+ last = min(fdp->fd_nfiles, lim);
+ freefd = 0;
+ for (i = fdp->fd_freefile; i < last; i++)
+ if (fdp->fd_ofiles[i] == NULL)
+ freefd++;
+
+ /*
+ * Adjust for the fact that the fdesc array may not
+ * have been fully allocated yet.
+ */
+ if (fdp->fd_nfiles < lim)
+ freefd += (lim - fdp->fd_nfiles);
+
+ sbp->f_type = MOUNT_FDESC;
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = lim + 1; /* Allow for "." */
+ sbp->f_ffree = freefd; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+fdesc_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+/*
+ * Fdesc flat namespace lookup.
+ * Currently unsupported.
+ */
+int
+fdesc_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_fhtovp(mp, fhp, setgen, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ int setgen;
+ struct vnode **vpp;
+{
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+struct vfsops fdesc_vfsops = {
+ fdesc_mount,
+ fdesc_start,
+ fdesc_unmount,
+ fdesc_root,
+ fdesc_quotactl,
+ fdesc_statfs,
+ fdesc_sync,
+ fdesc_vget,
+ fdesc_fhtovp,
+ fdesc_vptofh,
+ fdesc_init,
+};
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
new file mode 100644
index 000000000000..00d8675aea2f
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -0,0 +1,974 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vnops.c,v 1.12 1993/04/06 16:17:17 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/kernel.h> /* boottime */
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <miscfs/fdesc/fdesc.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+#define FDL_WANT 0x01
+#define FDL_LOCKED 0x02
+static int fdcache_lock;
+
+dev_t devctty;
+
+#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
+FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
+#endif
+
+#define NFDCACHE 3
+#define FD_NHASH(ix) ((ix) & NFDCACHE)
+
+/*
+ * Cache head
+ */
+struct fdcache {
+ struct fdescnode *fc_forw;
+ struct fdescnode *fc_back;
+};
+
+static struct fdcache fdcache[NFDCACHE];
+
+/*
+ * Initialise cache headers
+ */
+fdesc_init()
+{
+ struct fdcache *fc;
+
+ devctty = makedev(nchrdev, 0);
+
+ for (fc = fdcache; fc < fdcache + NFDCACHE; fc++)
+ fc->fc_forw = fc->fc_back = (struct fdescnode *) fc;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct fdcache *
+fdesc_hash(ix)
+ int ix;
+{
+
+ return (&fdcache[FD_NHASH(ix)]);
+}
+
+int
+fdesc_allocvp(ftype, ix, mp, vpp)
+ fdntype ftype;
+ int ix;
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct fdcache *fc;
+ struct fdescnode *fd;
+ int error = 0;
+
+loop:
+ fc = fdesc_hash(ix);
+ for (fd = fc->fc_forw; fd != (struct fdescnode *) fc; fd = fd->fd_forw) {
+ if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
+ if (vget(fd->fd_vnode, 0))
+ goto loop;
+ *vpp = fd->fd_vnode;
+ return (error);
+ }
+ }
+
+ /*
+ * otherwise lock the array while we call getnewvnode
+ * since that can block.
+ */
+ if (fdcache_lock & FDL_LOCKED) {
+ fdcache_lock |= FDL_WANT;
+ sleep((caddr_t) &fdcache_lock, PINOD);
+ goto loop;
+ }
+ fdcache_lock |= FDL_LOCKED;
+
+ error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp);
+ if (error)
+ goto out;
+ MALLOC(fd, void *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
+ (*vpp)->v_data = fd;
+ fd->fd_vnode = *vpp;
+ fd->fd_type = ftype;
+ fd->fd_fd = -1;
+ fd->fd_link = 0;
+ fd->fd_ix = ix;
+ fc = fdesc_hash(ix);
+ insque(fd, fc);
+
+out:;
+ fdcache_lock &= ~FDL_LOCKED;
+
+ if (fdcache_lock & FDL_WANT) {
+ fdcache_lock &= ~FDL_WANT;
+ wakeup((caddr_t) &fdcache_lock);
+ }
+
+ return (error);
+}
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+int
+fdesc_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname;
+ struct proc *p;
+ int nfiles;
+ unsigned fd;
+ int error;
+ struct vnode *fvp;
+ char *ln;
+
+ pname = ap->a_cnp->cn_nameptr;
+ if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ VOP_LOCK(dvp);
+ return (0);
+ }
+
+ p = ap->a_cnp->cn_proc;
+ nfiles = p->p_fd->fd_nfiles;
+
+ switch (VTOFDESC(dvp)->fd_type) {
+ default:
+ case Flink:
+ case Fdesc:
+ case Fctty:
+ error = ENOTDIR;
+ goto bad;
+
+ case Froot:
+ if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) {
+ error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VDIR;
+ VOP_LOCK(fvp);
+ return (0);
+ }
+
+ if (ap->a_cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) {
+ struct vnode *ttyvp = cttyvp(p);
+ if (ttyvp == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VFIFO;
+ VOP_LOCK(fvp);
+ return (0);
+ }
+
+ ln = 0;
+ switch (ap->a_cnp->cn_namelen) {
+ case 5:
+ if (bcmp(pname, "stdin", 5) == 0) {
+ ln = "fd/0";
+ fd = FD_STDIN;
+ }
+ break;
+ case 6:
+ if (bcmp(pname, "stdout", 6) == 0) {
+ ln = "fd/1";
+ fd = FD_STDOUT;
+ } else
+ if (bcmp(pname, "stderr", 6) == 0) {
+ ln = "fd/2";
+ fd = FD_STDERR;
+ }
+ break;
+ }
+
+ if (ln) {
+ error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_link = ln;
+ *vpp = fvp;
+ fvp->v_type = VLNK;
+ VOP_LOCK(fvp);
+ return (0);
+ } else {
+ error = ENOENT;
+ goto bad;
+ }
+
+ /* FALL THROUGH */
+
+ case Fdevfd:
+ if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) {
+ error = fdesc_root(dvp->v_mount, vpp);
+ return (error);
+ }
+
+ fd = 0;
+ while (*pname >= '0' && *pname <= '9') {
+ fd = 10 * fd + *pname++ - '0';
+ if (fd >= nfiles)
+ break;
+ }
+
+ if (*pname != '\0') {
+ error = ENOENT;
+ goto bad;
+ }
+
+ if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) {
+ error = EBADF;
+ goto bad;
+ }
+
+ error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_fd = fd;
+ *vpp = fvp;
+ return (0);
+ }
+
+bad:;
+ *vpp = NULL;
+ return (error);
+}
+
+int
+fdesc_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Fdesc:
+ /*
+ * XXX Kludge: set p->p_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be
+ * released by vn_open. Open will detect this special error and
+ * take the actions in dupfdopen. Other callers of vn_open or
+ * VOP_OPEN will simply report the error.
+ */
+ ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */
+ error = ENODEV;
+ break;
+
+ case Fctty:
+ error = cttyopen(devctty, ap->a_mode, 0, ap->a_p);
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_attr(fd, vap, cred, p)
+ int fd;
+ struct vattr *vap;
+ struct ucred *cred;
+ struct proc *p;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct file *fp;
+ struct stat stb;
+ int error;
+
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p);
+ if (error == 0 && vap->va_type == VDIR) {
+ /*
+ * don't allow directories to show up because
+ * that causes loops in the namespace.
+ */
+ vap->va_type = VFIFO;
+ }
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &stb);
+ if (error == 0) {
+ vattr_null(vap);
+ vap->va_type = VSOCK;
+ vap->va_mode = stb.st_mode;
+ vap->va_nlink = stb.st_nlink;
+ vap->va_uid = stb.st_uid;
+ vap->va_gid = stb.st_gid;
+ vap->va_fsid = stb.st_dev;
+ vap->va_fileid = stb.st_ino;
+ vap->va_size = stb.st_size;
+ vap->va_blocksize = stb.st_blksize;
+ vap->va_atime = stb.st_atimespec;
+ vap->va_mtime = stb.st_mtimespec;
+ vap->va_ctime = stb.st_ctimespec;
+ vap->va_gen = stb.st_gen;
+ vap->va_flags = stb.st_flags;
+ vap->va_rdev = stb.st_rdev;
+ vap->va_bytes = stb.st_blocks * stb.st_blksize;
+ }
+ break;
+
+ default:
+ panic("fdesc attr");
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ unsigned fd;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Froot:
+ case Fdevfd:
+ case Flink:
+ case Fctty:
+ bzero((caddr_t) vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_fileid = VTOFDESC(vp)->fd_ix;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Flink:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VLNK;
+ vap->va_nlink = 1;
+ vap->va_size = strlen(VTOFDESC(vp)->fd_link);
+ break;
+
+ case Fctty:
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
+ vap->va_type = VFIFO;
+ vap->va_nlink = 1;
+ vap->va_size = 0;
+ break;
+
+ default:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VDIR;
+ vap->va_nlink = 2;
+ vap->va_size = DEV_BSIZE;
+ break;
+ }
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_blocksize = DEV_BSIZE;
+ vap->va_atime.ts_sec = boottime.tv_sec;
+ vap->va_atime.ts_nsec = 0;
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_mtime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ vap->va_bytes = 0;
+ break;
+
+ case Fdesc:
+ fd = VTOFDESC(vp)->fd_fd;
+ error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p);
+ break;
+
+ default:
+ panic("fdesc_getattr");
+ break;
+ }
+
+ if (error == 0)
+ vp->v_type = vap->va_type;
+
+ return (error);
+}
+
+int
+fdesc_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct filedesc *fdp = ap->a_p->p_fd;
+ struct file *fp;
+ unsigned fd;
+ int error;
+
+ /*
+ * Can't mess with the root vnode
+ */
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fdesc:
+ break;
+
+ case Fctty:
+ return (0);
+
+ default:
+ return (EACCES);
+ }
+
+ fd = VTOFDESC(ap->a_vp)->fd_fd;
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
+ return (EBADF);
+ }
+
+ /*
+ * Can setattr the underlying vnode, but not sockets!
+ */
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p);
+ break;
+
+ case DTYPE_SOCKET:
+ error = 0;
+ break;
+
+ default:
+ panic("fdesc setattr");
+ break;
+ }
+
+ return (error);
+}
+
+#define UIO_MX 16
+
+static struct dirtmp {
+ u_long d_fileno;
+ u_short d_reclen;
+ u_short d_namlen;
+ char d_name[8];
+} rootent[] = {
+ { FD_DEVFD, UIO_MX, 2, "fd" },
+ { FD_STDIN, UIO_MX, 5, "stdin" },
+ { FD_STDOUT, UIO_MX, 6, "stdout" },
+ { FD_STDERR, UIO_MX, 6, "stderr" },
+ { FD_CTTY, UIO_MX, 3, "tty" },
+ { 0 }
+};
+
+int
+fdesc_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct filedesc *fdp;
+ int i;
+ int error;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ return (0);
+
+ case Fdesc:
+ return (ENOTDIR);
+
+ default:
+ break;
+ }
+
+ fdp = uio->uio_procp->p_fd;
+
+ if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
+ struct dirent d;
+ struct dirent *dp = &d;
+ struct dirtmp *dt;
+
+ i = uio->uio_offset / UIO_MX;
+ error = 0;
+
+ while (uio->uio_resid > 0) {
+ dt = &rootent[i];
+ if (dt->d_fileno == 0) {
+ /**eofflagp = 1;*/
+ break;
+ }
+ i++;
+
+ switch (dt->d_fileno) {
+ case FD_CTTY:
+ if (cttyvp(uio->uio_procp) == NULL)
+ continue;
+ break;
+
+ case FD_STDIN:
+ case FD_STDOUT:
+ case FD_STDERR:
+ if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles)
+ continue;
+ if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL)
+ continue;
+ break;
+ }
+ bzero((caddr_t) dp, UIO_MX);
+ dp->d_fileno = dt->d_fileno;
+ dp->d_namlen = dt->d_namlen;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_reclen = dt->d_reclen;
+ bcopy(dt->d_name, dp->d_name, dp->d_namlen+1);
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ }
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+ }
+
+ i = uio->uio_offset / UIO_MX;
+ error = 0;
+ while (uio->uio_resid > 0) {
+ if (i >= fdp->fd_nfiles)
+ break;
+
+ if (fdp->fd_ofiles[i] != NULL) {
+ struct dirent d;
+ struct dirent *dp = &d;
+
+ bzero((caddr_t) dp, UIO_MX);
+
+ dp->d_namlen = sprintf(dp->d_name, "%d", i);
+ dp->d_reclen = UIO_MX;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_fileno = i + FD_STDIN;
+ /*
+ * And ship to userland
+ */
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ }
+ i++;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+}
+
+int
+fdesc_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error;
+
+ if (vp->v_type != VLNK)
+ return (EPERM);
+
+ if (VTOFDESC(vp)->fd_type == Flink) {
+ char *ln = VTOFDESC(vp)->fd_link;
+ error = uiomove(ln, strlen(ln), ap->a_uio);
+ } else {
+ error = EOPNOTSUPP;
+ }
+
+ return (error);
+}
+
+int
+fdesc_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyread(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttywrite(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyioctl(devctty, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyselect(devctty, ap->a_fflags, ap->a_p);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ /*
+ * Clear out the v_type field to avoid
+ * nasty things happening in vgone().
+ */
+ vp->v_type = VNON;
+ return (0);
+}
+
+int
+fdesc_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ remque(VTOFDESC(vp));
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+fdesc_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+int
+fdesc_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, fdesc vnode\n");
+ return (0);
+}
+
+/*void*/
+int
+fdesc_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * /dev/fd vnode unsupported operation
+ */
+int
+fdesc_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+int
+fdesc_badop()
+{
+
+ panic("fdesc: bad op");
+ /* NOTREACHED */
+}
+
+/*
+ * /dev/fd vnode null operation
+ */
+int
+fdesc_nullop()
+{
+
+ return (0);
+}
+
+#define fdesc_create ((int (*) __P((struct vop_create_args *)))fdesc_enotsupp)
+#define fdesc_mknod ((int (*) __P((struct vop_mknod_args *)))fdesc_enotsupp)
+#define fdesc_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define fdesc_access ((int (*) __P((struct vop_access_args *)))nullop)
+#define fdesc_mmap ((int (*) __P((struct vop_mmap_args *)))fdesc_enotsupp)
+#define fdesc_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define fdesc_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define fdesc_remove ((int (*) __P((struct vop_remove_args *)))fdesc_enotsupp)
+#define fdesc_link ((int (*) __P((struct vop_link_args *)))fdesc_enotsupp)
+#define fdesc_rename ((int (*) __P((struct vop_rename_args *)))fdesc_enotsupp)
+#define fdesc_mkdir ((int (*) __P((struct vop_mkdir_args *)))fdesc_enotsupp)
+#define fdesc_rmdir ((int (*) __P((struct vop_rmdir_args *)))fdesc_enotsupp)
+#define fdesc_symlink ((int (*) __P((struct vop_symlink_args *)))fdesc_enotsupp)
+#define fdesc_abortop ((int (*) __P((struct vop_abortop_args *)))nullop)
+#define fdesc_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define fdesc_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define fdesc_bmap ((int (*) __P((struct vop_bmap_args *)))fdesc_badop)
+#define fdesc_strategy ((int (*) __P((struct vop_strategy_args *)))fdesc_badop)
+#define fdesc_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define fdesc_advlock ((int (*) __P((struct vop_advlock_args *)))fdesc_enotsupp)
+#define fdesc_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))fdesc_enotsupp)
+#define fdesc_vget ((int (*) __P((struct vop_vget_args *)))fdesc_enotsupp)
+#define fdesc_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) fdesc_enotsupp)
+#define fdesc_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))fdesc_enotsupp)
+#define fdesc_update ((int (*) __P((struct vop_update_args *)))fdesc_enotsupp)
+#define fdesc_bwrite ((int (*) __P((struct vop_bwrite_args *)))fdesc_enotsupp)
+
+int (**fdesc_vnodeop_p)();
+struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fdesc_lookup }, /* lookup */
+ { &vop_create_desc, fdesc_create }, /* create */
+ { &vop_mknod_desc, fdesc_mknod }, /* mknod */
+ { &vop_open_desc, fdesc_open }, /* open */
+ { &vop_close_desc, fdesc_close }, /* close */
+ { &vop_access_desc, fdesc_access }, /* access */
+ { &vop_getattr_desc, fdesc_getattr }, /* getattr */
+ { &vop_setattr_desc, fdesc_setattr }, /* setattr */
+ { &vop_read_desc, fdesc_read }, /* read */
+ { &vop_write_desc, fdesc_write }, /* write */
+ { &vop_ioctl_desc, fdesc_ioctl }, /* ioctl */
+ { &vop_select_desc, fdesc_select }, /* select */
+ { &vop_mmap_desc, fdesc_mmap }, /* mmap */
+ { &vop_fsync_desc, fdesc_fsync }, /* fsync */
+ { &vop_seek_desc, fdesc_seek }, /* seek */
+ { &vop_remove_desc, fdesc_remove }, /* remove */
+ { &vop_link_desc, fdesc_link }, /* link */
+ { &vop_rename_desc, fdesc_rename }, /* rename */
+ { &vop_mkdir_desc, fdesc_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fdesc_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fdesc_symlink }, /* symlink */
+ { &vop_readdir_desc, fdesc_readdir }, /* readdir */
+ { &vop_readlink_desc, fdesc_readlink }, /* readlink */
+ { &vop_abortop_desc, fdesc_abortop }, /* abortop */
+ { &vop_inactive_desc, fdesc_inactive }, /* inactive */
+ { &vop_reclaim_desc, fdesc_reclaim }, /* reclaim */
+ { &vop_lock_desc, fdesc_lock }, /* lock */
+ { &vop_unlock_desc, fdesc_unlock }, /* unlock */
+ { &vop_bmap_desc, fdesc_bmap }, /* bmap */
+ { &vop_strategy_desc, fdesc_strategy }, /* strategy */
+ { &vop_print_desc, fdesc_print }, /* print */
+ { &vop_islocked_desc, fdesc_islocked }, /* islocked */
+ { &vop_pathconf_desc, fdesc_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fdesc_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fdesc_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fdesc_valloc }, /* valloc */
+ { &vop_vfree_desc, fdesc_vfree }, /* vfree */
+ { &vop_truncate_desc, fdesc_truncate }, /* truncate */
+ { &vop_update_desc, fdesc_update }, /* update */
+ { &vop_bwrite_desc, fdesc_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fdesc_vnodeop_opv_desc =
+ { &fdesc_vnodeop_p, fdesc_vnodeop_entries };
diff --git a/sys/fs/fifofs/fifo.h b/sys/fs/fifofs/fifo.h
new file mode 100644
index 000000000000..e89186d8b896
--- /dev/null
+++ b/sys/fs/fifofs/fifo.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo.h 8.2 (Berkeley) 2/2/94
+ */
+
+#ifdef FIFO
+/*
+ * Prototypes for fifo operations on vnodes.
+ */
+int fifo_badop(),
+ fifo_ebadf();
+
+int fifo_lookup __P((struct vop_lookup_args *));
+#define fifo_create ((int (*) __P((struct vop_create_args *)))fifo_badop)
+#define fifo_mknod ((int (*) __P((struct vop_mknod_args *)))fifo_badop)
+int fifo_open __P((struct vop_open_args *));
+int fifo_close __P((struct vop_close_args *));
+#define fifo_access ((int (*) __P((struct vop_access_args *)))fifo_ebadf)
+#define fifo_getattr ((int (*) __P((struct vop_getattr_args *)))fifo_ebadf)
+#define fifo_setattr ((int (*) __P((struct vop_setattr_args *)))fifo_ebadf)
+int fifo_read __P((struct vop_read_args *));
+int fifo_write __P((struct vop_write_args *));
+int fifo_ioctl __P((struct vop_ioctl_args *));
+int fifo_select __P((struct vop_select_args *));
+#define fifo_mmap ((int (*) __P((struct vop_mmap_args *)))fifo_badop)
+#define fifo_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define fifo_seek ((int (*) __P((struct vop_seek_args *)))fifo_badop)
+#define fifo_remove ((int (*) __P((struct vop_remove_args *)))fifo_badop)
+#define fifo_link ((int (*) __P((struct vop_link_args *)))fifo_badop)
+#define fifo_rename ((int (*) __P((struct vop_rename_args *)))fifo_badop)
+#define fifo_mkdir ((int (*) __P((struct vop_mkdir_args *)))fifo_badop)
+#define fifo_rmdir ((int (*) __P((struct vop_rmdir_args *)))fifo_badop)
+#define fifo_symlink ((int (*) __P((struct vop_symlink_args *)))fifo_badop)
+#define fifo_readdir ((int (*) __P((struct vop_readdir_args *)))fifo_badop)
+#define fifo_readlink ((int (*) __P((struct vop_readlink_args *)))fifo_badop)
+#define fifo_abortop ((int (*) __P((struct vop_abortop_args *)))fifo_badop)
+#define fifo_inactive ((int (*) __P((struct vop_inactive_args *)))nullop)
+#define fifo_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop)
+int fifo_lock __P((struct vop_lock_args *));
+int fifo_unlock __P((struct vop_unlock_args *));
+int fifo_bmap __P((struct vop_bmap_args *));
+#define fifo_strategy ((int (*) __P((struct vop_strategy_args *)))fifo_badop)
+int fifo_print __P((struct vop_print_args *));
+#define fifo_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+int fifo_pathconf __P((struct vop_pathconf_args *));
+int fifo_advlock __P((struct vop_advlock_args *));
+#define fifo_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))fifo_badop)
+#define fifo_valloc ((int (*) __P((struct vop_valloc_args *)))fifo_badop)
+#define fifo_reallocblks \
+ ((int (*) __P((struct vop_reallocblks_args *)))fifo_badop)
+#define fifo_vfree ((int (*) __P((struct vop_vfree_args *)))fifo_badop)
+#define fifo_truncate ((int (*) __P((struct vop_truncate_args *)))nullop)
+#define fifo_update ((int (*) __P((struct vop_update_args *)))nullop)
+#define fifo_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop)
+#endif /* FIFO */
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
new file mode 100644
index 000000000000..bad33a430b62
--- /dev/null
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo_vnops.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <miscfs/fifofs/fifo.h>
+
+/*
+ * This structure is associated with the FIFO vnode and stores
+ * the state associated with the FIFO.
+ */
+struct fifoinfo {
+ struct socket *fi_readsock;
+ struct socket *fi_writesock;
+ long fi_readers;
+ long fi_writers;
+};
+
+int (**fifo_vnodeop_p)();
+struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, fifo_close }, /* close */
+ { &vop_access_desc, fifo_access }, /* access */
+ { &vop_getattr_desc, fifo_getattr }, /* getattr */
+ { &vop_setattr_desc, fifo_setattr }, /* setattr */
+ { &vop_read_desc, fifo_read }, /* read */
+ { &vop_write_desc, fifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, fifo_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, fifo_inactive }, /* inactive */
+ { &vop_reclaim_desc, fifo_reclaim }, /* reclaim */
+ { &vop_lock_desc, fifo_lock }, /* lock */
+ { &vop_unlock_desc, fifo_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_strategy }, /* strategy */
+ { &vop_print_desc, fifo_print }, /* print */
+ { &vop_islocked_desc, fifo_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, fifo_update }, /* update */
+ { &vop_bwrite_desc, fifo_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_vnodeop_opv_desc =
+ { &fifo_vnodeop_p, fifo_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+fifo_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open called to set up a new instance of a fifo or
+ * to find an active instance of a fifo.
+ */
+/* ARGSUSED */
+fifo_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct fifoinfo *fip;
+ struct socket *rso, *wso;
+ int error;
+ static char openstr[] = "fifo";
+
+ if ((ap->a_mode & (FREAD|FWRITE)) == (FREAD|FWRITE))
+ return (EINVAL);
+ if ((fip = vp->v_fifoinfo) == NULL) {
+ MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+ vp->v_fifoinfo = fip;
+ if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readsock = rso;
+ if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_writesock = wso;
+ if (error = unp_connect2(wso, rso)) {
+ (void)soclose(wso);
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readers = fip->fi_writers = 0;
+ wso->so_state |= SS_CANTRCVMORE;
+ rso->so_state |= SS_CANTSENDMORE;
+ }
+ error = 0;
+ if (ap->a_mode & FREAD) {
+ fip->fi_readers++;
+ if (fip->fi_readers == 1) {
+ fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+ if (fip->fi_writers > 0)
+ wakeup((caddr_t)&fip->fi_writers);
+ }
+ if (ap->a_mode & O_NONBLOCK)
+ return (0);
+ while (fip->fi_writers == 0) {
+ VOP_UNLOCK(vp);
+ error = tsleep((caddr_t)&fip->fi_readers,
+ PCATCH | PSOCK, openstr, 0);
+ VOP_LOCK(vp);
+ if (error)
+ break;
+ }
+ } else {
+ fip->fi_writers++;
+ if (fip->fi_readers == 0 && (ap->a_mode & O_NONBLOCK)) {
+ error = ENXIO;
+ } else {
+ if (fip->fi_writers == 1) {
+ fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+ if (fip->fi_readers > 0)
+ wakeup((caddr_t)&fip->fi_readers);
+ }
+ while (fip->fi_readers == 0) {
+ VOP_UNLOCK(vp);
+ error = tsleep((caddr_t)&fip->fi_writers,
+ PCATCH | PSOCK, openstr, 0);
+ VOP_LOCK(vp);
+ if (error)
+ break;
+ }
+ }
+ }
+ if (error)
+ VOP_CLOSE(vp, ap->a_mode, ap->a_cred, ap->a_p);
+ return (error);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+fifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock;
+ int error, startresid;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("fifo_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state |= SS_NBIO;
+ startresid = uio->uio_resid;
+ VOP_UNLOCK(ap->a_vp);
+ error = soreceive(rso, (struct mbuf **)0, uio, (int *)0,
+ (struct mbuf **)0, (struct mbuf **)0);
+ VOP_LOCK(ap->a_vp);
+ /*
+ * Clear EOF indication after first such return.
+ */
+ if (uio->uio_resid == startresid)
+ rso->so_state &= ~SS_CANTRCVMORE;
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+fifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock;
+ int error;
+
+#ifdef DIAGNOSTIC
+ if (ap->a_uio->uio_rw != UIO_WRITE)
+ panic("fifo_write mode");
+#endif
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state |= SS_NBIO;
+ VOP_UNLOCK(ap->a_vp);
+ error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0);
+ VOP_LOCK(ap->a_vp);
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+fifo_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+
+ if (ap->a_command == FIONBIO)
+ return (0);
+ if (ap->a_fflag & FREAD)
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ else
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ return (soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p));
+}
+
+/* ARGSUSED */
+fifo_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+
+ if (ap->a_fflags & FREAD)
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ else
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ return (soo_select(&filetmp, ap->a_which, ap->a_p));
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+fifo_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+fifo_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/* ARGSUSED */
+fifo_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+fifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+ int error1, error2;
+
+ if (ap->a_fflag & FWRITE) {
+ fip->fi_writers--;
+ if (fip->fi_writers == 0)
+ socantrcvmore(fip->fi_readsock);
+ } else {
+ fip->fi_readers--;
+ if (fip->fi_readers == 0)
+ socantsendmore(fip->fi_writesock);
+ }
+ if (vp->v_usecount > 1)
+ return (0);
+ error1 = soclose(fip->fi_readsock);
+ error2 = soclose(fip->fi_writesock);
+ FREE(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ if (error1)
+ return (error1);
+ return (error2);
+}
+
+/*
+ * Print out the contents of a fifo vnode.
+ */
+fifo_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON");
+ fifo_printinfo(ap->a_vp);
+ printf("\n");
+}
+
+/*
+ * Print out internal contents of a fifo vnode.
+ */
+fifo_printinfo(vp)
+ struct vnode *vp;
+{
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+
+ printf(", fifo with %d readers and %d writers",
+ fip->fi_readers, fip->fi_writers);
+}
+
+/*
+ * Return POSIX pathconf information applicable to fifo's.
+ */
+fifo_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Fifo failed operation
+ */
+fifo_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Fifo advisory byte-level locks.
+ */
+/* ARGSUSED */
+fifo_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Fifo bad operation
+ */
+fifo_badop()
+{
+
+ panic("fifo_badop called");
+ /* NOTREACHED */
+}
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
new file mode 100644
index 000000000000..14286ffeee0c
--- /dev/null
+++ b/sys/fs/nullfs/null.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null.h 8.2 (Berkeley) 1/21/94
+ *
+ * $Id: lofs.h,v 1.8 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+struct null_args {
+ char *target; /* Target of loopback */
+};
+
+struct null_mount {
+ struct mount *nullm_vfs;
+ struct vnode *nullm_rootvp; /* Reference to root null_node */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct null_node {
+ struct null_node *null_forw; /* Hash chain */
+ struct null_node *null_back;
+ struct vnode *null_lowervp; /* VREFed once */
+ struct vnode *null_vnode; /* Back pointer */
+};
+
+extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+
+#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
+#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define NULLTOV(xp) ((xp)->null_vnode)
+#ifdef NULLFS_DIAGNOSTIC
+extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
+#else
+#define NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
+#endif
+
+extern int (**null_vnodeop_p)();
+extern struct vfsops null_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
new file mode 100644
index 000000000000..a31723fe4c22
--- /dev/null
+++ b/sys/fs/nullfs/null_subr.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_subr.c 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: lofs_subr.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NNULLNODECACHE 16
+#define NULL_NHASH(vp) ((((u_long)vp)>>LOG2_SIZEVNODE) & (NNULLNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the lower vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the lower vnode is VREF'd. When the
+ * alias is removed the lower vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct null_node_cache {
+ struct null_node *ac_forw;
+ struct null_node *ac_back;
+};
+
+static struct null_node_cache null_node_cache[NNULLNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+nullfs_init()
+{
+ struct null_node_cache *ac;
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_init\n"); /* printed during system boot */
+#endif
+
+ for (ac = null_node_cache; ac < null_node_cache + NNULLNODECACHE; ac++)
+ ac->ac_forw = ac->ac_back = (struct null_node *) ac;
+}
+
+/*
+ * Compute hash list for given lower vnode
+ */
+static struct null_node_cache *
+null_node_hash(lowervp)
+struct vnode *lowervp;
+{
+
+ return (&null_node_cache[NULL_NHASH(lowervp)]);
+}
+
+/*
+ * Return a VREF'ed alias for lower vnode if already exists, else 0.
+ */
+static struct vnode *
+null_node_find(mp, lowervp)
+ struct mount *mp;
+ struct vnode *lowervp;
+{
+ struct null_node_cache *hd;
+ struct null_node *a;
+ struct vnode *vp;
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a null_node structure which is referencing
+ * the lower vnode. If found, the increment the null_node
+ * reference count (but NOT the lower vnode's VREF counter).
+ */
+ hd = null_node_hash(lowervp);
+loop:
+ for (a = hd->ac_forw; a != (struct null_node *) hd; a = a->null_forw) {
+ if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
+ vp = NULLTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0)) {
+ printf ("null_node_find: vget failed.\n");
+ goto loop;
+ };
+ return (vp);
+ }
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Make a new null_node node.
+ * Vp is the alias vnode, lofsvp is the lower vnode.
+ * Maintain a reference to (lowervp).
+ */
+static int
+null_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct null_node_cache *hd;
+ struct null_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ if (error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp))
+ return (error);
+ vp = *vpp;
+
+ MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
+ vp->v_type = lowervp->v_type;
+ xp->null_vnode = vp;
+ vp->v_data = xp;
+ xp->null_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ if (othervp = null_node_find(lowervp)) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return 0;
+ };
+ VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */
+ hd = null_node_hash(lowervp);
+ insque(xp, hd);
+ return 0;
+}
+
+
+/*
+ * Try to find an existing null_node vnode refering
+ * to it, otherwise make a new null_node vnode which
+ * contains a reference to the lower vnode.
+ */
+int
+null_node_create(mp, lowervp, newvpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ if (aliasvp = null_node_find(mp, lowervp)) {
+ /*
+ * null_node_find has taken another reference
+ * to the alias vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: exists", NULLTOV(ap));
+#endif
+ /* VREF(aliasvp); --- done in null_node_find */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ printf("null_node_create: create new alias vnode\n");
+#endif
+
+ /*
+ * Make new vnode reference the null_node.
+ */
+ if (error = null_node_alloc(mp, lowervp, &aliasvp))
+ return error;
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(lowervp);
+
+#ifdef DIAGNOSTIC
+ if (lowervp->v_usecount < 1) {
+ /* Should never happen... */
+ vprint ("null_node_create: alias ");
+ vprint ("null_node_create: lower ");
+ printf ("null_node_create: lower has 0 usecount.\n");
+ panic ("null_node_create: lower has 0 usecount.");
+ };
+#endif
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: alias", aliasvp);
+ vprint("null_node_create: lower", lowervp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+#ifdef NULLFS_DIAGNOSTIC
+struct vnode *
+null_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct null_node *a = VTONULL(vp);
+#ifdef notyet
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with a funny vop vector.
+ */
+ if (vp->v_op != null_vnodeop_p) {
+ printf ("null_checkvp: on non-null-node\n");
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ };
+#endif
+ if (a->null_lowervp == NULL) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %x, ZERO ptr\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ }
+ if (a->null_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %x, unref'ed lowervp\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic ("null with unref'ed lowervp");
+ };
+#ifdef notyet
+ printf("null %x/%d -> %x/%d [%s, %d]\n",
+ NULLTOV(a), NULLTOV(a)->v_usecount,
+ a->null_lowervp, a->null_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return a->null_lowervp;
+}
+#endif
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
new file mode 100644
index 000000000000..b0d2df75cdaf
--- /dev/null
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp jsp $
+ */
+
+/*
+ * Null Layer
+ * (See null_vnops.c for a description of what this does.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+/*
+ * Mount null layer
+ */
+int
+nullfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct null_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *nullm_rootvp;
+ struct null_mount *xmp;
+ u_int size;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/
+ }
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct null_args)))
+ return (error);
+
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ if (error = namei(ndp))
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+
+ xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ xmp->nullm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = null_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(xmp, M_UFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in nullfs_unmount.
+ */
+ nullm_rootvp = vp;
+ nullm_rootvp->v_flag |= VROOT;
+ xmp->nullm_rootvp = nullm_rootvp;
+ if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) xmp;
+ getnewfsid(mp, MOUNT_LOFS);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+nullfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */
+}
+
+/*
+ * Free reference to null layer
+ */
+int
+nullfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE) {
+ /* lofs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (nullm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, nullm_rootvp, flags))
+ return (error);
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("alias root of lower", nullm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(nullm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(nullm_rootvp);
+ /*
+ * Finally, throw away the null_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return 0;
+}
+
+int
+nullfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_root(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+ );
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return 0;
+}
+
+int
+nullfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p);
+}
+
+int
+nullfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+ );
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+nullfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at null layer.
+ */
+ return (0);
+}
+
+int
+nullfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+}
+
+int
+nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, vpp, exflagsp,credanonp);
+}
+
+int
+nullfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
+}
+
+int nullfs_init __P((void));
+
+struct vfsops null_vfsops = {
+ nullfs_mount,
+ nullfs_start,
+ nullfs_unmount,
+ nullfs_root,
+ nullfs_quotactl,
+ nullfs_statfs,
+ nullfs_sync,
+ nullfs_vget,
+ nullfs_fhtovp,
+ nullfs_vptofh,
+ nullfs_init,
+};
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
new file mode 100644
index 000000000000..115ff6f46432
--- /dev/null
+++ b/sys/fs/nullfs/null_vnops.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * John Heidemann of the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vnops.c 8.1 (Berkeley) 6/10/93
+ *
+ * Ancestors:
+ * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ * ...and...
+ * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
+ */
+
+/*
+ * Null Layer
+ *
+ * (See mount_null(8) for more information.)
+ *
+ * The null layer duplicates a portion of the file system
+ * name space under a new name. In this respect, it is
+ * similar to the loopback file system. It differs from
+ * the loopback fs in two respects: it is implemented using
+ * a stackable layers techniques, and it's "null-node"s stack above
+ * all lower-layer vnodes, not just over directory vnodes.
+ *
+ * The null layer has two purposes. First, it serves as a demonstration
+ * of layering by proving a layer which does nothing. (It actually
+ * does everything the loopback file system does, which is slightly
+ * more than nothing.) Second, the null layer can serve as a prototype
+ * layer. Since it provides all necessary layer framework,
+ * new file system layers can be created very easily be starting
+ * with a null layer.
+ *
+ * The remainder of this man page examines the null layer as a basis
+ * for constructing new layers.
+ *
+ *
+ * INSTANTIATING NEW NULL LAYERS
+ *
+ * New null layers are created with mount_null(8).
+ * Mount_null(8) takes two arguments, the pathname
+ * of the lower vfs (target-pn) and the pathname where the null
+ * layer will appear in the namespace (alias-pn). After
+ * the null layer is put into place, the contents
+ * of target-pn subtree will be aliased under alias-pn.
+ *
+ *
+ * OPERATION OF A NULL LAYER
+ *
+ * The null layer is the minimum file system layer,
+ * simply bypassing all possible operations to the lower layer
+ * for processing there. The majority of its activity centers
+ * on the bypass routine, though which nearly all vnode operations
+ * pass.
+ *
+ * The bypass routine accepts arbitrary vnode operations for
+ * handling by the lower layer. It begins by examing vnode
+ * operation arguments and replacing any null-nodes by their
+ * lower-layer equivlants. It then invokes the operation
+ * on the lower layer. Finally, it replaces the null-nodes
+ * in the arguments and, if a vnode is return by the operation,
+ * stacks a null-node on top of the returned vnode.
+ *
+ * Although bypass handles most operations,
+ * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
+ * Vop_getattr must change the fsid being returned.
+ * Vop_inactive and vop_reclaim are not bypassed so that
+ * they can handle freeing null-layer specific data.
+ * Vop_print is not bypassed to avoid excessive debugging
+ * information.
+ *
+ *
+ * INSTANTIATING VNODE STACKS
+ *
+ * Mounting associates the null layer with a lower layer,
+ * effect stacking two VFSes. Vnode stacks are instead
+ * created on demand as files are accessed.
+ *
+ * The initial mount creates a single vnode stack for the
+ * root of the new null layer. All other vnode stacks
+ * are created as a result of vnode operations on
+ * this or other null vnode stacks.
+ *
+ * New vnode stacks come into existance as a result of
+ * an operation which returns a vnode.
+ * The bypass routine stacks a null-node above the new
+ * vnode before returning it to the caller.
+ *
+ * For example, imagine mounting a null layer with
+ * "mount_null /usr/include /dev/layer/null".
+ * Changing directory to /dev/layer/null will assign
+ * the root null-node (which was created when the null layer was mounted).
+ * Now consider opening "sys". A vop_lookup would be
+ * done on the root null-node. This operation would bypass through
+ * to the lower layer which would return a vnode representing
+ * the UFS "sys". Null_bypass then builds a null-node
+ * aliasing the UFS "sys" and returns this to the caller.
+ * Later operations on the null-node "sys" will repeat this
+ * process when constructing other vnode stacks.
+ *
+ *
+ * CREATING OTHER FILE SYSTEM LAYERS
+ *
+ * One of the easiest ways to construct new file system layers is to make
+ * a copy of the null layer, rename all files and variables, and
+ * then begin modifing the copy. Sed can be used to easily rename
+ * all variables.
+ *
+ * The umap layer is an example of a layer descended from the
+ * null layer.
+ *
+ *
+ * INVOKING OPERATIONS ON LOWER LAYERS
+ *
+ * There are two techniques to invoke operations on a lower layer
+ * when the operation cannot be completely bypassed. Each method
+ * is appropriate in different situations. In both cases,
+ * it is the responsibility of the aliasing layer to make
+ * the operation arguments "correct" for the lower layer
+ * by mapping an vnode arguments to the lower layer.
+ *
+ * The first approach is to call the aliasing layer's bypass routine.
+ * This method is most suitable when you wish to invoke the operation
+ * currently being hanldled on the lower layer. It has the advantage
+ * that the bypass routine already must do argument mapping.
+ * An example of this is null_getattrs in the null layer.
+ *
+ * A second approach is to directly invoked vnode operations on
+ * the lower layer with the VOP_OPERATIONNAME interface.
+ * The advantage of this method is that it is easy to invoke
+ * arbitrary operations on the lower layer. The disadvantage
+ * is that vnodes arguments must be manualy mapped.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/nullfs/null.h>
+
+
+int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * This version has been optimized for speed, throwing away some
+ * safety checks. It should still always work, but it's not as
+ * robust to programmer errors.
+ * Define SAFETY to include some error checking code.
+ *
+ * In general, we map all vnodes going down and unmap them on the way back.
+ * As an exception to this, vnodes can be marked "unmapped" by setting
+ * the Nth bit in operation's vdesc_flags.
+ *
+ * Also, some BSD vnode operations have the side effect of vrele'ing
+ * their arguments. With stacking, the reference counts are held
+ * by the upper node, not the lower one, so we must handle these
+ * side-effects here. This is not of concern in Sun-derived systems
+ * since there are no such side-effects.
+ *
+ * This makes the following assumptions:
+ * - only one returned vpp
+ * - no INOUT vpp's (Sun's vop_open has one of these)
+ * - the vnode operation vector of the first vnode should be used
+ * to determine what implementation of the op should be invoked
+ * - all mapped vnodes are of our vnode-type (NEEDSWORK:
+ * problems on rmdir'ing mount points and renaming?)
+ */
+int
+null_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ extern int (**null_vnodeop_p)(); /* not extern, really "forward" */
+ register struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+
+ if (null_bug_bypass)
+ printf ("null_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("null_bypass: no vp's in map.\n");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (We must always map first vp or vclean fails.)
+ */
+ if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
+ old_vps[i] = NULL;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
+ /*
+ * XXX - Several operations have the side effect
+ * of vrele'ing their vp's. We must account for
+ * that. (This should go away in the future.)
+ */
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ }
+ }
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ /*
+ * XXX - even though some ops have vpp returned vp's,
+ * several ops actually vrele this before returning.
+ * We must avoid these ops.
+ * (This should go away when these ops are regularized.)
+ */
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset,ap);
+ error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ }
+
+ out:
+ return (error);
+}
+
+
+/*
+ * We handle getattr only to change the fsid.
+ */
+int
+null_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+ if (error = null_bypass(ap))
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+
+int
+null_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our null_node is in the
+ * cache and reusable.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+ return (0);
+}
+
+int
+null_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct null_node *xp = VTONULL(vp);
+ struct vnode *lowervp = xp->null_lowervp;
+
+ /*
+ * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
+ * so we can't call VOPs on ourself.
+ */
+ /* After this assignment, this node will not be re-used. */
+ xp->null_lowervp = NULL;
+ remque(xp);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele (lowervp);
+ return (0);
+}
+
+
+int
+null_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
+ return (0);
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+
+/*
+ * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**null_vnodeop_p)();
+struct vnodeopv_entry_desc null_vnodeop_entries[] = {
+ { &vop_default_desc, null_bypass },
+
+ { &vop_getattr_desc, null_getattr },
+ { &vop_inactive_desc, null_inactive },
+ { &vop_reclaim_desc, null_reclaim },
+ { &vop_print_desc, null_print },
+
+ { &vop_strategy_desc, null_strategy },
+ { &vop_bwrite_desc, null_bwrite },
+
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc null_vnodeop_opv_desc =
+ { &null_vnodeop_p, null_vnodeop_entries };
diff --git a/sys/fs/portalfs/portal.h b/sys/fs/portalfs/portal.h
new file mode 100644
index 000000000000..38d7ee0cdd27
--- /dev/null
+++ b/sys/fs/portalfs/portal.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal.h 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: portal.h,v 1.3 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+struct portal_args {
+ char *pa_config; /* Config file */
+ int pa_socket; /* Socket to server */
+};
+
+struct portal_cred {
+ int pcr_flag; /* File open mode */
+ uid_t pcr_uid; /* From ucred */
+ short pcr_ngroups; /* From ucred */
+ gid_t pcr_groups[NGROUPS]; /* From ucred */
+};
+
+#ifdef KERNEL
+struct portalmount {
+ struct vnode *pm_root; /* Root node */
+ struct file *pm_server; /* Held reference to server socket */
+};
+
+struct portalnode {
+ int pt_size; /* Length of Arg */
+ char *pt_arg; /* Arg to send to server */
+ int pt_fileid; /* cookie */
+};
+
+#define VFSTOPORTAL(mp) ((struct portalmount *)((mp)->mnt_data))
+#define VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data)
+
+#define PORTAL_ROOTFILEID 2
+
+extern int (**portal_vnodeop_p)();
+extern struct vfsops portal_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/portalfs/portal_vfsops.c b/sys/fs/portalfs/portal_vfsops.c
new file mode 100644
index 000000000000..39e8563009b4
--- /dev/null
+++ b/sys/fs/portalfs/portal_vfsops.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vfsops.c 8.6 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vfsops.c,v 1.5 1992/05/30 10:25:27 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/un.h>
+#include <miscfs/portal/portal.h>
+
+int
+portal_init()
+{
+
+ return (0);
+}
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+portal_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct file *fp;
+ struct portal_args args;
+ struct portalmount *fmp;
+ struct socket *so;
+ struct vnode *rvp;
+ u_int size;
+ int error;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ if (error = copyin(data, (caddr_t) &args, sizeof(struct portal_args)))
+ return (error);
+
+ if (error = getsock(p->p_fd, args.pa_socket, &fp))
+ return (error);
+ so = (struct socket *) fp->f_data;
+ if (so->so_proto->pr_domain->dom_family != AF_UNIX)
+ return (ESOCKTNOSUPPORT);
+
+ error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */
+ if (error)
+ return (error);
+ MALLOC(rvp->v_data, void *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ fmp = (struct portalmount *) malloc(sizeof(struct portalmount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ VTOPORTAL(rvp)->pt_arg = 0;
+ VTOPORTAL(rvp)->pt_size = 0;
+ VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
+ fmp->pm_root = rvp;
+ fmp->pm_server = fp; fp->f_count++;
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) fmp;
+ getnewfsid(mp, MOUNT_PORTAL);
+
+ (void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void)copyinstr(args.pa_config,
+ mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+#ifdef notdef
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal"));
+#endif
+
+ return (0);
+}
+
+int
+portal_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+int
+portal_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ extern int doforce;
+ struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root;
+ int error, flags = 0;
+
+
+ if (mntflags & MNT_FORCE) {
+ /* portal can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, rootvp, flags))
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Shutdown the socket. This will cause the select in the
+ * daemon to wake up, and then the accept will get ECONNABORTED
+ * which it interprets as a request to go and bury itself.
+ */
+ soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2);
+ /*
+ * Discard reference to underlying file. Must call closef because
+ * this may be the last reference.
+ */
+ closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0);
+ /*
+ * Finally, throw away the portalmount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+portal_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOPORTAL(mp)->pm_root;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+portal_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+
+ sbp->f_type = MOUNT_PORTAL;
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = 1; /* Allow for "." */
+ sbp->f_ffree = 0; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+portal_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+int
+portal_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_fhtovp(mp, fhp, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+struct vfsops portal_vfsops = {
+ portal_mount,
+ portal_start,
+ portal_unmount,
+ portal_root,
+ portal_quotactl,
+ portal_statfs,
+ portal_sync,
+ portal_vget,
+ portal_fhtovp,
+ portal_vptofh,
+ portal_init,
+};
diff --git a/sys/fs/portalfs/portal_vnops.c b/sys/fs/portalfs/portal_vnops.c
new file mode 100644
index 000000000000..5e170261e71f
--- /dev/null
+++ b/sys/fs/portalfs/portal_vnops.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vnops.c 8.8 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vnops.c,v 1.4 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <miscfs/portal/portal.h>
+
+static int portal_fileid = PORTAL_ROOTFILEID+1;
+
+static void
+portal_closefd(p, fd)
+ struct proc *p;
+ int fd;
+{
+ int error;
+ struct {
+ int fd;
+ } ua;
+ int rc;
+
+ ua.fd = fd;
+ error = close(p, &ua, &rc);
+ /*
+ * We should never get an error, and there isn't anything
+ * we could do if we got one, so just print a message.
+ */
+ if (error)
+ printf("portal_closefd: error = %d\n", error);
+}
+
+/*
+ * vp is the current namei directory
+ * cnp is the name to locate in that directory...
+ */
+int
+portal_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ char *pname = ap->a_cnp->cn_nameptr;
+ struct portalnode *pt;
+ int error;
+ struct vnode *fvp = 0;
+ char *path;
+ int size;
+
+ if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+ *ap->a_vpp = ap->a_dvp;
+ VREF(ap->a_dvp);
+ /*VOP_LOCK(ap->a_dvp);*/
+ return (0);
+ }
+
+
+ error = getnewvnode(VT_PORTAL, ap->a_dvp->v_mount, portal_vnodeop_p, &fvp);
+ if (error)
+ goto bad;
+ fvp->v_type = VREG;
+ MALLOC(fvp->v_data, void *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ pt = VTOPORTAL(fvp);
+ /*
+ * Save all of the remaining pathname and
+ * advance the namei next pointer to the end
+ * of the string.
+ */
+ for (size = 0, path = pname; *path; path++)
+ size++;
+ ap->a_cnp->cn_consume = size - ap->a_cnp->cn_namelen;
+
+ pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK);
+ pt->pt_size = size+1;
+ bcopy(pname, pt->pt_arg, pt->pt_size);
+ pt->pt_fileid = portal_fileid++;
+
+ *ap->a_vpp = fvp;
+ /*VOP_LOCK(fvp);*/
+ return (0);
+
+bad:;
+ if (fvp) {
+ vrele(fvp);
+ }
+ *ap->a_vpp = NULL;
+ return (error);
+}
+
+static int
+portal_connect(so, so2)
+ struct socket *so;
+ struct socket *so2;
+{
+ /* from unp_connect, bypassing the namei stuff... */
+ struct socket *so3;
+ struct unpcb *unp2;
+ struct unpcb *unp3;
+
+ if (so2 == 0)
+ return (ECONNREFUSED);
+
+ if (so->so_type != so2->so_type)
+ return (EPROTOTYPE);
+
+ if ((so2->so_options & SO_ACCEPTCONN) == 0)
+ return (ECONNREFUSED);
+
+ if ((so3 = sonewconn(so2, 0)) == 0)
+ return (ECONNREFUSED);
+
+ unp2 = sotounpcb(so2);
+ unp3 = sotounpcb(so3);
+ if (unp2->unp_addr)
+ unp3->unp_addr = m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+
+ so2 = so3;
+
+
+ return (unp_connect2(so, so2));
+}
+
+int
+portal_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct socket *so = 0;
+ struct portalnode *pt;
+ struct proc *p = ap->a_p;
+ struct vnode *vp = ap->a_vp;
+ int s;
+ struct uio auio;
+ struct iovec aiov[2];
+ int res;
+ struct mbuf *cm = 0;
+ struct cmsghdr *cmsg;
+ int newfds;
+ int *ip;
+ int fd;
+ int error;
+ int len;
+ struct portalmount *fmp;
+ struct file *fp;
+ struct portal_cred pcred;
+
+ /*
+ * Nothing to do when opening the root node.
+ */
+ if (vp->v_flag & VROOT)
+ return (0);
+
+ /*
+ * Can't be opened unless the caller is set up
+ * to deal with the side effects. Check for this
+ * by testing whether the p_dupfd has been set.
+ */
+ if (p->p_dupfd >= 0)
+ return (ENODEV);
+
+ pt = VTOPORTAL(vp);
+ fmp = VFSTOPORTAL(vp->v_mount);
+
+ /*
+ * Create a new socket.
+ */
+ error = socreate(AF_UNIX, &so, SOCK_STREAM, 0);
+ if (error)
+ goto bad;
+
+ /*
+ * Reserve some buffer space
+ */
+ res = pt->pt_size + sizeof(pcred) + 512; /* XXX */
+ error = soreserve(so, res, res);
+ if (error)
+ goto bad;
+
+ /*
+ * Kick off connection
+ */
+ error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
+ if (error)
+ goto bad;
+
+ /*
+ * Wait for connection to complete
+ */
+ /*
+ * XXX: Since the mount point is holding a reference on the
+ * underlying server socket, it is not easy to find out whether
+ * the server process is still running. To handle this problem
+ * we loop waiting for the new socket to be connected (something
+ * which will only happen if the server is still running) or for
+ * the reference count on the server socket to drop to 1, which
+ * will happen if the server dies. Sleep for 5 second intervals
+ * and keep polling the reference count. XXX.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ if (fmp->pm_server->f_count == 1) {
+ error = ECONNREFUSED;
+ splx(s);
+ goto bad;
+ }
+ (void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz);
+ }
+ splx(s);
+
+ if (so->so_error) {
+ error = so->so_error;
+ goto bad;
+ }
+
+ /*
+ * Set miscellaneous flags
+ */
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+
+ pcred.pcr_flag = ap->a_mode;
+ pcred.pcr_uid = ap->a_cred->cr_uid;
+ pcred.pcr_ngroups = ap->a_cred->cr_ngroups;
+ bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t));
+ aiov[0].iov_base = (caddr_t) &pcred;
+ aiov[0].iov_len = sizeof(pcred);
+ aiov[1].iov_base = pt->pt_arg;
+ aiov[1].iov_len = pt->pt_size;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = 2;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+ auio.uio_offset = 0;
+ auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
+
+ error = sosend(so, (struct mbuf *) 0, &auio,
+ (struct mbuf *) 0, (struct mbuf *) 0, 0);
+ if (error)
+ goto bad;
+
+ len = auio.uio_resid = sizeof(int);
+ do {
+ struct mbuf *m = 0;
+ int flags = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **) 0, &auio,
+ &m, &cm, &flags);
+ if (error)
+ goto bad;
+
+ /*
+ * Grab an error code from the mbuf.
+ */
+ if (m) {
+ m = m_pullup(m, sizeof(int)); /* Needed? */
+ if (m) {
+ error = *(mtod(m, int *));
+ m_freem(m);
+ } else {
+ error = EINVAL;
+ }
+ } else {
+ if (cm == 0) {
+ error = ECONNRESET; /* XXX */
+#ifdef notdef
+ break;
+#endif
+ }
+ }
+ } while (cm == 0 && auio.uio_resid == len && !error);
+
+ if (cm == 0)
+ goto bad;
+
+ if (auio.uio_resid) {
+ error = 0;
+#ifdef notdef
+ error = EMSGSIZE;
+ goto bad;
+#endif
+ }
+
+ /*
+ * XXX: Break apart the control message, and retrieve the
+ * received file descriptor. Note that more than one descriptor
+ * may have been received, or that the rights chain may have more
+ * than a single mbuf in it. What to do?
+ */
+ cmsg = mtod(cm, struct cmsghdr *);
+ newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int);
+ if (newfds == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ /*
+ * At this point the rights message consists of a control message
+ * header, followed by a data region containing a vector of
+ * integer file descriptors. The fds were allocated by the action
+ * of receiving the control message.
+ */
+ ip = (int *) (cmsg + 1);
+ fd = *ip++;
+ if (newfds > 1) {
+ /*
+ * Close extra fds.
+ */
+ int i;
+ printf("portal_open: %d extra fds\n", newfds - 1);
+ for (i = 1; i < newfds; i++) {
+ portal_closefd(p, *ip);
+ ip++;
+ }
+ }
+
+ /*
+ * Check that the mode the file is being opened for is a subset
+ * of the mode of the existing descriptor.
+ */
+ fp = p->p_fd->fd_ofiles[fd];
+ if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+ portal_closefd(p, fd);
+ error = EACCES;
+ goto bad;
+ }
+
+ /*
+ * Save the dup fd in the proc structure then return the
+ * special error code (ENXIO) which causes magic things to
+ * happen in vn_open. The whole concept is, well, hmmm.
+ */
+ p->p_dupfd = fd;
+ error = ENXIO;
+
+bad:;
+ /*
+ * And discard the control message.
+ */
+ if (cm) {
+ m_freem(cm);
+ }
+
+ if (so) {
+ soshutdown(so, 2);
+ soclose(so);
+ }
+ return (error);
+}
+
+int
+portal_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+
+ bzero(vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_size = DEV_BSIZE;
+ vap->va_blocksize = DEV_BSIZE;
+ microtime(&vap->va_atime);
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_ctime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ /* vap->va_qbytes = 0; */
+ vap->va_bytes = 0;
+ /* vap->va_qsize = 0; */
+ if (vp->v_flag & VROOT) {
+ vap->va_type = VDIR;
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR|
+ S_IRGRP|S_IWGRP|S_IXGRP|
+ S_IROTH|S_IWOTH|S_IXOTH;
+ vap->va_nlink = 2;
+ vap->va_fileid = 2;
+ } else {
+ vap->va_type = VREG;
+ vap->va_mode = S_IRUSR|S_IWUSR|
+ S_IRGRP|S_IWGRP|
+ S_IROTH|S_IWOTH;
+ vap->va_nlink = 1;
+ vap->va_fileid = VTOPORTAL(vp)->pt_fileid;
+ }
+ return (0);
+}
+
+int
+portal_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Can't mess with the root vnode
+ */
+ if (ap->a_vp->v_flag & VROOT)
+ return (EACCES);
+
+ return (0);
+}
+
+/*
+ * Fake readdir, just return empty directory.
+ * It is hard to deal with '.' and '..' so don't bother.
+ */
+int
+portal_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+int
+portal_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+int
+portal_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct portalnode *pt = VTOPORTAL(ap->a_vp);
+
+ if (pt->pt_arg) {
+ free((caddr_t) pt->pt_arg, M_TEMP);
+ pt->pt_arg = 0;
+ }
+ FREE(ap->a_vp->v_data, M_TEMP);
+ ap->a_vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+portal_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a Portal vnode.
+ */
+/* ARGSUSED */
+int
+portal_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_PORTAL, portal vnode\n");
+ return (0);
+}
+
+/*void*/
+int
+portal_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+
+/*
+ * Portal vnode unsupported operation
+ */
+int
+portal_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Portal "should never get here" operation
+ */
+int
+portal_badop()
+{
+
+ panic("portal: bad op");
+ /* NOTREACHED */
+}
+
+/*
+ * Portal vnode null operation
+ */
+int
+portal_nullop()
+{
+
+ return (0);
+}
+
+#define portal_create ((int (*) __P((struct vop_create_args *)))portal_enotsupp)
+#define portal_mknod ((int (*) __P((struct vop_mknod_args *)))portal_enotsupp)
+#define portal_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define portal_access ((int (*) __P((struct vop_access_args *)))nullop)
+#define portal_read ((int (*) __P((struct vop_read_args *)))portal_enotsupp)
+#define portal_write ((int (*) __P((struct vop_write_args *)))portal_enotsupp)
+#define portal_ioctl ((int (*) __P((struct vop_ioctl_args *)))portal_enotsupp)
+#define portal_select ((int (*) __P((struct vop_select_args *)))portal_enotsupp)
+#define portal_mmap ((int (*) __P((struct vop_mmap_args *)))portal_enotsupp)
+#define portal_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define portal_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define portal_remove ((int (*) __P((struct vop_remove_args *)))portal_enotsupp)
+#define portal_link ((int (*) __P((struct vop_link_args *)))portal_enotsupp)
+#define portal_rename ((int (*) __P((struct vop_rename_args *)))portal_enotsupp)
+#define portal_mkdir ((int (*) __P((struct vop_mkdir_args *)))portal_enotsupp)
+#define portal_rmdir ((int (*) __P((struct vop_rmdir_args *)))portal_enotsupp)
+#define portal_symlink \
+ ((int (*) __P((struct vop_symlink_args *)))portal_enotsupp)
+#define portal_readlink \
+ ((int (*) __P((struct vop_readlink_args *)))portal_enotsupp)
+#define portal_abortop ((int (*) __P((struct vop_abortop_args *)))nullop)
+#define portal_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define portal_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define portal_bmap ((int (*) __P((struct vop_bmap_args *)))portal_badop)
+#define portal_strategy \
+ ((int (*) __P((struct vop_strategy_args *)))portal_badop)
+#define portal_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define portal_advlock \
+ ((int (*) __P((struct vop_advlock_args *)))portal_enotsupp)
+#define portal_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))portal_enotsupp)
+#define portal_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) portal_enotsupp)
+#define portal_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))portal_enotsupp)
+#define portal_update ((int (*) __P((struct vop_update_args *)))portal_enotsupp)
+#define portal_bwrite ((int (*) __P((struct vop_bwrite_args *)))portal_enotsupp)
+
+int (**portal_vnodeop_p)();
+struct vnodeopv_entry_desc portal_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, portal_lookup }, /* lookup */
+ { &vop_create_desc, portal_create }, /* create */
+ { &vop_mknod_desc, portal_mknod }, /* mknod */
+ { &vop_open_desc, portal_open }, /* open */
+ { &vop_close_desc, portal_close }, /* close */
+ { &vop_access_desc, portal_access }, /* access */
+ { &vop_getattr_desc, portal_getattr }, /* getattr */
+ { &vop_setattr_desc, portal_setattr }, /* setattr */
+ { &vop_read_desc, portal_read }, /* read */
+ { &vop_write_desc, portal_write }, /* write */
+ { &vop_ioctl_desc, portal_ioctl }, /* ioctl */
+ { &vop_select_desc, portal_select }, /* select */
+ { &vop_mmap_desc, portal_mmap }, /* mmap */
+ { &vop_fsync_desc, portal_fsync }, /* fsync */
+ { &vop_seek_desc, portal_seek }, /* seek */
+ { &vop_remove_desc, portal_remove }, /* remove */
+ { &vop_link_desc, portal_link }, /* link */
+ { &vop_rename_desc, portal_rename }, /* rename */
+ { &vop_mkdir_desc, portal_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, portal_rmdir }, /* rmdir */
+ { &vop_symlink_desc, portal_symlink }, /* symlink */
+ { &vop_readdir_desc, portal_readdir }, /* readdir */
+ { &vop_readlink_desc, portal_readlink }, /* readlink */
+ { &vop_abortop_desc, portal_abortop }, /* abortop */
+ { &vop_inactive_desc, portal_inactive }, /* inactive */
+ { &vop_reclaim_desc, portal_reclaim }, /* reclaim */
+ { &vop_lock_desc, portal_lock }, /* lock */
+ { &vop_unlock_desc, portal_unlock }, /* unlock */
+ { &vop_bmap_desc, portal_bmap }, /* bmap */
+ { &vop_strategy_desc, portal_strategy }, /* strategy */
+ { &vop_print_desc, portal_print }, /* print */
+ { &vop_islocked_desc, portal_islocked }, /* islocked */
+ { &vop_pathconf_desc, portal_pathconf }, /* pathconf */
+ { &vop_advlock_desc, portal_advlock }, /* advlock */
+ { &vop_blkatoff_desc, portal_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, portal_valloc }, /* valloc */
+ { &vop_vfree_desc, portal_vfree }, /* vfree */
+ { &vop_truncate_desc, portal_truncate }, /* truncate */
+ { &vop_update_desc, portal_update }, /* update */
+ { &vop_bwrite_desc, portal_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc portal_vnodeop_opv_desc =
+ { &portal_vnodeop_p, portal_vnodeop_entries };
diff --git a/sys/fs/procfs/README b/sys/fs/procfs/README
new file mode 100644
index 000000000000..38811b3f6e3a
--- /dev/null
+++ b/sys/fs/procfs/README
@@ -0,0 +1,113 @@
+saute procfs lyonnais
+
+procfs supports two levels of directory. the filesystem root
+directory contains a representation of the system process table.
+this consists of an entry for each active and zombie process, and
+an additional entry "curproc" which always represents the process
+making the lookup request.
+
+each of the sub-directories contains several files. these files
+are used to control and interrogate processes. the files implemented
+are:
+
+ file - xxx. the exec'ed file.
+
+ status - r/o. returns process status.
+
+ ctl - w/o. sends a control message to the process.
+ for example:
+ echo hup > /proc/curproc/note
+ will send a SIGHUP to the shell.
+ whereas
+ echo attach > /proc/1293/ctl
+ would set up process 1293 for debugging.
+ see below for more details.
+
+ mem - r/w. virtual memory image of the process.
+ parts of the address space are readable
+ only if they exist in the target process.
+ a more reasonable alternative might be
+ to return zero pages instead of an error.
+ comments?
+
+ note - w/o. writing a string here sends the
+ equivalent note to the process.
+ [ not implemented. ]
+
+ notepg - w/o. the same as note, but sends to all
+ members of the process group.
+ [ not implemented. ]
+
+ regs - r/w. process register set. this can be read
+ or written any time even if the process
+ is not stopped. since the bsd kernel
+ is single-processor, this implementation
+ will get the "right" register values.
+ a multi-proc kernel would need to do some
+ synchronisation.
+
+this then looks like:
+
+% ls -li /proc
+total 0
+ 9 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 0
+ 17 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 1
+ 89 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 10
+ 25 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 2
+2065 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 257
+2481 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 309
+ 265 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 32
+3129 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 390
+3209 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 400
+3217 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 401
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 408
+ 393 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 48
+ 409 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 50
+ 465 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 57
+ 481 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 59
+ 537 dr-xr-xr-x 2 root kmem 0 Sep 21 15:06 66
+ 545 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 67
+ 657 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 81
+ 665 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 82
+ 673 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 83
+ 681 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 84
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 curproc
+% ls -li /proc/curproc
+total 408
+3341 --w------- 1 jsp staff 0 Sep 21 15:06 ctl
+1554 -r-xr-xr-x 1 bin bin 90112 Mar 29 04:52 file
+3339 -rw------- 1 jsp staff 118784 Sep 21 15:06 mem
+3343 --w------- 1 jsp staff 0 Sep 21 15:06 note
+3344 --w------- 1 jsp staff 0 Sep 21 15:06 notepg
+3340 -rw------- 1 jsp staff 0 Sep 21 15:06 regs
+3342 -r--r--r-- 1 jsp staff 0 Sep 21 15:06 status
+% df /proc/curproc /proc/curproc/file
+Filesystem 512-blocks Used Avail Capacity Mounted on
+proc 2 2 0 100% /proc
+/dev/wd0a 16186 13548 1018 93% /
+% cat /proc/curproc/status
+cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117
+
+
+
+the basic sequence of commands written to "ctl" would be
+
+ attach - this stops the target process and
+ arranges for the sending process
+ to become the debug control process
+ wait - wait for the target process to come to
+ a steady state ready for debugging.
+ step - single step, with no signal delivery.
+ run - continue running, with no signal delivery,
+ until next trap or breakpoint.
+ <signame> - deliver signal <signame> and continue running.
+ detach - continue execution of the target process
+ and remove it from control by the debug process
+
+in a normal debugging environment, where the target is fork/exec'd by
+the debugger, the debugger should fork and the child should stop itself
+(with a self-inflicted SIGSTOP). the parent should do a "wait" then an
+"attach". as before, the child will hit a breakpoint on the first
+instruction in any newly exec'd image.
+
+$Id: README,v 3.1 1993/12/15 09:40:17 jsp Exp $
diff --git a/sys/fs/procfs/procfs.h b/sys/fs/procfs/procfs.h
new file mode 100644
index 000000000000..f7b8fa3ef0ed
--- /dev/null
+++ b/sys/fs/procfs/procfs.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs.h 8.6 (Berkeley) 2/3/94
+ *
+ * From:
+ * $Id: procfs.h,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * The different types of node in a procfs filesystem
+ */
+typedef enum {
+ Proot, /* the filesystem root */
+ Pproc, /* a process-specific sub-directory */
+ Pfile, /* the executable file */
+ Pmem, /* the process's memory image */
+ Pregs, /* the process's register set */
+ Pfpregs, /* the process's FP register set */
+ Pctl, /* process control */
+ Pstatus, /* process status */
+ Pnote, /* process notifier */
+ Pnotepg /* process group notifier */
+} pfstype;
+
+/*
+ * control data for the proc file system.
+ */
+struct pfsnode {
+ struct pfsnode *pfs_next; /* next on list */
+ struct vnode *pfs_vnode; /* vnode associated with this pfsnode */
+ pfstype pfs_type; /* type of procfs node */
+ pid_t pfs_pid; /* associated process */
+ u_short pfs_mode; /* mode bits for stat() */
+ u_long pfs_flags; /* open flags */
+ u_long pfs_fileno; /* unique file id */
+};
+
+#define PROCFS_NOTELEN 64 /* max length of a note (/proc/$pid/note) */
+#define PROCFS_CTLLEN 8 /* max length of a ctl msg (/proc/$pid/ctl */
+
+/*
+ * Kernel stuff follows
+ */
+#ifdef KERNEL
+#define CNEQ(cnp, s, len) \
+ ((cnp)->cn_namelen == (len) && \
+ (bcmp((s), (cnp)->cn_nameptr, (len)) == 0))
+
+/*
+ * Format of a directory entry in /proc, ...
+ * This must map onto struct dirent (see <dirent.h>)
+ */
+#define PROCFS_NAMELEN 8
+struct pfsdent {
+ u_long d_fileno;
+ u_short d_reclen;
+ u_char d_type;
+ u_char d_namlen;
+ char d_name[PROCFS_NAMELEN];
+};
+#define UIO_MX sizeof(struct pfsdent)
+#define PROCFS_FILENO(pid, type) \
+ (((type) == Proot) ? \
+ 2 : \
+ ((((pid)+1) << 3) + ((int) (type))))
+
+/*
+ * Convert between pfsnode vnode
+ */
+#define VTOPFS(vp) ((struct pfsnode *)(vp)->v_data)
+#define PFSTOV(pfs) ((pfs)->pfs_vnode)
+
+typedef struct vfs_namemap vfs_namemap_t;
+struct vfs_namemap {
+ const char *nm_name;
+ int nm_val;
+};
+
+extern int vfs_getuserstr __P((struct uio *, char *, int *));
+extern vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int));
+
+/* <machine/reg.h> */
+struct reg;
+struct fpreg;
+
+#define PFIND(pid) ((pid) ? pfind(pid) : &proc0)
+extern int procfs_freevp __P((struct vnode *));
+extern int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype));
+extern struct vnode *procfs_findtextvp __P((struct proc *));
+extern int procfs_sstep __P((struct proc *));
+extern void procfs_fix_sstep __P((struct proc *));
+extern int procfs_read_regs __P((struct proc *, struct reg *));
+extern int procfs_write_regs __P((struct proc *, struct reg *));
+extern int procfs_read_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_write_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+
+#define PROCFS_LOCKED 0x01
+#define PROCFS_WANT 0x02
+
+extern int (**procfs_vnodeop_p)();
+extern struct vfsops procfs_vfsops;
+
+/*
+ * Prototypes for procfs vnode ops
+ */
+int procfs_badop(); /* varargs */
+int procfs_rw __P((struct vop_read_args *));
+int procfs_lookup __P((struct vop_lookup_args *));
+#define procfs_create ((int (*) __P((struct vop_create_args *))) procfs_badop)
+#define procfs_mknod ((int (*) __P((struct vop_mknod_args *))) procfs_badop)
+int procfs_open __P((struct vop_open_args *));
+int procfs_close __P((struct vop_close_args *));
+int procfs_access __P((struct vop_access_args *));
+int procfs_getattr __P((struct vop_getattr_args *));
+int procfs_setattr __P((struct vop_setattr_args *));
+#define procfs_read procfs_rw
+#define procfs_write procfs_rw
+int procfs_ioctl __P((struct vop_ioctl_args *));
+#define procfs_select ((int (*) __P((struct vop_select_args *))) procfs_badop)
+#define procfs_mmap ((int (*) __P((struct vop_mmap_args *))) procfs_badop)
+#define procfs_fsync ((int (*) __P((struct vop_fsync_args *))) procfs_badop)
+#define procfs_seek ((int (*) __P((struct vop_seek_args *))) procfs_badop)
+#define procfs_remove ((int (*) __P((struct vop_remove_args *))) procfs_badop)
+#define procfs_link ((int (*) __P((struct vop_link_args *))) procfs_badop)
+#define procfs_rename ((int (*) __P((struct vop_rename_args *))) procfs_badop)
+#define procfs_mkdir ((int (*) __P((struct vop_mkdir_args *))) procfs_badop)
+#define procfs_rmdir ((int (*) __P((struct vop_rmdir_args *))) procfs_badop)
+#define procfs_symlink ((int (*) __P((struct vop_symlink_args *))) procfs_badop)
+int procfs_readdir __P((struct vop_readdir_args *));
+#define procfs_readlink ((int (*) __P((struct vop_readlink_args *))) procfs_badop)
+int procfs_abortop __P((struct vop_abortop_args *));
+int procfs_inactive __P((struct vop_inactive_args *));
+int procfs_reclaim __P((struct vop_reclaim_args *));
+#define procfs_lock ((int (*) __P((struct vop_lock_args *))) nullop)
+#define procfs_unlock ((int (*) __P((struct vop_unlock_args *))) nullop)
+int procfs_bmap __P((struct vop_bmap_args *));
+#define procfs_strategy ((int (*) __P((struct vop_strategy_args *))) procfs_badop)
+int procfs_print __P((struct vop_print_args *));
+#define procfs_islocked ((int (*) __P((struct vop_islocked_args *))) nullop)
+#define procfs_advlock ((int (*) __P((struct vop_advlock_args *))) procfs_badop)
+#define procfs_blkatoff ((int (*) __P((struct vop_blkatoff_args *))) procfs_badop)
+#define procfs_valloc ((int (*) __P((struct vop_valloc_args *))) procfs_badop)
+#define procfs_vfree ((int (*) __P((struct vop_vfree_args *))) nullop)
+#define procfs_truncate ((int (*) __P((struct vop_truncate_args *))) procfs_badop)
+#define procfs_update ((int (*) __P((struct vop_update_args *))) nullop)
+#endif /* KERNEL */
diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
new file mode 100644
index 000000000000..a42a03ce91cd
--- /dev/null
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_ctl.c 8.3 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_ctl.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+/*
+ * True iff process (p) is in trace wait state
+ * relative to process (curp)
+ */
+#define TRACE_WAIT_P(curp, p) \
+ ((p)->p_stat == SSTOP && \
+ (p)->p_pptr == (curp) && \
+ ((p)->p_flag & P_TRACED))
+
+#ifdef notdef
+#define FIX_SSTEP(p) { \
+ procfs_fix_sstep(p); \
+ } \
+}
+#else
+#define FIX_SSTEP(p)
+#endif
+
+#define PROCFS_CTL_ATTACH 1
+#define PROCFS_CTL_DETACH 2
+#define PROCFS_CTL_STEP 3
+#define PROCFS_CTL_RUN 4
+#define PROCFS_CTL_WAIT 5
+
+static vfs_namemap_t ctlnames[] = {
+ /* special /proc commands */
+ { "attach", PROCFS_CTL_ATTACH },
+ { "detach", PROCFS_CTL_DETACH },
+ { "step", PROCFS_CTL_STEP },
+ { "run", PROCFS_CTL_RUN },
+ { "wait", PROCFS_CTL_WAIT },
+ { 0 },
+};
+
+static vfs_namemap_t signames[] = {
+ /* regular signal names */
+ { "hup", SIGHUP }, { "int", SIGINT },
+ { "quit", SIGQUIT }, { "ill", SIGILL },
+ { "trap", SIGTRAP }, { "abrt", SIGABRT },
+ { "iot", SIGIOT }, { "emt", SIGEMT },
+ { "fpe", SIGFPE }, { "kill", SIGKILL },
+ { "bus", SIGBUS }, { "segv", SIGSEGV },
+ { "sys", SIGSYS }, { "pipe", SIGPIPE },
+ { "alrm", SIGALRM }, { "term", SIGTERM },
+ { "urg", SIGURG }, { "stop", SIGSTOP },
+ { "tstp", SIGTSTP }, { "cont", SIGCONT },
+ { "chld", SIGCHLD }, { "ttin", SIGTTIN },
+ { "ttou", SIGTTOU }, { "io", SIGIO },
+ { "xcpu", SIGXCPU }, { "xfsz", SIGXFSZ },
+ { "vtalrm", SIGVTALRM }, { "prof", SIGPROF },
+ { "winch", SIGWINCH }, { "info", SIGINFO },
+ { "usr1", SIGUSR1 }, { "usr2", SIGUSR2 },
+ { 0 },
+};
+
+static int
+procfs_control(curp, p, op)
+ struct proc *curp;
+ struct proc *p;
+ int op;
+{
+ int error;
+
+ /*
+ * Attach - attaches the target process for debugging
+ * by the calling process.
+ */
+ if (op == PROCFS_CTL_ATTACH) {
+ /* check whether already being traced */
+ if (p->p_flag & P_TRACED)
+ return (EBUSY);
+
+ /* can't trace yourself! */
+ if (p->p_pid == curp->p_pid)
+ return (EINVAL);
+
+ /*
+ * Go ahead and set the trace flag.
+ * Save the old parent (it's reset in
+ * _DETACH, and also in kern_exit.c:wait4()
+ * Reparent the process so that the tracing
+ * proc gets to see all the action.
+ * Stop the target.
+ */
+ p->p_flag |= P_TRACED;
+ p->p_xstat = 0; /* XXX ? */
+ if (p->p_pptr != curp) {
+ p->p_oppid = p->p_pptr->p_pid;
+ proc_reparent(p, curp);
+ }
+ psignal(p, SIGSTOP);
+ return (0);
+ }
+
+ /*
+ * Target process must be stopped, owned by (curp) and
+ * be set up for tracing (P_TRACED flag set).
+ * Allow DETACH to take place at any time for sanity.
+ * Allow WAIT any time, of course.
+ */
+ switch (op) {
+ case PROCFS_CTL_DETACH:
+ case PROCFS_CTL_WAIT:
+ break;
+
+ default:
+ if (!TRACE_WAIT_P(curp, p))
+ return (EBUSY);
+ }
+
+ /*
+ * do single-step fixup if needed
+ */
+ FIX_SSTEP(p);
+
+ /*
+ * Don't deliver any signal by default.
+ * To continue with a signal, just send
+ * the signal name to the ctl file
+ */
+ p->p_xstat = 0;
+
+ switch (op) {
+ /*
+ * Detach. Cleans up the target process, reparent it if possible
+ * and set it running once more.
+ */
+ case PROCFS_CTL_DETACH:
+ /* if not being traced, then this is a painless no-op */
+ if ((p->p_flag & P_TRACED) == 0)
+ return (0);
+
+ /* not being traced any more */
+ p->p_flag &= ~P_TRACED;
+
+ /* give process back to original parent */
+ if (p->p_oppid != p->p_pptr->p_pid) {
+ struct proc *pp;
+
+ pp = pfind(p->p_oppid);
+ if (pp)
+ proc_reparent(p, pp);
+ }
+
+ p->p_oppid = 0;
+ p->p_flag &= ~P_WAITED; /* XXX ? */
+ wakeup((caddr_t) curp); /* XXX for CTL_WAIT below ? */
+
+ break;
+
+ /*
+ * Step. Let the target process execute a single instruction.
+ */
+ case PROCFS_CTL_STEP:
+ procfs_sstep(p);
+ break;
+
+ /*
+ * Run. Let the target process continue running until a breakpoint
+ * or some other trap.
+ */
+ case PROCFS_CTL_RUN:
+ break;
+
+ /*
+ * Wait for the target process to stop.
+ * If the target is not being traced then just wait
+ * to enter
+ */
+ case PROCFS_CTL_WAIT:
+ error = 0;
+ if (p->p_flag & P_TRACED) {
+ while (error == 0 &&
+ (p->p_stat != SSTOP) &&
+ (p->p_flag & P_TRACED) &&
+ (p->p_pptr == curp)) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfsx", 0);
+ }
+ if (error == 0 && !TRACE_WAIT_P(curp, p))
+ error = EBUSY;
+ } else {
+ while (error == 0 && p->p_stat != SSTOP) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfs", 0);
+ }
+ }
+ return (error);
+
+ default:
+ panic("procfs_control");
+ }
+
+ if (p->p_stat == SSTOP)
+ setrunnable(p);
+ return (0);
+}
+
+int
+procfs_doctl(curp, p, pfs, uio)
+ struct proc *curp;
+ struct pfsnode *pfs;
+ struct uio *uio;
+ struct proc *p;
+{
+ int xlen;
+ int error;
+ char msg[PROCFS_CTLLEN+1];
+ vfs_namemap_t *nm;
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EOPNOTSUPP);
+
+ xlen = PROCFS_CTLLEN;
+ error = vfs_getuserstr(uio, msg, &xlen);
+ if (error)
+ return (error);
+
+ /*
+ * Map signal names into signal generation
+ * or debug control. Unknown commands and/or signals
+ * return EOPNOTSUPP.
+ *
+ * Sending a signal while the process is being debugged
+ * also has the side effect of letting the target continue
+ * to run. There is no way to single-step a signal delivery.
+ */
+ error = EOPNOTSUPP;
+
+ nm = vfs_findname(ctlnames, msg, xlen);
+ if (nm) {
+ error = procfs_control(curp, p, nm->nm_val);
+ } else {
+ nm = vfs_findname(signames, msg, xlen);
+ if (nm) {
+ if (TRACE_WAIT_P(curp, p)) {
+ p->p_xstat = nm->nm_val;
+ FIX_SSTEP(p);
+ setrunnable(p);
+ } else {
+ psignal(p, nm->nm_val);
+ }
+ error = 0;
+ }
+ }
+
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c
new file mode 100644
index 000000000000..6d850a6a881b
--- /dev/null
+++ b/sys/fs/procfs/procfs_fpregs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_fpregs.c 8.1 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dofpregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct fpreg r;
+ char *kv;
+ int kl;
+
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_fpregs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_fpregs(p, &r);
+ }
+
+ uio->uio_offset = 0;
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
new file mode 100644
index 000000000000..039983da09c6
--- /dev/null
+++ b/sys/fs/procfs/procfs_mem.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993 Sean Eric Fagan
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry and Sean Eric Fagan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_mem.c 8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_mem.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * This is a lightly hacked and merged version
+ * of sef's pread/pwrite functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+static int
+procfs_rwmem(p, uio)
+ struct proc *p;
+ struct uio *uio;
+{
+ int error;
+ int writing;
+
+ writing = uio->uio_rw == UIO_WRITE;
+
+ /*
+ * Only map in one page at a time. We don't have to, but it
+ * makes things easier. This way is trivial - right?
+ */
+ do {
+ vm_map_t map, tmap;
+ vm_object_t object;
+ vm_offset_t kva;
+ vm_offset_t uva;
+ int page_offset; /* offset into page */
+ vm_offset_t pageno; /* page number */
+ vm_map_entry_t out_entry;
+ vm_prot_t out_prot;
+ vm_page_t m;
+ boolean_t wired, single_use;
+ vm_offset_t off;
+ u_int len;
+ int fix_prot;
+
+ uva = (vm_offset_t) uio->uio_offset;
+ if (uva > VM_MAXUSER_ADDRESS) {
+ error = 0;
+ break;
+ }
+
+ /*
+ * Get the page number of this segment.
+ */
+ pageno = trunc_page(uva);
+ page_offset = uva - pageno;
+
+ /*
+ * How many bytes to copy
+ */
+ len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+ /*
+ * The map we want...
+ */
+ map = &p->p_vmspace->vm_map;
+
+ /*
+ * Check the permissions for the area we're interested
+ * in.
+ */
+ fix_prot = 0;
+ if (writing)
+ fix_prot = !vm_map_check_protection(map, pageno,
+ pageno + PAGE_SIZE, VM_PROT_WRITE);
+
+ if (fix_prot) {
+ /*
+ * If the page is not writable, we make it so.
+ * XXX It is possible that a page may *not* be
+ * read/executable, if a process changes that!
+ * We will assume, for now, that a page is either
+ * VM_PROT_ALL, or VM_PROT_READ|VM_PROT_EXECUTE.
+ */
+ error = vm_map_protect(map, pageno,
+ pageno + PAGE_SIZE, VM_PROT_ALL, 0);
+ if (error)
+ break;
+ }
+
+ /*
+ * Now we need to get the page. out_entry, out_prot, wired,
+ * and single_use aren't used. One would think the vm code
+ * would be a *bit* nicer... We use tmap because
+ * vm_map_lookup() can change the map argument.
+ */
+ tmap = map;
+ error = vm_map_lookup(&tmap, pageno,
+ writing ? VM_PROT_WRITE : VM_PROT_READ,
+ &out_entry, &object, &off, &out_prot,
+ &wired, &single_use);
+ /*
+ * We're done with tmap now.
+ */
+ if (!error)
+ vm_map_lookup_done(tmap, out_entry);
+
+ /*
+ * Fault the page in...
+ */
+ if (!error && writing && object->shadow) {
+ m = vm_page_lookup(object, off);
+ if (m == 0 || (m->flags & PG_COPYONWRITE))
+ error = vm_fault(map, pageno,
+ VM_PROT_WRITE, FALSE);
+ }
+
+ /* Find space in kernel_map for the page we're interested in */
+ if (!error)
+ error = vm_map_find(kernel_map, object, off, &kva,
+ PAGE_SIZE, 1);
+
+ if (!error) {
+ /*
+ * Neither vm_map_lookup() nor vm_map_find() appear
+ * to add a reference count to the object, so we do
+ * that here and now.
+ */
+ vm_object_reference(object);
+
+ /*
+ * Mark the page we just found as pageable.
+ */
+ error = vm_map_pageable(kernel_map, kva,
+ kva + PAGE_SIZE, 0);
+
+ /*
+ * Now do the i/o move.
+ */
+ if (!error)
+ error = uiomove(kva + page_offset, len, uio);
+
+ vm_map_remove(kernel_map, kva, kva + PAGE_SIZE);
+ }
+ if (fix_prot)
+ vm_map_protect(map, pageno, pageno + PAGE_SIZE,
+ VM_PROT_READ|VM_PROT_EXECUTE, 0);
+ } while (error == 0 && uio->uio_resid > 0);
+
+ return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+procfs_domem(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ error = procfs_rwmem(p, uio);
+
+ return (error);
+}
+
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being executed.
+ *
+ * It would be nice to grab this information from
+ * the VM system, however, there is no sure-fire
+ * way of doing that. Instead, fork(), exec() and
+ * wait() all maintain the p_textvp field in the
+ * process proc structure which contains a held
+ * reference to the exec'ed vnode.
+ */
+struct vnode *
+procfs_findtextvp(p)
+ struct proc *p;
+{
+ return (p->p_textvp);
+}
+
+
+#ifdef probably_never
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being mapped.
+ *
+ * (This is here, rather than in procfs_subr in order
+ * to keep all the VM related code in one place.)
+ */
+struct vnode *
+procfs_findtextvp(p)
+ struct proc *p;
+{
+ int error;
+ vm_object_t object;
+ vm_offset_t pageno; /* page number */
+
+ /* find a vnode pager for the user address space */
+
+ for (pageno = VM_MIN_ADDRESS;
+ pageno < VM_MAXUSER_ADDRESS;
+ pageno += PAGE_SIZE) {
+ vm_map_t map;
+ vm_map_entry_t out_entry;
+ vm_prot_t out_prot;
+ boolean_t wired, single_use;
+ vm_offset_t off;
+
+ map = &p->p_vmspace->vm_map;
+ error = vm_map_lookup(&map, pageno,
+ VM_PROT_READ,
+ &out_entry, &object, &off, &out_prot,
+ &wired, &single_use);
+
+ if (!error) {
+ vm_pager_t pager;
+
+ printf("procfs: found vm object\n");
+ vm_map_lookup_done(map, out_entry);
+ printf("procfs: vm object = %x\n", object);
+
+ /*
+ * At this point, assuming no errors, object
+ * is the VM object mapping UVA (pageno).
+ * Ensure it has a vnode pager, then grab
+ * the vnode from that pager's handle.
+ */
+
+ pager = object->pager;
+ printf("procfs: pager = %x\n", pager);
+ if (pager)
+ printf("procfs: found pager, type = %d\n", pager->pg_type);
+ if (pager && pager->pg_type == PG_VNODE) {
+ struct vnode *vp;
+
+ vp = (struct vnode *) pager->pg_handle;
+ printf("procfs: vp = 0x%x\n", vp);
+ return (vp);
+ }
+ }
+ }
+
+ printf("procfs: text object not found\n");
+ return (0);
+}
+#endif /* probably_never */
diff --git a/sys/fs/procfs/procfs_note.c b/sys/fs/procfs/procfs_note.c
new file mode 100644
index 000000000000..bf2f160baa0d
--- /dev/null
+++ b/sys/fs/procfs/procfs_note.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_note.c 8.2 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_note.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/signal.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_donote(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int xlen;
+ int error;
+ char note[PROCFS_NOTELEN+1];
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EINVAL);
+
+ xlen = PROCFS_NOTELEN;
+ error = vfs_getuserstr(uio, note, &xlen);
+ if (error)
+ return (error);
+
+ /* send to process's notify function */
+ return (EOPNOTSUPP);
+}
diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c
new file mode 100644
index 000000000000..fa95fef8f102
--- /dev/null
+++ b/sys/fs/procfs/procfs_regs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_regs.c 8.3 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_doregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct reg r;
+ char *kv;
+ int kl;
+
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_regs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_regs(p, &r);
+ }
+
+ uio->uio_offset = 0;
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
new file mode 100644
index 000000000000..d88aaabdfb05
--- /dev/null
+++ b/sys/fs/procfs/procfs_status.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_status.c 8.3 (Berkeley) 2/17/94
+ *
+ * From:
+ * $Id: procfs_status.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dostatus(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ struct session *sess;
+ struct tty *tp;
+ struct ucred *cr;
+ char *ps;
+ char *sep;
+ int pid, ppid, pgid, sid;
+ int i;
+ int xlen;
+ int error;
+ char psbuf[256]; /* XXX - conservative */
+
+ if (uio->uio_rw != UIO_READ)
+ return (EOPNOTSUPP);
+
+ pid = p->p_pid;
+ ppid = p->p_pptr ? p->p_pptr->p_pid : 0,
+ pgid = p->p_pgrp->pg_id;
+ sess = p->p_pgrp->pg_session;
+ sid = sess->s_leader ? sess->s_leader->p_pid : 0;
+
+/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg uid groups ... */
+
+ ps = psbuf;
+ bcopy(p->p_comm, ps, MAXCOMLEN);
+ ps[MAXCOMLEN] = '\0';
+ ps += strlen(ps);
+ ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid);
+
+ if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp))
+ ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev));
+ else
+ ps += sprintf(ps, "%d,%d ", -1, -1);
+
+ sep = "";
+ if (sess->s_ttyvp) {
+ ps += sprintf(ps, "%sctty", sep);
+ sep = ",";
+ }
+ if (SESS_LEADER(p)) {
+ ps += sprintf(ps, "%ssldr", sep);
+ sep = ",";
+ }
+ if (*sep != ',')
+ ps += sprintf(ps, "noflags");
+
+ if (p->p_flag & P_INMEM)
+ ps += sprintf(ps, " %d,%d",
+ p->p_stats->p_start.tv_sec,
+ p->p_stats->p_start.tv_usec);
+ else
+ ps += sprintf(ps, " -1,-1");
+
+ {
+ struct timeval ut, st;
+
+ calcru(p, &ut, &st, (void *) 0);
+ ps += sprintf(ps, " %d,%d %d,%d",
+ ut.tv_sec,
+ ut.tv_usec,
+ st.tv_sec,
+ st.tv_usec);
+ }
+
+ ps += sprintf(ps, " %s",
+ (p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan");
+
+ cr = p->p_ucred;
+
+ ps += sprintf(ps, " %d", cr->cr_uid, cr->cr_gid);
+ for (i = 0; i < cr->cr_ngroups; i++)
+ ps += sprintf(ps, ",%d", cr->cr_groups[i]);
+ ps += sprintf(ps, "\n");
+
+ xlen = ps - psbuf;
+ xlen -= uio->uio_offset;
+ ps = psbuf + uio->uio_offset;
+ xlen = min(xlen, uio->uio_resid);
+ if (xlen <= 0)
+ error = 0;
+ else
+ error = uiomove(ps, xlen, uio);
+
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_subr.c b/sys/fs/procfs/procfs_subr.c
new file mode 100644
index 000000000000..b371af19af0b
--- /dev/null
+++ b/sys/fs/procfs/procfs_subr.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_subr.c 8.4 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_subr.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <miscfs/procfs/procfs.h>
+
+static struct pfsnode *pfshead;
+static int pfsvplock;
+
+/*
+ * allocate a pfsnode/vnode pair. the vnode is
+ * referenced, but not locked.
+ *
+ * the pid, pfs_type, and mount point uniquely
+ * identify a pfsnode. the mount point is needed
+ * because someone might mount this filesystem
+ * twice.
+ *
+ * all pfsnodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+procfs_allocvp(mp, vpp, pid, pfs_type)
+ struct mount *mp;
+ struct vnode **vpp;
+ long pid;
+ pfstype pfs_type;
+{
+ int error;
+ struct pfsnode *pfs;
+ struct pfsnode **pp;
+
+loop:
+ for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) {
+ if (pfs->pfs_pid == pid &&
+ pfs->pfs_type == pfs_type &&
+ PFSTOV(pfs)->v_mount == mp) {
+ if (vget(pfs->pfs_vnode, 0))
+ goto loop;
+ *vpp = pfs->pfs_vnode;
+ return (0);
+ }
+ }
+
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ if (pfsvplock & PROCFS_LOCKED) {
+ pfsvplock |= PROCFS_WANT;
+ sleep((caddr_t) &pfsvplock, PINOD);
+ goto loop;
+ }
+ pfsvplock |= PROCFS_LOCKED;
+
+ error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp);
+ if (error)
+ goto out;
+
+ MALLOC((*vpp)->v_data, void *, sizeof(struct pfsnode),
+ M_TEMP, M_WAITOK);
+
+ pfs = VTOPFS(*vpp);
+ pfs->pfs_next = 0;
+ pfs->pfs_pid = (pid_t) pid;
+ pfs->pfs_type = pfs_type;
+ pfs->pfs_vnode = *vpp;
+ pfs->pfs_flags = 0;
+ pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
+
+ switch (pfs_type) {
+ case Proot: /* /proc = dr-xr-xr-x */
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ break;
+
+ case Pproc:
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ break;
+
+ case Pfile:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pmem:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pregs:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pfpregs:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pctl:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ case Pstatus:
+ pfs->pfs_mode = (VREAD) |
+ (VREAD >> 3) |
+ (VREAD >> 6);
+ break;
+
+ case Pnote:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ case Pnotepg:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ default:
+ panic("procfs_allocvp");
+ }
+
+ /* add to procfs vnode list */
+ for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next)
+ continue;
+ *pp = pfs;
+
+out:
+ pfsvplock &= ~PROCFS_LOCKED;
+
+ if (pfsvplock & PROCFS_WANT) {
+ pfsvplock &= ~PROCFS_WANT;
+ wakeup((caddr_t) &pfsvplock);
+ }
+
+ return (error);
+}
+
+int
+procfs_freevp(vp)
+ struct vnode *vp;
+{
+ struct pfsnode **pfspp;
+ struct pfsnode *pfs = VTOPFS(vp);
+
+ for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) {
+ if (*pfspp == pfs) {
+ *pfspp = pfs->pfs_next;
+ break;
+ }
+ }
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+ return (0);
+}
+
+int
+procfs_rw(ap)
+ struct vop_read_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct uio *uio = ap->a_uio;
+ struct proc *curp = uio->uio_procp;
+ struct pfsnode *pfs = VTOPFS(vp);
+ struct proc *p;
+
+ p = PFIND(pfs->pfs_pid);
+ if (p == 0)
+ return (EINVAL);
+
+ switch (pfs->pfs_type) {
+ case Pnote:
+ case Pnotepg:
+ return (procfs_donote(curp, p, pfs, uio));
+
+ case Pregs:
+ return (procfs_doregs(curp, p, pfs, uio));
+
+ case Pfpregs:
+ return (procfs_dofpregs(curp, p, pfs, uio));
+
+ case Pctl:
+ return (procfs_doctl(curp, p, pfs, uio));
+
+ case Pstatus:
+ return (procfs_dostatus(curp, p, pfs, uio));
+
+ case Pmem:
+ return (procfs_domem(curp, p, pfs, uio));
+
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+/*
+ * Get a string from userland into (buf). Strip a trailing
+ * nl character (to allow easy access from the shell).
+ * The buffer should be *buflenp + 1 chars long. vfs_getuserstr
+ * will automatically add a nul char at the end.
+ *
+ * Returns 0 on success or the following errors
+ *
+ * EINVAL: file offset is non-zero.
+ * EMSGSIZE: message is longer than kernel buffer
+ * EFAULT: user i/o buffer is not addressable
+ */
+int
+vfs_getuserstr(uio, buf, buflenp)
+ struct uio *uio;
+ char *buf;
+ int *buflenp;
+{
+ int xlen;
+ int error;
+
+ if (uio->uio_offset != 0)
+ return (EINVAL);
+
+ xlen = *buflenp;
+
+ /* must be able to read the whole string in one go */
+ if (xlen < uio->uio_resid)
+ return (EMSGSIZE);
+ xlen = uio->uio_resid;
+
+ error = uiomove(buf, xlen, uio);
+ if (error)
+ return (error);
+
+ /* allow multiple writes without seeks */
+ uio->uio_offset = 0;
+
+ /* cleanup string and remove trailing newline */
+ buf[xlen] = '\0';
+ xlen = strlen(buf);
+ if (xlen > 0 && buf[xlen-1] == '\n')
+ buf[--xlen] = '\0';
+ *buflenp = xlen;
+
+ return (0);
+}
+
+vfs_namemap_t *
+vfs_findname(nm, buf, buflen)
+ vfs_namemap_t *nm;
+ char *buf;
+ int buflen;
+{
+ for (; nm->nm_name; nm++)
+ if (bcmp(buf, (char *) nm->nm_name, buflen+1) == 0)
+ return (nm);
+
+ return (0);
+}
diff --git a/sys/fs/procfs/procfs_vfsops.c b/sys/fs/procfs/procfs_vfsops.c
new file mode 100644
index 000000000000..3938ca123576
--- /dev/null
+++ b/sys/fs/procfs/procfs_vfsops.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vfsops.c 8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_vfsops.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs VFS interface
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h> /* for PAGE_SIZE */
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+procfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ u_int size;
+
+ if (UIO_MX & (UIO_MX-1)) {
+ log(LOG_ERR, "procfs: invalid directory entry size");
+ return (EINVAL);
+ }
+
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = 0;
+ getnewfsid(mp, MOUNT_PROCFS);
+
+ (void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ size = sizeof("procfs") - 1;
+ bcopy("procfs", mp->mnt_stat.f_mntfromname, size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+ return (0);
+}
+
+/*
+ * unmount system call
+ */
+procfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ extern int doforce;
+ int flags = 0;
+
+ if (mntflags & MNT_FORCE) {
+ /* procfs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ if (error = vflush(mp, 0, flags))
+ return (error);
+
+ return (0);
+}
+
+procfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct pfsnode *pfs;
+ struct vnode *vp;
+ int error;
+
+ error = procfs_allocvp(mp, &vp, (pid_t) 0, Proot);
+ if (error)
+ return (error);
+
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ pfs = VTOPFS(vp);
+
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ */
+/* ARGSUSED */
+procfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Get file system statistics.
+ */
+procfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ sbp->f_type = MOUNT_PROCFS;
+ sbp->f_bsize = PAGE_SIZE;
+ sbp->f_iosize = PAGE_SIZE;
+ sbp->f_blocks = 1; /* avoid divide by zero in some df's */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = maxproc; /* approx */
+ sbp->f_ffree = maxproc - nprocs; /* approx */
+
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+
+ return (0);
+}
+
+
+procfs_quotactl(mp, cmds, uid, arg, p)
+ struct mount *mp;
+ int cmds;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+procfs_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+procfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+procfs_fhtovp(mp, fhp, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ struct vnode **vpp;
+{
+
+ return (EINVAL);
+}
+
+procfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return EINVAL;
+}
+
+procfs_init()
+{
+
+ return (0);
+}
+
+struct vfsops procfs_vfsops = {
+ procfs_mount,
+ procfs_start,
+ procfs_unmount,
+ procfs_root,
+ procfs_quotactl,
+ procfs_statfs,
+ procfs_sync,
+ procfs_vget,
+ procfs_fhtovp,
+ procfs_vptofh,
+ procfs_init,
+};
diff --git a/sys/fs/procfs/procfs_vnops.c b/sys/fs/procfs/procfs_vnops.c
new file mode 100644
index 000000000000..4e1ee002bb90
--- /dev/null
+++ b/sys/fs/procfs/procfs_vnops.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vnops.c 8.6 (Berkeley) 2/7/94
+ *
+ * From:
+ * $Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs vnode interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h> /* for PAGE_SIZE */
+
+/*
+ * Vnode Operations.
+ *
+ */
+
+/*
+ * This is a list of the valid names in the
+ * process-specific sub-directories. It is
+ * used in procfs_lookup and procfs_readdir
+ */
+static struct pfsnames {
+ u_short d_namlen;
+ char d_name[PROCFS_NAMELEN];
+ pfstype d_pfstype;
+} procent[] = {
+#define N(s) sizeof(s)-1, s
+ /* namlen, nam, type */
+ { N("file"), Pfile },
+ { N("mem"), Pmem },
+ { N("regs"), Pregs },
+ { N("fpregs"), Pfpregs },
+ { N("ctl"), Pctl },
+ { N("status"), Pstatus },
+ { N("note"), Pnote },
+ { N("notepg"), Pnotepg },
+#undef N
+};
+#define Nprocent (sizeof(procent)/sizeof(procent[0]))
+
+static pid_t atopid __P((const char *, u_int));
+
+/*
+ * set things up for doing i/o on
+ * the pfsnode (vp). (vp) is locked
+ * on entry, and should be left locked
+ * on exit.
+ *
+ * for procfs we don't need to do anything
+ * in particular for i/o. all that is done
+ * is to support exclusive open on process
+ * memory images.
+ */
+procfs_open(ap)
+ struct vop_open_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if (PFIND(pfs->pfs_pid) == 0)
+ return (ENOENT); /* was ESRCH, jsp */
+
+ if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) ||
+ (pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))
+ return (EBUSY);
+
+
+ if (ap->a_mode & FWRITE)
+ pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
+
+ return (0);
+
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * close the pfsnode (vp) after doing i/o.
+ * (vp) is not locked on entry or exit.
+ *
+ * nothing to do for procfs other than undo
+ * any exclusive open flag (see _open above).
+ */
+procfs_close(ap)
+ struct vop_close_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
+ pfs->pfs_flags &= ~(FWRITE|O_EXCL);
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * do an ioctl operation on pfsnode (vp).
+ * (vp) is not locked on entry or exit.
+ */
+procfs_ioctl(ap)
+ struct vop_ioctl_args *ap;
+{
+
+ return (ENOTTY);
+}
+
+/*
+ * do block mapping for pfsnode (vp).
+ * since we don't use the buffer cache
+ * for procfs this function should never
+ * be called. in any case, it's not clear
+ * what part of the kernel ever makes use
+ * of this function. for sanity, this is the
+ * usual no-op bmap, although returning
+ * (EIO) would be a reasonable alternative.
+ */
+procfs_bmap(ap)
+ struct vop_bmap_args *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * _inactive is called when the pfsnode
+ * is vrele'd and the reference count goes
+ * to zero. (vp) will be on the vnode free
+ * list, so to get it back vget() must be
+ * used.
+ *
+ * for procfs, check if the process is still
+ * alive and if it isn't then just throw away
+ * the vnode by calling vgone(). this may
+ * be overkill and a waste of time since the
+ * chances are that the process will still be
+ * there and PFIND is not free.
+ *
+ * (vp) is not locked on entry or exit.
+ */
+procfs_inactive(ap)
+ struct vop_inactive_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ if (PFIND(pfs->pfs_pid) == 0)
+ vgone(ap->a_vp);
+
+ return (0);
+}
+
+/*
+ * _reclaim is called when getnewvnode()
+ * wants to make use of an entry on the vnode
+ * free list. at this time the filesystem needs
+ * to free any private data and remove the node
+ * from any private lists.
+ */
+procfs_reclaim(ap)
+ struct vop_reclaim_args *ap;
+{
+ int error;
+
+ error = procfs_freevp(ap->a_vp);
+ return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+procfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * _print is used for debugging.
+ * just print a readable description
+ * of (vp).
+ */
+procfs_print(ap)
+ struct vop_print_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n",
+ pfs->pfs_pid,
+ pfs->pfs_mode, pfs->pfs_flags);
+}
+
+/*
+ * _abortop is called when operations such as
+ * rename and create fail. this entry is responsible
+ * for undoing any side-effects caused by the lookup.
+ * this will always include freeing the pathname buffer.
+ */
+procfs_abortop(ap)
+ struct vop_abortop_args *ap;
+{
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return (0);
+}
+
+/*
+ * generic entry point for unsupported operations
+ */
+procfs_badop()
+{
+
+ return (EIO);
+}
+
+/*
+ * Invent attributes for pfsnode (vp) and store
+ * them in (vap).
+ * Directories lengths are returned as zero since
+ * any real length would require the genuine size
+ * to be computed, and nothing cares anyway.
+ *
+ * this is relatively minimal for procfs.
+ */
+procfs_getattr(ap)
+ struct vop_getattr_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+ struct vattr *vap = ap->a_vap;
+ struct proc *procp;
+ int error;
+
+ /* first check the process still exists */
+ switch (pfs->pfs_type) {
+ case Proot:
+ procp = 0;
+ break;
+
+ default:
+ procp = PFIND(pfs->pfs_pid);
+ if (procp == 0)
+ return (ENOENT);
+ }
+
+ error = 0;
+
+ /* start by zeroing out the attributes */
+ VATTR_NULL(vap);
+
+ /* next do all the common fields */
+ vap->va_type = ap->a_vp->v_type;
+ vap->va_mode = pfs->pfs_mode;
+ vap->va_fileid = pfs->pfs_fileno;
+ vap->va_flags = 0;
+ vap->va_blocksize = PAGE_SIZE;
+ vap->va_bytes = vap->va_size = 0;
+
+ /*
+ * If the process has exercised some setuid or setgid
+ * privilege, then rip away read/write permission so
+ * that only root can gain access.
+ */
+ switch (pfs->pfs_type) {
+ case Pregs:
+ case Pfpregs:
+ case Pmem:
+ if (procp->p_flag & P_SUGID)
+ vap->va_mode &= ~((VREAD|VWRITE)|
+ ((VREAD|VWRITE)>>3)|
+ ((VREAD|VWRITE)>>6));
+ break;
+ }
+
+ /*
+ * Make all times be current TOD.
+ * It would be possible to get the process start
+ * time from the p_stat structure, but there's
+ * no "file creation" time stamp anyway, and the
+ * p_stat structure is not addressible if u. gets
+ * swapped out for that process.
+ */
+ microtime(&vap->va_ctime);
+ vap->va_atime = vap->va_mtime = vap->va_ctime;
+
+ /*
+ * now do the object specific fields
+ *
+ * The size could be set from struct reg, but it's hardly
+ * worth the trouble, and it puts some (potentially) machine
+ * dependent data into this machine-independent code. If it
+ * becomes important then this function should break out into
+ * a per-file stat function in the corresponding .c file.
+ */
+
+ switch (pfs->pfs_type) {
+ case Proot:
+ vap->va_nlink = 2;
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ break;
+
+ case Pproc:
+ vap->va_nlink = 2;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ case Pfile:
+ error = EOPNOTSUPP;
+ break;
+
+ case Pmem:
+ vap->va_nlink = 1;
+ vap->va_bytes = vap->va_size =
+ ctob(procp->p_vmspace->vm_tsize +
+ procp->p_vmspace->vm_dsize +
+ procp->p_vmspace->vm_ssize);
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ case Pregs:
+ case Pfpregs:
+ case Pctl:
+ case Pstatus:
+ case Pnote:
+ case Pnotepg:
+ vap->va_nlink = 1;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ default:
+ panic("procfs_getattr");
+ }
+
+ return (error);
+}
+
+procfs_setattr(ap)
+ struct vop_setattr_args *ap;
+{
+ /*
+ * just fake out attribute setting
+ * it's not good to generate an error
+ * return, otherwise things like creat()
+ * will fail when they try to set the
+ * file length to 0. worse, this means
+ * that echo $note > /proc/$pid/note will fail.
+ */
+
+ return (0);
+}
+
+/*
+ * implement access checking.
+ *
+ * something very similar to this code is duplicated
+ * throughout the 4bsd kernel and should be moved
+ * into kern/vfs_subr.c sometime.
+ *
+ * actually, the check for super-user is slightly
+ * broken since it will allow read access to write-only
+ * objects. this doesn't cause any particular trouble
+ * but does mean that the i/o entry points need to check
+ * that the operation really does make sense.
+ */
+procfs_access(ap)
+ struct vop_access_args *ap;
+{
+ struct vattr *vap;
+ struct vattr vattr;
+ int error;
+
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (ap->a_cred->cr_uid == (uid_t) 0)
+ return (0);
+ vap = &vattr;
+ if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p))
+ return (error);
+
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (ap->a_cred->cr_uid != vap->va_uid) {
+ gid_t *gp;
+ int i;
+
+ (ap->a_mode) >>= 3;
+ gp = ap->a_cred->cr_groups;
+ for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ ap->a_mode >>= 3;
+found:
+ ;
+ }
+
+ if ((vap->va_mode & ap->a_mode) == ap->a_mode)
+ return (0);
+
+ return (EACCES);
+}
+
+/*
+ * lookup. this is incredibly complicated in the
+ * general case, however for most pseudo-filesystems
+ * very little needs to be done.
+ *
+ * unless you want to get a migraine, just make sure your
+ * filesystem doesn't do any locking of its own. otherwise
+ * read and inwardly digest ufs_lookup().
+ */
+procfs_lookup(ap)
+ struct vop_lookup_args *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname = cnp->cn_nameptr;
+ int error = 0;
+ pid_t pid;
+ struct vnode *nvp;
+ struct pfsnode *pfs;
+ struct proc *procp;
+ pfstype pfs_type;
+ int i;
+
+ if (cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ /*VOP_LOCK(dvp);*/
+ return (0);
+ }
+
+ *vpp = NULL;
+
+ pfs = VTOPFS(dvp);
+ switch (pfs->pfs_type) {
+ case Proot:
+ if (cnp->cn_flags & ISDOTDOT)
+ return (EIO);
+
+ if (CNEQ(cnp, "curproc", 7))
+ pid = cnp->cn_proc->p_pid;
+ else
+ pid = atopid(pname, cnp->cn_namelen);
+ if (pid == NO_PID)
+ return (ENOENT);
+
+ procp = PFIND(pid);
+ if (procp == 0)
+ return (ENOENT);
+
+ error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc);
+ if (error)
+ return (error);
+
+ nvp->v_type = VDIR;
+ pfs = VTOPFS(nvp);
+
+ *vpp = nvp;
+ return (0);
+
+ case Pproc:
+ if (cnp->cn_flags & ISDOTDOT) {
+ error = procfs_root(dvp->v_mount, vpp);
+ return (error);
+ }
+
+ procp = PFIND(pfs->pfs_pid);
+ if (procp == 0)
+ return (ENOENT);
+
+ for (i = 0; i < Nprocent; i++) {
+ struct pfsnames *dp = &procent[i];
+
+ if (cnp->cn_namelen == dp->d_namlen &&
+ bcmp(pname, dp->d_name, dp->d_namlen) == 0) {
+ pfs_type = dp->d_pfstype;
+ goto found;
+ }
+ }
+ return (ENOENT);
+
+ found:
+ if (pfs_type == Pfile) {
+ nvp = procfs_findtextvp(procp);
+ if (nvp) {
+ VREF(nvp);
+ VOP_LOCK(nvp);
+ } else {
+ error = ENXIO;
+ }
+ } else {
+ error = procfs_allocvp(dvp->v_mount, &nvp,
+ pfs->pfs_pid, pfs_type);
+ if (error)
+ return (error);
+
+ nvp->v_type = VREG;
+ pfs = VTOPFS(nvp);
+ }
+ *vpp = nvp;
+ return (error);
+
+ default:
+ return (ENOTDIR);
+ }
+}
+
+/*
+ * readdir returns directory entries from pfsnode (vp).
+ *
+ * the strategy here with procfs is to generate a single
+ * directory entry at a time (struct pfsdent) and then
+ * copy that out to userland using uiomove. a more efficent
+ * though more complex implementation, would try to minimize
+ * the number of calls to uiomove(). for procfs, this is
+ * hardly worth the added code complexity.
+ *
+ * this should just be done through read()
+ */
+procfs_readdir(ap)
+ struct vop_readdir_args *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct pfsdent d;
+ struct pfsdent *dp = &d;
+ struct pfsnode *pfs;
+ int error;
+ int count;
+ int i;
+
+ pfs = VTOPFS(ap->a_vp);
+
+ if (uio->uio_resid < UIO_MX)
+ return (EINVAL);
+ if (uio->uio_offset & (UIO_MX-1))
+ return (EINVAL);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+
+ error = 0;
+ count = 0;
+ i = uio->uio_offset / UIO_MX;
+
+ switch (pfs->pfs_type) {
+ /*
+ * this is for the process-specific sub-directories.
+ * all that is needed to is copy out all the entries
+ * from the procent[] table (top of this file).
+ */
+ case Pproc: {
+ while (uio->uio_resid >= UIO_MX) {
+ struct pfsnames *dt;
+
+ if (i >= Nprocent)
+ break;
+
+ dt = &procent[i];
+
+ dp->d_reclen = UIO_MX;
+ dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype);
+ dp->d_type = DT_REG;
+ dp->d_namlen = dt->d_namlen;
+ bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1);
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ count += UIO_MX;
+ i++;
+ }
+
+ break;
+
+ }
+
+ /*
+ * this is for the root of the procfs filesystem
+ * what is needed is a special entry for "curproc"
+ * followed by an entry for each process on allproc
+#ifdef PROCFS_ZOMBIE
+ * and zombproc.
+#endif
+ */
+
+ case Proot: {
+ int pcnt;
+#ifdef PROCFS_ZOMBIE
+ int doingzomb = 0;
+#endif
+ volatile struct proc *p;
+
+ p = allproc;
+
+#define PROCFS_XFILES 1 /* number of other entries, like "curproc" */
+ pcnt = PROCFS_XFILES;
+
+ while (p && uio->uio_resid >= UIO_MX) {
+ bzero((char *) dp, UIO_MX);
+ dp->d_type = DT_DIR;
+ dp->d_reclen = UIO_MX;
+
+ switch (i) {
+ case 0:
+ /* ship out entry for "curproc" */
+ dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc);
+ dp->d_namlen = sprintf(dp->d_name, "curproc");
+ break;
+
+ default:
+ if (pcnt >= i) {
+ dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
+ dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid);
+ }
+
+ p = p->p_next;
+
+#ifdef PROCFS_ZOMBIE
+ if (p == 0 && doingzomb == 0) {
+ doingzomb = 1;
+ p = zombproc;
+ }
+#endif
+
+ if (pcnt++ < i)
+ continue;
+
+ break;
+ }
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ count += UIO_MX;
+ i++;
+ }
+
+ break;
+
+ }
+
+ default:
+ error = ENOTDIR;
+ break;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+
+ return (error);
+}
+
+/*
+ * convert decimal ascii to pid_t
+ */
+static pid_t
+atopid(b, len)
+ const char *b;
+ u_int len;
+{
+ pid_t p = 0;
+
+ while (len--) {
+ char c = *b++;
+ if (c < '0' || c > '9')
+ return (NO_PID);
+ p = 10 * p + (c - '0');
+ if (p > PID_MAX)
+ return (NO_PID);
+ }
+
+ return (p);
+}
+
+/*
+ * procfs vnode operations.
+ */
+int (**procfs_vnodeop_p)();
+struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, procfs_lookup }, /* lookup */
+ { &vop_create_desc, procfs_create }, /* create */
+ { &vop_mknod_desc, procfs_mknod }, /* mknod */
+ { &vop_open_desc, procfs_open }, /* open */
+ { &vop_close_desc, procfs_close }, /* close */
+ { &vop_access_desc, procfs_access }, /* access */
+ { &vop_getattr_desc, procfs_getattr }, /* getattr */
+ { &vop_setattr_desc, procfs_setattr }, /* setattr */
+ { &vop_read_desc, procfs_read }, /* read */
+ { &vop_write_desc, procfs_write }, /* write */
+ { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
+ { &vop_select_desc, procfs_select }, /* select */
+ { &vop_mmap_desc, procfs_mmap }, /* mmap */
+ { &vop_fsync_desc, procfs_fsync }, /* fsync */
+ { &vop_seek_desc, procfs_seek }, /* seek */
+ { &vop_remove_desc, procfs_remove }, /* remove */
+ { &vop_link_desc, procfs_link }, /* link */
+ { &vop_rename_desc, procfs_rename }, /* rename */
+ { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, procfs_symlink }, /* symlink */
+ { &vop_readdir_desc, procfs_readdir }, /* readdir */
+ { &vop_readlink_desc, procfs_readlink }, /* readlink */
+ { &vop_abortop_desc, procfs_abortop }, /* abortop */
+ { &vop_inactive_desc, procfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, procfs_lock }, /* lock */
+ { &vop_unlock_desc, procfs_unlock }, /* unlock */
+ { &vop_bmap_desc, procfs_bmap }, /* bmap */
+ { &vop_strategy_desc, procfs_strategy }, /* strategy */
+ { &vop_print_desc, procfs_print }, /* print */
+ { &vop_islocked_desc, procfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, procfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, procfs_valloc }, /* valloc */
+ { &vop_vfree_desc, procfs_vfree }, /* vfree */
+ { &vop_truncate_desc, procfs_truncate }, /* truncate */
+ { &vop_update_desc, procfs_update }, /* update */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc procfs_vnodeop_opv_desc =
+ { &procfs_vnodeop_p, procfs_vnodeop_entries };
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
new file mode 100644
index 000000000000..111c517b1627
--- /dev/null
+++ b/sys/fs/specfs/spec_vnops.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <miscfs/specfs/specdev.h>
+
+/* symbolic sleep message strings for devices */
+char devopn[] = "devopn";
+char devio[] = "devio";
+char devwait[] = "devwait";
+char devin[] = "devin";
+char devout[] = "devout";
+char devioc[] = "devioc";
+char devcls[] = "devcls";
+
+int (**spec_vnodeop_p)();
+struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, spec_close }, /* close */
+ { &vop_access_desc, spec_access }, /* access */
+ { &vop_getattr_desc, spec_getattr }, /* getattr */
+ { &vop_setattr_desc, spec_setattr }, /* setattr */
+ { &vop_read_desc, spec_read }, /* read */
+ { &vop_write_desc, spec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, spec_inactive }, /* inactive */
+ { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
+ { &vop_lock_desc, spec_lock }, /* lock */
+ { &vop_unlock_desc, spec_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, spec_print }, /* print */
+ { &vop_islocked_desc, spec_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, spec_update }, /* update */
+ { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_vnodeop_opv_desc =
+ { &spec_vnodeop_p, spec_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+int
+spec_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open a special file.
+ */
+/* ARGSUSED */
+spec_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *bvp, *vp = ap->a_vp;
+ dev_t bdev, dev = (dev_t)vp->v_rdev;
+ register int maj = major(dev);
+ int error;
+
+ /*
+ * Don't allow open if fs is mounted -nodev.
+ */
+ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
+ return (ENXIO);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ if ((u_int)maj >= nchrdev)
+ return (ENXIO);
+ if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk character devices.
+ */
+ if (securelevel >= 2 && isdisk(dev, VCHR))
+ return (EPERM);
+ /*
+ * When running in secure mode, do not allow opens
+ * for writing of /dev/mem, /dev/kmem, or character
+ * devices whose corresponding block devices are
+ * currently mounted.
+ */
+ if (securelevel >= 1) {
+ if ((bdev = chrtoblk(dev)) != NODEV &&
+ vfinddev(bdev, VBLK, &bvp) &&
+ bvp->v_usecount > 0 &&
+ (error = vfs_mountedon(bvp)))
+ return (error);
+ if (iskmemdev(dev))
+ return (EPERM);
+ }
+ }
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if ((u_int)maj >= nblkdev)
+ return (ENXIO);
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk block devices.
+ */
+ if (securelevel >= 2 && ap->a_cred != FSCRED &&
+ (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
+ return (EPERM);
+ /*
+ * Do not allow opens of block devices that are
+ * currently mounted.
+ */
+ if (error = vfs_mountedon(vp))
+ return (error);
+ return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
+ }
+ return (0);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+spec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn, nextbn;
+ long bsize, bscale;
+ struct partinfo dpart;
+ int n, on, majordev, (*ioctl)();
+ int error = 0;
+ dev_t dev;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("spec_read mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_read proc");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[major(vp->v_rdev)].d_read)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ dev = vp->v_rdev;
+ if ((majordev = major(dev)) < nblkdev &&
+ (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
+ (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
+ dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag * dpart.part->p_fsize;
+ bscale = bsize / DEV_BSIZE;
+ do {
+ bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (vp->v_lastr + bscale == bn) {
+ nextbn = bn + bscale;
+ error = breadn(vp, bn, (int)bsize, &nextbn,
+ (int *)&bsize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, bn, (int)bsize, NOCRED, &bp);
+ vp->v_lastr = bn;
+ n = min(n, bsize - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ if (n + on == bsize)
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_read type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+spec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn;
+ int bsize, blkmask;
+ struct partinfo dpart;
+ register int n, on;
+ int error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("spec_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_write proc");
+#endif
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[major(vp->v_rdev)].d_write)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
+ (caddr_t)&dpart, FREAD, p) == 0) {
+ if (dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag *
+ dpart.part->p_fsize;
+ }
+ blkmask = (bsize / DEV_BSIZE) - 1;
+ do {
+ bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (n == bsize)
+ bp = getblk(vp, bn, bsize, 0, 0);
+ else
+ error = bread(vp, bn, bsize, NOCRED, &bp);
+ n = min(n, bsize - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ if (n + on == bsize) {
+ bp->b_flags |= B_AGE;
+ bawrite(bp);
+ } else
+ bdwrite(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_write type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+spec_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ dev_t dev = ap->a_vp->v_rdev;
+
+ switch (ap->a_vp->v_type) {
+
+ case VCHR:
+ return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p));
+
+ case VBLK:
+ if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
+ if (bdevsw[major(dev)].d_flags & B_TAPE)
+ return (0);
+ else
+ return (1);
+ return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p));
+
+ default:
+ panic("spec_ioctl");
+ /* NOTREACHED */
+ }
+}
+
+/* ARGSUSED */
+spec_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register dev_t dev;
+
+ switch (ap->a_vp->v_type) {
+
+ default:
+ return (1); /* XXX */
+
+ case VCHR:
+ dev = ap->a_vp->v_rdev;
+ return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
+ }
+}
+/*
+ * Synch buffers associated with a block device
+ */
+/* ARGSUSED */
+int
+spec_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct buf *bp;
+ struct buf *nbp;
+ int s;
+
+ if (vp->v_type == VCHR)
+ return (0);
+ /*
+ * Flush all dirty buffers associated with a block device.
+ */
+loop:
+ s = splbio();
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if ((bp->b_flags & B_BUSY))
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("spec_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ bawrite(bp);
+ goto loop;
+ }
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+ }
+#ifdef DIAGNOSTIC
+ if (vp->v_dirtyblkhd.lh_first) {
+ vprint("spec_fsync: dirty", vp);
+ goto loop;
+ }
+#endif
+ }
+ splx(s);
+ return (0);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+spec_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+
+ (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
+ return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+spec_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+spec_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/* ARGSUSED */
+spec_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+spec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ dev_t dev = vp->v_rdev;
+ int (*devclose) __P((dev_t, int, int, struct proc *));
+ int mode, error;
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ /*
+ * Hack: a tty device that is a controlling terminal
+ * has a reference from the session structure.
+ * We cannot easily tell that a character device is
+ * a controlling terminal, unless it is the closing
+ * process' controlling terminal. In that case,
+ * if the reference count is 2 (this last descriptor
+ * plus the session), release the reference from the session.
+ */
+ if (vcount(vp) == 2 && ap->a_p &&
+ vp == ap->a_p->p_session->s_ttyvp) {
+ vrele(vp);
+ ap->a_p->p_session->s_ttyvp = NULL;
+ }
+ /*
+ * If the vnode is locked, then we are in the midst
+ * of forcably closing the device, otherwise we only
+ * close on last reference.
+ */
+ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+ devclose = cdevsw[major(dev)].d_close;
+ mode = S_IFCHR;
+ break;
+
+ case VBLK:
+ /*
+ * On last close of a block device (that isn't mounted)
+ * we must invalidate any in core blocks, so that
+ * we can, for instance, change floppy disks.
+ */
+ if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
+ return (error);
+ /*
+ * We do not want to really close the device if it
+ * is still in use unless we are trying to close it
+ * forcibly. Since every use (buffer, vnode, swap, cmap)
+ * holds a reference to the vnode, and because we mark
+ * any other vnodes that alias this device, when the
+ * sum of the reference counts on all the aliased
+ * vnodes descends to one, we are on last close.
+ */
+ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+ devclose = bdevsw[major(dev)].d_close;
+ mode = S_IFBLK;
+ break;
+
+ default:
+ panic("spec_close: not special");
+ }
+
+ return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
+}
+
+/*
+ * Print out the contents of a special device vnode.
+ */
+spec_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
+ minor(ap->a_vp->v_rdev));
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+spec_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Special device advisory byte-level locks.
+ */
+/* ARGSUSED */
+spec_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Special device failed operation
+ */
+spec_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Special device bad operation
+ */
+spec_badop()
+{
+
+ panic("spec_badop called");
+ /* NOTREACHED */
+}
diff --git a/sys/fs/umapfs/umap.h b/sys/fs/umapfs/umap.h
new file mode 100644
index 000000000000..9f4d1e7ace53
--- /dev/null
+++ b/sys/fs/umapfs/umap.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap.h 8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vnops.c 1.5 (Berkeley) 7/10/92
+ */
+
+#define MAPFILEENTRIES 64
+#define GMAPFILEENTRIES 16
+#define NOBODY 32767
+#define NULLGROUP 65534
+
+struct umap_args {
+ char *target; /* Target of loopback */
+ int nentries; /* # of entries in user map array */
+ int gnentries; /* # of entries in group map array */
+ u_long (*mapdata)[2]; /* pointer to array of user mappings */
+ u_long (*gmapdata)[2]; /* pointer to array of group mappings */
+};
+
+struct umap_mount {
+ struct mount *umapm_vfs;
+ struct vnode *umapm_rootvp; /* Reference to root umap_node */
+ int info_nentries; /* number of uid mappings */
+ int info_gnentries; /* number of gid mappings */
+ u_long info_mapdata[MAPFILEENTRIES][2]; /* mapping data for
+ user mapping in ficus */
+ u_long info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for
+ group mapping in ficus */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct umap_node {
+ struct umap_node *umap_forw; /* Hash chain */
+ struct umap_node *umap_back;
+ struct vnode *umap_lowervp; /* Aliased vnode - VREFed once */
+ struct vnode *umap_vnode; /* Back pointer to vnode/umap_node */
+};
+
+extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries));
+extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp));
+
+#define MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data))
+#define VTOUMAP(vp) ((struct umap_node *)(vp)->v_data)
+#define UMAPTOV(xp) ((xp)->umap_vnode)
+#ifdef UMAPFS_DIAGNOSTIC
+extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__)
+#else
+#define UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp)
+#endif
+
+extern int (**umap_vnodeop_p)();
+extern struct vfsops umap_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/umapfs/umap_subr.c b/sys/fs/umapfs/umap_subr.c
new file mode 100644
index 000000000000..6f1f077a6217
--- /dev/null
+++ b/sys/fs/umapfs/umap_subr.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_subr.c 8.6 (Berkeley) 1/26/94
+ *
+ * $Id: lofs_subr.c, v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NUMAPNODECACHE 16
+#define UMAP_NHASH(vp) ((((u_long) vp)>>LOG2_SIZEVNODE) & (NUMAPNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the target vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the target vnode is VREF'd. When the
+ * alias is removed the target vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct umap_node_cache {
+ struct umap_node *ac_forw;
+ struct umap_node *ac_back;
+};
+
+static struct umap_node_cache umap_node_cache[NUMAPNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+umapfs_init()
+{
+ struct umap_node_cache *ac;
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_init\n"); /* printed during system boot */
+#endif
+
+ for (ac = umap_node_cache; ac < umap_node_cache + NUMAPNODECACHE; ac++)
+ ac->ac_forw = ac->ac_back = (struct umap_node *) ac;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct umap_node_cache *
+umap_node_hash(targetvp)
+ struct vnode *targetvp;
+{
+
+ return (&umap_node_cache[UMAP_NHASH(targetvp)]);
+}
+
+/*
+ * umap_findid is called by various routines in umap_vnodeops.c to
+ * find a user or group id in a map.
+ */
+static u_long
+umap_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][0]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][1]);
+ else
+ return (-1);
+
+}
+
+/*
+ * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to
+ * find a user or group id in a map, in reverse.
+ */
+u_long
+umap_reverse_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][1]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][0]);
+ else
+ return (-1);
+
+}
+
+/*
+ * Return alias for target vnode if already exists, else 0.
+ */
+static struct vnode *
+umap_node_find(mp, targetvp)
+ struct mount *mp;
+ struct vnode *targetvp;
+{
+ struct umap_node_cache *hd;
+ struct umap_node *a;
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp);
+#endif
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a umap_node structure which is referencing
+ * the target vnode. If found, the increment the umap_node
+ * reference count (but NOT the target vnode's VREF counter).
+ */
+ hd = umap_node_hash(targetvp);
+
+ loop:
+ for (a = hd->ac_forw; a != (struct umap_node *) hd; a = a->umap_forw) {
+ if (a->umap_lowervp == targetvp &&
+ a->umap_vnode->v_mount == mp) {
+ vp = UMAPTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0)) {
+#ifdef UMAPFS_DIAGNOSTIC
+ printf ("umap_node_find: vget failed.\n");
+#endif
+ goto loop;
+ }
+ return (vp);
+ }
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp);
+#endif
+
+ return (0);
+}
+
+/*
+ * Make a new umap_node node.
+ * Vp is the alias vnode, lofsvp is the target vnode.
+ * Maintain a reference to (targetvp).
+ */
+static int
+umap_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct umap_node_cache *hd;
+ struct umap_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ if (error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp))
+ return (error);
+ vp = *vpp;
+
+ MALLOC(xp, struct umap_node *, sizeof(struct umap_node),
+ M_TEMP, M_WAITOK);
+ vp->v_type = lowervp->v_type;
+ xp->umap_vnode = vp;
+ vp->v_data = xp;
+ xp->umap_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ if (othervp = umap_node_find(lowervp)) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return (0);
+ }
+ VREF(lowervp); /* Extra VREF will be vrele'd in umap_node_create */
+ hd = umap_node_hash(lowervp);
+ insque(xp, hd);
+ return (0);
+}
+
+
+/*
+ * Try to find an existing umap_node vnode refering
+ * to it, otherwise make a new umap_node vnode which
+ * contains a reference to the target vnode.
+ */
+int
+umap_node_create(mp, targetvp, newvpp)
+ struct mount *mp;
+ struct vnode *targetvp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ if (aliasvp = umap_node_find(mp, targetvp)) {
+ /*
+ * Take another reference to the alias vnode
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: exists", ap->umap_vnode);
+#endif
+ /* VREF(aliasvp); */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_create: create new alias vnode\n");
+#endif
+ /*
+ * Make new vnode reference the umap_node.
+ */
+ if (error = umap_node_alloc(mp, targetvp, &aliasvp))
+ return (error);
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(targetvp);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: alias", aliasvp);
+ vprint("umap_node_create: target", targetvp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+
+#ifdef UMAPFS_DIAGNOSTIC
+int umap_checkvp_barrier = 1;
+struct vnode *
+umap_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct umap_node *a = VTOUMAP(vp);
+#if 0
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with funny vop vector.
+ */
+ if (vp->v_op != umap_vnodeop_p) {
+ printf ("umap_checkvp: on non-umap-node\n");
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+#endif
+ if (a->umap_lowervp == NULL) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %x, ZERO ptr\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+ if (a->umap_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %x, unref'ed lowervp\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic ("umap with unref'ed lowervp");
+ }
+#if 0
+ printf("umap %x/%d -> %x/%d [%s, %d]\n",
+ a->umap_vnode, a->umap_vnode->v_usecount,
+ a->umap_lowervp, a->umap_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return (a->umap_lowervp);
+}
+#endif
+
+/* umap_mapids maps all of the ids in a credential, both user and group. */
+
+void
+umap_mapids(v_mount, credp)
+ struct mount *v_mount;
+ struct ucred *credp;
+{
+ int i, unentries, gnentries;
+ u_long *groupmap, *usermap;
+ uid_t uid;
+ gid_t gid;
+
+ unentries = MOUNTTOUMAPMOUNT(v_mount)->info_nentries;
+ usermap = &(MOUNTTOUMAPMOUNT(v_mount)->info_mapdata[0][0]);
+ gnentries = MOUNTTOUMAPMOUNT(v_mount)->info_gnentries;
+ groupmap = &(MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata[0][0]);
+
+ /* Find uid entry in map */
+
+ uid = (uid_t) umap_findid(credp->cr_uid, usermap, unentries);
+
+ if (uid != -1)
+ credp->cr_uid = uid;
+ else
+ credp->cr_uid = (uid_t) NOBODY;
+
+#ifdef notdef
+ /* cr_gid is the same as cr_groups[0] in 4BSD */
+
+ /* Find gid entry in map */
+
+ gid = (gid_t) umap_findid(credp->cr_gid, groupmap, gnentries);
+
+ if (gid != -1)
+ credp->cr_gid = gid;
+ else
+ credp->cr_gid = NULLGROUP;
+#endif
+
+ /* Now we must map each of the set of groups in the cr_groups
+ structure. */
+
+ i = 0;
+ while (credp->cr_groups[i] != 0) {
+ gid = (gid_t) umap_findid(credp->cr_groups[i],
+ groupmap, gnentries);
+
+ if (gid != -1)
+ credp->cr_groups[i++] = gid;
+ else
+ credp->cr_groups[i++] = NULLGROUP;
+ }
+}
diff --git a/sys/fs/umapfs/umap_vfsops.c b/sys/fs/umapfs/umap_vfsops.c
new file mode 100644
index 000000000000..2480a85e440c
--- /dev/null
+++ b/sys/fs/umapfs/umap_vfsops.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vfsops.c 8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vfsops.c 1.5 (Berkeley) 7/10/92
+ */
+
+/*
+ * Umap Layer
+ * (See mount_umap(8) for a description of this layer.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+/*
+ * Mount umap layer
+ */
+int
+umapfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct umap_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *umapm_rootvp;
+ struct umap_mount *amp;
+ u_int size;
+ int error;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/
+ }
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct umap_args)))
+ return (error);
+
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ if (error = namei(ndp))
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("vp = %x, check for VDIR...\n", lowerrootvp);
+#endif
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = 0;
+
+ if (lowerrootvp->v_type != VDIR) {
+ vput(lowerrootvp);
+ return (EINVAL);
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("mp = %x\n", mp);
+#endif
+
+ amp = (struct umap_mount *) malloc(sizeof(struct umap_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ amp->umapm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Now copy in the number of entries and maps for umap mapping.
+ */
+ amp->info_nentries = args.nentries;
+ amp->info_gnentries = args.gnentries;
+ error = copyin(args.mapdata, (caddr_t)amp->info_mapdata,
+ 2*sizeof(u_long)*args.nentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:nentries %d\n",args.nentries);
+ for (i = 0; i < args.nentries; i++)
+ printf(" %d maps to %d\n", amp->info_mapdata[i][0],
+ amp->info_mapdata[i][1]);
+#endif
+
+ error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata,
+ 2*sizeof(u_long)*args.nentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:gnentries %d\n",args.gnentries);
+ for (i = 0; i < args.gnentries; i++)
+ printf(" group %d maps to %d\n",
+ amp->info_gmapdata[i][0],
+ amp->info_gmapdata[i][1]);
+#endif
+
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = umap_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(amp, M_UFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in umapfs_unmount.
+ */
+ umapm_rootvp = vp;
+ umapm_rootvp->v_flag |= VROOT;
+ amp->umapm_rootvp = umapm_rootvp;
+ if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) amp;
+ getnewfsid(mp, MOUNT_LOFS);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+umapfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */
+}
+
+/*
+ * Free reference to umap layer
+ */
+int
+umapfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE) {
+ /* lofs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (umapm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, umapm_rootvp, flags))
+ return (error);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("alias root of lower", umapm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(umapm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(umapm_rootvp);
+ /*
+ * Finally, throw away the umap_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+umapfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+umapfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p));
+}
+
+int
+umapfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+umapfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at umap layer.
+ */
+ return (0);
+}
+
+int
+umapfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp));
+}
+
+int
+umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp));
+}
+
+int
+umapfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp));
+}
+
+int umapfs_init __P((void));
+
+struct vfsops umap_vfsops = {
+ umapfs_mount,
+ umapfs_start,
+ umapfs_unmount,
+ umapfs_root,
+ umapfs_quotactl,
+ umapfs_statfs,
+ umapfs_sync,
+ umapfs_vget,
+ umapfs_fhtovp,
+ umapfs_vptofh,
+ umapfs_init,
+};
diff --git a/sys/fs/umapfs/umap_vnops.c b/sys/fs/umapfs/umap_vnops.c
new file mode 100644
index 000000000000..287804e15618
--- /dev/null
+++ b/sys/fs/umapfs/umap_vnops.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vnops.c 8.3 (Berkeley) 1/5/94
+ */
+
+/*
+ * Umap Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/umapfs/umap.h>
+
+
+int umap_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * See null_vnops.c:null_bypass for more details.
+ */
+int
+umap_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ extern int (**umap_vnodeop_p)(); /* not extern, really "forward" */
+ struct ucred **credpp = 0, *credp = 0;
+ struct ucred *savecredp, *savecompcredp = 0;
+ struct ucred *compcredp = 0;
+ struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode *vp1 = 0;
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+ struct componentname **compnamepp = 0;
+
+ if (umap_bug_bypass)
+ printf ("umap_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("umap_bypass: no vp's in map.\n");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap);
+
+ if (i == 0) {
+ vp1 = *vps_p[0];
+ }
+
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (Must map first vp or vclean fails.)
+ */
+
+ if (i && (*this_vp_p)->v_op != umap_vnodeop_p) {
+ old_vps[i] = NULL;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Fix the credentials. (That's the purpose of this layer.)
+ */
+
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+
+ credpp = VOPARG_OFFSETTO(struct ucred**,
+ descp->vdesc_cred_offset, ap);
+
+ /* Save old values */
+
+ savecredp = (*credpp);
+ (*credpp) = crdup(savecredp);
+ credp = *credpp;
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user was %d, group %d\n",
+ credp->cr_uid, credp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, credp);
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user now %d, group %d\n",
+ credp->cr_uid, credp->cr_gid);
+ }
+
+ /* BSD often keeps a credential in the componentname structure
+ * for speed. If there is one, it better get mapped, too.
+ */
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+
+ compnamepp = VOPARG_OFFSETTO(struct componentname**,
+ descp->vdesc_componentname_offset, ap);
+
+ compcredp = (*compnamepp)->cn_cred;
+ savecompcredp = compcredp;
+ compcredp = (*compnamepp)->cn_cred = crdup(savecompcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_bypass: component credit user was %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_bypass: component credit user now %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ };
+ };
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset, ap);
+ error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ };
+
+ out:
+ /*
+ * Free duplicate cred structure and restore old one.
+ */
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && credp && credp->cr_uid != 0)
+ printf("umap_bypass: returning-user was %d\n",
+ credp->cr_uid);
+
+ crfree(credp);
+ (*credpp) = savecredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf("umap_bypass: returning-user now %d\n\n",
+ (*credpp)->cr_uid);
+ }
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0)
+ printf("umap_bypass: returning-component-user was %d\n",
+ compcredp->cr_uid);
+
+ crfree(compcredp);
+ (*compnamepp)->cn_cred = savecompcredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf("umap_bypass: returning-component-user now %d\n",
+ compcredp->cr_uid);
+ }
+
+ return (error);
+}
+
+
+/*
+ * We handle getattr to change the fsid.
+ */
+int
+umap_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ short uid, gid;
+ int error, tmpid, nentries, gnentries;
+ u_long (*mapdata)[2], (*gmapdata)[2];
+ struct vnode **vp1p;
+ struct vnodeop_desc *descp = ap->a_desc;
+
+ if (error = umap_bypass(ap))
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+ /*
+ * Umap needs to map the uid and gid returned by a stat
+ * into the proper values for this site. This involves
+ * finding the returned uid in the mapping information,
+ * translating it into the uid on the other end,
+ * and filling in the proper field in the vattr
+ * structure pointed to by ap->a_vap. The group
+ * is easier, since currently all groups will be
+ * translate to the NULLGROUP.
+ */
+
+ /* Find entry in map */
+
+ uid = ap->a_vap->va_uid;
+ gid = ap->a_vap->va_gid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid,
+ gid);
+
+ vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
+ nentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
+ mapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
+ gnentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
+ gmapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
+
+ /* Reverse map the uid for the vnode. Since it's a reverse
+ map, we can't use umap_mapids() to do it. */
+
+ tmpid = umap_reverse_findid(uid, mapdata, nentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_uid = (uid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original uid = %d\n", uid);
+ } else
+ ap->a_vap->va_uid = (uid_t) NOBODY;
+
+ /* Reverse map the gid for the vnode. */
+
+ tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_gid = (gid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original gid = %d\n", gid);
+ } else
+ ap->a_vap->va_gid = (gid_t) NULLGROUP;
+
+ return (0);
+}
+
+int
+umap_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our umap_node is in the
+ * cache and reusable.
+ *
+ */
+ return (0);
+}
+
+int
+umap_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct umap_node *xp = VTOUMAP(vp);
+ struct vnode *lowervp = xp->umap_lowervp;
+
+ /* After this assignment, this node will not be re-used. */
+ xp->umap_lowervp = NULL;
+ remque(xp);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele(lowervp);
+ return (0);
+}
+
+int
+umap_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+int
+umap_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+
+int
+umap_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ printf("\ttag VT_UMAPFS, vp=%x, lowervp=%x\n", vp, UMAPVPTOLOWERVP(vp));
+ return (0);
+}
+
+int
+umap_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+ struct componentname *compnamep;
+ struct ucred *compcredp, *savecompcredp;
+ struct vnode *vp;
+
+ /*
+ * Rename is irregular, having two componentname structures.
+ * We need to map the cre in the second structure,
+ * and then bypass takes care of the rest.
+ */
+
+ vp = ap->a_fdvp;
+ compnamep = ap->a_tcnp;
+ compcredp = compnamep->cn_cred;
+
+ savecompcredp = compcredp;
+ compcredp = compnamep->cn_cred = crdup(savecompcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_rename: rename component credit user was %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_rename: rename component credit user now %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ error = umap_bypass(ap);
+
+ /* Restore the additional mapped componentname cred structure. */
+
+ crfree(compcredp);
+ compnamep->cn_cred = savecompcredp;
+
+ return error;
+}
+
+/*
+ * Global vfs data structures
+ */
+/*
+ * XXX - strategy, bwrite are hand coded currently. They should
+ * go away with a merged buffer/block cache.
+ *
+ */
+int (**umap_vnodeop_p)();
+struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
+ { &vop_default_desc, umap_bypass },
+
+ { &vop_getattr_desc, umap_getattr },
+ { &vop_inactive_desc, umap_inactive },
+ { &vop_reclaim_desc, umap_reclaim },
+ { &vop_print_desc, umap_print },
+ { &vop_rename_desc, umap_rename },
+
+ { &vop_strategy_desc, umap_strategy },
+ { &vop_bwrite_desc, umap_bwrite },
+
+ { (struct vnodeop_desc*) NULL, (int(*)()) NULL }
+};
+struct vnodeopv_desc umap_vnodeop_opv_desc =
+ { &umap_vnodeop_p, umap_vnodeop_entries };
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
new file mode 100644
index 000000000000..463218ac3ed2
--- /dev/null
+++ b/sys/fs/unionfs/union.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union.h 8.2 (Berkeley) 2/17/94
+ */
+
+struct union_args {
+ char *target; /* Target of loopback */
+ int mntflags; /* Options on the mount */
+};
+
+#define UNMNT_ABOVE 0x0001 /* Target appears below mount point */
+#define UNMNT_BELOW 0x0002 /* Target appears below mount point */
+#define UNMNT_REPLACE 0x0003 /* Target replaces mount point */
+#define UNMNT_OPMASK 0x0003
+
+struct union_mount {
+ struct vnode *um_uppervp;
+ struct vnode *um_lowervp;
+ struct ucred *um_cred; /* Credentials of user calling mount */
+ int um_cmode; /* cmask from mount process */
+ int um_op; /* Operation mode */
+};
+
+#ifdef KERNEL
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references
+ */
+struct union_node {
+ LIST_ENTRY(union_node) un_cache; /* Hash chain */
+ struct vnode *un_vnode; /* Back pointer */
+ struct vnode *un_uppervp; /* overlaying object */
+ struct vnode *un_lowervp; /* underlying object */
+ struct vnode *un_dirvp; /* Parent dir of uppervp */
+ char *un_path; /* saved component name */
+ int un_hash; /* saved un_path hash value */
+ int un_openl; /* # of opens on lowervp */
+ int un_flags;
+#ifdef DIAGNOSTIC
+ pid_t un_pid;
+#endif
+};
+
+#define UN_WANT 0x01
+#define UN_LOCKED 0x02
+#define UN_ULOCK 0x04 /* Upper node is locked */
+#define UN_KLOCK 0x08 /* Keep upper node locked on vput */
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+ struct vnode *, struct vnode *,
+ struct componentname *, struct vnode *,
+ struct vnode *));
+extern int union_copyfile __P((struct proc *, struct ucred *,
+ struct vnode *, struct vnode *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+ struct componentname *, struct vnode **));
+extern int union_vn_create __P((struct vnode **, struct union_node *,
+ struct proc *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+ struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newlower __P((struct union_node *, struct vnode *));
+extern void union_newupper __P((struct union_node *, struct vnode *));
+
+#define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define UNIONTOV(un) ((un)->un_vnode)
+#define LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+extern int (**union_vnodeop_p)();
+extern struct vfsops union_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
new file mode 100644
index 000000000000..77947d1dfbe1
--- /dev/null
+++ b/sys/fs/unionfs/union_subr.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_subr.c 8.4 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#ifdef DIAGNOSTIC
+#include <sys/proc.h>
+#endif
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+ (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+int
+union_init()
+{
+ int i;
+
+ for (i = 0; i < NHASH; i++)
+ LIST_INIT(&unhead[i]);
+ bzero((caddr_t) unvplock, sizeof(unvplock));
+}
+
+static int
+union_list_lock(ix)
+ int ix;
+{
+
+ if (unvplock[ix] & UN_LOCKED) {
+ unvplock[ix] |= UN_WANT;
+ sleep((caddr_t) &unvplock[ix], PINOD);
+ return (1);
+ }
+
+ unvplock[ix] |= UN_LOCKED;
+
+ return (0);
+}
+
+static void
+union_list_unlock(ix)
+ int ix;
+{
+
+ unvplock[ix] &= ~UN_LOCKED;
+
+ if (unvplock[ix] & UN_WANT) {
+ unvplock[ix] &= ~UN_WANT;
+ wakeup((caddr_t) &unvplock[ix]);
+ }
+}
+
+void
+union_updatevp(un, uppervp, lowervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+ struct vnode *lowervp;
+{
+ int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+ int nhash = UNION_HASH(uppervp, lowervp);
+
+ if (ohash != nhash) {
+ /*
+ * Ensure locking is ordered from lower to higher
+ * to avoid deadlocks.
+ */
+ if (nhash < ohash) {
+ int t = ohash;
+ ohash = nhash;
+ nhash = t;
+ }
+
+ while (union_list_lock(ohash))
+ continue;
+
+ while (union_list_lock(nhash))
+ continue;
+
+ LIST_REMOVE(un, un_cache);
+ union_list_unlock(ohash);
+ } else {
+ while (union_list_lock(nhash))
+ continue;
+ }
+
+ if (un->un_lowervp != lowervp) {
+ if (un->un_lowervp) {
+ vrele(un->un_lowervp);
+ if (un->un_path) {
+ free(un->un_path, M_TEMP);
+ un->un_path = 0;
+ }
+ if (un->un_dirvp) {
+ vrele(un->un_dirvp);
+ un->un_dirvp = NULLVP;
+ }
+ }
+ un->un_lowervp = lowervp;
+ }
+
+ if (un->un_uppervp != uppervp) {
+ if (un->un_uppervp)
+ vrele(un->un_uppervp);
+
+ un->un_uppervp = uppervp;
+ }
+
+ if (ohash != nhash)
+ LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+
+ union_list_unlock(nhash);
+}
+
+void
+union_newlower(un, lowervp)
+ struct union_node *un;
+ struct vnode *lowervp;
+{
+
+ union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+void
+union_newupper(un, uppervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+{
+
+ union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * allocate a union_node/vnode pair. the vnode is
+ * referenced and locked. the new vnode is returned
+ * via (vpp). (mp) is the mountpoint of the union filesystem,
+ * (dvp) is the parent directory where the upper layer object
+ * should exist (but doesn't) and (cnp) is the componentname
+ * information which is partially copied to allow the upper
+ * layer object to be created at a later time. (uppervp)
+ * and (lowervp) reference the upper and lower layer objects
+ * being mapped. either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
+ * the reference is either maintained in the new union_node
+ * object which is allocated, or they are vrele'd.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp)
+ struct vnode **vpp;
+ struct mount *mp;
+ struct vnode *undvp;
+ struct vnode *dvp; /* may be null */
+ struct componentname *cnp; /* may be null */
+ struct vnode *uppervp; /* may be null */
+ struct vnode *lowervp; /* may be null */
+{
+ int error;
+ struct union_node *un;
+ struct union_node **pp;
+ struct vnode *xlowervp = NULLVP;
+ int hash;
+ int try;
+
+ if (uppervp == NULLVP && lowervp == NULLVP)
+ panic("union: unidentifiable allocation");
+
+ if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+ xlowervp = lowervp;
+ lowervp = NULLVP;
+ }
+
+loop:
+ for (try = 0; try < 3; try++) {
+ switch (try) {
+ case 0:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, lowervp);
+ break;
+
+ case 1:
+ if (uppervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, NULLVP);
+ break;
+
+ case 2:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(NULLVP, lowervp);
+ break;
+ }
+
+ while (union_list_lock(hash))
+ continue;
+
+ for (un = unhead[hash].lh_first; un != 0;
+ un = un->un_cache.le_next) {
+ if ((un->un_lowervp == lowervp ||
+ un->un_lowervp == NULLVP) &&
+ (un->un_uppervp == uppervp ||
+ un->un_uppervp == NULLVP) &&
+ (UNIONTOV(un)->v_mount == mp)) {
+ if (vget(UNIONTOV(un), 0)) {
+ union_list_unlock(hash);
+ goto loop;
+ }
+ break;
+ }
+ }
+
+ union_list_unlock(hash);
+
+ if (un)
+ break;
+ }
+
+ if (un) {
+ /*
+ * Obtain a lock on the union_node.
+ * uppervp is locked, though un->un_uppervp
+ * may not be. this doesn't break the locking
+ * hierarchy since in the case that un->un_uppervp
+ * is not yet locked it will be vrele'd and replaced
+ * with uppervp.
+ */
+
+ if ((dvp != NULLVP) && (uppervp == dvp)) {
+ /*
+ * Access ``.'', so (un) will already
+ * be locked. Since this process has
+ * the lock on (uppervp) no other
+ * process can hold the lock on (un).
+ */
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: . not locked");
+ else if (curproc && un->un_pid != curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: allocvp not lock owner");
+#endif
+ } else {
+ if (un->un_flags & UN_LOCKED) {
+ vrele(UNIONTOV(un));
+ un->un_flags |= UN_WANT;
+ sleep((caddr_t) &un->un_flags, PINOD);
+ goto loop;
+ }
+ un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ }
+
+ /*
+ * At this point, the union_node is locked,
+ * un->un_uppervp may not be locked, and uppervp
+ * is locked or nil.
+ */
+
+ /*
+ * Save information about the upper layer.
+ */
+ if (uppervp != un->un_uppervp) {
+ union_newupper(un, uppervp);
+ } else if (uppervp) {
+ vrele(uppervp);
+ }
+
+ if (un->un_uppervp) {
+ un->un_flags |= UN_ULOCK;
+ un->un_flags &= ~UN_KLOCK;
+ }
+
+ /*
+ * Save information about the lower layer.
+ * This needs to keep track of pathname
+ * and directory information which union_vn_create
+ * might need.
+ */
+ if (lowervp != un->un_lowervp) {
+ union_newlower(un, lowervp);
+ if (cnp && (lowervp != NULLVP) &&
+ (lowervp->v_type == VREG)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1,
+ M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path,
+ cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ }
+ } else if (lowervp) {
+ vrele(lowervp);
+ }
+ *vpp = UNIONTOV(un);
+ return (0);
+ }
+
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ hash = UNION_HASH(uppervp, lowervp);
+
+ if (union_list_lock(hash))
+ goto loop;
+
+ error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+ if (error) {
+ if (uppervp) {
+ if (dvp == uppervp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ }
+ if (lowervp)
+ vrele(lowervp);
+
+ goto out;
+ }
+
+ MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+ M_TEMP, M_WAITOK);
+
+ if (uppervp)
+ (*vpp)->v_type = uppervp->v_type;
+ else
+ (*vpp)->v_type = lowervp->v_type;
+ un = VTOUNION(*vpp);
+ un->un_vnode = *vpp;
+ un->un_uppervp = uppervp;
+ un->un_lowervp = lowervp;
+ un->un_openl = 0;
+ un->un_flags = UN_LOCKED;
+ if (un->un_uppervp)
+ un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ } else {
+ un->un_hash = 0;
+ un->un_path = 0;
+ un->un_dirvp = 0;
+ }
+
+ LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+
+ if (xlowervp)
+ vrele(xlowervp);
+
+out:
+ union_list_unlock(hash);
+
+ return (error);
+}
+
+int
+union_freevp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ LIST_REMOVE(un, un_cache);
+
+ if (un->un_uppervp)
+ vrele(un->un_uppervp);
+ if (un->un_lowervp)
+ vrele(un->un_lowervp);
+ if (un->un_dirvp)
+ vrele(un->un_dirvp);
+ if (un->un_path)
+ free(un->un_path, M_TEMP);
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * copyfile. copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes. both (fvp)
+ * and (tvp) are locked on entry and exit.
+ */
+int
+union_copyfile(p, cred, fvp, tvp)
+ struct proc *p;
+ struct ucred *cred;
+ struct vnode *fvp;
+ struct vnode *tvp;
+{
+ char *buf;
+ struct uio uio;
+ struct iovec iov;
+ int error = 0;
+
+ /*
+ * strategy:
+ * allocate a buffer of size MAXBSIZE.
+ * loop doing reads and writes, keeping track
+ * of the current uio offset.
+ * give up at the first sign of trouble.
+ */
+
+ uio.uio_procp = p;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_offset = 0;
+
+ VOP_UNLOCK(fvp); /* XXX */
+ LEASE_CHECK(fvp, p, cred, LEASE_READ);
+ VOP_LOCK(fvp); /* XXX */
+ VOP_UNLOCK(tvp); /* XXX */
+ LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
+ VOP_LOCK(tvp); /* XXX */
+
+ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+ /* ugly loop follows... */
+ do {
+ off_t offset = uio.uio_offset;
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_rw = UIO_READ;
+ error = VOP_READ(fvp, &uio, 0, cred);
+
+ if (error == 0) {
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE - uio.uio_resid;
+ uio.uio_offset = offset;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_resid = iov.iov_len;
+
+ if (uio.uio_resid == 0)
+ break;
+
+ do {
+ error = VOP_WRITE(tvp, &uio, 0, cred);
+ } while ((uio.uio_resid > 0) && (error == 0));
+ }
+
+ } while (error == 0);
+
+ free(buf, M_TEMP);
+ return (error);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory.
+ * it is unlocked on entry and exit.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked.
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+ struct union_mount *um;
+ struct vnode *dvp;
+ struct componentname *cnp;
+ struct vnode **vpp;
+{
+ int error;
+ struct vattr va;
+ struct proc *p = cnp->cn_proc;
+ struct componentname cn;
+
+ /*
+ * policy: when creating the shadow directory in the
+ * upper layer, create it owned by the user who did
+ * the mount, group from parent directory, and mode
+ * 777 modified by umask (ie mostly identical to the
+ * mkdir syscall). (jsp, kb)
+ */
+
+ /*
+ * A new componentname structure must be faked up because
+ * there is no way to know where the upper level cnp came
+ * from or what it is being used for. This must duplicate
+ * some of the work done by NDINIT, some of the work done
+ * by namei, some of the work done by lookup and some of
+ * the work done by VOP_LOOKUP when given a CREATE flag.
+ * Conclusion: Horrible.
+ *
+ * The pathname buffer will be FREEed by VOP_MKDIR.
+ */
+ cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
+ bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
+ cn.cn_pnbuf[cnp->cn_namelen] = '\0';
+
+ cn.cn_nameiop = CREATE;
+ cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn.cn_proc = cnp->cn_proc;
+ if (um->um_op == UNMNT_ABOVE)
+ cn.cn_cred = cnp->cn_cred;
+ else
+ cn.cn_cred = um->um_cred;
+ cn.cn_nameptr = cn.cn_pnbuf;
+ cn.cn_namelen = cnp->cn_namelen;
+ cn.cn_hash = cnp->cn_hash;
+ cn.cn_consume = cnp->cn_consume;
+
+ VREF(dvp);
+ if (error = relookup(dvp, vpp, &cn))
+ return (error);
+ vrele(dvp);
+
+ if (*vpp) {
+ VOP_ABORTOP(dvp, &cn);
+ VOP_UNLOCK(dvp);
+ vrele(*vpp);
+ *vpp = NULLVP;
+ return (EEXIST);
+ }
+
+ VATTR_NULL(&va);
+ va.va_type = VDIR;
+ va.va_mode = um->um_cmode;
+
+ /* LEASE_CHECK: dvp is locked */
+ LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
+
+ error = VOP_MKDIR(dvp, vpp, &cn, &va);
+ return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer. this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ */
+int
+union_vn_create(vpp, un, p)
+ struct vnode **vpp;
+ struct union_node *un;
+ struct proc *p;
+{
+ struct vnode *vp;
+ struct ucred *cred = p->p_ucred;
+ struct vattr vat;
+ struct vattr *vap = &vat;
+ int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+ int error;
+ int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+ char *cp;
+ struct componentname cn;
+
+ *vpp = NULLVP;
+
+ /*
+ * Build a new componentname structure (for the same
+ * reasons outlines in union_mkshadow).
+ * The difference here is that the file is owned by
+ * the current user, rather than by the person who
+ * did the mount, since the current user needs to be
+ * able to write the file (that's why it is being
+ * copied in the first place).
+ */
+ cn.cn_namelen = strlen(un->un_path);
+ cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
+ bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+ cn.cn_nameiop = CREATE;
+ cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn.cn_proc = p;
+ cn.cn_cred = p->p_ucred;
+ cn.cn_nameptr = cn.cn_pnbuf;
+ cn.cn_hash = un->un_hash;
+ cn.cn_consume = 0;
+
+ VREF(un->un_dirvp);
+ if (error = relookup(un->un_dirvp, &vp, &cn))
+ return (error);
+ vrele(un->un_dirvp);
+
+ if (vp) {
+ VOP_ABORTOP(un->un_dirvp, &cn);
+ if (un->un_dirvp == vp)
+ vrele(un->un_dirvp);
+ else
+ vput(un->un_dirvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+
+ /*
+ * Good - there was no race to create the file
+ * so go ahead and create it. The permissions
+ * on the file will be 0666 modified by the
+ * current user's umask. Access to the file, while
+ * it is unioned, will require access to the top *and*
+ * bottom files. Access when not unioned will simply
+ * require access to the top-level file.
+ * TODO: confirm choice of access permissions.
+ */
+ VATTR_NULL(vap);
+ vap->va_type = VREG;
+ vap->va_mode = cmode;
+ LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE);
+ if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap))
+ return (error);
+
+ if (error = VOP_OPEN(vp, fmode, cred, p)) {
+ vput(vp);
+ return (error);
+ }
+
+ vp->v_writecount++;
+ *vpp = vp;
+ return (0);
+}
+
+int
+union_vn_close(vp, fmode, cred, p)
+ struct vnode *vp;
+ int fmode;
+ struct ucred *cred;
+ struct proc *p;
+{
+ if (fmode & FWRITE)
+ --vp->v_writecount;
+ return (VOP_CLOSE(vp, fmode));
+}
+
+void
+union_removed_upper(un)
+ struct union_node *un;
+{
+ if (un->un_flags & UN_ULOCK) {
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(un->un_uppervp);
+ }
+
+ union_newupper(un, NULLVP);
+}
+
+struct vnode *
+union_lowervp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) {
+ if (vget(un->un_lowervp, 0))
+ return (NULLVP);
+ }
+
+ return (un->un_lowervp);
+}
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
new file mode 100644
index 000000000000..9fa27460e3d4
--- /dev/null
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vfsops.c 8.7 (Berkeley) 3/5/94
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+/*
+ * Mount union filesystem
+ */
+int
+union_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct union_args args;
+ struct vnode *lowerrootvp = NULLVP;
+ struct vnode *upperrootvp = NULLVP;
+ struct union_mount *um;
+ struct ucred *cred = 0;
+ struct ucred *scred;
+ struct vattr va;
+ char *cp;
+ int len;
+ u_int size;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ /*
+ * Need to provide.
+ * 1. a way to convert between rdonly and rdwr mounts.
+ * 2. support for nfs exports.
+ */
+ error = EOPNOTSUPP;
+ goto bad;
+ }
+
+ /*
+ * Take a copy of the process's credentials. This isn't
+ * quite right since the euid will always be zero and we
+ * want to get the "real" users credentials. So fix up
+ * the uid field after taking the copy.
+ */
+ cred = crdup(p->p_ucred);
+ cred->cr_uid = p->p_cred->p_ruid;
+
+ /*
+ * Ensure the *real* user has write permission on the
+ * mounted-on directory. This allows the mount_union
+ * command to be made setuid root so allowing anyone
+ * to do union mounts onto any directory on which they
+ * have write permission and which they also own.
+ */
+ error = VOP_GETATTR(mp->mnt_vnodecovered, &va, cred, p);
+ if (error)
+ goto bad;
+ if ((va.va_uid != cred->cr_uid) &&
+ (cred->cr_uid != 0)) {
+ error = EACCES;
+ goto bad;
+ }
+ error = VOP_ACCESS(mp->mnt_vnodecovered, VWRITE, cred, p);
+ if (error)
+ goto bad;
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args)))
+ goto bad;
+
+ lowerrootvp = mp->mnt_vnodecovered;
+ VREF(lowerrootvp);
+
+ /*
+ * Find upper node. Use the real process credentials,
+ * not the effective ones since this will have come
+ * through a setuid process (mount_union). All this
+ * messing around with permissions is entirely bogus
+ * and should be removed by allowing any user straight
+ * past the mount system call.
+ */
+ scred = p->p_ucred;
+ p->p_ucred = cred;
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+ UIO_USERSPACE, args.target, p);
+ p->p_ucred = scred;
+
+ if (error = namei(ndp))
+ goto bad;
+
+ upperrootvp = ndp->ni_vp;
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+
+ if (upperrootvp->v_type != VDIR) {
+ error = EINVAL;
+ goto bad;
+ }
+
+ um = (struct union_mount *) malloc(sizeof(struct union_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Keep a held reference to the target vnodes.
+ * They are vrele'd in union_unmount.
+ *
+ * Depending on the _BELOW flag, the filesystems are
+ * viewed in a different order. In effect, this is the
+ * same as providing a mount under option to the mount syscall.
+ */
+
+ um->um_op = args.mntflags & UNMNT_OPMASK;
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ um->um_lowervp = lowerrootvp;
+ um->um_uppervp = upperrootvp;
+ break;
+
+ case UNMNT_BELOW:
+ um->um_lowervp = upperrootvp;
+ um->um_uppervp = lowerrootvp;
+ break;
+
+ case UNMNT_REPLACE:
+ vrele(lowerrootvp);
+ lowerrootvp = NULLVP;
+ um->um_uppervp = upperrootvp;
+ um->um_lowervp = lowerrootvp;
+ break;
+
+ default:
+ error = EINVAL;
+ goto bad;
+ }
+
+ um->um_cred = cred;
+ um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+ /*
+ * Depending on what you think the MNT_LOCAL flag might mean,
+ * you may want the && to be || on the conditional below.
+ * At the moment it has been defined that the filesystem is
+ * only local if it is all local, ie the MNT_LOCAL flag implies
+ * that the entire namespace is local. If you think the MNT_LOCAL
+ * flag implies that some of the files might be stored locally
+ * then you will want to change the conditional.
+ */
+ if (um->um_op == UNMNT_ABOVE) {
+ if (((um->um_lowervp == NULLVP) ||
+ (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+ (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+ mp->mnt_flag |= MNT_LOCAL;
+ }
+
+ /*
+ * Copy in the upper layer's RDONLY flag. This is for the benefit
+ * of lookup() which explicitly checks the flag, rather than asking
+ * the filesystem for it's own opinion. This means, that an update
+ * mount of the underlying filesystem to go from rdonly to rdwr
+ * will leave the unioned view as read-only.
+ */
+ mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+ /*
+ * This is a user mount. Privilege check for unmount
+ * will be done in union_unmount.
+ */
+ mp->mnt_flag |= MNT_USER;
+
+ mp->mnt_data = (qaddr_t) um;
+ getnewfsid(mp, MOUNT_UNION);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ cp = "<above>";
+ break;
+ case UNMNT_BELOW:
+ cp = "<below>";
+ break;
+ case UNMNT_REPLACE:
+ cp = "";
+ break;
+ }
+ len = strlen(cp);
+ bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+ cp = mp->mnt_stat.f_mntfromname + len;
+ len = MNAMELEN - len;
+
+ (void) copyinstr(args.target, cp, len - 1, &size);
+ bzero(cp + size, len - size);
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount: from %s, on %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+
+bad:
+ if (cred)
+ crfree(cred);
+ if (upperrootvp)
+ vrele(upperrootvp);
+ if (lowerrootvp)
+ vrele(lowerrootvp);
+ return (error);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem(s) will have been called
+ * when that filesystem was mounted.
+ */
+int
+union_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Free reference to union layer
+ */
+int
+union_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct vnode *um_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_unmount(mp = %x)\n", mp);
+#endif
+
+ /* only the mounter, or superuser can unmount */
+ if ((p->p_cred->p_ruid != um->um_cred->cr_uid) &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ return (error);
+
+ if (mntflags & MNT_FORCE) {
+ /* union can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ if (error = union_root(mp, &um_rootvp))
+ return (error);
+ if (um_rootvp->v_usecount > 1) {
+ vput(um_rootvp);
+ return (EBUSY);
+ }
+ if (error = vflush(mp, um_rootvp, flags)) {
+ vput(um_rootvp);
+ return (error);
+ }
+
+#ifdef UNION_DIAGNOSTIC
+ vprint("alias root of lower", um_rootvp);
+#endif
+ /*
+ * Discard references to upper and lower target vnodes.
+ */
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ vrele(um->um_uppervp);
+ crfree(um->um_cred);
+ /*
+ * Release reference on underlying root vnode
+ */
+ vput(um_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(um_rootvp);
+ /*
+ * Finally, throw away the union_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+union_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ int error;
+ int loselock;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_root(mp = %x, lvp = %x, uvp = %x)\n", mp,
+ um->um_lowervp,
+ um->um_uppervp);
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ VREF(um->um_uppervp);
+ if ((um->um_op == UNMNT_BELOW) &&
+ VOP_ISLOCKED(um->um_uppervp)) {
+ loselock = 1;
+ } else {
+ VOP_LOCK(um->um_uppervp);
+ loselock = 0;
+ }
+ if (um->um_lowervp)
+ VREF(um->um_lowervp);
+ error = union_allocvp(vpp, mp,
+ (struct vnode *) 0,
+ (struct vnode *) 0,
+ (struct componentname *) 0,
+ um->um_uppervp,
+ um->um_lowervp);
+
+ if (error) {
+ if (!loselock)
+ VOP_UNLOCK(um->um_uppervp);
+ vrele(um->um_uppervp);
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ } else {
+ (*vpp)->v_flag |= VROOT;
+ if (loselock)
+ VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+ }
+
+ return (error);
+}
+
+int
+union_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct statfs mstat;
+ int lbsize;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
+ um->um_lowervp,
+ um->um_uppervp);
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ if (um->um_lowervp) {
+ error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+ }
+
+ /* now copy across the "interesting" information and fake the rest */
+#if 0
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+#endif
+ lbsize = mstat.f_bsize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+
+ error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+
+ sbp->f_type = MOUNT_UNION;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+
+ /*
+ * if the lower and upper blocksizes differ, then frig the
+ * block counts so that the sizes reported by df make some
+ * kind of sense. none of this makes sense though.
+ */
+
+ if (mstat.f_bsize != lbsize) {
+ sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize;
+ sbp->f_bfree = sbp->f_bfree * lbsize / mstat.f_bsize;
+ sbp->f_bavail = sbp->f_bavail * lbsize / mstat.f_bsize;
+ }
+ sbp->f_blocks += mstat.f_blocks;
+ sbp->f_bfree += mstat.f_bfree;
+ sbp->f_bavail += mstat.f_bavail;
+ sbp->f_files += mstat.f_files;
+ sbp->f_ffree += mstat.f_ffree;
+
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+union_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+
+ /*
+ * XXX - Assumes no data cached at union layer.
+ */
+ return (0);
+}
+
+int
+union_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int union_init __P((void));
+
+struct vfsops union_vfsops = {
+ union_mount,
+ union_start,
+ union_unmount,
+ union_root,
+ union_quotactl,
+ union_statfs,
+ union_sync,
+ union_vget,
+ union_fhtovp,
+ union_vptofh,
+ union_init,
+};
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
new file mode 100644
index 000000000000..96327b0922d4
--- /dev/null
+++ b/sys/fs/unionfs/union_vnops.c
@@ -0,0 +1,1495 @@
+/*
+ * Copyright (c) 1992, 1993, 1994 The Regents of the University of California.
+ * Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vnops.c 8.6 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#define FIXUP(un) { \
+ if (((un)->un_flags & UN_ULOCK) == 0) { \
+ union_fixup(un); \
+ } \
+}
+
+static void
+union_fixup(un)
+ struct union_node *un;
+{
+
+ VOP_LOCK(un->un_uppervp);
+ un->un_flags |= UN_ULOCK;
+}
+
+static int
+union_lookup1(udvp, dvp, vpp, cnp)
+ struct vnode *udvp;
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+{
+ int error;
+ struct vnode *tdvp;
+ struct mount *mp;
+
+ /*
+ * If stepping up the directory tree, check for going
+ * back across the mount point, in which case do what
+ * lookup would do by stepping back down the mount
+ * hierarchy.
+ */
+ if (cnp->cn_flags & ISDOTDOT) {
+ for (;;) {
+ /*
+ * Don't do the NOCROSSMOUNT check
+ * at this level. By definition,
+ * union fs deals with namespaces, not
+ * filesystems.
+ */
+ if ((dvp->v_flag & VROOT) == 0)
+ break;
+
+ tdvp = dvp;
+ dvp = dvp->v_mount->mnt_vnodecovered;
+ vput(tdvp);
+ VREF(dvp);
+ VOP_LOCK(dvp);
+ }
+ }
+
+ error = VOP_LOOKUP(dvp, &tdvp, cnp);
+ if (error)
+ return (error);
+
+ /*
+ * The parent directory will have been unlocked, unless lookup
+ * found the last component. In which case, re-lock the node
+ * here to allow it to be unlocked again (phew) in union_lookup.
+ */
+ if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+ VOP_LOCK(dvp);
+
+ dvp = tdvp;
+
+ /*
+ * Lastly check if the current node is a mount point in
+ * which case walk up the mount hierarchy making sure not to
+ * bump into the root of the mount tree (ie. dvp != udvp).
+ */
+ while (dvp != udvp && (dvp->v_type == VDIR) &&
+ (mp = dvp->v_mountedhere)) {
+
+ if (mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t) mp, PVFS);
+ continue;
+ }
+
+ if (error = VFS_ROOT(mp, &tdvp)) {
+ vput(dvp);
+ return (error);
+ }
+
+ vput(dvp);
+ dvp = tdvp;
+ }
+
+ *vpp = dvp;
+ return (0);
+}
+
+int
+union_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ int uerror, lerror;
+ struct vnode *uppervp, *lowervp;
+ struct vnode *upperdvp, *lowerdvp;
+ struct vnode *dvp = ap->a_dvp;
+ struct union_node *dun = VTOUNION(dvp);
+ struct componentname *cnp = ap->a_cnp;
+ int lockparent = cnp->cn_flags & LOCKPARENT;
+ int rdonly = cnp->cn_flags & RDONLY;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+ struct ucred *saved_cred;
+
+ cnp->cn_flags |= LOCKPARENT;
+
+ upperdvp = dun->un_uppervp;
+ lowerdvp = dun->un_lowervp;
+ uppervp = NULLVP;
+ lowervp = NULLVP;
+
+ /*
+ * do the lookup in the upper level.
+ * if that level comsumes additional pathnames,
+ * then assume that something special is going
+ * on and just return that vnode.
+ */
+ if (upperdvp) {
+ FIXUP(dun);
+ uerror = union_lookup1(um->um_uppervp, upperdvp,
+ &uppervp, cnp);
+ /*if (uppervp == upperdvp)
+ dun->un_flags |= UN_KLOCK;*/
+
+ if (cnp->cn_consume != 0) {
+ *ap->a_vpp = uppervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ return (uerror);
+ }
+ } else {
+ uerror = ENOENT;
+ }
+
+ /*
+ * in a similar way to the upper layer, do the lookup
+ * in the lower layer. this time, if there is some
+ * component magic going on, then vput whatever we got
+ * back from the upper layer and return the lower vnode
+ * instead.
+ */
+ if (lowerdvp) {
+ int nameiop;
+
+ VOP_LOCK(lowerdvp);
+
+ /*
+ * Only do a LOOKUP on the bottom node, since
+ * we won't be making changes to it anyway.
+ */
+ nameiop = cnp->cn_nameiop;
+ cnp->cn_nameiop = LOOKUP;
+ if (um->um_op == UNMNT_BELOW) {
+ saved_cred = cnp->cn_cred;
+ cnp->cn_cred = um->um_cred;
+ }
+ lerror = union_lookup1(um->um_lowervp, lowerdvp,
+ &lowervp, cnp);
+ if (um->um_op == UNMNT_BELOW)
+ cnp->cn_cred = saved_cred;
+ cnp->cn_nameiop = nameiop;
+
+ if (lowervp != lowerdvp)
+ VOP_UNLOCK(lowerdvp);
+
+ if (cnp->cn_consume != 0) {
+ if (uppervp) {
+ if (uppervp == upperdvp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ uppervp = NULLVP;
+ }
+ *ap->a_vpp = lowervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ return (lerror);
+ }
+ } else {
+ lerror = ENOENT;
+ }
+
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+
+ /*
+ * at this point, we have uerror and lerror indicating
+ * possible errors with the lookups in the upper and lower
+ * layers. additionally, uppervp and lowervp are (locked)
+ * references to existing vnodes in the upper and lower layers.
+ *
+ * there are now three cases to consider.
+ * 1. if both layers returned an error, then return whatever
+ * error the upper layer generated.
+ *
+ * 2. if the top layer failed and the bottom layer succeeded
+ * then two subcases occur.
+ * a. the bottom vnode is not a directory, in which
+ * case just return a new union vnode referencing
+ * an empty top layer and the existing bottom layer.
+ * b. the bottom vnode is a directory, in which case
+ * create a new directory in the top-level and
+ * continue as in case 3.
+ *
+ * 3. if the top layer succeeded then return a new union
+ * vnode referencing whatever the new top layer and
+ * whatever the bottom layer returned.
+ */
+
+ *ap->a_vpp = NULLVP;
+
+ /* case 1. */
+ if ((uerror != 0) && (lerror != 0)) {
+ return (uerror);
+ }
+
+ /* case 2. */
+ if (uerror != 0 /* && (lerror == 0) */ ) {
+ if (lowervp->v_type == VDIR) { /* case 2b. */
+ dun->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(upperdvp);
+ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+ VOP_LOCK(upperdvp);
+ dun->un_flags |= UN_ULOCK;
+
+ if (uerror) {
+ if (lowervp) {
+ vput(lowervp);
+ lowervp = NULLVP;
+ }
+ return (uerror);
+ }
+ }
+ }
+
+ if (lowervp)
+ VOP_UNLOCK(lowervp);
+
+ error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+ uppervp, lowervp);
+
+ if (error) {
+ if (uppervp)
+ vput(uppervp);
+ if (lowervp)
+ vrele(lowervp);
+ } else {
+ if (*ap->a_vpp != dvp)
+ if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+ VOP_UNLOCK(dvp);
+ }
+
+ return (error);
+}
+
+int
+union_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_CREATE(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_MKNOD(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ if (vp) {
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ }
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_open(ap)
+ struct vop_open_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *tvp;
+ int mode = ap->a_mode;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+ int error;
+
+ /*
+ * If there is an existing upper vp then simply open that.
+ */
+ tvp = un->un_uppervp;
+ if (tvp == NULLVP) {
+ /*
+ * If the lower vnode is being opened for writing, then
+ * copy the file contents to the upper vnode and open that,
+ * otherwise can simply open the lower vnode.
+ */
+ tvp = un->un_lowervp;
+ if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+ struct vnode *vp;
+ int i;
+
+ /*
+ * Open the named file in the upper layer. Note that
+ * the file may have come into existence *since* the
+ * lookup was done, since the upper layer may really
+ * be a loopback mount of some other filesystem...
+ * so open the file with exclusive create and barf if
+ * it already exists.
+ * XXX - perhaps should re-lookup the node (once more
+ * with feeling) and simply open that. Who knows.
+ */
+ error = union_vn_create(&vp, un, p);
+ if (error)
+ return (error);
+
+ /* at this point, uppervp is locked */
+ union_newupper(un, vp);
+ un->un_flags |= UN_ULOCK;
+
+ /*
+ * Now, if the file is being opened with truncation,
+ * then the (new) upper vnode is ready to fly,
+ * otherwise the data from the lower vnode must be
+ * copied to the upper layer first. This only works
+ * for regular files (check is made above).
+ */
+ if ((mode & O_TRUNC) == 0) {
+ /*
+ * XXX - should not ignore errors
+ * from VOP_CLOSE
+ */
+ VOP_LOCK(tvp);
+ error = VOP_OPEN(tvp, FREAD, cred, p);
+ if (error == 0) {
+ error = union_copyfile(p, cred,
+ tvp, un->un_uppervp);
+ VOP_UNLOCK(tvp);
+ (void) VOP_CLOSE(tvp, FREAD);
+ } else {
+ VOP_UNLOCK(tvp);
+ }
+
+#ifdef UNION_DIAGNOSTIC
+ if (!error)
+ uprintf("union: copied up %s\n",
+ un->un_path);
+#endif
+ }
+
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(un->un_uppervp);
+ union_vn_close(un->un_uppervp, FWRITE, cred, p);
+ VOP_LOCK(un->un_uppervp);
+ un->un_flags |= UN_ULOCK;
+
+ /*
+ * Subsequent IOs will go to the top layer, so
+ * call close on the lower vnode and open on the
+ * upper vnode to ensure that the filesystem keeps
+ * its references counts right. This doesn't do
+ * the right thing with (cred) and (FREAD) though.
+ * Ignoring error returns is not righ, either.
+ */
+ for (i = 0; i < un->un_openl; i++) {
+ (void) VOP_CLOSE(tvp, FREAD);
+ (void) VOP_OPEN(un->un_uppervp, FREAD, cred, p);
+ }
+ un->un_openl = 0;
+
+ if (error == 0)
+ error = VOP_OPEN(un->un_uppervp, mode, cred, p);
+ return (error);
+ }
+
+ /*
+ * Just open the lower vnode
+ */
+ un->un_openl++;
+ VOP_LOCK(tvp);
+ error = VOP_OPEN(tvp, mode, cred, p);
+ VOP_UNLOCK(tvp);
+
+ return (error);
+ }
+
+ FIXUP(un);
+
+ error = VOP_OPEN(tvp, mode, cred, p);
+
+ return (error);
+}
+
+int
+union_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp;
+
+ if (un->un_uppervp) {
+ vp = un->un_uppervp;
+ } else {
+#ifdef UNION_DIAGNOSTIC
+ if (un->un_openl <= 0)
+ panic("union: un_openl cnt");
+#endif
+ --un->un_openl;
+ vp = un->un_lowervp;
+ }
+
+ return (VOP_CLOSE(vp, ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode. This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+int
+union_access(ap)
+ struct vop_access_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ int error = EACCES;
+ struct vnode *vp;
+
+ if (vp = un->un_uppervp) {
+ FIXUP(un);
+ return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
+ }
+
+ if (vp = un->un_lowervp) {
+ VOP_LOCK(vp);
+ error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
+ if (error == 0) {
+ struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
+
+ if (um->um_op == UNMNT_BELOW)
+ error = VOP_ACCESS(vp, ap->a_mode,
+ um->um_cred, ap->a_p);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ }
+
+ return (error);
+}
+
+/*
+ * We handle getattr only to change the fsid.
+ */
+int
+union_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp = un->un_uppervp;
+ struct vattr *vap;
+ struct vattr va;
+
+
+ /*
+ * Some programs walk the filesystem hierarchy by counting
+ * links to directories to avoid stat'ing all the time.
+ * This means the link count on directories needs to be "correct".
+ * The only way to do that is to call getattr on both layers
+ * and fix up the link count. The link count will not necessarily
+ * be accurate but will be large enough to defeat the tree walkers.
+ */
+
+ vap = ap->a_vap;
+
+ vp = un->un_uppervp;
+ if (vp != NULLVP) {
+ FIXUP(un);
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ }
+
+ if (vp == NULLVP) {
+ vp = un->un_lowervp;
+ } else if (vp->v_type == VDIR) {
+ vp = un->un_lowervp;
+ vap = &va;
+ } else {
+ vp = NULLVP;
+ }
+
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ }
+
+ if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+ ap->a_vap->va_nlink += vap->va_nlink;
+
+ vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+int
+union_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ int error;
+
+ /*
+ * Handle case of truncating lower object to zero size,
+ * by creating a zero length upper object. This is to
+ * handle the case of open with O_TRUNC and O_CREAT.
+ */
+ if ((un->un_uppervp == NULLVP) &&
+ /* assert(un->un_lowervp != NULLVP) */
+ (un->un_lowervp->v_type == VREG) &&
+ (ap->a_vap->va_size == 0)) {
+ struct vnode *vp;
+
+ error = union_vn_create(&vp, un, ap->a_p);
+ if (error)
+ return (error);
+
+ /* at this point, uppervp is locked */
+ union_newupper(un, vp);
+
+ VOP_UNLOCK(vp);
+ union_vn_close(un->un_uppervp, FWRITE, ap->a_cred, ap->a_p);
+ VOP_LOCK(vp);
+ un->un_flags |= UN_ULOCK;
+ }
+
+ /*
+ * Try to set attributes in upper layer,
+ * otherwise return read-only filesystem error.
+ */
+ if (un->un_uppervp != NULLVP) {
+ FIXUP(un);
+ error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+ ap->a_cred, ap->a_p);
+ } else {
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_write(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_IOCTL(OTHERVP(ap->a_vp), ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+int
+union_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_SELECT(OTHERVP(ap->a_vp), ap->a_which, ap->a_fflags,
+ ap->a_cred, ap->a_p));
+}
+
+int
+union_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_MMAP(OTHERVP(ap->a_vp), ap->a_fflags,
+ ap->a_cred, ap->a_p));
+}
+
+int
+union_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = 0;
+ struct vnode *targetvp = OTHERVP(ap->a_vp);
+
+ if (targetvp) {
+ int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(targetvp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_FSYNC(targetvp, ap->a_cred,
+ ap->a_waitfor, ap->a_p);
+ if (dolock)
+ VOP_UNLOCK(targetvp);
+ }
+
+ return (error);
+}
+
+int
+union_seek(ap)
+ struct vop_seek_args /* {
+ struct vnode *a_vp;
+ off_t a_oldoff;
+ off_t a_newoff;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (VOP_SEEK(OTHERVP(ap->a_vp), ap->a_oldoff, ap->a_newoff, ap->a_cred));
+}
+
+int
+union_remove(ap)
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ FIXUP(un);
+ VREF(vp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+
+ error = VOP_REMOVE(dvp, vp, ap->a_cnp);
+ if (!error)
+ union_removed_upper(un);
+
+ /*
+ * XXX: should create a whiteout here
+ */
+ } else {
+ /*
+ * XXX: should create a whiteout here
+ */
+ vput(ap->a_dvp);
+ vput(ap->a_vp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_vp);
+ struct union_node *un = VTOUNION(ap->a_tdvp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+ FIXUP(un);
+ VREF(vp);
+ vrele(ap->a_tdvp);
+
+ error = VOP_LINK(dvp, vp, ap->a_cnp);
+ } else {
+ /*
+ * XXX: need to copy to upper layer
+ * and do the link there.
+ */
+ vput(ap->a_vp);
+ vrele(ap->a_tdvp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+
+ struct vnode *fdvp = ap->a_fdvp;
+ struct vnode *fvp = ap->a_fvp;
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *tvp = ap->a_tvp;
+
+ if (fdvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fdvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ FIXUP(un);
+ fdvp = un->un_uppervp;
+ VREF(fdvp);
+ vrele(ap->a_fdvp);
+ }
+
+ if (fvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ FIXUP(un);
+ fvp = un->un_uppervp;
+ VREF(fvp);
+ vrele(ap->a_fvp);
+ }
+
+ if (tdvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tdvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ tdvp = un->un_uppervp;
+ VREF(tdvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_tdvp);
+ }
+
+ if (tvp && tvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ tvp = un->un_uppervp;
+ VREF(tvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_tvp);
+ }
+
+ return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+ vrele(fdvp);
+ vrele(fvp);
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+
+ return (error);
+}
+
+int
+union_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_MKDIR(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ FIXUP(un);
+ VREF(vp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+
+ error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+ if (!error)
+ union_removed_upper(un);
+
+ /*
+ * XXX: should create a whiteout here
+ */
+ } else {
+ /*
+ * XXX: should create a whiteout here
+ */
+ vput(ap->a_dvp);
+ vput(ap->a_vp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+ struct mount *mp = ap->a_dvp->v_mount;
+
+ FIXUP(un);
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_SYMLINK(dvp, &vp, ap->a_cnp,
+ ap->a_vap, ap->a_target);
+ *ap->a_vpp = NULLVP;
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories. getdirentries is responsible for walking
+ * down the union stack. readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+int
+union_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = 0;
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (un->un_uppervp) {
+ FIXUP(un);
+ error = VOP_READDIR(un->un_uppervp, ap->a_uio, ap->a_cred);
+ }
+
+ return (error);
+}
+
+int
+union_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ int islocked = un->un_flags & UN_LOCKED;
+ int dolock = (vp == LOWERVP(ap->a_dvp));
+
+ if (islocked) {
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_dvp));
+ }
+ error = VOP_ABORTOP(vp, ap->a_cnp);
+ if (islocked && dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our union_node is in the
+ * cache and reusable.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+
+#ifdef UNION_DIAGNOSTIC
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (un->un_flags & UN_LOCKED)
+ panic("union: inactivating locked node");
+#endif
+
+ return (0);
+}
+
+int
+union_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ union_freevp(ap->a_vp);
+
+ return (0);
+}
+
+int
+union_lock(ap)
+ struct vop_lock_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct union_node *un;
+
+start:
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ }
+
+ un = VTOUNION(vp);
+
+ if (un->un_uppervp) {
+ if ((un->un_flags & UN_ULOCK) == 0) {
+ un->un_flags |= UN_ULOCK;
+ VOP_LOCK(un->un_uppervp);
+ }
+#ifdef DIAGNOSTIC
+ if (un->un_flags & UN_KLOCK)
+ panic("union: dangling upper lock");
+#endif
+ }
+
+ if (un->un_flags & UN_LOCKED) {
+#ifdef DIAGNOSTIC
+ if (curproc && un->un_pid == curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: locking against myself");
+#endif
+ un->un_flags |= UN_WANT;
+ sleep((caddr_t) &un->un_flags, PINOD);
+ goto start;
+ }
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+
+ un->un_flags |= UN_LOCKED;
+ return (0);
+}
+
+int
+union_unlock(ap)
+ struct vop_lock_args *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: unlock unlocked node");
+ if (curproc && un->un_pid != curproc->p_pid &&
+ curproc->p_pid > -1 && un->un_pid > -1)
+ panic("union: unlocking other process's union node");
+#endif
+
+ un->un_flags &= ~UN_LOCKED;
+
+ if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
+ VOP_UNLOCK(un->un_uppervp);
+
+ un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
+ if (un->un_flags & UN_WANT) {
+ un->un_flags &= ~UN_WANT;
+ wakeup((caddr_t) &un->un_flags);
+ }
+
+#ifdef DIAGNOSTIC
+ un->un_pid = 0;
+#endif
+
+ return (0);
+}
+
+int
+union_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_BMAP(vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
+ vp, UPPERVP(vp), LOWERVP(vp));
+ return (0);
+}
+
+int
+union_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+int
+union_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_PATHCONF(vp, ap->a_name, ap->a_retval);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (VOP_ADVLOCK(OTHERVP(ap->a_vp), ap->a_id, ap->a_op,
+ ap->a_fl, ap->a_flags));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+union_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+ if (bp->b_vp == NULLVP)
+ panic("union_strategy: nil vp");
+ if (((bp->b_flags & B_READ) == 0) &&
+ (bp->b_vp == LOWERVP(savedvp)))
+ panic("union_strategy: writing to lowervp");
+#endif
+
+ error = VOP_STRATEGY(bp);
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**union_vnodeop_p)();
+struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, union_lookup }, /* lookup */
+ { &vop_create_desc, union_create }, /* create */
+ { &vop_mknod_desc, union_mknod }, /* mknod */
+ { &vop_open_desc, union_open }, /* open */
+ { &vop_close_desc, union_close }, /* close */
+ { &vop_access_desc, union_access }, /* access */
+ { &vop_getattr_desc, union_getattr }, /* getattr */
+ { &vop_setattr_desc, union_setattr }, /* setattr */
+ { &vop_read_desc, union_read }, /* read */
+ { &vop_write_desc, union_write }, /* write */
+ { &vop_ioctl_desc, union_ioctl }, /* ioctl */
+ { &vop_select_desc, union_select }, /* select */
+ { &vop_mmap_desc, union_mmap }, /* mmap */
+ { &vop_fsync_desc, union_fsync }, /* fsync */
+ { &vop_seek_desc, union_seek }, /* seek */
+ { &vop_remove_desc, union_remove }, /* remove */
+ { &vop_link_desc, union_link }, /* link */
+ { &vop_rename_desc, union_rename }, /* rename */
+ { &vop_mkdir_desc, union_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, union_rmdir }, /* rmdir */
+ { &vop_symlink_desc, union_symlink }, /* symlink */
+ { &vop_readdir_desc, union_readdir }, /* readdir */
+ { &vop_readlink_desc, union_readlink }, /* readlink */
+ { &vop_abortop_desc, union_abortop }, /* abortop */
+ { &vop_inactive_desc, union_inactive }, /* inactive */
+ { &vop_reclaim_desc, union_reclaim }, /* reclaim */
+ { &vop_lock_desc, union_lock }, /* lock */
+ { &vop_unlock_desc, union_unlock }, /* unlock */
+ { &vop_bmap_desc, union_bmap }, /* bmap */
+ { &vop_strategy_desc, union_strategy }, /* strategy */
+ { &vop_print_desc, union_print }, /* print */
+ { &vop_islocked_desc, union_islocked }, /* islocked */
+ { &vop_pathconf_desc, union_pathconf }, /* pathconf */
+ { &vop_advlock_desc, union_advlock }, /* advlock */
+#ifdef notdef
+ { &vop_blkatoff_desc, union_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, union_valloc }, /* valloc */
+ { &vop_vfree_desc, union_vfree }, /* vfree */
+ { &vop_truncate_desc, union_truncate }, /* truncate */
+ { &vop_update_desc, union_update }, /* update */
+ { &vop_bwrite_desc, union_bwrite }, /* bwrite */
+#endif
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc union_vnodeop_opv_desc =
+ { &union_vnodeop_p, union_vnodeop_entries };
diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c
new file mode 100644
index 000000000000..bcd838d036a1
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+ ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file. They are given negative
+ * logical block numbers. Indirect blocks are addressed by the negative
+ * address of the first data block to which they point. Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point. Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+ struct vnode *vp;
+ register daddr_t bn;
+ daddr_t *bnp;
+ struct indir *ap;
+ int *nump;
+ int *runp;
+{
+ register struct inode *ip;
+ struct buf *bp;
+ struct ufsmount *ump;
+ struct mount *mp;
+ struct vnode *devvp;
+ struct indir a[NIADDR], *xap;
+ daddr_t daddr;
+ long metalbn;
+ int error, maxrun, num;
+
+ ip = VTOI(vp);
+ mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+ if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+ panic("ufs_bmaparray: invalid arguments");
+#endif
+
+ if (runp) {
+ /*
+ * XXX
+ * If MAXBSIZE is the largest transfer the disks can handle,
+ * we probably want maxrun to be 1 block less so that we
+ * don't create a block larger than the device can handle.
+ */
+ *runp = 0;
+ maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+ }
+
+ xap = ap == NULL ? a : ap;
+ if (!nump)
+ nump = &num;
+ if (error = ufs_getlbns(vp, bn, xap, nump))
+ return (error);
+
+ num = *nump;
+ if (num == 0) {
+ *bnp = blkptrtodb(ump, ip->i_db[bn]);
+ if (*bnp == 0)
+ *bnp = -1;
+ else if (runp)
+ for (++bn; bn < NDADDR && *runp < maxrun &&
+ is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+ ++bn, ++*runp);
+ return (0);
+ }
+
+
+ /* Get disk address out of indirect block array */
+ daddr = ip->i_ib[xap->in_off];
+
+ devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+ for (bp = NULL, ++xap; --num; ++xap) {
+ /*
+ * Exit the loop if there is no disk address assigned yet and
+ * the indirect block isn't in the cache, or if we were
+ * looking for an indirect block and we've found it.
+ */
+
+ metalbn = xap->in_lbn;
+ if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+ break;
+ /*
+ * If we get here, we've either got the block in the cache
+ * or we have a disk address for it, go fetch it.
+ */
+ if (bp)
+ brelse(bp);
+
+ xap->in_exists = 1;
+ bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ trace(TR_BREADHIT, pack(vp, size), metalbn);
+ }
+#ifdef DIAGNOSTIC
+ else if (!daddr)
+ panic("ufs_bmaparry: indirect block not in cache");
+#endif
+ else {
+ trace(TR_BREADMISS, pack(vp, size), metalbn);
+ bp->b_blkno = blkptrtodb(ump, daddr);
+ bp->b_flags |= B_READ;
+ VOP_STRATEGY(bp);
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ if (error = biowait(bp)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+ if (num == 1 && daddr && runp)
+ for (bn = xap->in_off + 1;
+ bn < MNINDIR(ump) && *runp < maxrun &&
+ is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+ ((daddr_t *)bp->b_data)[bn]);
+ ++bn, ++*runp);
+ }
+ if (bp)
+ brelse(bp);
+
+ daddr = blkptrtodb(ump, daddr);
+ *bnp = daddr == 0 ? -1 : daddr;
+ return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block. The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+ struct vnode *vp;
+ register daddr_t bn;
+ struct indir *ap;
+ int *nump;
+{
+ long metalbn, realbn;
+ struct ufsmount *ump;
+ int blockcnt, i, numlevels, off;
+
+ ump = VFSTOUFS(vp->v_mount);
+ if (nump)
+ *nump = 0;
+ numlevels = 0;
+ realbn = bn;
+ if ((long)bn < 0)
+ bn = -(long)bn;
+
+ /* The first NDADDR blocks are direct blocks. */
+ if (bn < NDADDR)
+ return (0);
+
+ /*
+ * Determine the number of levels of indirection. After this loop
+ * is done, blockcnt indicates the number of data blocks possible
+ * at the given level of indirection, and NIADDR - i is the number
+ * of levels of indirection needed to locate the requested block.
+ */
+ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ if (i == 0)
+ return (EFBIG);
+ blockcnt *= MNINDIR(ump);
+ if (bn < blockcnt)
+ break;
+ }
+
+ /* Calculate the address of the first meta-block. */
+ if (realbn >= 0)
+ metalbn = -(realbn - bn + NIADDR - i);
+ else
+ metalbn = -(-realbn - bn + NIADDR - i);
+
+ /*
+ * At each iteration, off is the offset into the bap array which is
+ * an array of disk addresses at the current level of indirection.
+ * The logical block number and the offset in that block are stored
+ * into the argument array.
+ */
+ ap->in_lbn = metalbn;
+ ap->in_off = off = NIADDR - i;
+ ap->in_exists = 0;
+ ap++;
+ for (++numlevels; i <= NIADDR; i++) {
+ /* If searching for a meta-data block, quit when found. */
+ if (metalbn == realbn)
+ break;
+
+ blockcnt /= MNINDIR(ump);
+ off = (bn / blockcnt) % MNINDIR(ump);
+
+ ++numlevels;
+ ap->in_lbn = metalbn;
+ ap->in_off = off;
+ ap->in_exists = 0;
+ ++ap;
+
+ metalbn -= -1 + off * blockcnt;
+ }
+ if (nump)
+ *nump = numlevels;
+ return (0);
+}
diff --git a/sys/gnu/ext2fs/ext2_ihash.c b/sys/gnu/ext2fs/ext2_ihash.c
new file mode 100644
index 000000000000..4a37c907ef63
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_ihash.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_ihash.c 8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Structures associated with inode cacheing.
+ */
+struct inode **ihashtbl;
+u_long ihash; /* size of hash table - 1 */
+#define INOHASH(device, inum) (((device) + (inum)) & ihash)
+
+/*
+ * Initialize inode hash table.
+ */
+void
+ufs_ihashinit()
+{
+
+ ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, return it, even if it is locked.
+ */
+struct vnode *
+ufs_ihashlookup(device, inum)
+ dev_t device;
+ ino_t inum;
+{
+ register struct inode *ip;
+
+ for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+ if (ip == NULL)
+ return (NULL);
+ if (inum == ip->i_number && device == ip->i_dev)
+ return (ITOV(ip));
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+ufs_ihashget(device, inum)
+ dev_t device;
+ ino_t inum;
+{
+ register struct inode *ip;
+ struct vnode *vp;
+
+ for (;;)
+ for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+ if (ip == NULL)
+ return (NULL);
+ if (inum == ip->i_number && device == ip->i_dev) {
+ if (ip->i_flag & IN_LOCKED) {
+ ip->i_flag |= IN_WANTED;
+ sleep(ip, PINOD);
+ break;
+ }
+ vp = ITOV(ip);
+ if (!vget(vp, 1))
+ return (vp);
+ break;
+ }
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+ufs_ihashins(ip)
+ struct inode *ip;
+{
+ struct inode **ipp, *iq;
+
+ ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
+ if (iq = *ipp)
+ iq->i_prev = &ip->i_next;
+ ip->i_next = iq;
+ ip->i_prev = ipp;
+ *ipp = ip;
+ if (ip->i_flag & IN_LOCKED)
+ panic("ufs_ihashins: already locked");
+ if (curproc)
+ ip->i_lockholder = curproc->p_pid;
+ else
+ ip->i_lockholder = -1;
+ ip->i_flag |= IN_LOCKED;
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+void
+ufs_ihashrem(ip)
+ register struct inode *ip;
+{
+ register struct inode *iq;
+
+ if (iq = ip->i_next)
+ iq->i_prev = ip->i_prev;
+ *ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+ ip->i_next = NULL;
+ ip->i_prev = NULL;
+#endif
+}
diff --git a/sys/gnu/ext2fs/ext2_mount.h b/sys/gnu/ext2fs/ext2_mount.h
new file mode 100644
index 000000000000..237871fdaaca
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_mount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufsmount.h 8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+ struct mount *um_mountp; /* filesystem vfs structure */
+ dev_t um_dev; /* device mounted */
+ struct vnode *um_devvp; /* block device mounted vnode */
+ union { /* pointer to superblock */
+ struct lfs *lfs; /* LFS */
+ struct fs *fs; /* FFS */
+ } ufsmount_u;
+#define um_fs ufsmount_u.fs
+#define um_lfs ufsmount_u.lfs
+ struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
+ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
+ u_long um_nindir; /* indirect ptrs per block */
+ u_long um_bptrtodb; /* indir ptr to disk block */
+ u_long um_seqinc; /* inc between seq blocks */
+ time_t um_btime[MAXQUOTAS]; /* block quota time limit */
+ time_t um_itime[MAXQUOTAS]; /* inode quota time limit */
+ char um_qflags[MAXQUOTAS]; /* quota specific flags */
+ struct netexport um_export; /* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */
+#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb)
+#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump) ((ump)->um_nindir)
+
+
diff --git a/sys/gnu/ext2fs/inode.h b/sys/gnu/ext2fs/inode.h
new file mode 100644
index 000000000000..df155967a7df
--- /dev/null
+++ b/sys/gnu/ext2fs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)inode.h 8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+ struct inode *i_next; /* Hash chain forward. */
+ struct inode **i_prev; /* Hash chain back. */
+ struct vnode *i_vnode; /* Vnode associated with this inode. */
+ struct vnode *i_devvp; /* Vnode for block I/O. */
+ u_long i_flag; /* I* flags. */
+ dev_t i_dev; /* Device associated with the inode. */
+ ino_t i_number; /* The identity of the inode. */
+ union { /* Associated filesystem. */
+ struct fs *fs; /* FFS */
+ struct lfs *lfs; /* LFS */
+ } inode_u;
+#define i_fs inode_u.fs
+#define i_lfs inode_u.lfs
+ struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
+ u_quad_t i_modrev; /* Revision level for lease. */
+ struct lockf *i_lockf; /* Head of byte-level lock list. */
+ pid_t i_lockholder; /* DEBUG: holder of inode lock. */
+ pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */
+ /*
+ * Side effects; used during directory lookup.
+ */
+ long i_count; /* Size of free slot in directory. */
+ doff_t i_endoff; /* End of useful stuff in directory. */
+ doff_t i_diroff; /* Offset in dir, where we found last entry. */
+ doff_t i_offset; /* Offset of free space in directory. */
+ ino_t i_ino; /* Inode number of found directory. */
+ u_long i_reclen; /* Size of found directory entry. */
+ long i_spare[11]; /* Spares to round up to 128 bytes. */
+ /*
+ * The on-disk dinode itself.
+ */
+ struct dinode i_din; /* 128 bytes of the on-disk dinode. */
+};
+
+#define i_atime i_din.di_atime
+#define i_blocks i_din.di_blocks
+#define i_ctime i_din.di_ctime
+#define i_db i_din.di_db
+#define i_flags i_din.di_flags
+#define i_gen i_din.di_gen
+#define i_gid i_din.di_gid
+#define i_ib i_din.di_ib
+#define i_mode i_din.di_mode
+#define i_mtime i_din.di_mtime
+#define i_nlink i_din.di_nlink
+#define i_rdev i_din.di_rdev
+#define i_shortlink i_din.di_shortlink
+#define i_size i_din.di_size
+#define i_uid i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define IN_ACCESS 0x0001 /* Access time update request. */
+#define IN_CHANGE 0x0002 /* Inode change time update request. */
+#define IN_EXLOCK 0x0004 /* File has exclusive lock. */
+#define IN_LOCKED 0x0008 /* Inode lock. */
+#define IN_LWAIT 0x0010 /* Process waiting on file lock. */
+#define IN_MODIFIED 0x0020 /* Inode has been modified. */
+#define IN_RENAME 0x0040 /* Inode is being renamed. */
+#define IN_SHLOCK 0x0080 /* File has shared lock. */
+#define IN_UPDATE 0x0100 /* Modification time update request. */
+#define IN_WANTED 0x0200 /* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+ daddr_t in_lbn; /* Logical block number. */
+ int in_off; /* Offset in buffer. */
+ int in_exists; /* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp) ((struct inode *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ITIMES(ip, t1, t2) { \
+ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \
+ (ip)->i_flag |= IN_MODIFIED; \
+ if ((ip)->i_flag & IN_ACCESS) \
+ (ip)->i_atime.ts_sec = (t1)->tv_sec; \
+ if ((ip)->i_flag & IN_UPDATE) { \
+ (ip)->i_mtime.ts_sec = (t2)->tv_sec; \
+ (ip)->i_modrev++; \
+ } \
+ if ((ip)->i_flag & IN_CHANGE) \
+ (ip)->i_ctime.ts_sec = time.tv_sec; \
+ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \
+ } \
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+ u_short ufid_len; /* Length of structure. */
+ u_short ufid_pad; /* Force long alignment. */
+ ino_t ufid_ino; /* File number (ino). */
+ long ufid_gen; /* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c
new file mode 100644
index 000000000000..bcd838d036a1
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/ext2_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+ ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file. They are given negative
+ * logical block numbers. Indirect blocks are addressed by the negative
+ * address of the first data block to which they point. Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point. Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+ struct vnode *vp;
+ register daddr_t bn;
+ daddr_t *bnp;
+ struct indir *ap;
+ int *nump;
+ int *runp;
+{
+ register struct inode *ip;
+ struct buf *bp;
+ struct ufsmount *ump;
+ struct mount *mp;
+ struct vnode *devvp;
+ struct indir a[NIADDR], *xap;
+ daddr_t daddr;
+ long metalbn;
+ int error, maxrun, num;
+
+ ip = VTOI(vp);
+ mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+ if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+ panic("ufs_bmaparray: invalid arguments");
+#endif
+
+ if (runp) {
+ /*
+ * XXX
+ * If MAXBSIZE is the largest transfer the disks can handle,
+ * we probably want maxrun to be 1 block less so that we
+ * don't create a block larger than the device can handle.
+ */
+ *runp = 0;
+ maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+ }
+
+ xap = ap == NULL ? a : ap;
+ if (!nump)
+ nump = &num;
+ if (error = ufs_getlbns(vp, bn, xap, nump))
+ return (error);
+
+ num = *nump;
+ if (num == 0) {
+ *bnp = blkptrtodb(ump, ip->i_db[bn]);
+ if (*bnp == 0)
+ *bnp = -1;
+ else if (runp)
+ for (++bn; bn < NDADDR && *runp < maxrun &&
+ is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+ ++bn, ++*runp);
+ return (0);
+ }
+
+
+ /* Get disk address out of indirect block array */
+ daddr = ip->i_ib[xap->in_off];
+
+ devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+ for (bp = NULL, ++xap; --num; ++xap) {
+ /*
+ * Exit the loop if there is no disk address assigned yet and
+ * the indirect block isn't in the cache, or if we were
+ * looking for an indirect block and we've found it.
+ */
+
+ metalbn = xap->in_lbn;
+ if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+ break;
+ /*
+ * If we get here, we've either got the block in the cache
+ * or we have a disk address for it, go fetch it.
+ */
+ if (bp)
+ brelse(bp);
+
+ xap->in_exists = 1;
+ bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ trace(TR_BREADHIT, pack(vp, size), metalbn);
+ }
+#ifdef DIAGNOSTIC
+ else if (!daddr)
+ panic("ufs_bmaparry: indirect block not in cache");
+#endif
+ else {
+ trace(TR_BREADMISS, pack(vp, size), metalbn);
+ bp->b_blkno = blkptrtodb(ump, daddr);
+ bp->b_flags |= B_READ;
+ VOP_STRATEGY(bp);
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ if (error = biowait(bp)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+ if (num == 1 && daddr && runp)
+ for (bn = xap->in_off + 1;
+ bn < MNINDIR(ump) && *runp < maxrun &&
+ is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+ ((daddr_t *)bp->b_data)[bn]);
+ ++bn, ++*runp);
+ }
+ if (bp)
+ brelse(bp);
+
+ daddr = blkptrtodb(ump, daddr);
+ *bnp = daddr == 0 ? -1 : daddr;
+ return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block. The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+ struct vnode *vp;
+ register daddr_t bn;
+ struct indir *ap;
+ int *nump;
+{
+ long metalbn, realbn;
+ struct ufsmount *ump;
+ int blockcnt, i, numlevels, off;
+
+ ump = VFSTOUFS(vp->v_mount);
+ if (nump)
+ *nump = 0;
+ numlevels = 0;
+ realbn = bn;
+ if ((long)bn < 0)
+ bn = -(long)bn;
+
+ /* The first NDADDR blocks are direct blocks. */
+ if (bn < NDADDR)
+ return (0);
+
+ /*
+ * Determine the number of levels of indirection. After this loop
+ * is done, blockcnt indicates the number of data blocks possible
+ * at the given level of indirection, and NIADDR - i is the number
+ * of levels of indirection needed to locate the requested block.
+ */
+ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ if (i == 0)
+ return (EFBIG);
+ blockcnt *= MNINDIR(ump);
+ if (bn < blockcnt)
+ break;
+ }
+
+ /* Calculate the address of the first meta-block. */
+ if (realbn >= 0)
+ metalbn = -(realbn - bn + NIADDR - i);
+ else
+ metalbn = -(-realbn - bn + NIADDR - i);
+
+ /*
+ * At each iteration, off is the offset into the bap array which is
+ * an array of disk addresses at the current level of indirection.
+ * The logical block number and the offset in that block are stored
+ * into the argument array.
+ */
+ ap->in_lbn = metalbn;
+ ap->in_off = off = NIADDR - i;
+ ap->in_exists = 0;
+ ap++;
+ for (++numlevels; i <= NIADDR; i++) {
+ /* If searching for a meta-data block, quit when found. */
+ if (metalbn == realbn)
+ break;
+
+ blockcnt /= MNINDIR(ump);
+ off = (bn / blockcnt) % MNINDIR(ump);
+
+ ++numlevels;
+ ap->in_lbn = metalbn;
+ ap->in_off = off;
+ ap->in_exists = 0;
+ ++ap;
+
+ metalbn -= -1 + off * blockcnt;
+ }
+ if (nump)
+ *nump = numlevels;
+ return (0);
+}
diff --git a/sys/gnu/fs/ext2fs/ext2_mount.h b/sys/gnu/fs/ext2fs/ext2_mount.h
new file mode 100644
index 000000000000..237871fdaaca
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/ext2_mount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufsmount.h 8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+ struct mount *um_mountp; /* filesystem vfs structure */
+ dev_t um_dev; /* device mounted */
+ struct vnode *um_devvp; /* block device mounted vnode */
+ union { /* pointer to superblock */
+ struct lfs *lfs; /* LFS */
+ struct fs *fs; /* FFS */
+ } ufsmount_u;
+#define um_fs ufsmount_u.fs
+#define um_lfs ufsmount_u.lfs
+ struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
+ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
+ u_long um_nindir; /* indirect ptrs per block */
+ u_long um_bptrtodb; /* indir ptr to disk block */
+ u_long um_seqinc; /* inc between seq blocks */
+ time_t um_btime[MAXQUOTAS]; /* block quota time limit */
+ time_t um_itime[MAXQUOTAS]; /* inode quota time limit */
+ char um_qflags[MAXQUOTAS]; /* quota specific flags */
+ struct netexport um_export; /* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */
+#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb)
+#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump) ((ump)->um_nindir)
+
+
diff --git a/sys/gnu/fs/ext2fs/inode.h b/sys/gnu/fs/ext2fs/inode.h
new file mode 100644
index 000000000000..df155967a7df
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)inode.h 8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+ struct inode *i_next; /* Hash chain forward. */
+ struct inode **i_prev; /* Hash chain back. */
+ struct vnode *i_vnode; /* Vnode associated with this inode. */
+ struct vnode *i_devvp; /* Vnode for block I/O. */
+ u_long i_flag; /* I* flags. */
+ dev_t i_dev; /* Device associated with the inode. */
+ ino_t i_number; /* The identity of the inode. */
+ union { /* Associated filesystem. */
+ struct fs *fs; /* FFS */
+ struct lfs *lfs; /* LFS */
+ } inode_u;
+#define i_fs inode_u.fs
+#define i_lfs inode_u.lfs
+ struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
+ u_quad_t i_modrev; /* Revision level for lease. */
+ struct lockf *i_lockf; /* Head of byte-level lock list. */
+ pid_t i_lockholder; /* DEBUG: holder of inode lock. */
+ pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */
+ /*
+ * Side effects; used during directory lookup.
+ */
+ long i_count; /* Size of free slot in directory. */
+ doff_t i_endoff; /* End of useful stuff in directory. */
+ doff_t i_diroff; /* Offset in dir, where we found last entry. */
+ doff_t i_offset; /* Offset of free space in directory. */
+ ino_t i_ino; /* Inode number of found directory. */
+ u_long i_reclen; /* Size of found directory entry. */
+ long i_spare[11]; /* Spares to round up to 128 bytes. */
+ /*
+ * The on-disk dinode itself.
+ */
+ struct dinode i_din; /* 128 bytes of the on-disk dinode. */
+};
+
+#define i_atime i_din.di_atime
+#define i_blocks i_din.di_blocks
+#define i_ctime i_din.di_ctime
+#define i_db i_din.di_db
+#define i_flags i_din.di_flags
+#define i_gen i_din.di_gen
+#define i_gid i_din.di_gid
+#define i_ib i_din.di_ib
+#define i_mode i_din.di_mode
+#define i_mtime i_din.di_mtime
+#define i_nlink i_din.di_nlink
+#define i_rdev i_din.di_rdev
+#define i_shortlink i_din.di_shortlink
+#define i_size i_din.di_size
+#define i_uid i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define IN_ACCESS 0x0001 /* Access time update request. */
+#define IN_CHANGE 0x0002 /* Inode change time update request. */
+#define IN_EXLOCK 0x0004 /* File has exclusive lock. */
+#define IN_LOCKED 0x0008 /* Inode lock. */
+#define IN_LWAIT 0x0010 /* Process waiting on file lock. */
+#define IN_MODIFIED 0x0020 /* Inode has been modified. */
+#define IN_RENAME 0x0040 /* Inode is being renamed. */
+#define IN_SHLOCK 0x0080 /* File has shared lock. */
+#define IN_UPDATE 0x0100 /* Modification time update request. */
+#define IN_WANTED 0x0200 /* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+ daddr_t in_lbn; /* Logical block number. */
+ int in_off; /* Offset in buffer. */
+ int in_exists; /* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp) ((struct inode *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ITIMES(ip, t1, t2) { \
+ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \
+ (ip)->i_flag |= IN_MODIFIED; \
+ if ((ip)->i_flag & IN_ACCESS) \
+ (ip)->i_atime.ts_sec = (t1)->tv_sec; \
+ if ((ip)->i_flag & IN_UPDATE) { \
+ (ip)->i_mtime.ts_sec = (t2)->tv_sec; \
+ (ip)->i_modrev++; \
+ } \
+ if ((ip)->i_flag & IN_CHANGE) \
+ (ip)->i_ctime.ts_sec = time.tv_sec; \
+ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \
+ } \
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+ u_short ufid_len; /* Length of structure. */
+ u_short ufid_pad; /* Force long alignment. */
+ ino_t ufid_ino; /* File number (ino). */
+ long ufid_gen; /* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/isofs/cd9660/TODO b/sys/isofs/cd9660/TODO
new file mode 100644
index 000000000000..555d26ad7d11
--- /dev/null
+++ b/sys/isofs/cd9660/TODO
@@ -0,0 +1,77 @@
+# $Id: TODO,v 1.4 1993/09/07 15:40:51 ws Exp $
+
+ 1) should understand "older", original High Sierra ("CDROM001") type
+
+ Not yet. ( I don't have this technical information, yet. )
+
+ 2) should understand Rock Ridge
+
+ Yes, we have follows function.
+
+ o Symbolic Link
+ o Real Name(long name)
+ o File Attribute
+ o Time stamp
+ o uid, gid
+ o Devices
+ o Relocated directories
+
+ Except follows:
+
+ o POSIX device number mapping
+
+ There is some preliminary stuff in there that (ab-)uses the mknod
+ system call, but this needs a writable filesystem
+
+ 3) should be called cdfs, as there are other ISO file system soon possible
+
+ Not yet. Probably we should make another file system when the ECMA draft
+ is valid and do it. For doing Rock Ridge Support, I can use almost same
+ code. So I just use the same file system interface...
+
+ 4) should have file handles implemented for use with NFS, etc
+
+ Yes. we have already this one, and I based it for this release.
+
+ 5) should have name translation enabled by mount flag
+
+ Yes. we can disable the Rock Ridge Extension by follows option;
+
+ "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+ are slow)
+
+ Not yet.
+
+ 7) ECMA support.
+
+ Not yet. we need not only a technical spec but also ECMA format
+ cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+ Not yet. We should also hack the other part of system as 8 bit
+ clean. As far as I know, if you export the cdrom by NFS, the client
+ can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
+ 9) Access checks in isofs_access
+
+ Not yet.
+
+ 10) Support for generation numbers
+
+ Yes. Default is to list only the last file (the one with the highest
+ generation number). If you mount with -gen, all files are shown with
+ their generation numbers. In both cases you can specify the generation
+ number on opening files (if you happen to know it) or leave it off,
+ when it will again find the last file.
+
+ 11) Support for extended attributes
+
+ Yes. Since this requires an extra block buffer for the attributes
+ this must be enabled on mounting with the option -extattr.
+
+----------
+Last update July 19, '93 by Atsushi Murai. (amurai@spec.co.jp)
+Last update August 19, '93 by Wolfgang Solfrank. (ws@tools.de)
diff --git a/sys/isofs/cd9660/TODO.hibler b/sys/isofs/cd9660/TODO.hibler
new file mode 100644
index 000000000000..3501aa296cd2
--- /dev/null
+++ b/sys/isofs/cd9660/TODO.hibler
@@ -0,0 +1,22 @@
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+ Since it was modelled after the inode code, we might be able to merge
+ them back. It looks like a seperate (but very similar) lookup routine
+ will be needed due to the associated file stuff.
+
+2. Make filesystem exportable. This comes for free if stacked with UFS.
+ Otherwise, the ufs_export routines need to be elevated to vfs_* routines.
+ [ DONE - hibler ]
+
+3. If it can't be merged with UFS, at least get them in sync. For example,
+ it could use the same style hashing routines as in ufs/ufs_ihash.c
+
+4. It would be nice to be able to use the vfs_cluster code.
+ Unfortunately, if the logical block size is smaller than the page size,
+ it won't work. Also, if throughtput is relatively constant for any
+ block size (as it is for the HP drive--150kbs) then clustering may not
+ buy much (or may even hurt when vfs_cluster comes up with a large sync
+ cluster).
+
+5. Seems like there should be a "notrans" or some such mount option to show
+ filenames as they really are without lower-casing, stripping of version
+ numbers, etc. Does this make sense?
diff --git a/sys/isofs/cd9660/cd9660_bmap.c b/sys/isofs/cd9660/cd9660_bmap.c
new file mode 100644
index 000000000000..911eedfd06ae
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_bmap.c
@@ -0,0 +1,102 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_bmap.c 8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ struct iso_node *ip = VTOI(ap->a_vp);
+ daddr_t lblkno = ap->a_bn;
+ long bsize;
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ip->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ /*
+ * Compute the requested block number
+ */
+ bsize = ip->i_mnt->logical_block_size;
+ *ap->a_bnp = (ip->iso_start + lblkno) * btodb(bsize);
+
+ /*
+ * Determine maximum number of readahead blocks following the
+ * requested block.
+ */
+ if (ap->a_runp) {
+ int nblk;
+
+ nblk = (ip->i_size - (lblkno + 1) * bsize) / bsize;
+ if (nblk <= 0)
+ *ap->a_runp = 0;
+ else if (nblk >= MAXBSIZE/bsize)
+ *ap->a_runp = MAXBSIZE/bsize - 1;
+ else
+ *ap->a_runp = nblk;
+ }
+
+ return 0;
+}
diff --git a/sys/isofs/cd9660/cd9660_lookup.c b/sys/isofs/cd9660/cd9660_lookup.c
new file mode 100644
index 000000000000..62d1d3fc791e
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_lookup.c
@@ -0,0 +1,465 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)ufs_lookup.c 7.33 (Berkeley) 5/19/91
+ *
+ * @(#)cd9660_lookup.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+
+struct nchstats iso_nchstats;
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".". When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ * check accessibility of directory
+ * look for name in cache, if found, then if at end of path
+ * and deleting or creating, drop it, else return name
+ * search for name in directory, to found or notfound
+ * notfound:
+ * if creating, return locked directory, leaving info on available slots
+ * else return error
+ * found:
+ * if at end of path and deleting, return information to allow delete
+ * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ * inode and return info to allow rewrite
+ * if not at end, add name to cache; if at end and neither creating
+ * nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+cd9660_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vdp; /* vnode for directory being searched */
+ register struct iso_node *dp; /* inode for directory being searched */
+ register struct iso_mnt *imp; /* file system that directory is in */
+ struct buf *bp; /* a buffer of directory entries */
+ struct iso_directory_record *ep;/* the current directory entry */
+ int entryoffsetinblock; /* offset of ep in bp's buffer */
+ int saveoffset; /* offset of last directory entry in dir */
+ int numdirpasses; /* strategy for directory search */
+ doff_t endsearch; /* offset to end directory search */
+ struct iso_node *pdp; /* saved dp during symlink work */
+ struct iso_node *tdp; /* returned by iget */
+ int lockparent; /* 1 => lockparent flag is set */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int error;
+ ino_t ino = 0;
+ int reclen;
+ u_short namelen;
+ char altname[NAME_MAX];
+ int res;
+ int assoc, len;
+ char *name;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ int flags = cnp->cn_flags;
+ int nameiop = cnp->cn_nameiop;
+
+ bp = NULL;
+ *vpp = NULL;
+ vdp = ap->a_dvp;
+ dp = VTOI(vdp);
+ imp = dp->i_mnt;
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+ /*
+ * Check accessiblity of directory.
+ */
+ if (vdp->v_type != VDIR)
+ return (ENOTDIR);
+ if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+ return (error);
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ *
+ * Before tediously performing a linear scan of the directory,
+ * check the name cache to see if the directory/name pair
+ * we are looking for is known already.
+ */
+ if (error = cache_lookup(vdp, vpp, cnp)) {
+ int vpid; /* capability number of vnode */
+
+ if (error == ENOENT)
+ return (error);
+#ifdef PARANOID
+ if ((vdp->v_flag & VROOT) && (flags & ISDOTDOT))
+ panic("ufs_lookup: .. through root");
+#endif
+ /*
+ * Get the next vnode in the path.
+ * See comment below starting `Step through' for
+ * an explaination of the locking protocol.
+ */
+ pdp = dp;
+ dp = VTOI(*vpp);
+ vdp = *vpp;
+ vpid = vdp->v_id;
+ if (pdp == dp) {
+ VREF(vdp);
+ error = 0;
+ } else if (flags & ISDOTDOT) {
+ ISO_IUNLOCK(pdp);
+ error = vget(vdp, 1);
+ if (!error && lockparent && (flags & ISLASTCN))
+ ISO_ILOCK(pdp);
+ } else {
+ error = vget(vdp, 1);
+ if (!lockparent || error || !(flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ }
+ /*
+ * Check that the capability number did not change
+ * while we were waiting for the lock.
+ */
+ if (!error) {
+ if (vpid == vdp->v_id)
+ return (0);
+ iso_iput(dp);
+ if (lockparent && pdp != dp && (flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ }
+ ISO_ILOCK(pdp);
+ dp = pdp;
+ vdp = ITOV(dp);
+ *vpp = NULL;
+ }
+
+ len = cnp->cn_namelen;
+ name = cnp->cn_nameptr;
+ /*
+ * A leading `=' means, we are looking for an associated file
+ */
+ if (assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)) {
+ len--;
+ name++;
+ }
+
+ /*
+ * If there is cached information on a previous search of
+ * this directory, pick up where we last left off.
+ * We cache only lookups as these are the most common
+ * and have the greatest payoff. Caching CREATE has little
+ * benefit as it usually must search the entire directory
+ * to determine that the entry does not exist. Caching the
+ * location of the last DELETE or RENAME has not reduced
+ * profiling time and hence has been removed in the interest
+ * of simplicity.
+ */
+ if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+ dp->i_diroff > dp->i_size) {
+ entryoffsetinblock = 0;
+ dp->i_offset = 0;
+ numdirpasses = 1;
+ } else {
+ dp->i_offset = dp->i_diroff;
+ entryoffsetinblock = iso_blkoff(imp, dp->i_offset);
+ if (entryoffsetinblock != 0) {
+ if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+ return (error);
+ }
+ numdirpasses = 2;
+ iso_nchstats.ncs_2passes++;
+ }
+ endsearch = roundup(dp->i_size, imp->logical_block_size);
+
+searchloop:
+ while (dp->i_offset < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+ if (iso_blkoff(imp, dp->i_offset) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+ return (error);
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+ ep = (struct iso_directory_record *)
+ (bp->b_un.b_addr + entryoffsetinblock);
+
+ reclen = isonum_711 (ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ dp->i_offset =
+ roundup(dp->i_offset, imp->logical_block_size);
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+ /* illegal entry, stop */
+ break;
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size)
+ /* entries are not allowed to cross boundaries */
+ break;
+
+ /*
+ * Check for a name match.
+ */
+ namelen = isonum_711(ep->name_len);
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+ /* illegal entry, stop */
+ break;
+
+ switch (imp->iso_ftype) {
+ default:
+ if ((!(isonum_711(ep->flags)&4)) == !assoc) {
+ if ((len == 1
+ && *name == '.')
+ || (flags & ISDOTDOT)) {
+ if (namelen == 1
+ && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+ /*
+ * Save directory entry's inode number and
+ * reclen in ndp->ni_ufs area, and release
+ * directory buffer.
+ */
+ isodirino(&dp->i_ino,ep,imp);
+ goto found;
+ }
+ if (namelen != 1
+ || ep->name[0] != 0)
+ goto notfound;
+ } else if (!(res = isofncmp(name,len,
+ ep->name,namelen))) {
+ if (isonum_711(ep->flags)&2)
+ isodirino(&ino,ep,imp);
+ else
+ ino = dbtob(bp->b_blkno)
+ + entryoffsetinblock;
+ saveoffset = dp->i_offset;
+ } else if (ino)
+ goto foundino;
+#ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */
+ else if (res < 0)
+ goto notfound;
+ else if (res > 0 && numdirpasses == 2)
+ numdirpasses++;
+#endif
+ }
+ break;
+ case ISO_FTYPE_RRIP:
+ if (isonum_711(ep->flags)&2)
+ isodirino(&ino,ep,imp);
+ else
+ ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+ dp->i_ino = ino;
+ cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+ if (namelen == cnp->cn_namelen
+ && !bcmp(name,altname,namelen))
+ goto found;
+ ino = 0;
+ break;
+ }
+ dp->i_offset += reclen;
+ entryoffsetinblock += reclen;
+ }
+ if (ino) {
+foundino:
+ dp->i_ino = ino;
+ if (saveoffset != dp->i_offset) {
+ if (iso_lblkno(imp,dp->i_offset)
+ != iso_lblkno(imp,saveoffset)) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(dp, saveoffset, &bp))
+ return (error);
+ }
+ ep = (struct iso_directory_record *)(bp->b_un.b_addr
+ + iso_blkoff(imp,saveoffset));
+ dp->i_offset = saveoffset;
+ }
+ goto found;
+ }
+notfound:
+ /*
+ * If we started in the middle of the directory and failed
+ * to find our target, we must check the beginning as well.
+ */
+ if (numdirpasses == 2) {
+ numdirpasses--;
+ dp->i_offset = 0;
+ endsearch = dp->i_diroff;
+ goto searchloop;
+ }
+ if (bp != NULL)
+ brelse(bp);
+ /*
+ * Insert name into cache (as non-existent) if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ if (nameiop == CREATE || nameiop == RENAME)
+ return (EJUSTRETURN);
+ return (ENOENT);
+
+found:
+ if (numdirpasses == 2)
+ iso_nchstats.ncs_pass2++;
+ if (bp != NULL)
+ brelse(bp);
+
+ /*
+ * Found component in pathname.
+ * If the final component of path name, save information
+ * in the cache as to where the entry was found.
+ */
+ if ((flags & ISLASTCN) && nameiop == LOOKUP)
+ dp->i_diroff = dp->i_offset;
+
+ /*
+ * Step through the translation in the name. We do not `iput' the
+ * directory because we may need it again if a symbolic link
+ * is relative to the current directory. Instead we save it
+ * unlocked as "pdp". We must get the target inode before unlocking
+ * the directory to insure that the inode will not be removed
+ * before we get it. We prevent deadlock by always fetching
+ * inodes from the root, moving down the directory tree. Thus
+ * when following backward pointers ".." we must unlock the
+ * parent directory before getting the requested directory.
+ * There is a potential race condition here if both the current
+ * and parent directories are removed before the `iget' for the
+ * inode associated with ".." returns. We hope that this occurs
+ * infrequently since we cannot avoid this race condition without
+ * implementing a sophisticated deadlock detection algorithm.
+ * Note also that this simple deadlock detection scheme will not
+ * work if the file system has any hard links other than ".."
+ * that point backwards in the directory structure.
+ */
+ pdp = dp;
+ /*
+ * If ino is different from dp->i_ino,
+ * it's a relocated directory.
+ */
+ if (flags & ISDOTDOT) {
+ ISO_IUNLOCK(pdp); /* race to get the inode */
+ if (error = iso_iget(dp,dp->i_ino,
+ dp->i_ino != ino,
+ &tdp,ep)) {
+ ISO_ILOCK(pdp);
+ return (error);
+ }
+ if (lockparent && (flags & ISLASTCN))
+ ISO_ILOCK(pdp);
+ *vpp = ITOV(tdp);
+ } else if (dp->i_number == dp->i_ino) {
+ VREF(vdp); /* we want ourself, ie "." */
+ *vpp = vdp;
+ } else {
+ if (error = iso_iget(dp,dp->i_ino,dp->i_ino!=ino,&tdp,ep))
+ return (error);
+ if (!lockparent || !(flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ *vpp = ITOV(tdp);
+ }
+
+ /*
+ * Insert name into cache if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ return (0);
+}
+
+/*
+ * Return buffer with contents of block "offset"
+ * from the beginning of directory "ip". If "res"
+ * is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+iso_blkatoff(ip, offset, bpp)
+ struct iso_node *ip;
+ doff_t offset;
+ struct buf **bpp;
+{
+ register struct iso_mnt *imp = ip->i_mnt;
+ daddr_t lbn = iso_lblkno(imp,offset);
+ int bsize = iso_blksize(imp,ip,lbn);
+ struct buf *bp;
+ int error;
+
+ if (error = bread(ITOV(ip),lbn,bsize,NOCRED,&bp)) {
+ brelse(bp);
+ *bpp = 0;
+ return (error);
+ }
+ *bpp = bp;
+
+ return (0);
+}
diff --git a/sys/isofs/cd9660/cd9660_node.c b/sys/isofs/cd9660/cd9660_node.c
new file mode 100644
index 000000000000..d83a7a6f126a
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_node.c
@@ -0,0 +1,648 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#define INOHSZ 512
+#if ((INOHSZ&(INOHSZ-1)) == 0)
+#define INOHASH(dev,ino) (((dev)+((ino)>>12))&(INOHSZ-1))
+#else
+#define INOHASH(dev,ino) (((unsigned)((dev)+((ino)>>12)))%INOHSZ)
+#endif
+
+union iso_ihead {
+ union iso_ihead *ih_head[2];
+ struct iso_node *ih_chain[2];
+} iso_ihead[INOHSZ];
+
+#ifdef ISODEVMAP
+#define DNOHSZ 64
+#if ((DNOHSZ&(DNOHSZ-1)) == 0)
+#define DNOHASH(dev,ino) (((dev)+((ino)>>12))&(DNOHSZ-1))
+#else
+#define DNOHASH(dev,ino) (((unsigned)((dev)+((ino)>>12)))%DNOHSZ)
+#endif
+
+union iso_dhead {
+ union iso_dhead *dh_head[2];
+ struct iso_dnode *dh_chain[2];
+} iso_dhead[DNOHSZ];
+#endif
+
+int prtactive; /* 1 => print out reclaim of active vnodes */
+
+/*
+ * Initialize hash links for inodes and dnodes.
+ */
+cd9660_init()
+{
+ register int i;
+ register union iso_ihead *ih = iso_ihead;
+#ifdef ISODEVMAP
+ register union iso_dhead *dh = iso_dhead;
+#endif
+
+ for (i = INOHSZ; --i >= 0; ih++) {
+ ih->ih_head[0] = ih;
+ ih->ih_head[1] = ih;
+ }
+#ifdef ISODEVMAP
+ for (i = DNOHSZ; --i >= 0; dh++) {
+ dh->dh_head[0] = dh;
+ dh->dh_head[1] = dh;
+ }
+#endif
+}
+
+#ifdef ISODEVMAP
+/*
+ * Enter a new node into the device hash list
+ */
+struct iso_dnode *
+iso_dmap(dev,ino,create)
+ dev_t dev;
+ ino_t ino;
+ int create;
+{
+ struct iso_dnode *dp;
+ union iso_dhead *dh;
+
+ dh = &iso_dhead[DNOHASH(dev, ino)];
+ for (dp = dh->dh_chain[0];
+ dp != (struct iso_dnode *)dh;
+ dp = dp->d_forw)
+ if (ino == dp->i_number && dev == dp->i_dev)
+ return dp;
+
+ if (!create)
+ return (struct iso_dnode *)0;
+
+ MALLOC(dp,struct iso_dnode *,sizeof(struct iso_dnode),M_CACHE,M_WAITOK);
+ dp->i_dev = dev;
+ dp->i_number = ino;
+ insque(dp,dh);
+
+ return dp;
+}
+
+void
+iso_dunmap(dev)
+ dev_t dev;
+{
+ struct iso_dnode *dp, *dq;
+ union iso_dhead *dh;
+
+ for (dh = iso_dhead; dh < iso_dhead + DNOHSZ; dh++) {
+ for (dp = dh->dh_chain[0];
+ dp != (struct iso_dnode *)dh;
+ dp = dq) {
+ dq = dp->d_forw;
+ if (dev == dp->i_dev) {
+ remque(dp);
+ FREE(dp,M_CACHE);
+ }
+ }
+ }
+}
+#endif
+
+/*
+ * Look up a ISOFS dinode number to find its incore vnode.
+ * If it is not in core, read it in from the specified device.
+ * If it is in core, wait for the lock bit to clear, then
+ * return the inode locked. Detection and handling of mount
+ * points must be done by the calling routine.
+ */
+iso_iget(xp, ino, relocated, ipp, isodir)
+ struct iso_node *xp;
+ ino_t ino;
+ struct iso_node **ipp;
+ struct iso_directory_record *isodir;
+{
+ dev_t dev = xp->i_dev;
+ struct mount *mntp = ITOV(xp)->v_mount;
+ register struct iso_node *ip, *iq;
+ register struct vnode *vp;
+ register struct iso_dnode *dp;
+ struct vnode *nvp;
+ struct buf *bp = NULL, *bp2 = NULL;
+ union iso_ihead *ih;
+ union iso_dhead *dh;
+ int i, error, result;
+ struct iso_mnt *imp;
+ ino_t defino;
+
+ ih = &iso_ihead[INOHASH(dev, ino)];
+loop:
+ for (ip = ih->ih_chain[0];
+ ip != (struct iso_node *)ih;
+ ip = ip->i_forw) {
+ if (ino != ip->i_number || dev != ip->i_dev)
+ continue;
+ if ((ip->i_flag&ILOCKED) != 0) {
+ ip->i_flag |= IWANT;
+ sleep((caddr_t)ip, PINOD);
+ goto loop;
+ }
+ if (vget(ITOV(ip), 1))
+ goto loop;
+ *ipp = ip;
+ return 0;
+ }
+ /*
+ * Allocate a new vnode/iso_node.
+ */
+ if (error = getnewvnode(VT_ISOFS, mntp, cd9660_vnodeop_p, &nvp)) {
+ *ipp = 0;
+ return error;
+ }
+ MALLOC(ip, struct iso_node *, sizeof(struct iso_node),
+ M_ISOFSNODE, M_WAITOK);
+ bzero((caddr_t)ip, sizeof(struct iso_node));
+ nvp->v_data = ip;
+ ip->i_vnode = nvp;
+ ip->i_flag = 0;
+ ip->i_devvp = 0;
+ ip->i_diroff = 0;
+ ip->i_lockf = 0;
+
+ /*
+ * Put it onto its hash chain and lock it so that other requests for
+ * this inode will block if they arrive while we are sleeping waiting
+ * for old data structures to be purged or for the contents of the
+ * disk portion of this inode to be read.
+ */
+ ip->i_dev = dev;
+ ip->i_number = ino;
+ insque(ip, ih);
+ ISO_ILOCK(ip);
+
+ imp = VFSTOISOFS (mntp);
+ ip->i_mnt = imp;
+ ip->i_devvp = imp->im_devvp;
+ VREF(ip->i_devvp);
+
+ if (relocated) {
+ /*
+ * On relocated directories we must
+ * read the `.' entry out of a dir.
+ */
+ ip->iso_start = ino >> imp->im_bshift;
+ if (error = iso_blkatoff(ip,0,&bp)) {
+ vrele(ip->i_devvp);
+ remque(ip);
+ ip->i_forw = ip;
+ ip->i_back = ip;
+ iso_iput(ip);
+ *ipp = 0;
+ return error;
+ }
+ isodir = (struct iso_directory_record *)bp->b_un.b_addr;
+ }
+
+ ip->iso_extent = isonum_733(isodir->extent);
+ ip->i_size = isonum_733(isodir->size);
+ ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+
+ vp = ITOV(ip);
+
+ /*
+ * Setup time stamp, attribute
+ */
+ vp->v_type = VNON;
+ switch (imp->iso_ftype) {
+ default: /* ISO_FTYPE_9660 */
+ if ((imp->im_flags&ISOFSMNT_EXTATT)
+ && isonum_711(isodir->ext_attr_length))
+ iso_blkatoff(ip,-isonum_711(isodir->ext_attr_length),
+ &bp2);
+ cd9660_defattr(isodir,ip,bp2 );
+ cd9660_deftstamp(isodir,ip,bp2 );
+ break;
+ case ISO_FTYPE_RRIP:
+ result = cd9660_rrip_analyze(isodir,ip,imp);
+ break;
+ }
+ if (bp2)
+ brelse(bp2);
+ if (bp)
+ brelse(bp);
+
+ /*
+ * Initialize the associated vnode
+ */
+ vp->v_type = IFTOVT(ip->inode.iso_mode);
+
+ if ( vp->v_type == VFIFO ) {
+#ifdef FIFO
+ extern int (**cd9660_fifoop_p)();
+ vp->v_op = cd9660_fifoop_p;
+#else
+ iso_iput(ip);
+ *ipp = 0;
+ return EOPNOTSUPP;
+#endif /* FIFO */
+ } else if ( vp->v_type == VCHR || vp->v_type == VBLK ) {
+ extern int (**cd9660_specop_p)();
+
+ /*
+ * if device, look at device number table for translation
+ */
+#ifdef ISODEVMAP
+ if (dp = iso_dmap(dev,ino,0))
+ ip->inode.iso_rdev = dp->d_dev;
+#endif
+ vp->v_op = cd9660_specop_p;
+ if (nvp = checkalias(vp, ip->inode.iso_rdev, mntp)) {
+ /*
+ * Reinitialize aliased inode.
+ */
+ vp = nvp;
+ iq = VTOI(vp);
+ iq->i_vnode = vp;
+ iq->i_flag = 0;
+ ISO_ILOCK(iq);
+ iq->i_dev = dev;
+ iq->i_number = ino;
+ iq->i_mnt = ip->i_mnt;
+ bcopy(&ip->iso_extent,&iq->iso_extent,
+ (char *)(ip + 1) - (char *)&ip->iso_extent);
+ insque(iq, ih);
+ /*
+ * Discard unneeded vnode
+ * (This introduces the need of INACTIVE modification)
+ */
+ ip->inode.iso_mode = 0;
+ iso_iput(ip);
+ ip = iq;
+ }
+ }
+
+ if (ip->iso_extent == imp->root_extent)
+ vp->v_flag |= VROOT;
+
+ *ipp = ip;
+ return 0;
+}
+
+/*
+ * Unlock and decrement the reference count of an inode structure.
+ */
+iso_iput(ip)
+ register struct iso_node *ip;
+{
+
+ if ((ip->i_flag & ILOCKED) == 0)
+ panic("iso_iput");
+ ISO_IUNLOCK(ip);
+ vrele(ITOV(ip));
+}
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ register struct iso_node *ip = VTOI(vp);
+ int mode, error = 0;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_inactive: pushing active", vp);
+
+ ip->i_flag = 0;
+ /*
+ * If we are done with the inode, reclaim it
+ * so that it can be reused immediately.
+ */
+ if (vp->v_usecount == 0 && ip->inode.iso_mode == 0)
+ vgone(vp);
+ return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct iso_node *ip = VTOI(vp);
+ int i;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_reclaim: pushing active", vp);
+ /*
+ * Remove the inode from its hash chain.
+ */
+ remque(ip);
+ ip->i_forw = ip;
+ ip->i_back = ip;
+ /*
+ * Purge old data structures associated with the inode.
+ */
+ cache_purge(vp);
+ if (ip->i_devvp) {
+ vrele(ip->i_devvp);
+ ip->i_devvp = 0;
+ }
+ FREE(vp->v_data, M_ISOFSNODE);
+ vp->v_data = NULL;
+ return 0;
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+iso_ilock(ip)
+ register struct iso_node *ip;
+{
+
+ while (ip->i_flag & ILOCKED) {
+ ip->i_flag |= IWANT;
+ if (ip->i_spare0 == curproc->p_pid)
+ panic("locking against myself");
+ ip->i_spare1 = curproc->p_pid;
+ (void) sleep((caddr_t)ip, PINOD);
+ }
+ ip->i_spare1 = 0;
+ ip->i_spare0 = curproc->p_pid;
+ ip->i_flag |= ILOCKED;
+}
+
+/*
+ * Unlock an inode. If WANT bit is on, wakeup.
+ */
+iso_iunlock(ip)
+ register struct iso_node *ip;
+{
+
+ if ((ip->i_flag & ILOCKED) == 0)
+ vprint("iso_iunlock: unlocked inode", ITOV(ip));
+ ip->i_spare0 = 0;
+ ip->i_flag &= ~ILOCKED;
+ if (ip->i_flag&IWANT) {
+ ip->i_flag &= ~IWANT;
+ wakeup((caddr_t)ip);
+ }
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir,inop,bp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ if (isonum_711(isodir->flags)&2) {
+ inop->inode.iso_mode = S_IFDIR;
+ /*
+ * If we return 2, fts() will assume there are no subdirectories
+ * (just links for the path and .), so instead we return 1.
+ */
+ inop->inode.iso_links = 1;
+ } else {
+ inop->inode.iso_mode = S_IFREG;
+ inop->inode.iso_links = 1;
+ }
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+
+ if (isonum_711(ap->version) == 1) {
+ if (!(ap->perm[0]&0x40))
+ inop->inode.iso_mode |= VEXEC >> 6;
+ if (!(ap->perm[0]&0x10))
+ inop->inode.iso_mode |= VREAD >> 6;
+ if (!(ap->perm[0]&4))
+ inop->inode.iso_mode |= VEXEC >> 3;
+ if (!(ap->perm[0]&1))
+ inop->inode.iso_mode |= VREAD >> 3;
+ if (!(ap->perm[1]&0x40))
+ inop->inode.iso_mode |= VEXEC;
+ if (!(ap->perm[1]&0x10))
+ inop->inode.iso_mode |= VREAD;
+ inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+ inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+ inop->inode.iso_uid = (uid_t)0;
+ inop->inode.iso_gid = (gid_t)0;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+
+ if (isonum_711(ap->version) == 1) {
+ if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+ cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+ if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+ inop->inode.iso_ctime = inop->inode.iso_atime;
+ if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime);
+ inop->inode.iso_atime = inop->inode.iso_ctime;
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu)
+char *pi;
+struct timeval *pu;
+{
+ int i;
+ int crtime, days;
+ int y, m, d, hour, minute, second, tz;
+
+ y = pi[0] + 1900;
+ m = pi[1];
+ d = pi[2];
+ hour = pi[3];
+ minute = pi[4];
+ second = pi[5];
+ tz = pi[6];
+
+ if (y < 1970) {
+ pu->tv_sec = 0;
+ pu->tv_usec = 0;
+ return 0;
+ } else {
+#ifdef ORIGINAL
+ /* computes day number relative to Sept. 19th,1989 */
+ /* don't even *THINK* about changing formula. It works! */
+ days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+ /*
+ * Changed :-) to make it relative to Jan. 1st, 1970
+ * and to disambiguate negative division
+ */
+ days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+ crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+
+ /* timezone offset is unreliable on some disks */
+ if (-48 <= tz && tz <= 52)
+ crtime += tz * 15 * 60;
+ }
+ pu->tv_sec = crtime;
+ pu->tv_usec = 0;
+ return 1;
+}
+
+static unsigned
+cd9660_chars2ui(begin,len)
+ unsigned char *begin;
+ int len;
+{
+ unsigned rc;
+
+ for (rc = 0; --len >= 0;) {
+ rc *= 10;
+ rc += *begin++ - '0';
+ }
+ return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+ unsigned char *pi;
+ struct timeval *pu;
+{
+ unsigned char buf[7];
+
+ /* year:"0001"-"9999" -> -1900 */
+ buf[0] = cd9660_chars2ui(pi,4) - 1900;
+
+ /* month: " 1"-"12" -> 1 - 12 */
+ buf[1] = cd9660_chars2ui(pi + 4,2);
+
+ /* day: " 1"-"31" -> 1 - 31 */
+ buf[2] = cd9660_chars2ui(pi + 6,2);
+
+ /* hour: " 0"-"23" -> 0 - 23 */
+ buf[3] = cd9660_chars2ui(pi + 8,2);
+
+ /* minute:" 0"-"59" -> 0 - 59 */
+ buf[4] = cd9660_chars2ui(pi + 10,2);
+
+ /* second:" 0"-"59" -> 0 - 59 */
+ buf[5] = cd9660_chars2ui(pi + 12,2);
+
+ /* difference of GMT */
+ buf[6] = pi[16];
+
+ return cd9660_tstamp_conv7(buf,pu);
+}
+
+void
+isodirino(inump,isodir,imp)
+ ino_t *inump;
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ *inump = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+ * imp->logical_block_size;
+}
diff --git a/sys/isofs/cd9660/cd9660_node.h b/sys/isofs/cd9660/cd9660_node.h
new file mode 100644
index 000000000000..45de67f1a6be
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_node.h
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.h 8.2 (Berkeley) 1/23/94
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+typedef struct {
+ struct timespec iso_atime; /* time of last access */
+ struct timespec iso_mtime; /* time of last modification */
+ struct timespec iso_ctime; /* time file changed */
+ u_short iso_mode; /* files access mode and type */
+ uid_t iso_uid; /* owner user id */
+ gid_t iso_gid; /* owner group id */
+ short iso_links; /* links of file */
+ dev_t iso_rdev; /* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+#ifdef ISODEVMAP
+/*
+ * FOr device# (major,minor) translation table
+ */
+struct iso_dnode {
+ struct iso_dnode *d_chain[2]; /* hash chain, MUST be first */
+ dev_t i_dev; /* device where dnode resides */
+ ino_t i_number; /* the identity of the inode */
+ dev_t d_dev; /* device # for translation */
+};
+#define d_forw d_chain[0]
+#define d_back d_chain[1]
+#endif
+
+struct iso_node {
+ struct iso_node *i_chain[2]; /* hash chain, MUST be first */
+ struct vnode *i_vnode; /* vnode associated with this inode */
+ struct vnode *i_devvp; /* vnode for block I/O */
+ u_long i_flag; /* see below */
+ dev_t i_dev; /* device where inode resides */
+ ino_t i_number; /* the identity of the inode */
+ /* we use the actual starting block of the file */
+ struct iso_mnt *i_mnt; /* filesystem associated with this inode */
+ struct lockf *i_lockf; /* head of byte-level lock list */
+ doff_t i_endoff; /* end of useful stuff in directory */
+ doff_t i_diroff; /* offset in dir, where we found last entry */
+ doff_t i_offset; /* offset of free space in directory */
+ ino_t i_ino; /* inode number of found directory */
+ long i_spare0;
+ long i_spare1;
+
+ long iso_extent; /* extent of file */
+ long i_size;
+ long iso_start; /* actual start of data of file (may be different */
+ /* from iso_extent, if file has extended attributes) */
+ ISO_RRIP_INODE inode;
+};
+
+#define i_forw i_chain[0]
+#define i_back i_chain[1]
+
+/* flags */
+#define ILOCKED 0x0001 /* inode is locked */
+#define IWANT 0x0002 /* some process waiting on lock */
+#define IACC 0x0020 /* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ISO_ILOCK(ip) iso_ilock(ip)
+#define ISO_IUNLOCK(ip) iso_iunlock(ip)
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup __P((struct vop_lookup_args *));
+int cd9660_open __P((struct vop_open_args *));
+int cd9660_close __P((struct vop_close_args *));
+int cd9660_access __P((struct vop_access_args *));
+int cd9660_getattr __P((struct vop_getattr_args *));
+int cd9660_read __P((struct vop_read_args *));
+int cd9660_ioctl __P((struct vop_ioctl_args *));
+int cd9660_select __P((struct vop_select_args *));
+int cd9660_mmap __P((struct vop_mmap_args *));
+int cd9660_seek __P((struct vop_seek_args *));
+int cd9660_readdir __P((struct vop_readdir_args *));
+int cd9660_abortop __P((struct vop_abortop_args *));
+int cd9660_inactive __P((struct vop_inactive_args *));
+int cd9660_reclaim __P((struct vop_reclaim_args *));
+int cd9660_bmap __P((struct vop_bmap_args *));
+int cd9660_lock __P((struct vop_lock_args *));
+int cd9660_unlock __P((struct vop_unlock_args *));
+int cd9660_strategy __P((struct vop_strategy_args *));
+int cd9660_print __P((struct vop_print_args *));
+int cd9660_islocked __P((struct vop_islocked_args *));
+void cd9660_defattr __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *));
+void cd9660_deftstamp __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *));
+#ifdef ISODEVMAP
+struct iso_dnode *iso_dmap __P((dev_t, ino_t, int));
+void iso_dunmap __P((dev_t));
+#endif
diff --git a/sys/isofs/cd9660/cd9660_rrip.c b/sys/isofs/cd9660/cd9660_rrip.c
new file mode 100644
index 000000000000..0923fa014773
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_rrip.c
@@ -0,0 +1,685 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/time.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+ ISO_RRIP_ATTR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->inop->inode.iso_mode = isonum_731(p->mode_l);
+ ana->inop->inode.iso_uid = (uid_t)isonum_731(p->uid_l);
+ ana->inop->inode.iso_gid = (gid_t)isonum_731(p->gid_l);
+ ana->inop->inode.iso_links = isonum_731(p->links_l);
+ ana->fields &= ~ISO_SUSP_ATTR;
+ return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* But this is a required field! */
+ printf("RRIP without PX field?\n");
+ cd9660_defattr(isodir,ana->inop,NULL);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+ ISO_RRIP_SLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ register ISO_RRIP_SLINK_COMPONENT *pcomp;
+ register ISO_RRIP_SLINK_COMPONENT *pcompe;
+ int len, wlen, cont;
+ char *outbuf, *inbuf;
+
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+ pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+ len = *ana->outlen;
+ outbuf = ana->outbuf;
+ cont = ana->cont;
+
+ /*
+ * Gathering a Symbolic name from each component with path
+ */
+ for (;
+ pcomp < pcompe;
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+ + isonum_711(pcomp->clen))) {
+
+ if (!cont) {
+ if (len < ana->maxlen) {
+ len++;
+ *outbuf++ = '/';
+ }
+ }
+ cont = 0;
+
+ inbuf = "..";
+ wlen = 0;
+
+ switch (*pcomp->cflag) {
+
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_ROOT:
+ /* Inserting slash for ROOT */
+ /* start over from beginning(?) */
+ outbuf -= len;
+ len = 0;
+ break;
+
+ case ISO_SUSP_CFLAG_VOLROOT:
+ /* Inserting a mount point i.e. "/cdrom" */
+ /* same as above */
+ outbuf -= len;
+ len = 0;
+ inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+ wlen = strlen(inbuf);
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = hostnamelen;
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(pcomp->clen);
+ inbuf = pcomp->name;
+ break;
+ default:
+ printf("RRIP with incorrect flags?");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if (len + wlen > ana->maxlen) {
+ /* indicate error to caller */
+ ana->cont = 1;
+ ana->fields = 0;
+ ana->outbuf -= *ana->outlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,outbuf,wlen);
+ outbuf += wlen;
+ len += wlen;
+
+ }
+ ana->outbuf = outbuf;
+ *ana->outlen = len;
+ ana->cont = cont;
+
+ if (!isonum_711(p->flags)) {
+ ana->fields &= ~ISO_SUSP_SLINK;
+ return ISO_SUSP_SLINK;
+ }
+ return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+ ISO_RRIP_ALTNAME *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ char *inbuf;
+ int wlen;
+ int cont;
+
+ inbuf = "..";
+ wlen = 0;
+ cont = 0;
+
+ switch (*p->flags) {
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = hostnamelen;
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(p->h.length) - 5;
+ inbuf = (char *)p + 5;
+ break;
+
+ default:
+ printf("RRIP with incorrect NM flags?\n");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if ((*ana->outlen += wlen) > ana->maxlen) {
+ /* treat as no name field */
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ ana->outbuf -= *ana->outlen - wlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,ana->outbuf,wlen);
+ ana->outbuf += wlen;
+
+ if (!cont) {
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ return ISO_SUSP_ALTNAME;
+ }
+ return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ strcpy(ana->outbuf,"..");
+ switch (*isodir->name) {
+ default:
+ isofntrans(isodir->name,isonum_711(isodir->name_len),
+ ana->outbuf,ana->outlen,
+ 1,isonum_711(isodir->flags)&4);
+ break;
+ case 0:
+ *ana->outlen = 1;
+ break;
+ case 1:
+ *ana->outlen = 2;
+ break;
+ }
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+ ISO_RRIP_CLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+ ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+ return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+ ISO_RRIP_RELDIR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* special hack to make caller aware of RE field */
+ *ana->outlen = 0;
+ ana->fields = 0;
+ return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+ ISO_RRIP_TSTAMP *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ unsigned char *ptime;
+
+ ptime = p->time;
+
+ /* Check a format of time stamp (7bytes/17bytes) */
+ if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 7;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime);
+ ptime += 7;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime);
+ ptime += 7;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ } else {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 17;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+ ptime += 17;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+ ptime += 17;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ }
+ ana->fields &= ~ISO_SUSP_TSTAMP;
+ return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ cd9660_deftstamp(isodir,ana->inop,NULL);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+ ISO_RRIP_DEVICE *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ unsigned high, low;
+
+ high = isonum_733(p->dev_t_high_l);
+ low = isonum_733(p->dev_t_low_l);
+
+ if ( high == 0 ) {
+ ana->inop->inode.iso_rdev = makedev( major(low), minor(low) );
+ } else {
+ ana->inop->inode.iso_rdev = makedev( high, minor(low) );
+ }
+ ana->fields &= ~ISO_SUSP_DEVICE;
+ return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+ ISO_RRIP_IDFLAG *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+ /* special handling of RE field */
+ if (ana->fields&ISO_SUSP_RELDIR)
+ return cd9660_rrip_reldir(p,ana);
+
+ return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+ ISO_RRIP_CONT *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->iso_ce_blk = isonum_733(p->location);
+ ana->iso_ce_off = isonum_733(p->offset);
+ ana->iso_ce_len = isonum_733(p->length);
+ return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+ ISO_SUSP_HEADER *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* stop analyzing */
+ ana->fields = 0;
+ return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+ ISO_RRIP_EXTREF *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ if (isonum_711(p->len_id) != 10
+ || bcmp((char *)p + 8,"RRIP_1991A",10)
+ || isonum_711(p->version) != 1)
+ return 0;
+ ana->fields &= ~ISO_SUSP_EXTREF;
+ return ISO_SUSP_EXTREF;
+}
+
+typedef struct {
+ char type[2];
+ int (*func)();
+ void (*func2)();
+ int result;
+} RRIP_TABLE;
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+ RRIP_TABLE *table;
+{
+ register RRIP_TABLE *ptable;
+ register ISO_SUSP_HEADER *phead;
+ register ISO_SUSP_HEADER *pend;
+ struct buf *bp = NULL;
+ int i;
+ char *pwhead;
+ int result;
+
+ /*
+ * Note: If name length is odd,
+ * it will be padding 1 byte after the name
+ */
+ pwhead = isodir->name + isonum_711(isodir->name_len);
+ if (!(isonum_711(isodir->name_len)&1))
+ pwhead++;
+
+ /* If it's not the '.' entry of the root dir obey SP field */
+ if (*isodir->name != 0
+ || isonum_733(isodir->extent) != ana->imp->root_extent)
+ pwhead += ana->imp->rr_skip;
+ else
+ pwhead += ana->imp->rr_skip0;
+
+ phead = (ISO_SUSP_HEADER *)pwhead;
+ pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+
+ result = 0;
+ while (1) {
+ ana->iso_ce_len = 0;
+ /*
+ * Note: "pend" should be more than one SUSP header
+ */
+ while (pend >= phead + 1) {
+ if (isonum_711(phead->version) == 1) {
+ for (ptable = table; ptable->func; ptable++) {
+ if (*phead->type == *ptable->type
+ && phead->type[1] == ptable->type[1]) {
+ result |= ptable->func(phead,ana);
+ break;
+ }
+ }
+ if (!ana->fields)
+ break;
+ }
+ /*
+ * move to next SUSP
+ * Hopefully this works with newer versions, too
+ */
+ phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+ }
+
+ if ( ana->fields && ana->iso_ce_len ) {
+ if (ana->iso_ce_blk >= ana->imp->volume_space_size
+ || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+ || bread(ana->imp->im_devvp,
+ ana->iso_ce_blk * ana->imp->logical_block_size / DEV_BSIZE,
+ ana->imp->logical_block_size,NOCRED,&bp))
+ /* what to do now? */
+ break;
+ phead = (ISO_SUSP_HEADER *)(bp->b_un.b_addr + ana->iso_ce_off);
+ pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+ } else
+ break;
+ }
+ if (bp)
+ brelse(bp);
+ /*
+ * If we don't find the Basic SUSP stuffs, just set default value
+ * ( attribute/time stamp )
+ */
+ for (ptable = table; ptable->func2; ptable++)
+ if (!(ptable->result&result))
+ ptable->func2(isodir,ana);
+
+ return result;
+}
+
+static RRIP_TABLE rrip_table_analyze[] = {
+ { "PX", cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR },
+ { "TF", cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP },
+ { "PN", cd9660_rrip_device, 0, ISO_SUSP_DEVICE },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.inop = inop;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+
+ return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/*
+ * Get Alternate Name from 'AL' record
+ * If either no AL record or 0 length,
+ * it will be return the translated ISO9660 name,
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+ { "NM", cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME },
+ { "CL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "PL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "RE", cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ ino_t *inump;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+ RRIP_TABLE *tab;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ analyze.maxlen = NAME_MAX;
+ analyze.inump = inump;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+ *outlen = 0;
+
+ tab = rrip_table_getname;
+ if (*isodir->name == 0
+ || *isodir->name == 1) {
+ cd9660_rrip_defname(isodir,&analyze);
+
+ analyze.fields &= ~ISO_SUSP_ALTNAME;
+ tab++;
+ }
+
+ return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/*
+ * Get Symbolic Name from 'SL' record
+ *
+ * Note: isodir should contains SL record!
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+ { "SL", cd9660_rrip_slink, 0, ISO_SUSP_SLINK },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ *outlen = 0;
+ analyze.maxlen = MAXPATHLEN;
+ analyze.cont = 1; /* don't start with a slash */
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_SLINK;
+
+ return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+ { "ER", cd9660_rrip_extref, 0, ISO_SUSP_EXTREF },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We require the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_OFFSET *p;
+ ISO_RRIP_ANALYZE analyze;
+
+ imp->rr_skip0 = 0;
+ p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+ if (bcmp(p,"SP\7\1\276\357",6)) {
+ /* Maybe, it's a CDROM XA disc? */
+ imp->rr_skip0 = 15;
+ p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+ if (bcmp(p,"SP\7\1\276\357",6))
+ return -1;
+ }
+
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_EXTREF;
+ if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+ return -1;
+
+ return isonum_711(p->skip);
+}
diff --git a/sys/isofs/cd9660/cd9660_rrip.h b/sys/isofs/cd9660/cd9660_rrip.h
new file mode 100644
index 000000000000..b4017281f065
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_rrip.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.h 8.1 (Berkeley) 1/21/94
+ */
+
+typedef struct {
+ char type [ISODCL ( 0, 1)];
+ unsigned char length [ISODCL ( 2, 2)]; /* 711 */
+ unsigned char version [ISODCL ( 3, 3)];
+} ISO_SUSP_HEADER;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char mode_l [ISODCL ( 4, 7)]; /* 731 */
+ char mode_m [ISODCL ( 8, 11)]; /* 732 */
+ char links_l [ISODCL ( 12, 15)]; /* 731 */
+ char links_m [ISODCL ( 16, 19)]; /* 732 */
+ char uid_l [ISODCL ( 20, 23)]; /* 731 */
+ char uid_m [ISODCL ( 24, 27)]; /* 732 */
+ char gid_l [ISODCL ( 28, 31)]; /* 731 */
+ char gid_m [ISODCL ( 32, 35)]; /* 732 */
+} ISO_RRIP_ATTR;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dev_t_high_l [ISODCL ( 4, 7)]; /* 731 */
+ char dev_t_high_m [ISODCL ( 8, 11)]; /* 732 */
+ char dev_t_low_l [ISODCL ( 12, 15)]; /* 731 */
+ char dev_t_low_m [ISODCL ( 16, 19)]; /* 732 */
+} ISO_RRIP_DEVICE;
+
+#define ISO_SUSP_CFLAG_CONTINUE 0x01
+#define ISO_SUSP_CFLAG_CURRENT 0x02
+#define ISO_SUSP_CFLAG_PARENT 0x04
+#define ISO_SUSP_CFLAG_ROOT 0x08
+#define ISO_SUSP_CFLAG_VOLROOT 0x10
+#define ISO_SUSP_CFLAG_HOST 0x20
+
+typedef struct {
+ u_char cflag [ISODCL ( 1, 1)];
+ u_char clen [ISODCL ( 2, 2)];
+ u_char name [0];
+} ISO_RRIP_SLINK_COMPONENT;
+#define ISO_RRIP_SLSIZ 2
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ u_char flags [ISODCL ( 4, 4)];
+ u_char component [ISODCL ( 5, 5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+} ISO_RRIP_RELDIR;
+
+#define ISO_SUSP_TSTAMP_FORM17 0x80
+#define ISO_SUSP_TSTAMP_FORM7 0x00
+#define ISO_SUSP_TSTAMP_CREAT 0x01
+#define ISO_SUSP_TSTAMP_MODIFY 0x02
+#define ISO_SUSP_TSTAMP_ACCESS 0x04
+#define ISO_SUSP_TSTAMP_ATTR 0x08
+#define ISO_SUSP_TSTAMP_BACKUP 0x10
+#define ISO_SUSP_TSTAMP_EXPIRE 0x20
+#define ISO_SUSP_TSTAMP_EFFECT 0x40
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ unsigned char flags [ISODCL ( 4, 4)];
+ unsigned char time [ISODCL ( 5, 5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ unsigned char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char len_id [ISODCL ( 4, 4)];
+ char len_des [ISODCL ( 5, 5)];
+ char len_src [ISODCL ( 6, 6)];
+ char version [ISODCL ( 7, 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char check [ISODCL ( 4, 5)];
+ char skip [ISODCL ( 6, 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char location [ISODCL ( 4, 11)];
+ char offset [ISODCL ( 12, 19)];
+ char length [ISODCL ( 20, 27)];
+} ISO_RRIP_CONT;
diff --git a/sys/isofs/cd9660/cd9660_util.c b/sys/isofs/cd9660/cd9660_util.c
new file mode 100644
index 000000000000..f74f0515ff77
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_util.c
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_util.c 8.1 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */
+#include <miscfs/fifofs/fifo.h> /* XXX */
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+
+#ifdef __notanymore__
+int
+isonum_711 (p)
+unsigned char *p;
+{
+ return (*p);
+}
+
+int
+isonum_712 (p)
+signed char *p;
+{
+ return (*p);
+}
+
+int
+isonum_721 (p)
+unsigned char *p;
+{
+ /* little endian short */
+#if BYTE_ORDER != LITTLE_ENDIAN
+ printf ("isonum_721 called on non little-endian machine!\n");
+#endif
+
+ return *(short *)p;
+}
+
+int
+isonum_722 (p)
+unsigned char *p;
+{
+ /* big endian short */
+#if BYTE_ORDER != BIG_ENDIAN
+ printf ("isonum_722 called on non big-endian machine!\n");
+#endif
+
+ return *(short *)p;
+}
+
+int
+isonum_723 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+ return isonum_722 (p + 2);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ return isonum_721 (p);
+#else
+ printf ("isonum_723 unsupported byte order!\n");
+ return 0;
+#endif
+}
+
+int
+isonum_731 (p)
+unsigned char *p;
+{
+ /* little endian long */
+#if BYTE_ORDER != LITTLE_ENDIAN
+ printf ("isonum_731 called on non little-endian machine!\n");
+#endif
+
+ return *(long *)p;
+}
+
+int
+isonum_732 (p)
+unsigned char *p;
+{
+ /* big endian long */
+#if BYTE_ORDER != BIG_ENDIAN
+ printf ("isonum_732 called on non big-endian machine!\n");
+#endif
+
+ return *(long *)p;
+}
+
+int
+isonum_733 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+ return isonum_732 (p + 4);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ return isonum_731 (p);
+#else
+ printf ("isonum_733 unsupported byte order!\n");
+ return 0;
+#endif
+}
+#endif /* __notanymore__ */
+
+/*
+ * translate and compare a filename
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(unsigned char *fn,int fnlen,unsigned char *isofn,int isolen)
+{
+ int i, j;
+ char c;
+
+ while (--fnlen >= 0) {
+ if (--isolen < 0)
+ return *fn;
+ if ((c = *isofn++) == ';') {
+ switch (*fn++) {
+ default:
+ return *--fn;
+ case 0:
+ return 0;
+ case ';':
+ break;
+ }
+ for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
+ if (*fn < '0' || *fn > '9') {
+ return -1;
+ }
+ }
+ for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0');
+ return i - j;
+ }
+ if (c != *fn) {
+ if (c >= 'A' && c <= 'Z') {
+ if (c + ('a' - 'A') != *fn) {
+ if (*fn >= 'a' && *fn <= 'z')
+ return *fn - ('a' - 'A') - c;
+ else
+ return *fn - c;
+ }
+ } else
+ return *fn - c;
+ }
+ fn++;
+ }
+ if (isolen > 0) {
+ switch (*isofn) {
+ default:
+ return -1;
+ case '.':
+ if (isofn[1] != ';')
+ return -1;
+ case ';':
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+ * translate a filename
+ */
+void
+isofntrans(unsigned char *infn,int infnlen,
+ unsigned char *outfn,unsigned short *outfnlen,
+ int original,int assoc)
+{
+ int fnidx = 0;
+
+ if (assoc) {
+ *outfn++ = ASSOCCHAR;
+ fnidx++;
+ }
+ for (; fnidx < infnlen; fnidx++) {
+ char c = *infn++;
+
+ if (!original && c >= 'A' && c <= 'Z')
+ *outfn++ = c + ('a' - 'A');
+ else if (!original && c == '.' && *infn == ';')
+ break;
+ else if (!original && c == ';')
+ break;
+ else
+ *outfn++ = c;
+ }
+ *outfnlen = fnidx;
+}
diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c
new file mode 100644
index 000000000000..02dd92af66f6
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,681 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vfsops.c 8.3 (Berkeley) 1/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/dkbad.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+extern int enodev ();
+
+struct vfsops cd9660_vfsops = {
+ cd9660_mount,
+ cd9660_start,
+ cd9660_unmount,
+ cd9660_root,
+ cd9660_quotactl,
+ cd9660_statfs,
+ cd9660_sync,
+ cd9660_vget,
+ cd9660_fhtovp,
+ cd9660_vptofh,
+ cd9660_init,
+};
+
+/*
+ * Called by vfs_mountroot when iso is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME "root_device"
+
+static iso_mountfs();
+
+cd9660_mountroot()
+{
+ register struct mount *mp;
+ extern struct vnode *rootvp;
+ struct proc *p = curproc; /* XXX */
+ struct iso_mnt *imp;
+ register struct fs *fs;
+ u_int size;
+ int error;
+ struct iso_args args;
+
+ /*
+ * Get vnodes for swapdev and rootdev.
+ */
+ if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+ panic("cd9660_mountroot: can't setup bdevvp's");
+
+ mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &cd9660_vfsops;
+ mp->mnt_flag = MNT_RDONLY;
+ args.flags = ISOFSMNT_ROOT;
+ if (error = iso_mountfs(rootvp, mp, p, &args)) {
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ if (error = vfs_lock(mp)) {
+ (void)cd9660_unmount(mp, 0, p);
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ imp = VFSTOISOFS(mp);
+ bzero(imp->im_fsmnt, sizeof(imp->im_fsmnt));
+ imp->im_fsmnt[0] = '/';
+ bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) cd9660_statfs(mp, &mp->mnt_stat, p);
+ vfs_unlock(mp);
+ return (0);
+}
+
+/*
+ * Flag to allow forcible unmounting.
+ */
+int iso_doforce = 1;
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+cd9660_mount(mp, path, data, ndp, p)
+ register struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp;
+ struct iso_args args;
+ u_int size;
+ int error;
+ struct iso_mnt *imp;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))
+ return (error);
+
+ if ((mp->mnt_flag & MNT_RDONLY) == 0)
+ return (EROFS);
+
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ imp = VFSTOISOFS(mp);
+ if (args.fspec == 0)
+ return (vfs_export(mp, &imp->im_export, &args.export));
+ }
+ /*
+ * Not an update, or updating the name: look up the name
+ * and verify that it refers to a sensible block device.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+ if (error = namei(ndp))
+ return (error);
+ devvp = ndp->ni_vp;
+
+ if (devvp->v_type != VBLK) {
+ vrele(devvp);
+ return ENOTBLK;
+ }
+ if (major(devvp->v_rdev) >= nblkdev) {
+ vrele(devvp);
+ return ENXIO;
+ }
+ if ((mp->mnt_flag & MNT_UPDATE) == 0)
+ error = iso_mountfs(devvp, mp, p, &args);
+ else {
+ if (devvp != imp->im_devvp)
+ error = EINVAL; /* needs translation */
+ else
+ vrele(devvp);
+ }
+ if (error) {
+ vrele(devvp);
+ return error;
+ }
+ imp = VFSTOISOFS(mp);
+ (void) copyinstr(path, imp->im_fsmnt, sizeof(imp->im_fsmnt)-1, &size);
+ bzero(imp->im_fsmnt + size, sizeof(imp->im_fsmnt) - size);
+ bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) cd9660_statfs(mp, &mp->mnt_stat, p);
+ return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static iso_mountfs(devvp, mp, p, argp)
+ register struct vnode *devvp;
+ struct mount *mp;
+ struct proc *p;
+ struct iso_args *argp;
+{
+ register struct iso_mnt *isomp = (struct iso_mnt *)0;
+ struct buf *bp = NULL;
+ dev_t dev = devvp->v_rdev;
+ caddr_t base, space;
+ int havepart = 0, blks;
+ int error = EINVAL, i, size;
+ int needclose = 0;
+ int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ extern struct vnode *rootvp;
+ int j;
+ int iso_bsize;
+ int iso_blknum;
+ struct iso_volume_descriptor *vdp;
+ struct iso_primary_descriptor *pri;
+ struct iso_directory_record *rootp;
+ int logical_block_size;
+
+ if (!ronly)
+ return EROFS;
+
+ /*
+ * Disallow multiple mounts of the same device.
+ * Disallow mounting of a device that is currently in use
+ * (except for root, which might share swap device for miniroot).
+ * Flush out any old buffers remaining from a previous use.
+ */
+ if (error = vfs_mountedon(devvp))
+ return error;
+ if (vcount(devvp) > 1 && devvp != rootvp)
+ return EBUSY;
+ if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+
+ if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+ return error;
+ needclose = 1;
+
+ /* This is the "logical sector size". The standard says this
+ * should be 2048 or the physical sector size on the device,
+ * whichever is greater. For now, we'll just use a constant.
+ */
+ iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
+
+ for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) {
+ if (error = bread (devvp, btodb(iso_blknum * iso_bsize),
+ iso_bsize, NOCRED, &bp))
+ goto out;
+
+ vdp = (struct iso_volume_descriptor *)bp->b_un.b_addr;
+ if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (isonum_711 (vdp->type) == ISO_VD_END) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (isonum_711 (vdp->type) == ISO_VD_PRIMARY)
+ break;
+ brelse(bp);
+ }
+
+ if (isonum_711 (vdp->type) != ISO_VD_PRIMARY) {
+ error = EINVAL;
+ goto out;
+ }
+
+ pri = (struct iso_primary_descriptor *)vdp;
+
+ logical_block_size = isonum_723 (pri->logical_block_size);
+
+ if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+ || (logical_block_size & (logical_block_size - 1)) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ rootp = (struct iso_directory_record *)pri->root_directory_record;
+
+ isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
+ bzero((caddr_t)isomp, sizeof *isomp);
+ isomp->logical_block_size = logical_block_size;
+ isomp->volume_space_size = isonum_733 (pri->volume_space_size);
+ bcopy (rootp, isomp->root, sizeof isomp->root);
+ isomp->root_extent = isonum_733 (rootp->extent);
+ isomp->root_size = isonum_733 (rootp->size);
+
+ isomp->im_bmask = logical_block_size - 1;
+ isomp->im_bshift = 0;
+ while ((1 << isomp->im_bshift) < isomp->logical_block_size)
+ isomp->im_bshift++;
+
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+
+ mp->mnt_data = (qaddr_t)isomp;
+ mp->mnt_stat.f_fsid.val[0] = (long)dev;
+ mp->mnt_stat.f_fsid.val[1] = MOUNT_CD9660;
+ mp->mnt_maxsymlinklen = 0;
+ mp->mnt_flag |= MNT_LOCAL;
+ isomp->im_mountp = mp;
+ isomp->im_dev = dev;
+ isomp->im_devvp = devvp;
+
+ devvp->v_specflags |= SI_MOUNTEDON;
+
+ /* Check the Rock Ridge Extention support */
+ if (!(argp->flags & ISOFSMNT_NORRIP)) {
+ if (error = bread (isomp->im_devvp,
+ (isomp->root_extent + isonum_711(rootp->ext_attr_length))
+ * isomp->logical_block_size / DEV_BSIZE,
+ isomp->logical_block_size,NOCRED,&bp))
+ goto out;
+
+ rootp = (struct iso_directory_record *)bp->b_un.b_addr;
+
+ if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+ argp->flags |= ISOFSMNT_NORRIP;
+ } else {
+ argp->flags &= ~ISOFSMNT_GENS;
+ }
+
+ /*
+ * The contents are valid,
+ * but they will get reread as part of another vnode, so...
+ */
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+ }
+ isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
+ switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+ default:
+ isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+ break;
+ case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+ isomp->iso_ftype = ISO_FTYPE_9660;
+ break;
+ case 0:
+ isomp->iso_ftype = ISO_FTYPE_RRIP;
+ break;
+ }
+
+ return 0;
+out:
+ if (bp)
+ brelse(bp);
+ if (needclose)
+ (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+ if (isomp) {
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ }
+ return error;
+}
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+cd9660_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return 0;
+}
+
+/*
+ * unmount system call
+ */
+int
+cd9660_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+ int i, error, ronly, flags = 0;
+
+ if (mntflags & MNT_FORCE) {
+ if (!iso_doforce || (mp->mnt_flag & MNT_ROOTFS))
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp))
+ return EBUSY;
+#endif
+ if (error = vflush(mp, NULLVP, flags))
+ return (error);
+
+ isomp = VFSTOISOFS(mp);
+
+#ifdef ISODEVMAP
+ if (isomp->iso_ftype == ISO_FTYPE_RRIP)
+ iso_dunmap(isomp->im_dev);
+#endif
+
+ isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON;
+ error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
+ vrele(isomp->im_devvp);
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+cd9660_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ register struct iso_node *ip;
+ struct iso_node tip, *nip;
+ struct vnode tvp;
+ int error;
+ struct iso_mnt *imp = VFSTOISOFS (mp);
+ struct iso_directory_record *dp;
+
+ tvp.v_mount = mp;
+ tvp.v_data = &tip;
+ ip = VTOI(&tvp);
+ ip->i_vnode = &tvp;
+ ip->i_dev = imp->im_dev;
+ ip->i_diroff = 0;
+ dp = (struct iso_directory_record *)imp->root;
+ isodirino(&ip->i_number,dp,imp);
+
+ /*
+ * With RRIP we must use the `.' entry of the root directory.
+ * Simply tell iget, that it's a relocated directory.
+ */
+ error = iso_iget(ip,ip->i_number,
+ imp->iso_ftype == ISO_FTYPE_RRIP,
+ &nip,dp);
+ if (error)
+ return error;
+ *vpp = ITOV(nip);
+ return 0;
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+cd9660_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+cd9660_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+ register struct fs *fs;
+
+ isomp = VFSTOISOFS(mp);
+
+ sbp->f_type = MOUNT_CD9660;
+ sbp->f_bsize = isomp->logical_block_size;
+ sbp->f_iosize = sbp->f_bsize; /* XXX */
+ sbp->f_blocks = isomp->volume_space_size;
+ sbp->f_bfree = 0; /* total free blocks */
+ sbp->f_bavail = 0; /* blocks free for non superuser */
+ sbp->f_files = 0; /* total files */
+ sbp->f_ffree = 0; /* free file nodes */
+ if (sbp != &mp->mnt_stat) {
+ bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+ (caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+ bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+ (caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+ }
+ /* Use the first spare for flags: */
+ sbp->f_spare[0] = isomp->im_flags;
+ return 0;
+}
+
+/* ARGSUSED */
+int
+cd9660_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ return (0);
+}
+
+/*
+ * Flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+struct ifid {
+ ushort ifid_len;
+ ushort ifid_pad;
+ int ifid_ino;
+ long ifid_start;
+};
+
+/* ARGSUSED */
+int
+cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ struct vnode tvp;
+ int error;
+ int lbn, off;
+ struct ifid *ifhp;
+ struct iso_mnt *imp;
+ struct buf *bp;
+ struct iso_directory_record *dirp;
+ struct iso_node tip, *ip, *nip;
+ struct netcred *np;
+
+ imp = VFSTOISOFS (mp);
+ ifhp = (struct ifid *)fhp;
+
+#ifdef ISOFS_DBG
+ printf("fhtovp: ino %d, start %ld\n",
+ ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+
+ np = vfs_export_lookup(mp, &imp->im_export, nam);
+ if (np == NULL)
+ return (EACCES);
+
+ lbn = iso_lblkno(imp, ifhp->ifid_ino);
+ if (lbn >= imp->volume_space_size) {
+ printf("fhtovp: lbn exceed volume space %d\n", lbn);
+ return (ESTALE);
+ }
+
+ off = iso_blkoff(imp, ifhp->ifid_ino);
+ if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+ printf("fhtovp: crosses block boundary %d\n",
+ off + ISO_DIRECTORY_RECORD_SIZE);
+ return (ESTALE);
+ }
+
+ error = bread(imp->im_devvp, btodb(lbn * imp->logical_block_size),
+ imp->logical_block_size, NOCRED, &bp);
+ if (error) {
+ printf("fhtovp: bread error %d\n",error);
+ brelse(bp);
+ return (error);
+ }
+
+ dirp = (struct iso_directory_record *)(bp->b_un.b_addr + off);
+ if (off + isonum_711(dirp->length) > imp->logical_block_size) {
+ brelse(bp);
+ printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+ off+isonum_711(dirp->length), off,
+ isonum_711(dirp->length));
+ return (ESTALE);
+ }
+
+ if (isonum_733(dirp->extent) + isonum_711(dirp->ext_attr_length) !=
+ ifhp->ifid_start) {
+ brelse(bp);
+ printf("fhtovp: file start miss %d vs %d\n",
+ isonum_733(dirp->extent)+isonum_711(dirp->ext_attr_length),
+ ifhp->ifid_start);
+ return (ESTALE);
+ }
+ brelse(bp);
+
+ ip = &tip;
+ tvp.v_mount = mp;
+ tvp.v_data = ip;
+ ip->i_vnode = &tvp;
+ ip->i_dev = imp->im_dev;
+ if (error = iso_iget(ip, ifhp->ifid_ino, 0, &nip, dirp)) {
+ *vpp = NULLVP;
+ printf("fhtovp: failed to get inode\n");
+ return (error);
+ }
+ ip = nip;
+ /*
+ * XXX need generation number?
+ */
+ if (ip->inode.iso_mode == 0) {
+ iso_iput(ip);
+ *vpp = NULLVP;
+ printf("fhtovp: inode mode == 0\n");
+ return (ESTALE);
+ }
+ *vpp = ITOV(ip);
+ *exflagsp = np->netc_exflags;
+ *credanonp = &np->netc_anon;
+ return 0;
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+cd9660_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ register struct iso_node *ip = VTOI(vp);
+ register struct ifid *ifhp;
+ register struct iso_mnt *mp = ip->i_mnt;
+
+ ifhp = (struct ifid *)fhp;
+ ifhp->ifid_len = sizeof(struct ifid);
+
+ ifhp->ifid_ino = ip->i_number;
+ ifhp->ifid_start = ip->iso_start;
+
+#ifdef ISOFS_DBG
+ printf("vptofh: ino %d, start %ld\n",
+ ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+ return 0;
+}
diff --git a/sys/isofs/cd9660/cd9660_vnops.c b/sys/isofs/cd9660/cd9660_vnops.c
new file mode 100644
index 000000000000..59f5a73f5c86
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_vnops.c
@@ -0,0 +1,1038 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vnops.c 8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#if 0
+/*
+ * Mknod vnode call
+ * Actually remap the device number
+ */
+cd9660_mknod(ndp, vap, cred, p)
+ struct nameidata *ndp;
+ struct ucred *cred;
+ struct vattr *vap;
+ struct proc *p;
+{
+#ifndef ISODEVMAP
+ free(ndp->ni_pnbuf, M_NAMEI);
+ vput(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ return EINVAL;
+#else
+ register struct vnode *vp;
+ struct iso_node *ip;
+ struct iso_dnode *dp;
+ int error;
+
+ vp = ndp->ni_vp;
+ ip = VTOI(vp);
+
+ if (ip->i_mnt->iso_ftype != ISO_FTYPE_RRIP
+ || vap->va_type != vp->v_type
+ || (vap->va_type != VCHR && vap->va_type != VBLK)) {
+ free(ndp->ni_pnbuf, M_NAMEI);
+ vput(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ return EINVAL;
+ }
+
+ dp = iso_dmap(ip->i_dev,ip->i_number,1);
+ if (ip->inode.iso_rdev == vap->va_rdev || vap->va_rdev == VNOVAL) {
+ /* same as the unmapped one, delete the mapping */
+ remque(dp);
+ FREE(dp,M_CACHE);
+ } else
+ /* enter new mapping */
+ dp->d_dev = vap->va_rdev;
+
+ /*
+ * Remove inode so that it will be reloaded by iget and
+ * checked to see if it is an alias of an existing entry
+ * in the inode cache.
+ */
+ vput(vp);
+ vp->v_type = VNON;
+ vgone(vp);
+ return (0);
+#endif
+}
+#endif
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+cd9660_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+/*
+ * Close called
+ *
+ * Update the times on the inode on writeable file systems.
+ */
+/* ARGSUSED */
+int
+cd9660_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+cd9660_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+cd9660_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+
+{
+ struct vnode *vp = ap->a_vp;
+ register struct vattr *vap = ap->a_vap;
+ register struct iso_node *ip = VTOI(vp);
+ int i;
+
+ vap->va_fsid = ip->i_dev;
+ vap->va_fileid = ip->i_number;
+
+ vap->va_mode = ip->inode.iso_mode;
+ vap->va_nlink = ip->inode.iso_links;
+ vap->va_uid = ip->inode.iso_uid;
+ vap->va_gid = ip->inode.iso_gid;
+ vap->va_atime = ip->inode.iso_atime;
+ vap->va_mtime = ip->inode.iso_mtime;
+ vap->va_ctime = ip->inode.iso_ctime;
+ vap->va_rdev = ip->inode.iso_rdev;
+
+ vap->va_size = (u_quad_t) ip->i_size;
+ vap->va_flags = 0;
+ vap->va_gen = 1;
+ vap->va_blocksize = ip->i_mnt->logical_block_size;
+ vap->va_bytes = (u_quad_t) ip->i_size;
+ vap->va_type = vp->v_type;
+ return (0);
+}
+
+#if ISO_DEFAULT_BLOCK_SIZE >= NBPG
+#ifdef DEBUG
+extern int doclusterread;
+#else
+#define doclusterread 1
+#endif
+#else
+/* XXX until cluster routines can handle block sizes less than one page */
+#define doclusterread 0
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+cd9660_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ register struct iso_node *ip = VTOI(vp);
+ register struct iso_mnt *imp;
+ struct buf *bp;
+ daddr_t lbn, bn, rablock;
+ off_t diff;
+ int rasize, error = 0;
+ long size, n, on;
+
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ ip->i_flag |= IACC;
+ imp = ip->i_mnt;
+ do {
+ lbn = iso_lblkno(imp, uio->uio_offset);
+ on = iso_blkoff(imp, uio->uio_offset);
+ n = min((unsigned)(imp->logical_block_size - on),
+ uio->uio_resid);
+ diff = (off_t)ip->i_size - uio->uio_offset;
+ if (diff <= 0)
+ return (0);
+ if (diff < n)
+ n = diff;
+ size = iso_blksize(imp, ip, lbn);
+ rablock = lbn + 1;
+ if (doclusterread) {
+ if (iso_lblktosize(imp, rablock) <= ip->i_size)
+ error = cluster_read(vp, (off_t)ip->i_size,
+ lbn, size, NOCRED, &bp);
+ else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ } else {
+ if (vp->v_lastr + 1 == lbn &&
+ iso_lblktosize(imp, rablock) < ip->i_size) {
+ rasize = iso_blksize(imp, ip, rablock);
+ error = breadn(vp, lbn, size, &rablock,
+ &rasize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ }
+ vp->v_lastr = lbn;
+ n = min(n, size - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+ if (n + on == imp->logical_block_size ||
+ uio->uio_offset == (off_t)ip->i_size)
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+}
+
+/* ARGSUSED */
+int
+cd9660_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ printf("You did ioctl for isofs !!\n");
+ return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+cd9660_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * We should really check to see if I/O is possible.
+ */
+ return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+cd9660_seek(ap)
+ struct vop_seek_args /* {
+ struct vnode *a_vp;
+ off_t a_oldoff;
+ off_t a_newoff;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+ struct dirent saveent;
+ struct dirent assocent;
+ struct dirent current;
+ off_t saveoff;
+ off_t assocoff;
+ off_t curroff;
+ struct uio *uio;
+ off_t uio_off;
+ u_int *cookiep;
+ int ncookies;
+ int eof;
+};
+
+static int
+iso_uiodir(idp,dp,off)
+ struct isoreaddir *idp;
+ struct dirent *dp;
+ off_t off;
+{
+ int error;
+
+ dp->d_name[dp->d_namlen] = 0;
+ dp->d_reclen = DIRSIZ(dp);
+
+ if (idp->uio->uio_resid < dp->d_reclen) {
+ idp->eof = 0;
+ return -1;
+ }
+
+ if (idp->cookiep) {
+ if (idp->ncookies <= 0) {
+ idp->eof = 0;
+ return -1;
+ }
+
+ *idp->cookiep++ = off;
+ --idp->ncookies;
+ }
+
+ if (error = uiomove(dp,dp->d_reclen,idp->uio))
+ return error;
+ idp->uio_off = off;
+ return 0;
+}
+
+static int
+iso_shipdir(idp)
+ struct isoreaddir *idp;
+{
+ struct dirent *dp;
+ int cl, sl, assoc;
+ int error;
+ char *cname, *sname;
+
+ cl = idp->current.d_namlen;
+ cname = idp->current.d_name;
+ if (assoc = cl > 1 && *cname == ASSOCCHAR) {
+ cl--;
+ cname++;
+ }
+
+ dp = &idp->saveent;
+ sname = dp->d_name;
+ if (!(sl = dp->d_namlen)) {
+ dp = &idp->assocent;
+ sname = dp->d_name + 1;
+ sl = dp->d_namlen - 1;
+ }
+ if (sl > 0) {
+ if (sl != cl
+ || bcmp(sname,cname,sl)) {
+ if (idp->assocent.d_namlen) {
+ if (error = iso_uiodir(idp,&idp->assocent,idp->assocoff))
+ return error;
+ idp->assocent.d_namlen = 0;
+ }
+ if (idp->saveent.d_namlen) {
+ if (error = iso_uiodir(idp,&idp->saveent,idp->saveoff))
+ return error;
+ idp->saveent.d_namlen = 0;
+ }
+ }
+ }
+ idp->current.d_reclen = DIRSIZ(&idp->current);
+ if (assoc) {
+ idp->assocoff = idp->curroff;
+ bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+ } else {
+ idp->saveoff = idp->curroff;
+ bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+ }
+ return 0;
+}
+
+/*
+ * Vnode op for readdir
+ * XXX make sure everything still works now that eofflagp and cookiep
+ * are no longer args.
+ */
+int
+cd9660_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ struct isoreaddir *idp;
+ int entryoffsetinblock;
+ int error = 0;
+ int endsearch;
+ struct iso_directory_record *ep;
+ u_short elen;
+ int reclen;
+ struct iso_mnt *imp;
+ struct iso_node *ip;
+ struct buf *bp = NULL;
+
+ ip = VTOI(ap->a_vp);
+ imp = ip->i_mnt;
+
+ MALLOC(idp,struct isoreaddir *,sizeof(*idp),M_TEMP,M_WAITOK);
+ idp->saveent.d_namlen = 0;
+ idp->assocent.d_namlen = 0;
+ idp->uio = uio;
+#if 0
+ idp->cookiep = cookies;
+ idp->ncookies = ncookies;
+ idp->eof = 1;
+#else
+ idp->cookiep = 0;
+#endif
+ idp->curroff = uio->uio_offset;
+
+ entryoffsetinblock = iso_blkoff(imp, idp->curroff);
+ if (entryoffsetinblock != 0) {
+ if (error = iso_blkatoff(ip, idp->curroff, &bp)) {
+ FREE(idp,M_TEMP);
+ return (error);
+ }
+ }
+
+ endsearch = ip->i_size;
+
+ while (idp->curroff < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+
+ if (iso_blkoff(imp, idp->curroff) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(ip, idp->curroff, &bp))
+ break;
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+
+ ep = (struct iso_directory_record *)
+ (bp->b_un.b_addr + entryoffsetinblock);
+
+ reclen = isonum_711 (ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ idp->curroff = roundup (idp->curroff,
+ imp->logical_block_size);
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size) {
+ error = EINVAL;
+ /* illegal directory, so stop looking */
+ break;
+ }
+
+ idp->current.d_namlen = isonum_711 (ep->name_len);
+ if (isonum_711(ep->flags)&2)
+ isodirino(&idp->current.d_fileno,ep,imp);
+ else
+ idp->current.d_fileno = dbtob(bp->b_blkno) +
+ idp->curroff;
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ idp->curroff += reclen;
+ /*
+ *
+ */
+ switch (imp->iso_ftype) {
+ case ISO_FTYPE_RRIP:
+ cd9660_rrip_getname(ep,idp->current.d_name,
+ (u_short *)&idp->current.d_namlen,
+ &idp->current.d_fileno,imp);
+ if (idp->current.d_namlen)
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 */
+ strcpy(idp->current.d_name,"..");
+ switch (ep->name[0]) {
+ case 0:
+ idp->current.d_namlen = 1;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ case 1:
+ idp->current.d_namlen = 2;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default:
+ isofntrans(ep->name,idp->current.d_namlen,
+ idp->current.d_name, &elen,
+ imp->iso_ftype == ISO_FTYPE_9660,
+ isonum_711(ep->flags)&4);
+ idp->current.d_namlen = (u_char)elen;
+ if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+ error = iso_shipdir(idp);
+ else
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ }
+ }
+ if (error)
+ break;
+
+ entryoffsetinblock += reclen;
+ }
+
+ if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+ idp->current.d_namlen = 0;
+ error = iso_shipdir(idp);
+ }
+ if (error < 0)
+ error = 0;
+
+ if (bp)
+ brelse (bp);
+
+ uio->uio_offset = idp->uio_off;
+#if 0
+ *eofflagp = idp->eof;
+#endif
+
+ FREE(idp,M_TEMP);
+
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node ISONODE;
+typedef struct iso_mnt ISOMNT;
+int
+cd9660_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ ISONODE *ip;
+ ISODIR *dirp;
+ ISOMNT *imp;
+ struct buf *bp;
+ u_short symlen;
+ int error;
+ char *symname;
+ ino_t ino;
+
+ ip = VTOI(ap->a_vp);
+ imp = ip->i_mnt;
+
+ if (imp->iso_ftype != ISO_FTYPE_RRIP)
+ return EINVAL;
+
+ /*
+ * Get parents directory record block that this inode included.
+ */
+ error = bread(imp->im_devvp,
+ (daddr_t)(ip->i_number / DEV_BSIZE),
+ imp->logical_block_size,
+ NOCRED,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return EINVAL;
+ }
+
+ /*
+ * Setup the directory pointer for this inode
+ */
+ dirp = (ISODIR *)(bp->b_un.b_addr + (ip->i_number & imp->im_bmask));
+#ifdef DEBUG
+ printf("lbn=%d,off=%d,bsize=%d,DEV_BSIZE=%d, dirp= %08x, b_addr=%08x, offset=%08x(%08x)\n",
+ (daddr_t)(ip->i_number >> imp->im_bshift),
+ ip->i_number & imp->im_bmask,
+ imp->logical_block_size,
+ DEV_BSIZE,
+ dirp,
+ bp->b_un.b_addr,
+ ip->i_number,
+ ip->i_number & imp->im_bmask );
+#endif
+
+ /*
+ * Just make sure, we have a right one....
+ * 1: Check not cross boundary on block
+ */
+ if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+ > imp->logical_block_size) {
+ brelse(bp);
+ return EINVAL;
+ }
+
+ /*
+ * Now get a buffer
+ * Abuse a namei buffer for now.
+ */
+ MALLOC(symname,char *,MAXPATHLEN,M_NAMEI,M_WAITOK);
+
+ /*
+ * Ok, we just gathering a symbolic name in SL record.
+ */
+ if (cd9660_rrip_getsymname(dirp,symname,&symlen,imp) == 0) {
+ FREE(symname,M_NAMEI);
+ brelse(bp);
+ return EINVAL;
+ }
+ /*
+ * Don't forget before you leave from home ;-)
+ */
+ brelse(bp);
+
+ /*
+ * return with the symbolic name to caller's.
+ */
+ error = uiomove(symname,symlen,ap->a_uio);
+
+ FREE(symname,M_NAMEI);
+
+ return error;
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+int
+cd9660_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return 0;
+}
+
+/*
+ * Lock an inode.
+ */
+int
+cd9660_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct iso_node *ip = VTOI(ap->a_vp);
+
+ ISO_ILOCK(ip);
+ return 0;
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+cd9660_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct iso_node *ip = VTOI(ap->a_vp);
+
+ if (!(ip->i_flag & ILOCKED))
+ panic("cd9660_unlock NOT LOCKED");
+ ISO_IUNLOCK(ip);
+ return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+cd9660_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (VTOI(ap->a_vp)->i_flag & ILOCKED)
+ return 1;
+ return 0;
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+cd9660_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ register struct vnode *vp = bp->b_vp;
+ register struct iso_node *ip;
+ int error;
+
+ ip = VTOI(vp);
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ panic("cd9660_strategy: spec");
+ if (bp->b_blkno == bp->b_lblkno) {
+ if (error =
+ VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+ bp->b_error = error;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return (error);
+ }
+ if ((long)bp->b_blkno == -1)
+ clrbuf(bp);
+ }
+ if ((long)bp->b_blkno == -1) {
+ biodone(bp);
+ return (0);
+ }
+ vp = ip->i_devvp;
+ bp->b_dev = vp->v_rdev;
+ VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+ return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+cd9660_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ printf("tag VT_ISOFS, isofs vnode\n");
+ return 0;
+}
+
+/*
+ * Unsupported operation
+ */
+int
+cd9660_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Global vfs data structures for isofs
+ */
+#define cd9660_create \
+ ((int (*) __P((struct vop_create_args *)))cd9660_enotsupp)
+#define cd9660_mknod ((int (*) __P((struct vop_mknod_args *)))cd9660_enotsupp)
+#define cd9660_setattr \
+ ((int (*) __P((struct vop_setattr_args *)))cd9660_enotsupp)
+#define cd9660_write ((int (*) __P((struct vop_write_args *)))cd9660_enotsupp)
+#define cd9660_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define cd9660_remove \
+ ((int (*) __P((struct vop_remove_args *)))cd9660_enotsupp)
+#define cd9660_link ((int (*) __P((struct vop_link_args *)))cd9660_enotsupp)
+#define cd9660_rename \
+ ((int (*) __P((struct vop_rename_args *)))cd9660_enotsupp)
+#define cd9660_mkdir ((int (*) __P((struct vop_mkdir_args *)))cd9660_enotsupp)
+#define cd9660_rmdir ((int (*) __P((struct vop_rmdir_args *)))cd9660_enotsupp)
+#define cd9660_symlink \
+ ((int (*) __P((struct vop_symlink_args *)))cd9660_enotsupp)
+#define cd9660_pathconf \
+ ((int (*) __P((struct vop_pathconf_args *)))cd9660_enotsupp)
+#define cd9660_advlock \
+ ((int (*) __P((struct vop_advlock_args *)))cd9660_enotsupp)
+#define cd9660_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))cd9660_enotsupp)
+#define cd9660_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) cd9660_enotsupp)
+#define cd9660_vfree ((int (*) __P((struct vop_vfree_args *)))cd9660_enotsupp)
+#define cd9660_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))cd9660_enotsupp)
+#define cd9660_update \
+ ((int (*) __P((struct vop_update_args *)))cd9660_enotsupp)
+#define cd9660_bwrite \
+ ((int (*) __P((struct vop_bwrite_args *)))cd9660_enotsupp)
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**cd9660_vnodeop_p)();
+struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, cd9660_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, cd9660_open }, /* open */
+ { &vop_close_desc, cd9660_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, cd9660_read }, /* read */
+ { &vop_write_desc, cd9660_write }, /* write */
+ { &vop_ioctl_desc, cd9660_ioctl }, /* ioctl */
+ { &vop_select_desc, cd9660_select }, /* select */
+ { &vop_mmap_desc, cd9660_mmap }, /* mmap */
+ { &vop_fsync_desc, cd9660_fsync }, /* fsync */
+ { &vop_seek_desc, cd9660_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, cd9660_readdir }, /* readdir */
+ { &vop_readlink_desc, cd9660_readlink },/* readlink */
+ { &vop_abortop_desc, cd9660_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, cd9660_bmap }, /* bmap */
+ { &vop_strategy_desc, cd9660_strategy },/* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
+ { &vop_advlock_desc, cd9660_advlock }, /* advlock */
+ { &vop_blkatoff_desc, cd9660_blkatoff },/* blkatoff */
+ { &vop_valloc_desc, cd9660_valloc }, /* valloc */
+ { &vop_vfree_desc, cd9660_vfree }, /* vfree */
+ { &vop_truncate_desc, cd9660_truncate },/* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_vnodeop_opv_desc =
+ { &cd9660_vnodeop_p, cd9660_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**cd9660_specop_p)();
+struct vnodeopv_entry_desc cd9660_specop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, spec_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, spec_read }, /* read */
+ { &vop_write_desc, spec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ /* XXX strategy: panics, should be notsupp instead? */
+ { &vop_strategy_desc, cd9660_strategy },/* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_specop_opv_desc =
+ { &cd9660_specop_p, cd9660_specop_entries };
+
+#ifdef FIFO
+int (**cd9660_fifoop_p)();
+struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, fifo_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, fifo_read }, /* read */
+ { &vop_write_desc, fifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, fifo_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_badop }, /* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_fifoop_opv_desc =
+ { &cd9660_fifoop_p, cd9660_fifoop_entries };
+#endif /* FIFO */
diff --git a/sys/isofs/cd9660/iso.h b/sys/isofs/cd9660/iso.h
new file mode 100644
index 000000000000..e3567066e1cd
--- /dev/null
+++ b/sys/isofs/cd9660/iso.h
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso.h 8.2 (Berkeley) 1/23/94
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+ char type[ISODCL(1,1)]; /* 711 */
+ char id[ISODCL(2,6)];
+ char version[ISODCL(7,7)];
+ char data[ISODCL(8,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID "CDW01"
+
+struct iso_primary_descriptor {
+ char type [ISODCL ( 1, 1)]; /* 711 */
+ char id [ISODCL ( 2, 6)];
+ char version [ISODCL ( 7, 7)]; /* 711 */
+ char unused1 [ISODCL ( 8, 8)];
+ char system_id [ISODCL ( 9, 40)]; /* achars */
+ char volume_id [ISODCL ( 41, 72)]; /* dchars */
+ char unused2 [ISODCL ( 73, 80)];
+ char volume_space_size [ISODCL ( 81, 88)]; /* 733 */
+ char unused3 [ISODCL ( 89, 120)];
+ char volume_set_size [ISODCL (121, 124)]; /* 723 */
+ char volume_sequence_number [ISODCL (125, 128)]; /* 723 */
+ char logical_block_size [ISODCL (129, 132)]; /* 723 */
+ char path_table_size [ISODCL (133, 140)]; /* 733 */
+ char type_l_path_table [ISODCL (141, 144)]; /* 731 */
+ char opt_type_l_path_table [ISODCL (145, 148)]; /* 731 */
+ char type_m_path_table [ISODCL (149, 152)]; /* 732 */
+ char opt_type_m_path_table [ISODCL (153, 156)]; /* 732 */
+ char root_directory_record [ISODCL (157, 190)]; /* 9.1 */
+ char volume_set_id [ISODCL (191, 318)]; /* dchars */
+ char publisher_id [ISODCL (319, 446)]; /* achars */
+ char preparer_id [ISODCL (447, 574)]; /* achars */
+ char application_id [ISODCL (575, 702)]; /* achars */
+ char copyright_file_id [ISODCL (703, 739)]; /* 7.5 dchars */
+ char abstract_file_id [ISODCL (740, 776)]; /* 7.5 dchars */
+ char bibliographic_file_id [ISODCL (777, 813)]; /* 7.5 dchars */
+ char creation_date [ISODCL (814, 830)]; /* 8.4.26.1 */
+ char modification_date [ISODCL (831, 847)]; /* 8.4.26.1 */
+ char expiration_date [ISODCL (848, 864)]; /* 8.4.26.1 */
+ char effective_date [ISODCL (865, 881)]; /* 8.4.26.1 */
+ char file_structure_version [ISODCL (882, 882)]; /* 711 */
+ char unused4 [ISODCL (883, 883)];
+ char application_data [ISODCL (884, 1395)];
+ char unused5 [ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE 2048
+
+struct iso_directory_record {
+ char length [ISODCL (1, 1)]; /* 711 */
+ char ext_attr_length [ISODCL (2, 2)]; /* 711 */
+ unsigned char extent [ISODCL (3, 10)]; /* 733 */
+ unsigned char size [ISODCL (11, 18)]; /* 733 */
+ char date [ISODCL (19, 25)]; /* 7 by 711 */
+ char flags [ISODCL (26, 26)];
+ char file_unit_size [ISODCL (27, 27)]; /* 711 */
+ char interleave [ISODCL (28, 28)]; /* 711 */
+ char volume_sequence_number [ISODCL (29, 32)]; /* 723 */
+ char name_len [ISODCL (33, 33)]; /* 711 */
+ char name [0];
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+ of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE 33
+
+struct iso_extended_attributes {
+ unsigned char owner [ISODCL (1, 4)]; /* 723 */
+ unsigned char group [ISODCL (5, 8)]; /* 723 */
+ unsigned char perm [ISODCL (9, 10)]; /* 9.5.3 */
+ char ctime [ISODCL (11, 27)]; /* 8.4.26.1 */
+ char mtime [ISODCL (28, 44)]; /* 8.4.26.1 */
+ char xtime [ISODCL (45, 61)]; /* 8.4.26.1 */
+ char ftime [ISODCL (62, 78)]; /* 8.4.26.1 */
+ char recfmt [ISODCL (79, 79)]; /* 711 */
+ char recattr [ISODCL (80, 80)]; /* 711 */
+ unsigned char reclen [ISODCL (81, 84)]; /* 723 */
+ char system_id [ISODCL (85, 116)]; /* achars */
+ char system_use [ISODCL (117, 180)];
+ char version [ISODCL (181, 181)]; /* 711 */
+ char len_esc [ISODCL (182, 182)]; /* 711 */
+ char reserved [ISODCL (183, 246)];
+ unsigned char len_au [ISODCL (247, 250)]; /* 723 */
+};
+
+/* CD-ROM Format type */
+enum ISO_FTYPE { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, ISO_FTYPE_ECMA };
+
+#ifndef ISOFSMNT_ROOT
+#define ISOFSMNT_ROOT 0
+#endif
+
+struct iso_mnt {
+ int im_flags;
+
+ struct mount *im_mountp;
+ dev_t im_dev;
+ struct vnode *im_devvp;
+
+ int logical_block_size;
+ int im_bshift;
+ int im_bmask;
+
+ int volume_space_size;
+ char im_fsmnt[50];
+ struct netexport im_export;
+
+ char root[ISODCL (157, 190)];
+ int root_extent;
+ int root_size;
+ enum ISO_FTYPE iso_ftype;
+
+ int rr_skip;
+ int rr_skip0;
+};
+
+#define VFSTOISOFS(mp) ((struct iso_mnt *)((mp)->mnt_data))
+
+#define iso_blkoff(imp, loc) ((loc) & (imp)->im_bmask)
+#define iso_lblkno(imp, loc) ((loc) >> (imp)->im_bshift)
+#define iso_blksize(imp, ip, lbn) ((imp)->logical_block_size)
+#define iso_lblktosize(imp, blk) ((blk) << (imp)->im_bshift)
+
+int cd9660_mount __P((struct mount *,
+ char *, caddr_t, struct nameidata *, struct proc *));
+int cd9660_start __P((struct mount *, int, struct proc *));
+int cd9660_unmount __P((struct mount *, int, struct proc *));
+int cd9660_root __P((struct mount *, struct vnode **));
+int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
+int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
+int cd9660_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+ struct vnode **, int *, struct ucred **));
+int cd9660_vptofh __P((struct vnode *, struct fid *));
+int cd9660_init __P(());
+
+struct iso_node;
+int iso_blkatoff __P((struct iso_node *ip, long offset, struct buf **bpp));
+int iso_iget __P((struct iso_node *xp, ino_t ino, int relocated,
+ struct iso_node **ipp, struct iso_directory_record *isodir));
+int iso_iput __P((struct iso_node *ip));
+int iso_ilock __P((struct iso_node *ip));
+int iso_iunlock __P((struct iso_node *ip));
+int cd9660_mountroot __P((void));
+
+extern int (**cd9660_vnodeop_p)();
+
+extern inline int
+isonum_711(p)
+ unsigned char *p;
+{
+ return *p;
+}
+
+extern inline int
+isonum_712(p)
+ char *p;
+{
+ return *p;
+}
+
+extern inline int
+isonum_721(p)
+ unsigned char *p;
+{
+ return *p|((char)p[1] << 8);
+}
+
+extern inline int
+isonum_722(p)
+ unsigned char *p;
+{
+ return ((char)*p << 8)|p[1];
+}
+
+extern inline int
+isonum_723(p)
+ unsigned char *p;
+{
+ return isonum_721(p);
+}
+
+extern inline int
+isonum_731(p)
+ unsigned char *p;
+{
+ return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24);
+}
+
+extern inline int
+isonum_732(p)
+ unsigned char *p;
+{
+ return (*p << 24)|(p[1] << 16)|(p[2] << 8)|p[3];
+}
+
+extern inline int
+isonum_733(p)
+ unsigned char *p;
+{
+ return isonum_731(p);
+}
+
+int isofncmp __P((unsigned char *, int, unsigned char *, int));
+void isofntrans __P((unsigned char *, int, unsigned char *, unsigned short *,
+ int, int));
+
+/*
+ * Associated files have a leading '='.
+ */
+#define ASSOCCHAR '='
diff --git a/sys/isofs/cd9660/iso_rrip.h b/sys/isofs/cd9660/iso_rrip.h
new file mode 100644
index 000000000000..78e4a775201b
--- /dev/null
+++ b/sys/isofs/cd9660/iso_rrip.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_rrip.h 8.2 (Berkeley) 1/23/94
+ */
+
+
+/*
+ * Analyze function flag (similar to RR field bits)
+ */
+#define ISO_SUSP_ATTR 0x0001
+#define ISO_SUSP_DEVICE 0x0002
+#define ISO_SUSP_SLINK 0x0004
+#define ISO_SUSP_ALTNAME 0x0008
+#define ISO_SUSP_CLINK 0x0010
+#define ISO_SUSP_PLINK 0x0020
+#define ISO_SUSP_RELDIR 0x0040
+#define ISO_SUSP_TSTAMP 0x0080
+#define ISO_SUSP_IDFLAG 0x0100
+#define ISO_SUSP_EXTREF 0x0200
+#define ISO_SUSP_CONT 0x0400
+#define ISO_SUSP_OFFSET 0x0800
+#define ISO_SUSP_STOP 0x1000
+#define ISO_SUSP_UNKNOWN 0x8000
+
+typedef struct {
+ struct iso_node *inop;
+ int fields; /* interesting fields in this analysis */
+ daddr_t iso_ce_blk; /* block of continuation area */
+ off_t iso_ce_off; /* offset of continuation area */
+ int iso_ce_len; /* length of continuation area */
+ struct iso_mnt *imp; /* mount structure */
+ ino_t *inump; /* inode number pointer */
+ char *outbuf; /* name/symbolic link output area */
+ u_short *outlen; /* length of above */
+ u_short maxlen; /* maximum length of above */
+ int cont; /* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+int cd9660_rrip_analyze __P((struct iso_directory_record *isodir,
+ struct iso_node *inop, struct iso_mnt *imp));
+int cd9660_rrip_getname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ ino_t *inump, struct iso_mnt *imp));
+int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ struct iso_mnt *imp));
+int cd9660_rrip_offset __P((struct iso_directory_record *isodir,
+ struct iso_mnt *imp));
diff --git a/sys/kern/Make.tags.inc b/sys/kern/Make.tags.inc
new file mode 100644
index 000000000000..1563c4165f1e
--- /dev/null
+++ b/sys/kern/Make.tags.inc
@@ -0,0 +1,18 @@
+# @(#)Make.tags.inc 8.1 (Berkeley) 6/11/93
+
+# Common files for "make tags".
+# Included by the Makefile for each architecture.
+
+# Put the ../sys stuff near the end so that subroutine definitions win when
+# there is a struct tag with the same name (eg., vmmeter). The real
+# solution would probably be for ctags to generate "struct vmmeter" tags.
+
+COMM= /sys/conf/*.[ch] \
+ /sys/dev/*.[ch] /sys/dev/scsi/*.[ch] \
+ /sys/kern/*.[ch] /sys/libkern/*.[ch] \
+ /sys/miscfs/*/*.[ch] \
+ /sys/net/*.[ch] /sys/netccitt/*.[ch] /sys/netinet/*.[ch] \
+ /sys/netiso/*.[ch] /sys/netns/*.[ch] \
+ /sys/nfs/*.[ch] /sys/sys/*.[ch] \
+ /sys/ufs/*/*.[ch] \
+ /sys/vm/*.[ch]
diff --git a/sys/kern/Makefile b/sys/kern/Makefile
new file mode 100644
index 000000000000..cfe962a9a66b
--- /dev/null
+++ b/sys/kern/Makefile
@@ -0,0 +1,50 @@
+# @(#)Makefile 8.2 (Berkeley) 3/21/94
+
+# Makefile for kernel tags files, init_sysent, etc.
+
+ARCH= hp300 i386 luna68k news3400 pmax sparc tahoe vax
+
+all:
+ @echo "make tags, make links or init_sysent.c only"
+
+init_sysent.c syscalls.c ../sys/syscall.h: makesyscalls.sh syscalls.master
+ -mv -f init_sysent.c init_sysent.c.bak
+ -mv -f syscalls.c syscalls.c.bak
+ -mv -f ../sys/syscall.h ../sys/syscall.h.bak
+ sh makesyscalls.sh syscalls.master
+
+# Kernel tags:
+# Tags files are built in the top-level directory for each architecture,
+# with a makefile listing the architecture-dependent files, etc. The list
+# of common files is in ./Make.tags.inc. Links to the correct tags file
+# are placed in each source directory. We need to have links to tags files
+# from the generic directories that are relative to the machine type, even
+# via remote mounts; therefore we use symlinks to $SYSTAGS, which points at
+# ${SYSDIR}/${MACHINE}/tags.
+
+SYSTAGS=/var/db/sys_tags
+SYSDIR=/sys
+
+# Directories in which to place tags links (other than machine-dependent)
+DGEN= conf \
+ dev dev/scsi \
+ hp hp/dev hp/hpux \
+ kern libkern \
+ miscfs miscfs/deadfs miscfs/fdesc miscfs/fifofs miscfs/kernfs \
+ miscfs/lofs miscfs/nullfs miscfs/portal miscfs/procfs \
+ miscfs/specfs miscfs/umapfs miscfs/union \
+ net netccitt netinet netiso netns nfs scripts sys \
+ ufs ufs/ffs ufs/lfs ufs/mfs ufs/ufs \
+ vm
+
+tags::
+ -for i in ${ARCH}; do \
+ (cd ../$$i && make ${MFLAGS} tags); done
+
+links::
+ rm -f ${SYSTAGS}
+ ln -s ${SYSDIR}/${MACHINE}/tags ${SYSTAGS}
+ -for i in ${DGEN}; do \
+ (cd ../$$i && { rm -f tags; ln -s ${SYSTAGS} tags; }) done
+ -for i in ${ARCH}; do \
+ (cd ../$$i && make ${MFLAGS} SYSTAGS=${SYSTAGS} links); done
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
new file mode 100644
index 000000000000..c6497153a695
--- /dev/null
+++ b/sys/kern/init_main.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)init_main.c 8.9 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/exec.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/map.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/clist.h>
+#include <sys/device.h>
+#include <sys/protosw.h>
+#include <sys/reboot.h>
+#include <sys/user.h>
+
+#include <ufs/ufs/quota.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+
+#ifdef HPFPLIB
+char copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California.\nCopyright (c) 1992 Hewlett-Packard Company\nCopyright (c) 1992 Motorola Inc.\nAll rights reserved.\n\n";
+#else
+char copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California. All rights reserved.\n\n";
+#endif
+
+/* Components of the first process -- never freed. */
+struct session session0;
+struct pgrp pgrp0;
+struct proc proc0;
+struct pcred cred0;
+struct filedesc0 filedesc0;
+struct plimit limit0;
+struct vmspace vmspace0;
+struct proc *curproc = &proc0;
+struct proc *initproc, *pageproc;
+
+int cmask = CMASK;
+extern struct user *proc0paddr;
+
+struct vnode *rootvp, *swapdev_vp;
+int boothowto;
+struct timeval boottime;
+struct timeval runtime;
+
+static void start_init __P((struct proc *p, void *framep));
+
+/*
+ * System startup; initialize the world, create process 0, mount root
+ * filesystem, and fork to create init and pagedaemon. Most of the
+ * hard work is done in the lower-level initialization routines including
+ * startup(), which does memory initialization and autoconfiguration.
+ */
+main(framep)
+ void *framep;
+{
+ register struct proc *p;
+ register struct filedesc0 *fdp;
+ register struct pdevinit *pdev;
+ register int i;
+ int s, rval[2];
+ extern int (*mountroot) __P((void));
+ extern struct pdevinit pdevinit[];
+ extern void roundrobin __P((void *));
+ extern void schedcpu __P((void *));
+
+ /*
+ * Initialize the current process pointer (curproc) before
+ * any possible traps/probes to simplify trap processing.
+ */
+ p = &proc0;
+ curproc = p;
+ /*
+ * Attempt to find console and initialize
+ * in case of early panic or other messages.
+ */
+ consinit();
+ printf(copyright);
+
+ vm_mem_init();
+ kmeminit();
+ cpu_startup();
+
+ /*
+ * Create process 0 (the swapper).
+ */
+ allproc = (volatile struct proc *)p;
+ p->p_prev = (struct proc **)&allproc;
+ p->p_pgrp = &pgrp0;
+ pgrphash[0] = &pgrp0;
+ pgrp0.pg_mem = p;
+ pgrp0.pg_session = &session0;
+ session0.s_count = 1;
+ session0.s_leader = p;
+
+ p->p_flag = P_INMEM | P_SYSTEM;
+ p->p_stat = SRUN;
+ p->p_nice = NZERO;
+ bcopy("swapper", p->p_comm, sizeof ("swapper"));
+
+ /* Create credentials. */
+ cred0.p_refcnt = 1;
+ p->p_cred = &cred0;
+ p->p_ucred = crget();
+ p->p_ucred->cr_ngroups = 1; /* group 0 */
+
+ /* Create the file descriptor table. */
+ fdp = &filedesc0;
+ p->p_fd = &fdp->fd_fd;
+ fdp->fd_fd.fd_refcnt = 1;
+ fdp->fd_fd.fd_cmask = cmask;
+ fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
+ fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
+ fdp->fd_fd.fd_nfiles = NDFILE;
+
+ /* Create the limits structures. */
+ p->p_limit = &limit0;
+ for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
+ limit0.pl_rlimit[i].rlim_cur =
+ limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
+ limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
+ limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
+ i = ptoa(cnt.v_free_count);
+ limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
+ limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
+ limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
+ limit0.p_refcnt = 1;
+
+ /* Allocate a prototype map so we have something to fork. */
+ p->p_vmspace = &vmspace0;
+ vmspace0.vm_refcnt = 1;
+ pmap_pinit(&vmspace0.vm_pmap);
+ vm_map_init(&p->p_vmspace->vm_map, round_page(VM_MIN_ADDRESS),
+ trunc_page(VM_MAX_ADDRESS), TRUE);
+ vmspace0.vm_map.pmap = &vmspace0.vm_pmap;
+ p->p_addr = proc0paddr; /* XXX */
+
+ /*
+ * We continue to place resource usage info and signal
+ * actions in the user struct so they're pageable.
+ */
+ p->p_stats = &p->p_addr->u_stats;
+ p->p_sigacts = &p->p_addr->u_sigacts;
+
+ /*
+ * Initialize per uid information structure and charge
+ * root for one process.
+ */
+ usrinfoinit();
+ (void)chgproccnt(0, 1);
+
+ rqinit();
+
+ /* Configure virtual memory system, set vm rlimits. */
+ vm_init_limits(p);
+
+ /* Initialize the file systems. */
+ vfsinit();
+
+ /* Start real time and statistics clocks. */
+ initclocks();
+
+ /* Initialize mbuf's. */
+ mbinit();
+
+ /* Initialize clists. */
+ clist_init();
+
+#ifdef SYSVSHM
+ /* Initialize System V style shared memory. */
+ shminit();
+#endif
+
+ /* Attach pseudo-devices. */
+ for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
+ (*pdev->pdev_attach)(pdev->pdev_count);
+
+ /*
+ * Initialize protocols. Block reception of incoming packets
+ * until everything is ready.
+ */
+ s = splimp();
+ ifinit();
+ domaininit();
+ splx(s);
+
+#ifdef GPROF
+ /* Initialize kernel profiling. */
+ kmstartup();
+#endif
+
+ /* Kick off timeout driven events by calling first time. */
+ roundrobin(NULL);
+ schedcpu(NULL);
+
+ /* Mount the root file system. */
+ if ((*mountroot)())
+ panic("cannot mount root");
+
+ /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */
+ if (VFS_ROOT(mountlist.tqh_first, &rootvnode))
+ panic("cannot find root vnode");
+ fdp->fd_fd.fd_cdir = rootvnode;
+ VREF(fdp->fd_fd.fd_cdir);
+ VOP_UNLOCK(rootvnode);
+ fdp->fd_fd.fd_rdir = NULL;
+ swapinit();
+
+ /*
+ * Now can look at time, having had a chance to verify the time
+ * from the file system. Reset p->p_rtime as it may have been
+ * munched in mi_switch() after the time got set.
+ */
+ p->p_stats->p_start = runtime = mono_time = boottime = time;
+ p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
+
+ /* Initialize signal state for process 0. */
+ siginit(p);
+
+ /* Create process 1 (init(8)). */
+ if (fork(p, NULL, rval))
+ panic("fork init");
+ if (rval[1]) {
+ start_init(curproc, framep);
+ return;
+ }
+
+ /* Create process 2 (the pageout daemon). */
+ if (fork(p, NULL, rval))
+ panic("fork pager");
+ if (rval[1]) {
+ /*
+ * Now in process 2.
+ */
+ p = curproc;
+ pageproc = p;
+ p->p_flag |= P_INMEM | P_SYSTEM; /* XXX */
+ bcopy("pagedaemon", curproc->p_comm, sizeof ("pagedaemon"));
+ vm_pageout();
+ /* NOTREACHED */
+ }
+
+ /* The scheduler is an infinite loop. */
+ scheduler();
+ /* NOTREACHED */
+}
+
+/*
+ * List of paths to try when searching for "init".
+ */
+static char *initpaths[] = {
+ "/sbin/init",
+ "/sbin/oinit",
+ "/sbin/init.bak",
+ NULL,
+};
+
+/*
+ * Start the initial user process; try exec'ing each pathname in "initpaths".
+ * The program is invoked with one argument containing the boot flags.
+ */
+static void
+start_init(p, framep)
+ struct proc *p;
+ void *framep;
+{
+ vm_offset_t addr;
+ struct execve_args args;
+ int options, i, retval[2], error;
+ char **pathp, *path, *ucp, **uap, *arg0, *arg1;
+
+ initproc = p;
+
+ /*
+ * We need to set the system call frame as if we were entered through
+ * a syscall() so that when we call execve() below, it will be able
+ * to set the entry point (see setregs) when it tries to exec. The
+ * startup code in "locore.s" has allocated space for the frame and
+ * passed a pointer to that space as main's argument.
+ */
+ cpu_set_init_frame(p, framep);
+
+ /*
+ * Need just enough stack to hold the faked-up "execve()" arguments.
+ */
+ addr = trunc_page(VM_MAX_ADDRESS - PAGE_SIZE);
+ if (vm_allocate(&p->p_vmspace->vm_map, &addr, PAGE_SIZE, FALSE) != 0)
+ panic("init: couldn't allocate argument space");
+ p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
+
+ for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
+ /*
+ * Move out the boot flag argument.
+ */
+ options = 0;
+ ucp = (char *)USRSTACK;
+ (void)subyte(--ucp, 0); /* trailing zero */
+ if (boothowto & RB_SINGLE) {
+ (void)subyte(--ucp, 's');
+ options = 1;
+ }
+#ifdef notyet
+ if (boothowto & RB_FASTBOOT) {
+ (void)subyte(--ucp, 'f');
+ options = 1;
+ }
+#endif
+ if (options == 0)
+ (void)subyte(--ucp, '-');
+ (void)subyte(--ucp, '-'); /* leading hyphen */
+ arg1 = ucp;
+
+ /*
+ * Move out the file name (also arg 0).
+ */
+ for (i = strlen(path) + 1; i >= 0; i--)
+ (void)subyte(--ucp, path[i]);
+ arg0 = ucp;
+
+ /*
+ * Move out the arg pointers.
+ */
+ uap = (char **)((int)ucp & ~(NBPW-1));
+ (void)suword((caddr_t)--uap, 0); /* terminator */
+ (void)suword((caddr_t)--uap, (int)arg1);
+ (void)suword((caddr_t)--uap, (int)arg0);
+
+ /*
+ * Point at the arguments.
+ */
+ args.fname = arg0;
+ args.argp = uap;
+ args.envp = NULL;
+
+ /*
+ * Now try to exec the program. If can't for any reason
+ * other than it doesn't exist, complain.
+ */
+ if ((error = execve(p, &args, &retval)) == 0)
+ return;
+ if (error != ENOENT)
+ printf("exec %s: error %d\n", path, error);
+ }
+ printf("init: not found\n");
+ panic("no init");
+}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
new file mode 100644
index 000000000000..4b25c0695cfe
--- /dev/null
+++ b/sys/kern/init_sysent.c
@@ -0,0 +1,480 @@
+/*
+ * System call switch table.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from @(#)syscalls.master 8.2 (Berkeley) 1/13/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+int nosys();
+
+int nosys();
+int exit();
+int fork();
+int read();
+int write();
+int open();
+int close();
+int wait4();
+int link();
+int unlink();
+int chdir();
+int fchdir();
+int mknod();
+int chmod();
+int chown();
+int obreak();
+int getfsstat();
+int getpid();
+int mount();
+int unmount();
+int setuid();
+int getuid();
+int geteuid();
+int ptrace();
+int recvmsg();
+int sendmsg();
+int recvfrom();
+int accept();
+int getpeername();
+int getsockname();
+int access();
+int chflags();
+int fchflags();
+int sync();
+int kill();
+int getppid();
+int dup();
+int pipe();
+int getegid();
+int profil();
+#ifdef KTRACE
+int ktrace();
+#else
+#endif
+int sigaction();
+int getgid();
+int sigprocmask();
+int getlogin();
+int setlogin();
+int acct();
+int sigpending();
+int sigaltstack();
+int ioctl();
+int reboot();
+int revoke();
+int symlink();
+int readlink();
+int execve();
+int umask();
+int chroot();
+int msync();
+int vfork();
+int sbrk();
+int sstk();
+int ovadvise();
+int munmap();
+int mprotect();
+int madvise();
+int mincore();
+int getgroups();
+int setgroups();
+int getpgrp();
+int setpgid();
+int setitimer();
+int swapon();
+int getitimer();
+int getdtablesize();
+int dup2();
+int fcntl();
+int select();
+int fsync();
+int setpriority();
+int socket();
+int connect();
+int getpriority();
+int sigreturn();
+int bind();
+int setsockopt();
+int listen();
+int sigsuspend();
+#ifdef TRACE
+int vtrace();
+#else
+#endif
+int gettimeofday();
+int getrusage();
+int getsockopt();
+#ifdef vax
+int resuba();
+#else
+#endif
+int readv();
+int writev();
+int settimeofday();
+int fchown();
+int fchmod();
+int rename();
+int flock();
+int mkfifo();
+int sendto();
+int shutdown();
+int socketpair();
+int mkdir();
+int rmdir();
+int utimes();
+int adjtime();
+int setsid();
+int quotactl();
+#ifdef NFS
+int nfssvc();
+#else
+#endif
+int statfs();
+int fstatfs();
+#ifdef NFS
+int getfh();
+#else
+#endif
+#ifdef SYSVSHM
+int shmsys();
+#else
+#endif
+int setgid();
+int setegid();
+int seteuid();
+#ifdef LFS
+int lfs_bmapv();
+int lfs_markv();
+int lfs_segclean();
+int lfs_segwait();
+#else
+#endif
+int stat();
+int fstat();
+int lstat();
+int pathconf();
+int fpathconf();
+int getrlimit();
+int setrlimit();
+int getdirentries();
+int mmap();
+int nosys();
+int lseek();
+int truncate();
+int ftruncate();
+int __sysctl();
+int mlock();
+int munlock();
+
+#ifdef COMPAT_43
+#define compat(n, name) n, __CONCAT(o,name)
+
+int ocreat();
+int olseek();
+int ostat();
+int olstat();
+#ifdef KTRACE
+#else
+#endif
+int ofstat();
+int ogetkerninfo();
+int ogetpagesize();
+int ommap();
+int owait();
+int ogethostname();
+int osethostname();
+int oaccept();
+int osend();
+int orecv();
+int osigvec();
+int osigblock();
+int osigsetmask();
+int osigstack();
+int orecvmsg();
+int osendmsg();
+#ifdef TRACE
+#else
+#endif
+#ifdef vax
+#else
+#endif
+int orecvfrom();
+int osetreuid();
+int osetregid();
+int otruncate();
+int oftruncate();
+int ogetpeername();
+int ogethostid();
+int osethostid();
+int ogetrlimit();
+int osetrlimit();
+int okillpg();
+int oquota();
+int ogetsockname();
+#ifdef NFS
+#else
+#endif
+int ogetdirentries();
+#ifdef NFS
+#else
+#endif
+#ifdef SYSVSHM
+#else
+#endif
+#ifdef LFS
+#else
+#endif
+
+#else /* COMPAT_43 */
+#define compat(n, name) 0, nosys
+#endif /* COMPAT_43 */
+
+struct sysent sysent[] = {
+ { 0, nosys }, /* 0 = syscall */
+ { 1, exit }, /* 1 = exit */
+ { 0, fork }, /* 2 = fork */
+ { 3, read }, /* 3 = read */
+ { 3, write }, /* 4 = write */
+ { 3, open }, /* 5 = open */
+ { 1, close }, /* 6 = close */
+ { 4, wait4 }, /* 7 = wait4 */
+ { compat(2,creat) }, /* 8 = old creat */
+ { 2, link }, /* 9 = link */
+ { 1, unlink }, /* 10 = unlink */
+ { 0, nosys }, /* 11 = obsolete execv */
+ { 1, chdir }, /* 12 = chdir */
+ { 1, fchdir }, /* 13 = fchdir */
+ { 3, mknod }, /* 14 = mknod */
+ { 2, chmod }, /* 15 = chmod */
+ { 3, chown }, /* 16 = chown */
+ { 1, obreak }, /* 17 = break */
+ { 3, getfsstat }, /* 18 = getfsstat */
+ { compat(3,lseek) }, /* 19 = old lseek */
+ { 0, getpid }, /* 20 = getpid */
+ { 4, mount }, /* 21 = mount */
+ { 2, unmount }, /* 22 = unmount */
+ { 1, setuid }, /* 23 = setuid */
+ { 0, getuid }, /* 24 = getuid */
+ { 0, geteuid }, /* 25 = geteuid */
+ { 4, ptrace }, /* 26 = ptrace */
+ { 3, recvmsg }, /* 27 = recvmsg */
+ { 3, sendmsg }, /* 28 = sendmsg */
+ { 6, recvfrom }, /* 29 = recvfrom */
+ { 3, accept }, /* 30 = accept */
+ { 3, getpeername }, /* 31 = getpeername */
+ { 3, getsockname }, /* 32 = getsockname */
+ { 2, access }, /* 33 = access */
+ { 2, chflags }, /* 34 = chflags */
+ { 2, fchflags }, /* 35 = fchflags */
+ { 0, sync }, /* 36 = sync */
+ { 2, kill }, /* 37 = kill */
+ { compat(2,stat) }, /* 38 = old stat */
+ { 0, getppid }, /* 39 = getppid */
+ { compat(2,lstat) }, /* 40 = old lstat */
+ { 2, dup }, /* 41 = dup */
+ { 0, pipe }, /* 42 = pipe */
+ { 0, getegid }, /* 43 = getegid */
+ { 4, profil }, /* 44 = profil */
+#ifdef KTRACE
+ { 4, ktrace }, /* 45 = ktrace */
+#else
+ { 0, nosys }, /* 45 = ktrace */
+#endif
+ { 3, sigaction }, /* 46 = sigaction */
+ { 0, getgid }, /* 47 = getgid */
+ { 2, sigprocmask }, /* 48 = sigprocmask */
+ { 2, getlogin }, /* 49 = getlogin */
+ { 1, setlogin }, /* 50 = setlogin */
+ { 1, acct }, /* 51 = acct */
+ { 0, sigpending }, /* 52 = sigpending */
+ { 2, sigaltstack }, /* 53 = sigaltstack */
+ { 3, ioctl }, /* 54 = ioctl */
+ { 1, reboot }, /* 55 = reboot */
+ { 1, revoke }, /* 56 = revoke */
+ { 2, symlink }, /* 57 = symlink */
+ { 3, readlink }, /* 58 = readlink */
+ { 3, execve }, /* 59 = execve */
+ { 1, umask }, /* 60 = umask */
+ { 1, chroot }, /* 61 = chroot */
+ { compat(2,fstat) }, /* 62 = old fstat */
+ { compat(4,getkerninfo) }, /* 63 = old getkerninfo */
+ { compat(0,getpagesize) }, /* 64 = old getpagesize */
+ { 2, msync }, /* 65 = msync */
+ { 0, vfork }, /* 66 = vfork */
+ { 0, nosys }, /* 67 = obsolete vread */
+ { 0, nosys }, /* 68 = obsolete vwrite */
+ { 1, sbrk }, /* 69 = sbrk */
+ { 1, sstk }, /* 70 = sstk */
+ { compat(7,mmap) }, /* 71 = old mmap */
+ { 1, ovadvise }, /* 72 = vadvise */
+ { 2, munmap }, /* 73 = munmap */
+ { 3, mprotect }, /* 74 = mprotect */
+ { 3, madvise }, /* 75 = madvise */
+ { 0, nosys }, /* 76 = obsolete vhangup */
+ { 0, nosys }, /* 77 = obsolete vlimit */
+ { 3, mincore }, /* 78 = mincore */
+ { 2, getgroups }, /* 79 = getgroups */
+ { 2, setgroups }, /* 80 = setgroups */
+ { 0, getpgrp }, /* 81 = getpgrp */
+ { 2, setpgid }, /* 82 = setpgid */
+ { 3, setitimer }, /* 83 = setitimer */
+ { compat(0,wait) }, /* 84 = old wait */
+ { 1, swapon }, /* 85 = swapon */
+ { 2, getitimer }, /* 86 = getitimer */
+ { compat(2,gethostname) }, /* 87 = old gethostname */
+ { compat(2,sethostname) }, /* 88 = old sethostname */
+ { 0, getdtablesize }, /* 89 = getdtablesize */
+ { 2, dup2 }, /* 90 = dup2 */
+ { 0, nosys }, /* 91 = getdopt */
+ { 3, fcntl }, /* 92 = fcntl */
+ { 5, select }, /* 93 = select */
+ { 0, nosys }, /* 94 = setdopt */
+ { 1, fsync }, /* 95 = fsync */
+ { 3, setpriority }, /* 96 = setpriority */
+ { 3, socket }, /* 97 = socket */
+ { 3, connect }, /* 98 = connect */
+ { compat(3,accept) }, /* 99 = old accept */
+ { 2, getpriority }, /* 100 = getpriority */
+ { compat(4,send) }, /* 101 = old send */
+ { compat(4,recv) }, /* 102 = old recv */
+ { 1, sigreturn }, /* 103 = sigreturn */
+ { 3, bind }, /* 104 = bind */
+ { 5, setsockopt }, /* 105 = setsockopt */
+ { 2, listen }, /* 106 = listen */
+ { 0, nosys }, /* 107 = obsolete vtimes */
+ { compat(3,sigvec) }, /* 108 = old sigvec */
+ { compat(1,sigblock) }, /* 109 = old sigblock */
+ { compat(1,sigsetmask) }, /* 110 = old sigsetmask */
+ { 1, sigsuspend }, /* 111 = sigsuspend */
+ { compat(2,sigstack) }, /* 112 = old sigstack */
+ { compat(3,recvmsg) }, /* 113 = old recvmsg */
+ { compat(3,sendmsg) }, /* 114 = old sendmsg */
+#ifdef TRACE
+ { 2, vtrace }, /* 115 = vtrace */
+#else
+ { 0, nosys }, /* 115 = obsolete vtrace */
+#endif
+ { 2, gettimeofday }, /* 116 = gettimeofday */
+ { 2, getrusage }, /* 117 = getrusage */
+ { 5, getsockopt }, /* 118 = getsockopt */
+#ifdef vax
+ { 1, resuba }, /* 119 = resuba */
+#else
+ { 0, nosys }, /* 119 = nosys */
+#endif
+ { 3, readv }, /* 120 = readv */
+ { 3, writev }, /* 121 = writev */
+ { 2, settimeofday }, /* 122 = settimeofday */
+ { 3, fchown }, /* 123 = fchown */
+ { 2, fchmod }, /* 124 = fchmod */
+ { compat(6,recvfrom) }, /* 125 = old recvfrom */
+ { compat(2,setreuid) }, /* 126 = old setreuid */
+ { compat(2,setregid) }, /* 127 = old setregid */
+ { 2, rename }, /* 128 = rename */
+ { compat(2,truncate) }, /* 129 = old truncate */
+ { compat(2,ftruncate) }, /* 130 = old ftruncate */
+ { 2, flock }, /* 131 = flock */
+ { 2, mkfifo }, /* 132 = mkfifo */
+ { 6, sendto }, /* 133 = sendto */
+ { 2, shutdown }, /* 134 = shutdown */
+ { 5, socketpair }, /* 135 = socketpair */
+ { 2, mkdir }, /* 136 = mkdir */
+ { 1, rmdir }, /* 137 = rmdir */
+ { 2, utimes }, /* 138 = utimes */
+ { 0, nosys }, /* 139 = obsolete 4.2 sigreturn */
+ { 2, adjtime }, /* 140 = adjtime */
+ { compat(3,getpeername) }, /* 141 = old getpeername */
+ { compat(0,gethostid) }, /* 142 = old gethostid */
+ { compat(1,sethostid) }, /* 143 = old sethostid */
+ { compat(2,getrlimit) }, /* 144 = old getrlimit */
+ { compat(2,setrlimit) }, /* 145 = old setrlimit */
+ { compat(2,killpg) }, /* 146 = old killpg */
+ { 0, setsid }, /* 147 = setsid */
+ { 4, quotactl }, /* 148 = quotactl */
+ { compat(4,quota) }, /* 149 = old quota */
+ { compat(3,getsockname) }, /* 150 = old getsockname */
+ { 0, nosys }, /* 151 = nosys */
+ { 0, nosys }, /* 152 = nosys */
+ { 0, nosys }, /* 153 = nosys */
+ { 0, nosys }, /* 154 = nosys */
+#ifdef NFS
+ { 2, nfssvc }, /* 155 = nfssvc */
+#else
+ { 0, nosys }, /* 155 = nosys */
+#endif
+ { compat(4,getdirentries) }, /* 156 = old getdirentries */
+ { 2, statfs }, /* 157 = statfs */
+ { 2, fstatfs }, /* 158 = fstatfs */
+ { 0, nosys }, /* 159 = nosys */
+ { 0, nosys }, /* 160 = nosys */
+#ifdef NFS
+ { 2, getfh }, /* 161 = getfh */
+#else
+ { 0, nosys }, /* 161 = nosys */
+#endif
+ { 0, nosys }, /* 162 = nosys */
+ { 0, nosys }, /* 163 = nosys */
+ { 0, nosys }, /* 164 = nosys */
+ { 0, nosys }, /* 165 = nosys */
+ { 0, nosys }, /* 166 = nosys */
+ { 0, nosys }, /* 167 = nosys */
+ { 0, nosys }, /* 168 = nosys */
+ { 0, nosys }, /* 169 = nosys */
+ { 0, nosys }, /* 170 = nosys */
+#ifdef SYSVSHM
+ { 4, shmsys }, /* 171 = shmsys */
+#else
+ { 0, nosys }, /* 171 = nosys */
+#endif
+ { 0, nosys }, /* 172 = nosys */
+ { 0, nosys }, /* 173 = nosys */
+ { 0, nosys }, /* 174 = nosys */
+ { 0, nosys }, /* 175 = nosys */
+ { 0, nosys }, /* 176 = nosys */
+ { 0, nosys }, /* 177 = nosys */
+ { 0, nosys }, /* 178 = nosys */
+ { 0, nosys }, /* 179 = nosys */
+ { 0, nosys }, /* 180 = nosys */
+ { 1, setgid }, /* 181 = setgid */
+ { 1, setegid }, /* 182 = setegid */
+ { 1, seteuid }, /* 183 = seteuid */
+#ifdef LFS
+ { 3, lfs_bmapv }, /* 184 = lfs_bmapv */
+ { 3, lfs_markv }, /* 185 = lfs_markv */
+ { 2, lfs_segclean }, /* 186 = lfs_segclean */
+ { 2, lfs_segwait }, /* 187 = lfs_segwait */
+#else
+ { 0, nosys }, /* 184 = nosys */
+ { 0, nosys }, /* 185 = nosys */
+ { 0, nosys }, /* 186 = nosys */
+ { 0, nosys }, /* 187 = nosys */
+#endif
+ { 2, stat }, /* 188 = stat */
+ { 2, fstat }, /* 189 = fstat */
+ { 2, lstat }, /* 190 = lstat */
+ { 2, pathconf }, /* 191 = pathconf */
+ { 2, fpathconf }, /* 192 = fpathconf */
+ { 0, nosys }, /* 193 = nosys */
+ { 2, getrlimit }, /* 194 = getrlimit */
+ { 2, setrlimit }, /* 195 = setrlimit */
+ { 4, getdirentries }, /* 196 = getdirentries */
+ { 8, mmap }, /* 197 = mmap */
+ { 0, nosys }, /* 198 = __syscall */
+ { 5, lseek }, /* 199 = lseek */
+ { 4, truncate }, /* 200 = truncate */
+ { 4, ftruncate }, /* 201 = ftruncate */
+ { 6, __sysctl }, /* 202 = __sysctl */
+ { 2, mlock }, /* 203 = mlock */
+ { 2, munlock }, /* 204 = munlock */
+ { 0, nosys }, /* 205 = nosys */
+ { 0, nosys }, /* 206 = nosys */
+ { 0, nosys }, /* 207 = nosys */
+ { 0, nosys }, /* 208 = nosys */
+ { 0, nosys }, /* 209 = nosys */
+ { 0, nosys }, /* 210 = nosys */
+};
+
+int nsysent = sizeof(sysent) / sizeof(sysent[0]);
diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c
new file mode 100644
index 000000000000..b752279d120a
--- /dev/null
+++ b/sys/kern/kern_acct.c
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)kern_acct.c 8.1 (Berkeley) 6/14/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+
+struct acct_args {
+ char *fname;
+};
+acct(a1, a2, a3)
+ struct proc *a1;
+ struct acct_args *a2;
+ int *a3;
+{
+ /*
+ * Body deleted.
+ */
+ return (ENOSYS);
+}
+
+acct_process(a1)
+ struct proc *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+/*
+ * Periodically check the file system to see if accounting
+ * should be turned on or off.
+ */
+
+/*
+ * Values associated with enabling and disabling accounting
+ */
+int acctsuspend = 2; /* stop accounting when < 2% free space left */
+int acctresume = 4; /* resume when free space risen to > 4% */
+int acctchkfreq = 15; /* frequency (in seconds) to check space */
+
+/*
+ * SHOULD REPLACE THIS WITH A DRIVER THAT CAN BE READ TO SIMPLIFY.
+ */
+struct vnode *acctp;
+struct vnode *savacctp;
+
+/* ARGSUSED */
+void
+acctwatch(a)
+ void *a;
+{
+ struct statfs sb;
+
+ if (savacctp) {
+ (void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0);
+ if (sb.f_bavail > acctresume * sb.f_blocks / 100) {
+ acctp = savacctp;
+ savacctp = NULL;
+ log(LOG_NOTICE, "Accounting resumed\n");
+ }
+ } else {
+ if (acctp == NULL)
+ return;
+ (void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0);
+ if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) {
+ savacctp = acctp;
+ acctp = NULL;
+ log(LOG_NOTICE, "Accounting suspended\n");
+ }
+ }
+ timeout(acctwatch, NULL, acctchkfreq * hz);
+}
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
new file mode 100644
index 000000000000..f42900cb75d2
--- /dev/null
+++ b/sys/kern/kern_clock.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
new file mode 100644
index 000000000000..543946d3f8fd
--- /dev/null
+++ b/sys/kern/kern_descrip.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+#include <sys/unistd.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Descriptor management.
+ */
+struct file *filehead; /* head of list of open files */
+int nfiles; /* actual number of open files */
+
+/*
+ * System calls on descriptors.
+ */
+struct getdtablesize_args {
+ int dummy;
+};
+/* ARGSUSED */
+getdtablesize(p, uap, retval)
+ struct proc *p;
+ struct getdtablesize_args *uap;
+ int *retval;
+{
+
+ *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+ return (0);
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+struct dup_args {
+ u_int fd;
+};
+/* ARGSUSED */
+dup(p, uap, retval)
+ struct proc *p;
+ struct dup_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp;
+ u_int old;
+ int new, error;
+
+ old = uap->fd;
+ /*
+ * XXX Compatibility
+ */
+ if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
+
+ fdp = p->p_fd;
+ if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
+ return (EBADF);
+ if (error = fdalloc(p, 0, &new))
+ return (error);
+ return (finishdup(fdp, (int)old, new, retval));
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ */
+struct dup2_args {
+ u_int from;
+ u_int to;
+};
+/* ARGSUSED */
+dup2(p, uap, retval)
+ struct proc *p;
+ struct dup2_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register u_int old = uap->from, new = uap->to;
+ int i, error;
+
+ if (old >= fdp->fd_nfiles ||
+ fdp->fd_ofiles[old] == NULL ||
+ new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+ new >= maxfiles)
+ return (EBADF);
+ if (old == new) {
+ *retval = new;
+ return (0);
+ }
+ if (new >= fdp->fd_nfiles) {
+ if (error = fdalloc(p, new, &i))
+ return (error);
+ if (new != i)
+ panic("dup2: fdalloc");
+ } else if (fdp->fd_ofiles[new]) {
+ if (fdp->fd_ofileflags[new] & UF_MAPPED)
+ (void) munmapfd(p, new);
+ /*
+ * dup2() must succeed even if the close has an error.
+ */
+ (void) closef(fdp->fd_ofiles[new], p);
+ }
+ return (finishdup(fdp, (int)old, (int)new, retval));
+}
+
+/*
+ * The file control system call.
+ */
+struct fcntl_args {
+ int fd;
+ int cmd;
+ int arg;
+};
+/* ARGSUSED */
+fcntl(p, uap, retval)
+ struct proc *p;
+ register struct fcntl_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register char *pop;
+ struct vnode *vp;
+ int i, tmp, error, flg = F_POSIX;
+ struct flock fl;
+ u_int newmin;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ pop = &fdp->fd_ofileflags[uap->fd];
+ switch (uap->cmd) {
+
+ case F_DUPFD:
+ newmin = uap->arg;
+ if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+ newmin >= maxfiles)
+ return (EINVAL);
+ if (error = fdalloc(p, newmin, &i))
+ return (error);
+ return (finishdup(fdp, uap->fd, i, retval));
+
+ case F_GETFD:
+ *retval = *pop & 1;
+ return (0);
+
+ case F_SETFD:
+ *pop = (*pop &~ 1) | (uap->arg & 1);
+ return (0);
+
+ case F_GETFL:
+ *retval = OFLAGS(fp->f_flag);
+ return (0);
+
+ case F_SETFL:
+ fp->f_flag &= ~FCNTLFLAGS;
+ fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
+ tmp = fp->f_flag & FNONBLOCK;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+ if (error)
+ return (error);
+ tmp = fp->f_flag & FASYNC;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+ if (!error)
+ return (0);
+ fp->f_flag &= ~FNONBLOCK;
+ tmp = 0;
+ (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+ return (error);
+
+ case F_GETOWN:
+ if (fp->f_type == DTYPE_SOCKET) {
+ *retval = ((struct socket *)fp->f_data)->so_pgid;
+ return (0);
+ }
+ error = (*fp->f_ops->fo_ioctl)
+ (fp, (int)TIOCGPGRP, (caddr_t)retval, p);
+ *retval = -*retval;
+ return (error);
+
+ case F_SETOWN:
+ if (fp->f_type == DTYPE_SOCKET) {
+ ((struct socket *)fp->f_data)->so_pgid = uap->arg;
+ return (0);
+ }
+ if (uap->arg <= 0) {
+ uap->arg = -uap->arg;
+ } else {
+ struct proc *p1 = pfind(uap->arg);
+ if (p1 == 0)
+ return (ESRCH);
+ uap->arg = p1->p_pgrp->pg_id;
+ }
+ return ((*fp->f_ops->fo_ioctl)
+ (fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p));
+
+ case F_SETLKW:
+ flg |= F_WAIT;
+ /* Fall into F_SETLK */
+
+ case F_SETLK:
+ if (fp->f_type != DTYPE_VNODE)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ /* Copy in the lock structure */
+ error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+ if (error)
+ return (error);
+ if (fl.l_whence == SEEK_CUR)
+ fl.l_start += fp->f_offset;
+ switch (fl.l_type) {
+
+ case F_RDLCK:
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ p->p_flag |= P_ADVLOCK;
+ return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+ case F_WRLCK:
+ if ((fp->f_flag & FWRITE) == 0)
+ return (EBADF);
+ p->p_flag |= P_ADVLOCK;
+ return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+ case F_UNLCK:
+ return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
+ F_POSIX));
+
+ default:
+ return (EINVAL);
+ }
+
+ case F_GETLK:
+ if (fp->f_type != DTYPE_VNODE)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ /* Copy in the lock structure */
+ error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+ if (error)
+ return (error);
+ if (fl.l_whence == SEEK_CUR)
+ fl.l_start += fp->f_offset;
+ if (error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX))
+ return (error);
+ return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
+
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Common code for dup, dup2, and fcntl(F_DUPFD).
+ */
+int
+finishdup(fdp, old, new, retval)
+ register struct filedesc *fdp;
+ register int old, new, *retval;
+{
+ register struct file *fp;
+
+ fp = fdp->fd_ofiles[old];
+ fdp->fd_ofiles[new] = fp;
+ fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+ fp->f_count++;
+ if (new > fdp->fd_lastfile)
+ fdp->fd_lastfile = new;
+ *retval = new;
+ return (0);
+}
+
+/*
+ * Close a file descriptor.
+ */
+struct close_args {
+ int fd;
+};
+/* ARGSUSED */
+close(p, uap, retval)
+ struct proc *p;
+ struct close_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register int fd = uap->fd;
+ register u_char *pf;
+
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+ pf = (u_char *)&fdp->fd_ofileflags[fd];
+ if (*pf & UF_MAPPED)
+ (void) munmapfd(p, fd);
+ fdp->fd_ofiles[fd] = NULL;
+ while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+ fdp->fd_lastfile--;
+ if (fd < fdp->fd_freefile)
+ fdp->fd_freefile = fd;
+ *pf = 0;
+ return (closef(fp, p));
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Return status information about a file descriptor.
+ */
+struct ofstat_args {
+ int fd;
+ struct ostat *sb;
+};
+/* ARGSUSED */
+ofstat(p, uap, retval)
+ struct proc *p;
+ register struct ofstat_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct stat ub;
+ struct ostat oub;
+ int error;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ switch (fp->f_type) {
+
+ case DTYPE_VNODE:
+ error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &ub);
+ break;
+
+ default:
+ panic("ofstat");
+ /*NOTREACHED*/
+ }
+ cvtstat(&ub, &oub);
+ if (error == 0)
+ error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
+ return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Return status information about a file descriptor.
+ */
+struct fstat_args {
+ int fd;
+ struct stat *sb;
+};
+/* ARGSUSED */
+fstat(p, uap, retval)
+ struct proc *p;
+ register struct fstat_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct stat ub;
+ int error;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ switch (fp->f_type) {
+
+ case DTYPE_VNODE:
+ error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &ub);
+ break;
+
+ default:
+ panic("fstat");
+ /*NOTREACHED*/
+ }
+ if (error == 0)
+ error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
+ return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+struct fpathconf_args {
+ int fd;
+ int name;
+};
+/* ARGSUSED */
+fpathconf(p, uap, retval)
+ struct proc *p;
+ register struct fpathconf_args *uap;
+ int *retval;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct file *fp;
+ struct vnode *vp;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ switch (fp->f_type) {
+
+ case DTYPE_SOCKET:
+ if (uap->name != _PC_PIPE_BUF)
+ return (EINVAL);
+ *retval = PIPE_BUF;
+ return (0);
+
+ case DTYPE_VNODE:
+ vp = (struct vnode *)fp->f_data;
+ return (VOP_PATHCONF(vp, uap->name, retval));
+
+ default:
+ panic("fpathconf");
+ }
+ /*NOTREACHED*/
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int fdexpand;
+
+fdalloc(p, want, result)
+ struct proc *p;
+ int want;
+ int *result;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register int i;
+ int lim, last, nfiles;
+ struct file **newofile;
+ char *newofileflags;
+
+ /*
+ * Search for a free descriptor starting at the higher
+ * of want or fd_freefile. If that fails, consider
+ * expanding the ofile array.
+ */
+ lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+ for (;;) {
+ last = min(fdp->fd_nfiles, lim);
+ if ((i = want) < fdp->fd_freefile)
+ i = fdp->fd_freefile;
+ for (; i < last; i++) {
+ if (fdp->fd_ofiles[i] == NULL) {
+ fdp->fd_ofileflags[i] = 0;
+ if (i > fdp->fd_lastfile)
+ fdp->fd_lastfile = i;
+ if (want <= fdp->fd_freefile)
+ fdp->fd_freefile = i;
+ *result = i;
+ return (0);
+ }
+ }
+
+ /*
+ * No space in current array. Expand?
+ */
+ if (fdp->fd_nfiles >= lim)
+ return (EMFILE);
+ if (fdp->fd_nfiles < NDEXTENT)
+ nfiles = NDEXTENT;
+ else
+ nfiles = 2 * fdp->fd_nfiles;
+ MALLOC(newofile, struct file **, nfiles * OFILESIZE,
+ M_FILEDESC, M_WAITOK);
+ newofileflags = (char *) &newofile[nfiles];
+ /*
+ * Copy the existing ofile and ofileflags arrays
+ * and zero the new portion of each array.
+ */
+ bcopy(fdp->fd_ofiles, newofile,
+ (i = sizeof(struct file *) * fdp->fd_nfiles));
+ bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
+ bcopy(fdp->fd_ofileflags, newofileflags,
+ (i = sizeof(char) * fdp->fd_nfiles));
+ bzero(newofileflags + i, nfiles * sizeof(char) - i);
+ if (fdp->fd_nfiles > NDFILE)
+ FREE(fdp->fd_ofiles, M_FILEDESC);
+ fdp->fd_ofiles = newofile;
+ fdp->fd_ofileflags = newofileflags;
+ fdp->fd_nfiles = nfiles;
+ fdexpand++;
+ }
+}
+
+/*
+ * Check to see whether n user file descriptors
+ * are available to the process p.
+ */
+fdavail(p, n)
+ struct proc *p;
+ register int n;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file **fpp;
+ register int i, lim;
+
+ lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+ if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+ return (1);
+ fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+ for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++)
+ if (*fpp == NULL && --n <= 0)
+ return (1);
+ return (0);
+}
+
+/*
+ * Create a new open file structure and allocate
+ * a file decriptor for the process that refers to it.
+ */
+falloc(p, resultfp, resultfd)
+ register struct proc *p;
+ struct file **resultfp;
+ int *resultfd;
+{
+ register struct file *fp, *fq, **fpp;
+ int error, i;
+
+ if (error = fdalloc(p, 0, &i))
+ return (error);
+ if (nfiles >= maxfiles) {
+ tablefull("file");
+ return (ENFILE);
+ }
+ /*
+ * Allocate a new file descriptor.
+ * If the process has file descriptor zero open, add to the list
+ * of open files at that point, otherwise put it at the front of
+ * the list of open files.
+ */
+ nfiles++;
+ MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
+ bzero(fp, sizeof(struct file));
+ if (fq = p->p_fd->fd_ofiles[0])
+ fpp = &fq->f_filef;
+ else
+ fpp = &filehead;
+ p->p_fd->fd_ofiles[i] = fp;
+ if (fq = *fpp)
+ fq->f_fileb = &fp->f_filef;
+ fp->f_filef = fq;
+ fp->f_fileb = fpp;
+ *fpp = fp;
+ fp->f_count = 1;
+ fp->f_cred = p->p_ucred;
+ crhold(fp->f_cred);
+ if (resultfp)
+ *resultfp = fp;
+ if (resultfd)
+ *resultfd = i;
+ return (0);
+}
+
+/*
+ * Free a file descriptor.
+ */
+ffree(fp)
+ register struct file *fp;
+{
+ register struct file *fq;
+
+ if (fq = fp->f_filef)
+ fq->f_fileb = fp->f_fileb;
+ *fp->f_fileb = fq;
+ crfree(fp->f_cred);
+#ifdef DIAGNOSTIC
+ fp->f_filef = NULL;
+ fp->f_fileb = NULL;
+ fp->f_count = 0;
+#endif
+ nfiles--;
+ FREE(fp, M_FILE);
+}
+
+/*
+ * Copy a filedesc structure.
+ */
+struct filedesc *
+fdcopy(p)
+ struct proc *p;
+{
+ register struct filedesc *newfdp, *fdp = p->p_fd;
+ register struct file **fpp;
+ register int i;
+
+ MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
+ M_FILEDESC, M_WAITOK);
+ bcopy(fdp, newfdp, sizeof(struct filedesc));
+ VREF(newfdp->fd_cdir);
+ if (newfdp->fd_rdir)
+ VREF(newfdp->fd_rdir);
+ newfdp->fd_refcnt = 1;
+
+ /*
+ * If the number of open files fits in the internal arrays
+ * of the open file structure, use them, otherwise allocate
+ * additional memory for the number of descriptors currently
+ * in use.
+ */
+ if (newfdp->fd_lastfile < NDFILE) {
+ newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
+ newfdp->fd_ofileflags =
+ ((struct filedesc0 *) newfdp)->fd_dfileflags;
+ i = NDFILE;
+ } else {
+ /*
+ * Compute the smallest multiple of NDEXTENT needed
+ * for the file descriptors currently in use,
+ * allowing the table to shrink.
+ */
+ i = newfdp->fd_nfiles;
+ while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
+ i /= 2;
+ MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
+ M_FILEDESC, M_WAITOK);
+ newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
+ }
+ newfdp->fd_nfiles = i;
+ bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
+ bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
+ fpp = newfdp->fd_ofiles;
+ for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
+ if (*fpp != NULL)
+ (*fpp)->f_count++;
+ return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(p)
+ struct proc *p;
+{
+ register struct filedesc *fdp = p->p_fd;
+ struct file **fpp;
+ register int i;
+
+ if (--fdp->fd_refcnt > 0)
+ return;
+ fpp = fdp->fd_ofiles;
+ for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
+ if (*fpp)
+ (void) closef(*fpp, p);
+ if (fdp->fd_nfiles > NDFILE)
+ FREE(fdp->fd_ofiles, M_FILEDESC);
+ vrele(fdp->fd_cdir);
+ if (fdp->fd_rdir)
+ vrele(fdp->fd_rdir);
+ FREE(fdp, M_FILEDESC);
+}
+
+/*
+ * Internal form of close.
+ * Decrement reference count on file structure.
+ * Note: p may be NULL when closing a file
+ * that was being passed in a message.
+ */
+closef(fp, p)
+ register struct file *fp;
+ register struct proc *p;
+{
+ struct vnode *vp;
+ struct flock lf;
+ int error;
+
+ if (fp == NULL)
+ return (0);
+ /*
+ * POSIX record locking dictates that any close releases ALL
+ * locks owned by this process. This is handled by setting
+ * a flag in the unlock to free ONLY locks obeying POSIX
+ * semantics, and not to free BSD-style file locks.
+ * If the descriptor was in a message, POSIX-style locks
+ * aren't passed with the descriptor.
+ */
+ if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = (struct vnode *)fp->f_data;
+ (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
+ }
+ if (--fp->f_count > 0)
+ return (0);
+ if (fp->f_count < 0)
+ panic("closef: count < 0");
+ if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = (struct vnode *)fp->f_data;
+ (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+ }
+ if (fp->f_ops)
+ error = (*fp->f_ops->fo_close)(fp, p);
+ else
+ error = 0;
+ ffree(fp);
+ return (error);
+}
+
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on
+ * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+struct flock_args {
+ int fd;
+ int how;
+};
+/* ARGSUSED */
+flock(p, uap, retval)
+ struct proc *p;
+ register struct flock_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vnode *vp;
+ struct flock lf;
+
+ if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EOPNOTSUPP);
+ vp = (struct vnode *)fp->f_data;
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (uap->how & LOCK_UN) {
+ lf.l_type = F_UNLCK;
+ fp->f_flag &= ~FHASLOCK;
+ return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
+ }
+ if (uap->how & LOCK_EX)
+ lf.l_type = F_WRLCK;
+ else if (uap->how & LOCK_SH)
+ lf.l_type = F_RDLCK;
+ else
+ return (EBADF);
+ fp->f_flag |= FHASLOCK;
+ if (uap->how & LOCK_NB)
+ return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
+ return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
+}
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process. Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ */
+/* ARGSUSED */
+fdopen(dev, mode, type, p)
+ dev_t dev;
+ int mode, type;
+ struct proc *p;
+{
+
+ /*
+ * XXX Kludge: set curproc->p_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be released
+ * by vn_open. Open will detect this special error and take the
+ * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+ * will simply report the error.
+ */
+ p->p_dupfd = minor(dev);
+ return (ENODEV);
+}
+
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+dupfdopen(fdp, indx, dfd, mode, error)
+ register struct filedesc *fdp;
+ register int indx, dfd;
+ int mode;
+ int error;
+{
+ register struct file *wfp;
+ struct file *fp;
+
+ /*
+ * If the to-be-dup'd fd number is greater than the allowed number
+ * of file descriptors, or the fd to be dup'd has already been
+ * closed, reject. Note, check for new == old is necessary as
+ * falloc could allocate an already closed to-be-dup'd descriptor
+ * as the new descriptor.
+ */
+ fp = fdp->fd_ofiles[indx];
+ if ((u_int)dfd >= fdp->fd_nfiles ||
+ (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
+ return (EBADF);
+
+ /*
+ * There are two cases of interest here.
+ *
+ * For ENODEV simply dup (dfd) to file descriptor
+ * (indx) and return.
+ *
+ * For ENXIO steal away the file structure from (dfd) and
+ * store it in (indx). (dfd) is effectively closed by
+ * this operation.
+ *
+ * Any other error code is just returned.
+ */
+ switch (error) {
+ case ENODEV:
+ /*
+ * Check that the mode the file is being opened for is a
+ * subset of the mode of the existing descriptor.
+ */
+ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
+ return (EACCES);
+ fdp->fd_ofiles[indx] = wfp;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ wfp->f_count++;
+ if (indx > fdp->fd_lastfile)
+ fdp->fd_lastfile = indx;
+ return (0);
+
+ case ENXIO:
+ /*
+ * Steal away the file pointer from dfd, and stuff it into indx.
+ */
+ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+ fdp->fd_ofiles[dfd] = NULL;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ fdp->fd_ofileflags[dfd] = 0;
+ /*
+ * Complete the clean up of the filedesc structure by
+ * recomputing the various hints.
+ */
+ if (indx > fdp->fd_lastfile)
+ fdp->fd_lastfile = indx;
+ else
+ while (fdp->fd_lastfile > 0 &&
+ fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+ fdp->fd_lastfile--;
+ if (dfd < fdp->fd_freefile)
+ fdp->fd_freefile = dfd;
+ return (0);
+
+ default:
+ return (error);
+ }
+ /* NOTREACHED */
+}
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
new file mode 100644
index 000000000000..fbb4444d52bd
--- /dev/null
+++ b/sys/kern/kern_exec.c
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+
+/*
+ * exec system call
+ */
+struct execve_args {
+ char *fname;
+ char **argp;
+ char **envp;
+};
+/* ARGSUSED */
+execve(a1, a2, a3)
+ struct proc *a1;
+ struct execve_args *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (ENOSYS);
+}
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
new file mode 100644
index 000000000000..03353c72d1d1
--- /dev/null
+++ b/sys/kern/kern_exit.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <sys/ptrace.h>
+
+#include <machine/cpu.h>
+#ifdef COMPAT_43
+#include <machine/reg.h>
+#include <machine/psl.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+__dead void cpu_exit __P((struct proc *));
+__dead void exit1 __P((struct proc *, int));
+
+/*
+ * exit --
+ * Death of process.
+ */
+struct rexit_args {
+ int rval;
+};
+__dead void
+exit(p, uap, retval)
+ struct proc *p;
+ struct rexit_args *uap;
+ int *retval;
+{
+
+ exit1(p, W_EXITCODE(uap->rval, 0));
+ /* NOTREACHED */
+}
+
+/*
+ * Exit: deallocate address space and other resources, change proc state
+ * to zombie, and unlink proc from allproc and parent's lists. Save exit
+ * status and rusage for wait(). Check for child processes and orphan them.
+ */
+__dead void
+exit1(p, rv)
+ register struct proc *p;
+ int rv;
+{
+ register struct proc *q, *nq;
+ register struct proc **pp;
+ register struct vmspace *vm;
+
+ if (p->p_pid == 1)
+ panic("init died (signal %d, exit %d)",
+ WTERMSIG(rv), WEXITSTATUS(rv));
+#ifdef PGINPROF
+ vmsizmon();
+#endif
+ if (p->p_flag & P_PROFIL)
+ stopprofclock(p);
+ MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
+ M_ZOMBIE, M_WAITOK);
+ /*
+ * If parent is waiting for us to exit or exec,
+ * P_PPWAIT is set; we will wakeup the parent below.
+ */
+ p->p_flag &= ~(P_TRACED | P_PPWAIT);
+ p->p_flag |= P_WEXIT;
+ p->p_sigignore = ~0;
+ p->p_siglist = 0;
+ untimeout(realitexpire, (caddr_t)p);
+
+ /*
+ * Close open files and release open-file table.
+ * This may block!
+ */
+ fdfree(p);
+
+ /* The next two chunks should probably be moved to vmspace_exit. */
+ vm = p->p_vmspace;
+#ifdef SYSVSHM
+ if (vm->vm_shm)
+ shmexit(p);
+#endif
+ /*
+ * Release user portion of address space.
+ * This releases references to vnodes,
+ * which could cause I/O if the file has been unlinked.
+ * Need to do this early enough that we can still sleep.
+ * Can't free the entire vmspace as the kernel stack
+ * may be mapped within that space also.
+ */
+ if (vm->vm_refcnt == 1)
+ (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
+ VM_MAXUSER_ADDRESS);
+
+ if (SESS_LEADER(p)) {
+ register struct session *sp = p->p_session;
+
+ if (sp->s_ttyvp) {
+ /*
+ * Controlling process.
+ * Signal foreground pgrp,
+ * drain controlling terminal
+ * and revoke access to controlling terminal.
+ */
+ if (sp->s_ttyp->t_session == sp) {
+ if (sp->s_ttyp->t_pgrp)
+ pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
+ (void) ttywait(sp->s_ttyp);
+ /*
+ * The tty could have been revoked
+ * if we blocked.
+ */
+ if (sp->s_ttyvp)
+ vgoneall(sp->s_ttyvp);
+ }
+ if (sp->s_ttyvp)
+ vrele(sp->s_ttyvp);
+ sp->s_ttyvp = NULL;
+ /*
+ * s_ttyp is not zero'd; we use this to indicate
+ * that the session once had a controlling terminal.
+ * (for logging and informational purposes)
+ */
+ }
+ sp->s_leader = NULL;
+ }
+ fixjobc(p, p->p_pgrp, 0);
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+ (void)acct_process(p);
+#ifdef KTRACE
+ /*
+ * release trace file
+ */
+ p->p_traceflag = 0; /* don't trace the vrele() */
+ if (p->p_tracep)
+ vrele(p->p_tracep);
+#endif
+ /*
+ * Remove proc from allproc queue and pidhash chain.
+ * Place onto zombproc. Unlink from parent's child list.
+ */
+ if (*p->p_prev = p->p_next)
+ p->p_next->p_prev = p->p_prev;
+ if (p->p_next = zombproc)
+ p->p_next->p_prev = &p->p_next;
+ p->p_prev = &zombproc;
+ zombproc = p;
+ p->p_stat = SZOMB;
+
+ for (pp = &pidhash[PIDHASH(p->p_pid)]; *pp; pp = &(*pp)->p_hash)
+ if (*pp == p) {
+ *pp = p->p_hash;
+ goto done;
+ }
+ panic("exit");
+done:
+
+ if (p->p_cptr) /* only need this if any child is S_ZOMB */
+ wakeup((caddr_t) initproc);
+ for (q = p->p_cptr; q != NULL; q = nq) {
+ nq = q->p_osptr;
+ if (nq != NULL)
+ nq->p_ysptr = NULL;
+ if (initproc->p_cptr)
+ initproc->p_cptr->p_ysptr = q;
+ q->p_osptr = initproc->p_cptr;
+ q->p_ysptr = NULL;
+ initproc->p_cptr = q;
+
+ q->p_pptr = initproc;
+ /*
+ * Traced processes are killed
+ * since their existence means someone is screwing up.
+ */
+ if (q->p_flag & P_TRACED) {
+ q->p_flag &= ~P_TRACED;
+ psignal(q, SIGKILL);
+ }
+ }
+ p->p_cptr = NULL;
+
+ /*
+ * Save exit status and final rusage info, adding in child rusage
+ * info and self times.
+ */
+ p->p_xstat = rv;
+ *p->p_ru = p->p_stats->p_ru;
+ calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
+ ruadd(p->p_ru, &p->p_stats->p_cru);
+
+ /*
+ * Notify parent that we're gone.
+ */
+ psignal(p->p_pptr, SIGCHLD);
+ wakeup((caddr_t)p->p_pptr);
+#if defined(tahoe)
+ /* move this to cpu_exit */
+ p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL;
+#endif
+ /*
+ * Clear curproc after we've done all operations
+ * that could block, and before tearing down the rest
+ * of the process state that might be used from clock, etc.
+ * Also, can't clear curproc while we're still runnable,
+ * as we're not on a run queue (we are current, just not
+ * a proper proc any longer!).
+ *
+ * Other substructures are freed from wait().
+ */
+ curproc = NULL;
+ if (--p->p_limit->p_refcnt == 0)
+ FREE(p->p_limit, M_SUBPROC);
+
+ /*
+ * Finally, call machine-dependent code to release the remaining
+ * resources including address space, the kernel stack and pcb.
+ * The address space is released by "vmspace_free(p->p_vmspace)";
+ * This is machine-dependent, as we may have to change stacks
+ * or ensure that the current one isn't reallocated before we
+ * finish. cpu_exit will end with a call to cpu_swtch(), finishing
+ * our execution (pun intended).
+ */
+ cpu_exit(p);
+}
+
+struct wait_args {
+ int pid;
+ int *status;
+ int options;
+ struct rusage *rusage;
+#ifdef COMPAT_43
+ int compat; /* pseudo */
+#endif
+};
+
+#ifdef COMPAT_43
+#if defined(hp300) || defined(luna68k)
+#include <machine/frame.h>
+#define GETPS(rp) ((struct frame *)(rp))->f_sr
+#else
+#define GETPS(rp) (rp)[PS]
+#endif
+
+owait(p, uap, retval)
+ struct proc *p;
+ register struct wait_args *uap;
+ int *retval;
+{
+
+#ifdef PSL_ALLCC
+ if ((GETPS(p->p_md.md_regs) & PSL_ALLCC) != PSL_ALLCC) {
+ uap->options = 0;
+ uap->rusage = NULL;
+ } else {
+ uap->options = p->p_md.md_regs[R0];
+ uap->rusage = (struct rusage *)p->p_md.md_regs[R1];
+ }
+#else
+ uap->options = 0;
+ uap->rusage = NULL;
+#endif
+ uap->pid = WAIT_ANY;
+ uap->status = NULL;
+ uap->compat = 1;
+ return (wait1(p, uap, retval));
+}
+
+wait4(p, uap, retval)
+ struct proc *p;
+ struct wait_args *uap;
+ int *retval;
+{
+
+ uap->compat = 0;
+ return (wait1(p, uap, retval));
+}
+#else
+#define wait1 wait4
+#endif
+
+int
+wait1(q, uap, retval)
+ register struct proc *q;
+ register struct wait_args *uap;
+ int retval[];
+{
+ register int nfound;
+ register struct proc *p, *t;
+ int status, error;
+
+ if (uap->pid == 0)
+ uap->pid = -q->p_pgid;
+#ifdef notyet
+ if (uap->options &~ (WUNTRACED|WNOHANG))
+ return (EINVAL);
+#endif
+loop:
+ nfound = 0;
+ for (p = q->p_cptr; p; p = p->p_osptr) {
+ if (uap->pid != WAIT_ANY &&
+ p->p_pid != uap->pid && p->p_pgid != -uap->pid)
+ continue;
+ nfound++;
+ if (p->p_stat == SZOMB) {
+ retval[0] = p->p_pid;
+#ifdef COMPAT_43
+ if (uap->compat)
+ retval[1] = p->p_xstat;
+ else
+#endif
+ if (uap->status) {
+ status = p->p_xstat; /* convert to int */
+ if (error = copyout((caddr_t)&status,
+ (caddr_t)uap->status, sizeof(status)))
+ return (error);
+ }
+ if (uap->rusage && (error = copyout((caddr_t)p->p_ru,
+ (caddr_t)uap->rusage, sizeof (struct rusage))))
+ return (error);
+ /*
+ * If we got the child via a ptrace 'attach',
+ * we need to give it back to the old parent.
+ */
+ if (p->p_oppid && (t = pfind(p->p_oppid))) {
+ p->p_oppid = 0;
+ proc_reparent(p, t);
+ psignal(t, SIGCHLD);
+ wakeup((caddr_t)t);
+ return (0);
+ }
+ p->p_xstat = 0;
+ ruadd(&q->p_stats->p_cru, p->p_ru);
+ FREE(p->p_ru, M_ZOMBIE);
+
+ /*
+ * Decrement the count of procs running with this uid.
+ */
+ (void)chgproccnt(p->p_cred->p_ruid, -1);
+
+ /*
+ * Free up credentials.
+ */
+ if (--p->p_cred->p_refcnt == 0) {
+ crfree(p->p_cred->pc_ucred);
+ FREE(p->p_cred, M_SUBPROC);
+ }
+
+ /*
+ * Release reference to text vnode
+ */
+ if (p->p_textvp)
+ vrele(p->p_textvp);
+
+ /*
+ * Finally finished with old proc entry.
+ * Unlink it from its process group and free it.
+ */
+ leavepgrp(p);
+ if (*p->p_prev = p->p_next) /* off zombproc */
+ p->p_next->p_prev = p->p_prev;
+ if (q = p->p_ysptr)
+ q->p_osptr = p->p_osptr;
+ if (q = p->p_osptr)
+ q->p_ysptr = p->p_ysptr;
+ if ((q = p->p_pptr)->p_cptr == p)
+ q->p_cptr = p->p_osptr;
+
+ /*
+ * Give machine-dependent layer a chance
+ * to free anything that cpu_exit couldn't
+ * release while still running in process context.
+ */
+ cpu_wait(p);
+ FREE(p, M_PROC);
+ nprocs--;
+ return (0);
+ }
+ if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
+ (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
+ p->p_flag |= P_WAITED;
+ retval[0] = p->p_pid;
+#ifdef COMPAT_43
+ if (uap->compat) {
+ retval[1] = W_STOPCODE(p->p_xstat);
+ error = 0;
+ } else
+#endif
+ if (uap->status) {
+ status = W_STOPCODE(p->p_xstat);
+ error = copyout((caddr_t)&status,
+ (caddr_t)uap->status, sizeof(status));
+ } else
+ error = 0;
+ return (error);
+ }
+ }
+ if (nfound == 0)
+ return (ECHILD);
+ if (uap->options & WNOHANG) {
+ retval[0] = 0;
+ return (0);
+ }
+ if (error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0))
+ return (error);
+ goto loop;
+}
+
+/*
+ * make process 'parent' the new parent of process 'child'.
+ */
+void
+proc_reparent(child, parent)
+ register struct proc *child;
+ register struct proc *parent;
+{
+ register struct proc *o;
+ register struct proc *y;
+
+ if (child->p_pptr == parent)
+ return;
+
+ /* fix up the child linkage for the old parent */
+ o = child->p_osptr;
+ y = child->p_ysptr;
+ if (y)
+ y->p_osptr = o;
+ if (o)
+ o->p_ysptr = y;
+ if (child->p_pptr->p_cptr == child)
+ child->p_pptr->p_cptr = o;
+
+ /* fix up child linkage for new parent */
+ o = parent->p_cptr;
+ if (o)
+ o->p_ysptr = child;
+ child->p_osptr = o;
+ child->p_ysptr = NULL;
+ parent->p_cptr = child;
+ child->p_pptr = parent;
+}
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
new file mode 100644
index 000000000000..8bec2fa5d5fe
--- /dev/null
+++ b/sys/kern/kern_fork.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/acct.h>
+#include <sys/ktrace.h>
+
+struct fork_args {
+ int dummy;
+};
+/* ARGSUSED */
+fork(p, uap, retval)
+ struct proc *p;
+ struct fork_args *uap;
+ int retval[];
+{
+
+ return (fork1(p, 0, retval));
+}
+
+/* ARGSUSED */
+vfork(p, uap, retval)
+ struct proc *p;
+ struct fork_args *uap;
+ int retval[];
+{
+
+ return (fork1(p, 1, retval));
+}
+
+int nprocs = 1; /* process 0 */
+
+fork1(p1, isvfork, retval)
+ register struct proc *p1;
+ int isvfork, retval[];
+{
+ register struct proc *p2;
+ register uid_t uid;
+ struct proc *newproc;
+ struct proc **hash;
+ int count;
+ static int nextpid, pidchecked = 0;
+
+ /*
+ * Although process entries are dynamically created, we still keep
+ * a global limit on the maximum number we will create. Don't allow
+ * a nonprivileged user to use the last process; don't let root
+ * exceed the limit. The variable nprocs is the current number of
+ * processes, maxproc is the limit.
+ */
+ uid = p1->p_cred->p_ruid;
+ if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
+ tablefull("proc");
+ return (EAGAIN);
+ }
+ /*
+ * Increment the count of procs running with this uid. Don't allow
+ * a nonprivileged user to exceed their current limit.
+ */
+ count = chgproccnt(uid, 1);
+ if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
+ (void)chgproccnt(uid, -1);
+ return (EAGAIN);
+ }
+
+ /* Allocate new proc. */
+ MALLOC(newproc, struct proc *, sizeof(struct proc), M_PROC, M_WAITOK);
+
+ /*
+ * Find an unused process ID. We remember a range of unused IDs
+ * ready to use (from nextpid+1 through pidchecked-1).
+ */
+ nextpid++;
+retry:
+ /*
+ * If the process ID prototype has wrapped around,
+ * restart somewhat above 0, as the low-numbered procs
+ * tend to include daemons that don't exit.
+ */
+ if (nextpid >= PID_MAX) {
+ nextpid = 100;
+ pidchecked = 0;
+ }
+ if (nextpid >= pidchecked) {
+ int doingzomb = 0;
+
+ pidchecked = PID_MAX;
+ /*
+ * Scan the active and zombie procs to check whether this pid
+ * is in use. Remember the lowest pid that's greater
+ * than nextpid, so we can avoid checking for a while.
+ */
+ p2 = (struct proc *)allproc;
+again:
+ for (; p2 != NULL; p2 = p2->p_next) {
+ while (p2->p_pid == nextpid ||
+ p2->p_pgrp->pg_id == nextpid) {
+ nextpid++;
+ if (nextpid >= pidchecked)
+ goto retry;
+ }
+ if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
+ pidchecked = p2->p_pid;
+ if (p2->p_pgrp->pg_id > nextpid &&
+ pidchecked > p2->p_pgrp->pg_id)
+ pidchecked = p2->p_pgrp->pg_id;
+ }
+ if (!doingzomb) {
+ doingzomb = 1;
+ p2 = zombproc;
+ goto again;
+ }
+ }
+
+
+ /*
+ * Link onto allproc (this should probably be delayed).
+ * Heavy use of volatile here to prevent the compiler from
+ * rearranging code. Yes, it *is* terribly ugly, but at least
+ * it works.
+ */
+ nprocs++;
+ p2 = newproc;
+#define Vp2 ((volatile struct proc *)p2)
+ Vp2->p_stat = SIDL; /* protect against others */
+ Vp2->p_pid = nextpid;
+ /*
+ * This is really:
+ * p2->p_next = allproc;
+ * allproc->p_prev = &p2->p_next;
+ * p2->p_prev = &allproc;
+ * allproc = p2;
+ * The assignment via allproc is legal since it is never NULL.
+ */
+ *(volatile struct proc **)&Vp2->p_next = allproc;
+ *(volatile struct proc ***)&allproc->p_prev =
+ (volatile struct proc **)&Vp2->p_next;
+ *(volatile struct proc ***)&Vp2->p_prev = &allproc;
+ allproc = Vp2;
+#undef Vp2
+ p2->p_forw = p2->p_back = NULL; /* shouldn't be necessary */
+
+ /* Insert on the hash chain. */
+ hash = &pidhash[PIDHASH(p2->p_pid)];
+ p2->p_hash = *hash;
+ *hash = p2;
+
+ /*
+ * Make a proc table entry for the new process.
+ * Start by zeroing the section of proc that is zero-initialized,
+ * then copy the section that is copied directly from the parent.
+ */
+ bzero(&p2->p_startzero,
+ (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
+ bcopy(&p1->p_startcopy, &p2->p_startcopy,
+ (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
+
+ /*
+ * Duplicate sub-structures as needed.
+ * Increase reference counts on shared objects.
+ * The p_stats and p_sigacts substructs are set in vm_fork.
+ */
+ p2->p_flag = P_INMEM;
+ if (p1->p_flag & P_PROFIL)
+ startprofclock(p2);
+ MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred),
+ M_SUBPROC, M_WAITOK);
+ bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
+ p2->p_cred->p_refcnt = 1;
+ crhold(p1->p_ucred);
+
+ /* bump references to the text vnode (for procfs) */
+ p2->p_textvp = p1->p_textvp;
+ if (p2->p_textvp)
+ VREF(p2->p_textvp);
+
+ p2->p_fd = fdcopy(p1);
+ /*
+ * If p_limit is still copy-on-write, bump refcnt,
+ * otherwise get a copy that won't be modified.
+ * (If PL_SHAREMOD is clear, the structure is shared
+ * copy-on-write.)
+ */
+ if (p1->p_limit->p_lflags & PL_SHAREMOD)
+ p2->p_limit = limcopy(p1->p_limit);
+ else {
+ p2->p_limit = p1->p_limit;
+ p2->p_limit->p_refcnt++;
+ }
+
+ if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
+ p2->p_flag |= P_CONTROLT;
+ if (isvfork)
+ p2->p_flag |= P_PPWAIT;
+ p2->p_pgrpnxt = p1->p_pgrpnxt;
+ p1->p_pgrpnxt = p2;
+ p2->p_pptr = p1;
+ p2->p_osptr = p1->p_cptr;
+ if (p1->p_cptr)
+ p1->p_cptr->p_ysptr = p2;
+ p1->p_cptr = p2;
+#ifdef KTRACE
+ /*
+ * Copy traceflag and tracefile if enabled.
+ * If not inherited, these were zeroed above.
+ */
+ if (p1->p_traceflag&KTRFAC_INHERIT) {
+ p2->p_traceflag = p1->p_traceflag;
+ if ((p2->p_tracep = p1->p_tracep) != NULL)
+ VREF(p2->p_tracep);
+ }
+#endif
+
+ /*
+ * This begins the section where we must prevent the parent
+ * from being swapped.
+ */
+ p1->p_flag |= P_NOSWAP;
+ /*
+ * Set return values for child before vm_fork,
+ * so they can be copied to child stack.
+ * We return parent pid, and mark as child in retval[1].
+ * NOTE: the kernel stack may be at a different location in the child
+ * process, and thus addresses of automatic variables (including retval)
+ * may be invalid after vm_fork returns in the child process.
+ */
+ retval[0] = p1->p_pid;
+ retval[1] = 1;
+ if (vm_fork(p1, p2, isvfork)) {
+ /*
+ * Child process. Set start time and get to work.
+ */
+ (void) splclock();
+ p2->p_stats->p_start = time;
+ (void) spl0();
+ p2->p_acflag = AFORK;
+ return (0);
+ }
+
+ /*
+ * Make child runnable and add to run queue.
+ */
+ (void) splhigh();
+ p2->p_stat = SRUN;
+ setrunqueue(p2);
+ (void) spl0();
+
+ /*
+ * Now can be swapped.
+ */
+ p1->p_flag &= ~P_NOSWAP;
+
+ /*
+ * Preserve synchronization semantics of vfork. If waiting for
+ * child to exec or exit, set P_PPWAIT on child, and sleep on our
+ * proc (in case of exit).
+ */
+ if (isvfork)
+ while (p2->p_flag & P_PPWAIT)
+ tsleep(p1, PWAIT, "ppwait", 0);
+
+ /*
+ * Return child pid to parent process,
+ * marking us as parent via retval[1].
+ */
+ retval[0] = p2->p_pid;
+ retval[1] = 0;
+ return (0);
+}
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
new file mode 100644
index 000000000000..763cfb257ffb
--- /dev/null
+++ b/sys/kern/kern_ktrace.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93
+ */
+
+#ifdef KTRACE
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ktrace.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+
+struct ktr_header *
+ktrgetheader(type)
+ int type;
+{
+ register struct ktr_header *kth;
+ struct proc *p = curproc; /* XXX */
+
+ MALLOC(kth, struct ktr_header *, sizeof (struct ktr_header),
+ M_TEMP, M_WAITOK);
+ kth->ktr_type = type;
+ microtime(&kth->ktr_time);
+ kth->ktr_pid = p->p_pid;
+ bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN);
+ return (kth);
+}
+
+ktrsyscall(vp, code, narg, args)
+ struct vnode *vp;
+ int code, narg, args[];
+{
+ struct ktr_header *kth;
+ struct ktr_syscall *ktp;
+ register len = sizeof(struct ktr_syscall) + (narg * sizeof(int));
+ struct proc *p = curproc; /* XXX */
+ int *argp, i;
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_SYSCALL);
+ MALLOC(ktp, struct ktr_syscall *, len, M_TEMP, M_WAITOK);
+ ktp->ktr_code = code;
+ ktp->ktr_narg = narg;
+ argp = (int *)((char *)ktp + sizeof(struct ktr_syscall));
+ for (i = 0; i < narg; i++)
+ *argp++ = args[i];
+ kth->ktr_buf = (caddr_t)ktp;
+ kth->ktr_len = len;
+ ktrwrite(vp, kth);
+ FREE(ktp, M_TEMP);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrsysret(vp, code, error, retval)
+ struct vnode *vp;
+ int code, error, retval;
+{
+ struct ktr_header *kth;
+ struct ktr_sysret ktp;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_SYSRET);
+ ktp.ktr_code = code;
+ ktp.ktr_error = error;
+ ktp.ktr_retval = retval; /* what about val2 ? */
+
+ kth->ktr_buf = (caddr_t)&ktp;
+ kth->ktr_len = sizeof(struct ktr_sysret);
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrnamei(vp, path)
+ struct vnode *vp;
+ char *path;
+{
+ struct ktr_header *kth;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_NAMEI);
+ kth->ktr_len = strlen(path);
+ kth->ktr_buf = path;
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrgenio(vp, fd, rw, iov, len, error)
+ struct vnode *vp;
+ int fd;
+ enum uio_rw rw;
+ register struct iovec *iov;
+ int len, error;
+{
+ struct ktr_header *kth;
+ register struct ktr_genio *ktp;
+ register caddr_t cp;
+ register int resid = len, cnt;
+ struct proc *p = curproc; /* XXX */
+
+ if (error)
+ return;
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_GENIO);
+ MALLOC(ktp, struct ktr_genio *, sizeof(struct ktr_genio) + len,
+ M_TEMP, M_WAITOK);
+ ktp->ktr_fd = fd;
+ ktp->ktr_rw = rw;
+ cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio));
+ while (resid > 0) {
+ if ((cnt = iov->iov_len) > resid)
+ cnt = resid;
+ if (copyin(iov->iov_base, cp, (unsigned)cnt))
+ goto done;
+ cp += cnt;
+ resid -= cnt;
+ iov++;
+ }
+ kth->ktr_buf = (caddr_t)ktp;
+ kth->ktr_len = sizeof (struct ktr_genio) + len;
+
+ ktrwrite(vp, kth);
+done:
+ FREE(kth, M_TEMP);
+ FREE(ktp, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrpsig(vp, sig, action, mask, code)
+ struct vnode *vp;
+ int sig;
+ sig_t action;
+ int mask, code;
+{
+ struct ktr_header *kth;
+ struct ktr_psig kp;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_PSIG);
+ kp.signo = (char)sig;
+ kp.action = action;
+ kp.mask = mask;
+ kp.code = code;
+ kth->ktr_buf = (caddr_t)&kp;
+ kth->ktr_len = sizeof (struct ktr_psig);
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrcsw(vp, out, user)
+ struct vnode *vp;
+ int out, user;
+{
+ struct ktr_header *kth;
+ struct ktr_csw kc;
+ struct proc *p = curproc; /* XXX */
+
+ p->p_traceflag |= KTRFAC_ACTIVE;
+ kth = ktrgetheader(KTR_CSW);
+ kc.out = out;
+ kc.user = user;
+ kth->ktr_buf = (caddr_t)&kc;
+ kth->ktr_len = sizeof (struct ktr_csw);
+
+ ktrwrite(vp, kth);
+ FREE(kth, M_TEMP);
+ p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+/* Interface and common routines */
+
+/*
+ * ktrace system call
+ */
+struct ktrace_args {
+ char *fname;
+ int ops;
+ int facs;
+ int pid;
+};
+/* ARGSUSED */
+ktrace(curp, uap, retval)
+ struct proc *curp;
+ register struct ktrace_args *uap;
+ int *retval;
+{
+ register struct vnode *vp = NULL;
+ register struct proc *p;
+ struct pgrp *pg;
+ int facs = uap->facs & ~KTRFAC_ROOT;
+ int ops = KTROP(uap->ops);
+ int descend = uap->ops & KTRFLAG_DESCEND;
+ int ret = 0;
+ int error = 0;
+ struct nameidata nd;
+
+ curp->p_traceflag |= KTRFAC_ACTIVE;
+ if (ops != KTROP_CLEAR) {
+ /*
+ * an operation which requires a file argument.
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->fname, curp);
+ if (error = vn_open(&nd, FREAD|FWRITE, 0)) {
+ curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ return (error);
+ }
+ vp = nd.ni_vp;
+ VOP_UNLOCK(vp);
+ if (vp->v_type != VREG) {
+ (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
+ curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ return (EACCES);
+ }
+ }
+ /*
+ * Clear all uses of the tracefile
+ */
+ if (ops == KTROP_CLEARFILE) {
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_tracep == vp) {
+ if (ktrcanset(curp, p)) {
+ p->p_tracep = NULL;
+ p->p_traceflag = 0;
+ (void) vn_close(vp, FREAD|FWRITE,
+ p->p_ucred, p);
+ } else
+ error = EPERM;
+ }
+ }
+ goto done;
+ }
+ /*
+ * need something to (un)trace (XXX - why is this here?)
+ */
+ if (!facs) {
+ error = EINVAL;
+ goto done;
+ }
+ /*
+ * do it
+ */
+ if (uap->pid < 0) {
+ /*
+ * by process group
+ */
+ pg = pgfind(-uap->pid);
+ if (pg == NULL) {
+ error = ESRCH;
+ goto done;
+ }
+ for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt)
+ if (descend)
+ ret |= ktrsetchildren(curp, p, ops, facs, vp);
+ else
+ ret |= ktrops(curp, p, ops, facs, vp);
+
+ } else {
+ /*
+ * by pid
+ */
+ p = pfind(uap->pid);
+ if (p == NULL) {
+ error = ESRCH;
+ goto done;
+ }
+ if (descend)
+ ret |= ktrsetchildren(curp, p, ops, facs, vp);
+ else
+ ret |= ktrops(curp, p, ops, facs, vp);
+ }
+ if (!ret)
+ error = EPERM;
+done:
+ if (vp != NULL)
+ (void) vn_close(vp, FWRITE, curp->p_ucred, curp);
+ curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ return (error);
+}
+
+int
+ktrops(curp, p, ops, facs, vp)
+ struct proc *p, *curp;
+ int ops, facs;
+ struct vnode *vp;
+{
+
+ if (!ktrcanset(curp, p))
+ return (0);
+ if (ops == KTROP_SET) {
+ if (p->p_tracep != vp) {
+ /*
+ * if trace file already in use, relinquish
+ */
+ if (p->p_tracep != NULL)
+ vrele(p->p_tracep);
+ VREF(vp);
+ p->p_tracep = vp;
+ }
+ p->p_traceflag |= facs;
+ if (curp->p_ucred->cr_uid == 0)
+ p->p_traceflag |= KTRFAC_ROOT;
+ } else {
+ /* KTROP_CLEAR */
+ if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
+ /* no more tracing */
+ p->p_traceflag = 0;
+ if (p->p_tracep != NULL) {
+ vrele(p->p_tracep);
+ p->p_tracep = NULL;
+ }
+ }
+ }
+
+ return (1);
+}
+
+ktrsetchildren(curp, top, ops, facs, vp)
+ struct proc *curp, *top;
+ int ops, facs;
+ struct vnode *vp;
+{
+ register struct proc *p;
+ register int ret = 0;
+
+ p = top;
+ for (;;) {
+ ret |= ktrops(curp, p, ops, facs, vp);
+ /*
+ * If this process has children, descend to them next,
+ * otherwise do any siblings, and if done with this level,
+ * follow back up the tree (but not past top).
+ */
+ if (p->p_cptr)
+ p = p->p_cptr;
+ else if (p == top)
+ return (ret);
+ else if (p->p_osptr)
+ p = p->p_osptr;
+ else for (;;) {
+ p = p->p_pptr;
+ if (p == top)
+ return (ret);
+ if (p->p_osptr) {
+ p = p->p_osptr;
+ break;
+ }
+ }
+ }
+ /*NOTREACHED*/
+}
+
+ktrwrite(vp, kth)
+ struct vnode *vp;
+ register struct ktr_header *kth;
+{
+ struct uio auio;
+ struct iovec aiov[2];
+ register struct proc *p = curproc; /* XXX */
+ int error;
+
+ if (vp == NULL)
+ return;
+ auio.uio_iov = &aiov[0];
+ auio.uio_offset = 0;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ aiov[0].iov_base = (caddr_t)kth;
+ aiov[0].iov_len = sizeof(struct ktr_header);
+ auio.uio_resid = sizeof(struct ktr_header);
+ auio.uio_iovcnt = 1;
+ auio.uio_procp = (struct proc *)0;
+ if (kth->ktr_len > 0) {
+ auio.uio_iovcnt++;
+ aiov[1].iov_base = kth->ktr_buf;
+ aiov[1].iov_len = kth->ktr_len;
+ auio.uio_resid += kth->ktr_len;
+ }
+ VOP_LOCK(vp);
+ error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred);
+ VOP_UNLOCK(vp);
+ if (!error)
+ return;
+ /*
+ * If error encountered, give up tracing on this vnode.
+ */
+ log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
+ error);
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_tracep == vp) {
+ p->p_tracep = NULL;
+ p->p_traceflag = 0;
+ vrele(vp);
+ }
+ }
+}
+
+/*
+ * Return true if caller has permission to set the ktracing state
+ * of target. Essentially, the target can't possess any
+ * more permissions than the caller. KTRFAC_ROOT signifies that
+ * root previously set the tracing status on the target process, and
+ * so, only root may further change it.
+ *
+ * TODO: check groups. use caller effective gid.
+ */
+ktrcanset(callp, targetp)
+ struct proc *callp, *targetp;
+{
+ register struct pcred *caller = callp->p_cred;
+ register struct pcred *target = targetp->p_cred;
+
+ if ((caller->pc_ucred->cr_uid == target->p_ruid &&
+ target->p_ruid == target->p_svuid &&
+ caller->p_rgid == target->p_rgid && /* XXX */
+ target->p_rgid == target->p_svgid &&
+ (targetp->p_traceflag & KTRFAC_ROOT) == 0) ||
+ caller->pc_ucred->cr_uid == 0)
+ return (1);
+
+ return (0);
+}
+
+#endif
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
new file mode 100644
index 000000000000..c6276bc73cf4
--- /dev/null
+++ b/sys/kern/kern_malloc.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+struct kmembuckets bucket[MINBUCKET + 16];
+struct kmemstats kmemstats[M_LAST];
+struct kmemusage *kmemusage;
+char *kmembase, *kmemlimit;
+char *memname[] = INITKMEMNAMES;
+
+#ifdef DIAGNOSTIC
+/*
+ * This structure provides a set of masks to catch unaligned frees.
+ */
+long addrmask[] = { 0,
+ 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
+ 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
+ 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+ 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
+};
+
+/*
+ * The WEIRD_ADDR is used as known text to copy into free objects so
+ * that modifications after frees can be detected.
+ */
+#define WEIRD_ADDR 0xdeadbeef
+#define MAX_COPY 32
+
+/*
+ * Normally the first word of the structure is used to hold the list
+ * pointer for free objects. However, when running with diagnostics,
+ * we use the third and fourth fields, so as to catch modifications
+ * in the most commonly trashed first two words.
+ */
+struct freelist {
+ long spare0;
+ short type;
+ long spare1;
+ caddr_t next;
+};
+#else /* !DIAGNOSTIC */
+struct freelist {
+ caddr_t next;
+};
+#endif /* DIAGNOSTIC */
+
+/*
+ * Allocate a block of memory
+ */
+void *
+malloc(size, type, flags)
+ unsigned long size;
+ int type, flags;
+{
+ register struct kmembuckets *kbp;
+ register struct kmemusage *kup;
+ register struct freelist *freep;
+ long indx, npg, allocsize;
+ int s;
+ caddr_t va, cp, savedlist;
+#ifdef DIAGNOSTIC
+ long *end, *lp;
+ int copysize;
+ char *savedtype;
+#endif
+#ifdef KMEMSTATS
+ register struct kmemstats *ksp = &kmemstats[type];
+
+ if (((unsigned long)type) > M_LAST)
+ panic("malloc - bogus type");
+#endif
+ indx = BUCKETINDX(size);
+ kbp = &bucket[indx];
+ s = splimp();
+#ifdef KMEMSTATS
+ while (ksp->ks_memuse >= ksp->ks_limit) {
+ if (flags & M_NOWAIT) {
+ splx(s);
+ return ((void *) NULL);
+ }
+ if (ksp->ks_limblocks < 65535)
+ ksp->ks_limblocks++;
+ tsleep((caddr_t)ksp, PSWP+2, memname[type], 0);
+ }
+ ksp->ks_size |= 1 << indx;
+#endif
+#ifdef DIAGNOSTIC
+ copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY;
+#endif
+ if (kbp->kb_next == NULL) {
+ kbp->kb_last = NULL;
+ if (size > MAXALLOCSAVE)
+ allocsize = roundup(size, CLBYTES);
+ else
+ allocsize = 1 << indx;
+ npg = clrnd(btoc(allocsize));
+ va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg),
+ !(flags & M_NOWAIT));
+ if (va == NULL) {
+ splx(s);
+ return ((void *) NULL);
+ }
+#ifdef KMEMSTATS
+ kbp->kb_total += kbp->kb_elmpercl;
+#endif
+ kup = btokup(va);
+ kup->ku_indx = indx;
+ if (allocsize > MAXALLOCSAVE) {
+ if (npg > 65535)
+ panic("malloc: allocation too large");
+ kup->ku_pagecnt = npg;
+#ifdef KMEMSTATS
+ ksp->ks_memuse += allocsize;
+#endif
+ goto out;
+ }
+#ifdef KMEMSTATS
+ kup->ku_freecnt = kbp->kb_elmpercl;
+ kbp->kb_totalfree += kbp->kb_elmpercl;
+#endif
+ /*
+ * Just in case we blocked while allocating memory,
+ * and someone else also allocated memory for this
+ * bucket, don't assume the list is still empty.
+ */
+ savedlist = kbp->kb_next;
+ kbp->kb_next = cp = va + (npg * NBPG) - allocsize;
+ for (;;) {
+ freep = (struct freelist *)cp;
+#ifdef DIAGNOSTIC
+ /*
+ * Copy in known text to detect modification
+ * after freeing.
+ */
+ end = (long *)&cp[copysize];
+ for (lp = (long *)cp; lp < end; lp++)
+ *lp = WEIRD_ADDR;
+ freep->type = M_FREE;
+#endif /* DIAGNOSTIC */
+ if (cp <= va)
+ break;
+ cp -= allocsize;
+ freep->next = cp;
+ }
+ freep->next = savedlist;
+ if (kbp->kb_last == NULL)
+ kbp->kb_last = (caddr_t)freep;
+ }
+ va = kbp->kb_next;
+ kbp->kb_next = ((struct freelist *)va)->next;
+#ifdef DIAGNOSTIC
+ freep = (struct freelist *)va;
+ savedtype = (unsigned)freep->type < M_LAST ?
+ memname[freep->type] : "???";
+ if (kbp->kb_next &&
+ !kernacc(kbp->kb_next, sizeof(struct freelist), 0)) {
+ printf("%s of object 0x%x size %d %s %s (invalid addr 0x%x)\n",
+ "Data modified on freelist: word 2.5", va, size,
+ "previous type", savedtype, kbp->kb_next);
+ kbp->kb_next = NULL;
+ }
+#if BYTE_ORDER == BIG_ENDIAN
+ freep->type = WEIRD_ADDR >> 16;
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ freep->type = (short)WEIRD_ADDR;
+#endif
+ if (((long)(&freep->next)) & 0x2)
+ freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16));
+ else
+ freep->next = (caddr_t)WEIRD_ADDR;
+ end = (long *)&va[copysize];
+ for (lp = (long *)va; lp < end; lp++) {
+ if (*lp == WEIRD_ADDR)
+ continue;
+ printf("%s %d of object 0x%x size %d %s %s (0x%x != 0x%x)\n",
+ "Data modified on freelist: word", lp - (long *)va,
+ va, size, "previous type", savedtype, *lp, WEIRD_ADDR);
+ break;
+ }
+ freep->spare0 = 0;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+ kup = btokup(va);
+ if (kup->ku_indx != indx)
+ panic("malloc: wrong bucket");
+ if (kup->ku_freecnt == 0)
+ panic("malloc: lost data");
+ kup->ku_freecnt--;
+ kbp->kb_totalfree--;
+ ksp->ks_memuse += 1 << indx;
+out:
+ kbp->kb_calls++;
+ ksp->ks_inuse++;
+ ksp->ks_calls++;
+ if (ksp->ks_memuse > ksp->ks_maxused)
+ ksp->ks_maxused = ksp->ks_memuse;
+#else
+out:
+#endif
+ splx(s);
+ return ((void *) va);
+}
+
+/*
+ * Free a block of memory allocated by malloc.
+ */
+void
+free(addr, type)
+ void *addr;
+ int type;
+{
+ register struct kmembuckets *kbp;
+ register struct kmemusage *kup;
+ register struct freelist *freep;
+ long size;
+ int s;
+#ifdef DIAGNOSTIC
+ caddr_t cp;
+ long *end, *lp, alloc, copysize;
+#endif
+#ifdef KMEMSTATS
+ register struct kmemstats *ksp = &kmemstats[type];
+#endif
+
+ kup = btokup(addr);
+ size = 1 << kup->ku_indx;
+ kbp = &bucket[kup->ku_indx];
+ s = splimp();
+#ifdef DIAGNOSTIC
+ /*
+ * Check for returns of data that do not point to the
+ * beginning of the allocation.
+ */
+ if (size > NBPG * CLSIZE)
+ alloc = addrmask[BUCKETINDX(NBPG * CLSIZE)];
+ else
+ alloc = addrmask[kup->ku_indx];
+ if (((u_long)addr & alloc) != 0)
+ panic("free: unaligned addr 0x%x, size %d, type %s, mask %d\n",
+ addr, size, memname[type], alloc);
+#endif /* DIAGNOSTIC */
+ if (size > MAXALLOCSAVE) {
+ kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt));
+#ifdef KMEMSTATS
+ size = kup->ku_pagecnt << PGSHIFT;
+ ksp->ks_memuse -= size;
+ kup->ku_indx = 0;
+ kup->ku_pagecnt = 0;
+ if (ksp->ks_memuse + size >= ksp->ks_limit &&
+ ksp->ks_memuse < ksp->ks_limit)
+ wakeup((caddr_t)ksp);
+ ksp->ks_inuse--;
+ kbp->kb_total -= 1;
+#endif
+ splx(s);
+ return;
+ }
+ freep = (struct freelist *)addr;
+#ifdef DIAGNOSTIC
+ /*
+ * Check for multiple frees. Use a quick check to see if
+ * it looks free before laboriously searching the freelist.
+ */
+ if (freep->spare0 == WEIRD_ADDR) {
+ for (cp = kbp->kb_next; cp; cp = *(caddr_t *)cp) {
+ if (addr != cp)
+ continue;
+ printf("multiply freed item 0x%x\n", addr);
+ panic("free: duplicated free");
+ }
+ }
+ /*
+ * Copy in known text to detect modification after freeing
+ * and to make it look free. Also, save the type being freed
+ * so we can list likely culprit if modification is detected
+ * when the object is reallocated.
+ */
+ copysize = size < MAX_COPY ? size : MAX_COPY;
+ end = (long *)&((caddr_t)addr)[copysize];
+ for (lp = (long *)addr; lp < end; lp++)
+ *lp = WEIRD_ADDR;
+ freep->type = type;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+ kup->ku_freecnt++;
+ if (kup->ku_freecnt >= kbp->kb_elmpercl)
+ if (kup->ku_freecnt > kbp->kb_elmpercl)
+ panic("free: multiple frees");
+ else if (kbp->kb_totalfree > kbp->kb_highwat)
+ kbp->kb_couldfree++;
+ kbp->kb_totalfree++;
+ ksp->ks_memuse -= size;
+ if (ksp->ks_memuse + size >= ksp->ks_limit &&
+ ksp->ks_memuse < ksp->ks_limit)
+ wakeup((caddr_t)ksp);
+ ksp->ks_inuse--;
+#endif
+ if (kbp->kb_next == NULL)
+ kbp->kb_next = addr;
+ else
+ ((struct freelist *)kbp->kb_last)->next = addr;
+ freep->next = NULL;
+ kbp->kb_last = addr;
+ splx(s);
+}
+
+/*
+ * Initialize the kernel memory allocator
+ */
+kmeminit()
+{
+ register long indx;
+ int npg;
+
+#if ((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0)
+ ERROR!_kmeminit:_MAXALLOCSAVE_not_power_of_2
+#endif
+#if (MAXALLOCSAVE > MINALLOCSIZE * 32768)
+ ERROR!_kmeminit:_MAXALLOCSAVE_too_big
+#endif
+#if (MAXALLOCSAVE < CLBYTES)
+ ERROR!_kmeminit:_MAXALLOCSAVE_too_small
+#endif
+ npg = VM_KMEM_SIZE/ NBPG;
+ kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
+ (vm_size_t)(npg * sizeof(struct kmemusage)));
+ kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
+ (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * NBPG), FALSE);
+#ifdef KMEMSTATS
+ for (indx = 0; indx < MINBUCKET + 16; indx++) {
+ if (1 << indx >= CLBYTES)
+ bucket[indx].kb_elmpercl = 1;
+ else
+ bucket[indx].kb_elmpercl = CLBYTES / (1 << indx);
+ bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl;
+ }
+ for (indx = 0; indx < M_LAST; indx++)
+ kmemstats[indx].ks_limit = npg * NBPG * 6 / 10;
+#endif
+}
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
new file mode 100644
index 000000000000..1eaae3599dee
--- /dev/null
+++ b/sys/kern/kern_physio.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)kern_physio.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+
+physio(a1, a2, a3, a4, a5, a6)
+ int (*a1)();
+ struct buf *a2;
+ dev_t a3;
+ int a4;
+ u_int (*a5)();
+ struct uio *a6;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+u_int
+minphys(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+/*
+ * Do a read on a device for a user process.
+ */
+rawread(dev, uio)
+ dev_t dev;
+ struct uio *uio;
+{
+ return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
+ dev, B_READ, minphys, uio));
+}
+
+/*
+ * Do a write on a device for a user process.
+ */
+rawwrite(dev, uio)
+ dev_t dev;
+ struct uio *uio;
+{
+ return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
+ dev, B_WRITE, minphys, uio));
+}
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
new file mode 100644
index 000000000000..91d9e212d388
--- /dev/null
+++ b/sys/kern/kern_proc.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_proc.c 8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <ufs/ufs/quota.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+/*
+ * Structure associated with user cacheing.
+ */
+struct uidinfo {
+ struct uidinfo *ui_next;
+ struct uidinfo **ui_prev;
+ uid_t ui_uid;
+ long ui_proccnt;
+} **uihashtbl;
+u_long uihash; /* size of hash table - 1 */
+#define UIHASH(uid) ((uid) & uihash)
+
+/*
+ * Allocate a hash table.
+ */
+usrinfoinit()
+{
+
+ uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
+}
+
+/*
+ * Change the count associated with number of processes
+ * a given user is using.
+ */
+int
+chgproccnt(uid, diff)
+ uid_t uid;
+ int diff;
+{
+ register struct uidinfo **uipp, *uip, *uiq;
+
+ uipp = &uihashtbl[UIHASH(uid)];
+ for (uip = *uipp; uip; uip = uip->ui_next)
+ if (uip->ui_uid == uid)
+ break;
+ if (uip) {
+ uip->ui_proccnt += diff;
+ if (uip->ui_proccnt > 0)
+ return (uip->ui_proccnt);
+ if (uip->ui_proccnt < 0)
+ panic("chgproccnt: procs < 0");
+ if (uiq = uip->ui_next)
+ uiq->ui_prev = uip->ui_prev;
+ *uip->ui_prev = uiq;
+ FREE(uip, M_PROC);
+ return (0);
+ }
+ if (diff <= 0) {
+ if (diff == 0)
+ return(0);
+ panic("chgproccnt: lost user");
+ }
+ MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
+ if (uiq = *uipp)
+ uiq->ui_prev = &uip->ui_next;
+ uip->ui_next = uiq;
+ uip->ui_prev = uipp;
+ *uipp = uip;
+ uip->ui_uid = uid;
+ uip->ui_proccnt = diff;
+ return (diff);
+}
+
+/*
+ * Is p an inferior of the current process?
+ */
+inferior(p)
+ register struct proc *p;
+{
+
+ for (; p != curproc; p = p->p_pptr)
+ if (p->p_pid == 0)
+ return (0);
+ return (1);
+}
+
+/*
+ * Locate a process by number
+ */
+struct proc *
+pfind(pid)
+ register pid_t pid;
+{
+ register struct proc *p;
+
+ for (p = pidhash[PIDHASH(pid)]; p != NULL; p = p->p_hash)
+ if (p->p_pid == pid)
+ return (p);
+ return (NULL);
+}
+
+/*
+ * Locate a process group by number
+ */
+struct pgrp *
+pgfind(pgid)
+ register pid_t pgid;
+{
+ register struct pgrp *pgrp;
+
+ for (pgrp = pgrphash[PIDHASH(pgid)];
+ pgrp != NULL; pgrp = pgrp->pg_hforw)
+ if (pgrp->pg_id == pgid)
+ return (pgrp);
+ return (NULL);
+}
+
+/*
+ * Move p to a new or existing process group (and session)
+ */
+enterpgrp(p, pgid, mksess)
+ register struct proc *p;
+ pid_t pgid;
+ int mksess;
+{
+ register struct pgrp *pgrp = pgfind(pgid);
+ register struct proc **pp;
+ int n;
+
+#ifdef DIAGNOSTIC
+ if (pgrp != NULL && mksess) /* firewalls */
+ panic("enterpgrp: setsid into non-empty pgrp");
+ if (SESS_LEADER(p))
+ panic("enterpgrp: session leader attempted setpgrp");
+#endif
+ if (pgrp == NULL) {
+ pid_t savepid = p->p_pid;
+ struct proc *np;
+ /*
+ * new process group
+ */
+#ifdef DIAGNOSTIC
+ if (p->p_pid != pgid)
+ panic("enterpgrp: new pgrp and pid != pgid");
+#endif
+ MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP,
+ M_WAITOK);
+ if ((np = pfind(savepid)) == NULL || np != p)
+ return (ESRCH);
+ if (mksess) {
+ register struct session *sess;
+
+ /*
+ * new session
+ */
+ MALLOC(sess, struct session *, sizeof(struct session),
+ M_SESSION, M_WAITOK);
+ sess->s_leader = p;
+ sess->s_count = 1;
+ sess->s_ttyvp = NULL;
+ sess->s_ttyp = NULL;
+ bcopy(p->p_session->s_login, sess->s_login,
+ sizeof(sess->s_login));
+ p->p_flag &= ~P_CONTROLT;
+ pgrp->pg_session = sess;
+#ifdef DIAGNOSTIC
+ if (p != curproc)
+ panic("enterpgrp: mksession and p != curproc");
+#endif
+ } else {
+ pgrp->pg_session = p->p_session;
+ pgrp->pg_session->s_count++;
+ }
+ pgrp->pg_id = pgid;
+ pgrp->pg_hforw = pgrphash[n = PIDHASH(pgid)];
+ pgrphash[n] = pgrp;
+ pgrp->pg_jobc = 0;
+ pgrp->pg_mem = NULL;
+ } else if (pgrp == p->p_pgrp)
+ return (0);
+
+ /*
+ * Adjust eligibility of affected pgrps to participate in job control.
+ * Increment eligibility counts before decrementing, otherwise we
+ * could reach 0 spuriously during the first call.
+ */
+ fixjobc(p, pgrp, 1);
+ fixjobc(p, p->p_pgrp, 0);
+
+ /*
+ * unlink p from old process group
+ */
+ for (pp = &p->p_pgrp->pg_mem; *pp; pp = &(*pp)->p_pgrpnxt) {
+ if (*pp == p) {
+ *pp = p->p_pgrpnxt;
+ break;
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (pp == NULL)
+ panic("enterpgrp: can't find p on old pgrp");
+#endif
+ /*
+ * delete old if empty
+ */
+ if (p->p_pgrp->pg_mem == 0)
+ pgdelete(p->p_pgrp);
+ /*
+ * link into new one
+ */
+ p->p_pgrp = pgrp;
+ p->p_pgrpnxt = pgrp->pg_mem;
+ pgrp->pg_mem = p;
+ return (0);
+}
+
+/*
+ * remove process from process group
+ */
+leavepgrp(p)
+ register struct proc *p;
+{
+ register struct proc **pp = &p->p_pgrp->pg_mem;
+
+ for (; *pp; pp = &(*pp)->p_pgrpnxt) {
+ if (*pp == p) {
+ *pp = p->p_pgrpnxt;
+ break;
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (pp == NULL)
+ panic("leavepgrp: can't find p in pgrp");
+#endif
+ if (!p->p_pgrp->pg_mem)
+ pgdelete(p->p_pgrp);
+ p->p_pgrp = 0;
+ return (0);
+}
+
+/*
+ * delete a process group
+ */
+pgdelete(pgrp)
+ register struct pgrp *pgrp;
+{
+ register struct pgrp **pgp = &pgrphash[PIDHASH(pgrp->pg_id)];
+
+ if (pgrp->pg_session->s_ttyp != NULL &&
+ pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
+ pgrp->pg_session->s_ttyp->t_pgrp = NULL;
+ for (; *pgp; pgp = &(*pgp)->pg_hforw) {
+ if (*pgp == pgrp) {
+ *pgp = pgrp->pg_hforw;
+ break;
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (pgp == NULL)
+ panic("pgdelete: can't find pgrp on hash chain");
+#endif
+ if (--pgrp->pg_session->s_count == 0)
+ FREE(pgrp->pg_session, M_SESSION);
+ FREE(pgrp, M_PGRP);
+}
+
+static void orphanpg();
+
+/*
+ * Adjust pgrp jobc counters when specified process changes process group.
+ * We count the number of processes in each process group that "qualify"
+ * the group for terminal job control (those with a parent in a different
+ * process group of the same session). If that count reaches zero, the
+ * process group becomes orphaned. Check both the specified process'
+ * process group and that of its children.
+ * entering == 0 => p is leaving specified group.
+ * entering == 1 => p is entering specified group.
+ */
+fixjobc(p, pgrp, entering)
+ register struct proc *p;
+ register struct pgrp *pgrp;
+ int entering;
+{
+ register struct pgrp *hispgrp;
+ register struct session *mysession = pgrp->pg_session;
+
+ /*
+ * Check p's parent to see whether p qualifies its own process
+ * group; if so, adjust count for p's process group.
+ */
+ if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
+ hispgrp->pg_session == mysession)
+ if (entering)
+ pgrp->pg_jobc++;
+ else if (--pgrp->pg_jobc == 0)
+ orphanpg(pgrp);
+
+ /*
+ * Check this process' children to see whether they qualify
+ * their process groups; if so, adjust counts for children's
+ * process groups.
+ */
+ for (p = p->p_cptr; p; p = p->p_osptr)
+ if ((hispgrp = p->p_pgrp) != pgrp &&
+ hispgrp->pg_session == mysession &&
+ p->p_stat != SZOMB)
+ if (entering)
+ hispgrp->pg_jobc++;
+ else if (--hispgrp->pg_jobc == 0)
+ orphanpg(hispgrp);
+}
+
+/*
+ * A process group has become orphaned;
+ * if there are any stopped processes in the group,
+ * hang-up all process in that group.
+ */
+static void
+orphanpg(pg)
+ struct pgrp *pg;
+{
+ register struct proc *p;
+
+ for (p = pg->pg_mem; p; p = p->p_pgrpnxt) {
+ if (p->p_stat == SSTOP) {
+ for (p = pg->pg_mem; p; p = p->p_pgrpnxt) {
+ psignal(p, SIGHUP);
+ psignal(p, SIGCONT);
+ }
+ return;
+ }
+ }
+}
+
+#ifdef debug
+/* DEBUG */
+pgrpdump()
+{
+ register struct pgrp *pgrp;
+ register struct proc *p;
+ register i;
+
+ for (i=0; i<PIDHSZ; i++) {
+ if (pgrphash[i]) {
+ printf("\tindx %d\n", i);
+ for (pgrp=pgrphash[i]; pgrp; pgrp=pgrp->pg_hforw) {
+ printf("\tpgrp %x, pgid %d, sess %x, sesscnt %d, mem %x\n",
+ pgrp, pgrp->pg_id, pgrp->pg_session,
+ pgrp->pg_session->s_count, pgrp->pg_mem);
+ for (p=pgrp->pg_mem; p; p=p->p_pgrpnxt) {
+ printf("\t\tpid %d addr %x pgrp %x\n",
+ p->p_pid, p, p->p_pgrp);
+ }
+ }
+
+ }
+ }
+}
+#endif /* debug */
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
new file mode 100644
index 000000000000..ef400770e20a
--- /dev/null
+++ b/sys/kern/kern_prot.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_prot.c 8.6 (Berkeley) 1/21/94
+ */
+
+/*
+ * System calls related to processes and protection
+ */
+
+#include <sys/param.h>
+#include <sys/acct.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/proc.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/malloc.h>
+
+struct args {
+ int dummy;
+};
+
+/* ARGSUSED */
+getpid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_pid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ retval[1] = p->p_pptr->p_pid;
+#endif
+ return (0);
+}
+
+/* ARGSUSED */
+getppid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_pptr->p_pid;
+ return (0);
+}
+
+/* Get process group ID; note that POSIX getpgrp takes no parameter */
+getpgrp(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_pgrp->pg_id;
+ return (0);
+}
+
+/* ARGSUSED */
+getuid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_cred->p_ruid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ retval[1] = p->p_ucred->cr_uid;
+#endif
+ return (0);
+}
+
+/* ARGSUSED */
+geteuid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_ucred->cr_uid;
+ return (0);
+}
+
+/* ARGSUSED */
+getgid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_cred->p_rgid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ retval[1] = p->p_ucred->cr_groups[0];
+#endif
+ return (0);
+}
+
+/*
+ * Get effective group ID. The "egid" is groups[0], and could be obtained
+ * via getgroups. This syscall exists because it is somewhat painful to do
+ * correctly in a library function.
+ */
+/* ARGSUSED */
+getegid(p, uap, retval)
+ struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ *retval = p->p_ucred->cr_groups[0];
+ return (0);
+}
+
+struct getgroups_args {
+ u_int gidsetsize;
+ gid_t *gidset;
+};
+getgroups(p, uap, retval)
+ struct proc *p;
+ register struct getgroups_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register u_int ngrp;
+ int error;
+
+ if ((ngrp = uap->gidsetsize) == 0) {
+ *retval = pc->pc_ucred->cr_ngroups;
+ return (0);
+ }
+ if (ngrp < pc->pc_ucred->cr_ngroups)
+ return (EINVAL);
+ ngrp = pc->pc_ucred->cr_ngroups;
+ if (error = copyout((caddr_t)pc->pc_ucred->cr_groups,
+ (caddr_t)uap->gidset, ngrp * sizeof(gid_t)))
+ return (error);
+ *retval = ngrp;
+ return (0);
+}
+
+/* ARGSUSED */
+setsid(p, uap, retval)
+ register struct proc *p;
+ struct args *uap;
+ int *retval;
+{
+
+ if (p->p_pgid == p->p_pid || pgfind(p->p_pid)) {
+ return (EPERM);
+ } else {
+ (void)enterpgrp(p, p->p_pid, 1);
+ *retval = p->p_pid;
+ return (0);
+ }
+}
+
+/*
+ * set process group (setpgid/old setpgrp)
+ *
+ * caller does setpgid(targpid, targpgid)
+ *
+ * pid must be caller or child of caller (ESRCH)
+ * if a child
+ * pid must be in same session (EPERM)
+ * pid can't have done an exec (EACCES)
+ * if pgid != pid
+ * there must exist some pid in same session having pgid (EPERM)
+ * pid must not be session leader (EPERM)
+ */
+struct setpgid_args {
+ int pid; /* target process id */
+ int pgid; /* target pgrp id */
+};
+/* ARGSUSED */
+setpgid(curp, uap, retval)
+ struct proc *curp;
+ register struct setpgid_args *uap;
+ int *retval;
+{
+ register struct proc *targp; /* target process */
+ register struct pgrp *pgrp; /* target pgrp */
+
+ if (uap->pid != 0 && uap->pid != curp->p_pid) {
+ if ((targp = pfind(uap->pid)) == 0 || !inferior(targp))
+ return (ESRCH);
+ if (targp->p_session != curp->p_session)
+ return (EPERM);
+ if (targp->p_flag & P_EXEC)
+ return (EACCES);
+ } else
+ targp = curp;
+ if (SESS_LEADER(targp))
+ return (EPERM);
+ if (uap->pgid == 0)
+ uap->pgid = targp->p_pid;
+ else if (uap->pgid != targp->p_pid)
+ if ((pgrp = pgfind(uap->pgid)) == 0 ||
+ pgrp->pg_session != curp->p_session)
+ return (EPERM);
+ return (enterpgrp(targp, uap->pgid, 0));
+}
+
+struct setuid_args {
+ uid_t uid;
+};
+/* ARGSUSED */
+setuid(p, uap, retval)
+ struct proc *p;
+ struct setuid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register uid_t uid;
+ int error;
+
+ uid = uap->uid;
+ if (uid != pc->p_ruid &&
+ (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ /*
+ * Everything's okay, do it.
+ * Transfer proc count to new user.
+ * Copy credentials so other references do not see our changes.
+ */
+ (void)chgproccnt(pc->p_ruid, -1);
+ (void)chgproccnt(uid, 1);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_uid = uid;
+ pc->p_ruid = uid;
+ pc->p_svuid = uid;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct seteuid_args {
+ uid_t euid;
+};
+/* ARGSUSED */
+seteuid(p, uap, retval)
+ struct proc *p;
+ struct seteuid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register uid_t euid;
+ int error;
+
+ euid = uap->euid;
+ if (euid != pc->p_ruid && euid != pc->p_svuid &&
+ (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ /*
+ * Everything's okay, do it. Copy credentials so other references do
+ * not see our changes.
+ */
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_uid = euid;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct setgid_args {
+ gid_t gid;
+};
+/* ARGSUSED */
+setgid(p, uap, retval)
+ struct proc *p;
+ struct setgid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register gid_t gid;
+ int error;
+
+ gid = uap->gid;
+ if (gid != pc->p_rgid && (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_groups[0] = gid;
+ pc->p_rgid = gid;
+ pc->p_svgid = gid; /* ??? */
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct setegid_args {
+ gid_t egid;
+};
+/* ARGSUSED */
+setegid(p, uap, retval)
+ struct proc *p;
+ struct setegid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register gid_t egid;
+ int error;
+
+ egid = uap->egid;
+ if (egid != pc->p_rgid && egid != pc->p_svgid &&
+ (error = suser(pc->pc_ucred, &p->p_acflag)))
+ return (error);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ pc->pc_ucred->cr_groups[0] = egid;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+struct setgroups_args {
+ u_int gidsetsize;
+ gid_t *gidset;
+};
+/* ARGSUSED */
+setgroups(p, uap, retval)
+ struct proc *p;
+ struct setgroups_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ register u_int ngrp;
+ int error;
+
+ if (error = suser(pc->pc_ucred, &p->p_acflag))
+ return (error);
+ if ((ngrp = uap->gidsetsize) > NGROUPS)
+ return (EINVAL);
+ pc->pc_ucred = crcopy(pc->pc_ucred);
+ if (error = copyin((caddr_t)uap->gidset,
+ (caddr_t)pc->pc_ucred->cr_groups, ngrp * sizeof(gid_t)))
+ return (error);
+ pc->pc_ucred->cr_ngroups = ngrp;
+ p->p_flag |= P_SUGID;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct setreuid_args {
+ int ruid;
+ int euid;
+};
+/* ARGSUSED */
+osetreuid(p, uap, retval)
+ register struct proc *p;
+ struct setreuid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ struct seteuid_args args;
+
+ /*
+ * we assume that the intent of setting ruid is to be able to get
+ * back ruid priviledge. So we make sure that we will be able to
+ * do so, but do not actually set the ruid.
+ */
+ if (uap->ruid != (uid_t)-1 && uap->ruid != pc->p_ruid &&
+ uap->ruid != pc->p_svuid)
+ return (EPERM);
+ if (uap->euid == (uid_t)-1)
+ return (0);
+ args.euid = uap->euid;
+ return (seteuid(p, &args, retval));
+}
+
+struct setregid_args {
+ int rgid;
+ int egid;
+};
+/* ARGSUSED */
+osetregid(p, uap, retval)
+ register struct proc *p;
+ struct setregid_args *uap;
+ int *retval;
+{
+ register struct pcred *pc = p->p_cred;
+ struct setegid_args args;
+
+ /*
+ * we assume that the intent of setting rgid is to be able to get
+ * back rgid priviledge. So we make sure that we will be able to
+ * do so, but do not actually set the rgid.
+ */
+ if (uap->rgid != (gid_t)-1 && uap->rgid != pc->p_rgid &&
+ uap->rgid != pc->p_svgid)
+ return (EPERM);
+ if (uap->egid == (gid_t)-1)
+ return (0);
+ args.egid = uap->egid;
+ return (setegid(p, &args, retval));
+}
+#endif /* defined(COMPAT_43) || defined(COMPAT_SUNOS) */
+
+/*
+ * Check if gid is a member of the group set.
+ */
+groupmember(gid, cred)
+ gid_t gid;
+ register struct ucred *cred;
+{
+ register gid_t *gp;
+ gid_t *egp;
+
+ egp = &(cred->cr_groups[cred->cr_ngroups]);
+ for (gp = cred->cr_groups; gp < egp; gp++)
+ if (*gp == gid)
+ return (1);
+ return (0);
+}
+
+/*
+ * Test whether the specified credentials imply "super-user"
+ * privilege; if so, and we have accounting info, set the flag
+ * indicating use of super-powers.
+ * Returns 0 or error.
+ */
+suser(cred, acflag)
+ struct ucred *cred;
+ short *acflag;
+{
+ if (cred->cr_uid == 0) {
+ if (acflag)
+ *acflag |= ASU;
+ return (0);
+ }
+ return (EPERM);
+}
+
+/*
+ * Allocate a zeroed cred structure.
+ */
+struct ucred *
+crget()
+{
+ register struct ucred *cr;
+
+ MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK);
+ bzero((caddr_t)cr, sizeof(*cr));
+ cr->cr_ref = 1;
+ return (cr);
+}
+
+/*
+ * Free a cred structure.
+ * Throws away space when ref count gets to 0.
+ */
+crfree(cr)
+ struct ucred *cr;
+{
+ int s;
+
+ s = splimp(); /* ??? */
+ if (--cr->cr_ref == 0)
+ FREE((caddr_t)cr, M_CRED);
+ (void) splx(s);
+}
+
+/*
+ * Copy cred structure to a new one and free the old one.
+ */
+struct ucred *
+crcopy(cr)
+ struct ucred *cr;
+{
+ struct ucred *newcr;
+
+ if (cr->cr_ref == 1)
+ return (cr);
+ newcr = crget();
+ *newcr = *cr;
+ crfree(cr);
+ newcr->cr_ref = 1;
+ return (newcr);
+}
+
+/*
+ * Dup cred struct to a new held one.
+ */
+struct ucred *
+crdup(cr)
+ struct ucred *cr;
+{
+ struct ucred *newcr;
+
+ newcr = crget();
+ *newcr = *cr;
+ newcr->cr_ref = 1;
+ return (newcr);
+}
+
+/*
+ * Get login name, if available.
+ */
+struct getlogin_args {
+ char *namebuf;
+ u_int namelen;
+};
+/* ARGSUSED */
+getlogin(p, uap, retval)
+ struct proc *p;
+ struct getlogin_args *uap;
+ int *retval;
+{
+
+ if (uap->namelen > sizeof (p->p_pgrp->pg_session->s_login))
+ uap->namelen = sizeof (p->p_pgrp->pg_session->s_login);
+ return (copyout((caddr_t) p->p_pgrp->pg_session->s_login,
+ (caddr_t) uap->namebuf, uap->namelen));
+}
+
+/*
+ * Set login name.
+ */
+struct setlogin_args {
+ char *namebuf;
+};
+/* ARGSUSED */
+setlogin(p, uap, retval)
+ struct proc *p;
+ struct setlogin_args *uap;
+ int *retval;
+{
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ error = copyinstr((caddr_t) uap->namebuf,
+ (caddr_t) p->p_pgrp->pg_session->s_login,
+ sizeof (p->p_pgrp->pg_session->s_login) - 1, (u_int *)0);
+ if (error == ENAMETOOLONG)
+ error = EINVAL;
+ return (error);
+}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
new file mode 100644
index 000000000000..68e9dfbc86de
--- /dev/null
+++ b/sys/kern/kern_resource.c
@@ -0,0 +1,476 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/resourcevar.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+/*
+ * Resource controls and accounting.
+ */
+
+struct getpriority_args {
+ int which;
+ int who;
+};
+getpriority(curp, uap, retval)
+ struct proc *curp;
+ register struct getpriority_args *uap;
+ int *retval;
+{
+ register struct proc *p;
+ register int low = PRIO_MAX + 1;
+
+ switch (uap->which) {
+
+ case PRIO_PROCESS:
+ if (uap->who == 0)
+ p = curp;
+ else
+ p = pfind(uap->who);
+ if (p == 0)
+ break;
+ low = p->p_nice;
+ break;
+
+ case PRIO_PGRP: {
+ register struct pgrp *pg;
+
+ if (uap->who == 0)
+ pg = curp->p_pgrp;
+ else if ((pg = pgfind(uap->who)) == NULL)
+ break;
+ for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+ if (p->p_nice < low)
+ low = p->p_nice;
+ }
+ break;
+ }
+
+ case PRIO_USER:
+ if (uap->who == 0)
+ uap->who = curp->p_ucred->cr_uid;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_ucred->cr_uid == uap->who &&
+ p->p_nice < low)
+ low = p->p_nice;
+ }
+ break;
+
+ default:
+ return (EINVAL);
+ }
+ if (low == PRIO_MAX + 1)
+ return (ESRCH);
+ *retval = low;
+ return (0);
+}
+
+struct setpriority_args {
+ int which;
+ int who;
+ int prio;
+};
+/* ARGSUSED */
+setpriority(curp, uap, retval)
+ struct proc *curp;
+ register struct setpriority_args *uap;
+ int *retval;
+{
+ register struct proc *p;
+ int found = 0, error = 0;
+
+ switch (uap->which) {
+
+ case PRIO_PROCESS:
+ if (uap->who == 0)
+ p = curp;
+ else
+ p = pfind(uap->who);
+ if (p == 0)
+ break;
+ error = donice(curp, p, uap->prio);
+ found++;
+ break;
+
+ case PRIO_PGRP: {
+ register struct pgrp *pg;
+
+ if (uap->who == 0)
+ pg = curp->p_pgrp;
+ else if ((pg = pgfind(uap->who)) == NULL)
+ break;
+ for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+ error = donice(curp, p, uap->prio);
+ found++;
+ }
+ break;
+ }
+
+ case PRIO_USER:
+ if (uap->who == 0)
+ uap->who = curp->p_ucred->cr_uid;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next)
+ if (p->p_ucred->cr_uid == uap->who) {
+ error = donice(curp, p, uap->prio);
+ found++;
+ }
+ break;
+
+ default:
+ return (EINVAL);
+ }
+ if (found == 0)
+ return (ESRCH);
+ return (error);
+}
+
+donice(curp, chgp, n)
+ register struct proc *curp, *chgp;
+ register int n;
+{
+ register struct pcred *pcred = curp->p_cred;
+
+ if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
+ pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
+ pcred->p_ruid != chgp->p_ucred->cr_uid)
+ return (EPERM);
+ if (n > PRIO_MAX)
+ n = PRIO_MAX;
+ if (n < PRIO_MIN)
+ n = PRIO_MIN;
+ if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag))
+ return (EACCES);
+ chgp->p_nice = n;
+ (void)resetpriority(chgp);
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct setrlimit_args {
+ u_int which;
+ struct orlimit *lim;
+};
+/* ARGSUSED */
+osetrlimit(p, uap, retval)
+ struct proc *p;
+ register struct setrlimit_args *uap;
+ int *retval;
+{
+ struct orlimit olim;
+ struct rlimit lim;
+ int error;
+
+ if (error =
+ copyin((caddr_t)uap->lim, (caddr_t)&olim, sizeof (struct orlimit)))
+ return (error);
+ lim.rlim_cur = olim.rlim_cur;
+ lim.rlim_max = olim.rlim_max;
+ return (dosetrlimit(p, uap->which, &lim));
+}
+
+struct getrlimit_args {
+ u_int which;
+ struct orlimit *rlp;
+};
+/* ARGSUSED */
+ogetrlimit(p, uap, retval)
+ struct proc *p;
+ register struct getrlimit_args *uap;
+ int *retval;
+{
+ struct orlimit olim;
+
+ if (uap->which >= RLIM_NLIMITS)
+ return (EINVAL);
+ olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
+ if (olim.rlim_cur == -1)
+ olim.rlim_cur = 0x7fffffff;
+ olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
+ if (olim.rlim_max == -1)
+ olim.rlim_max = 0x7fffffff;
+ return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct __setrlimit_args {
+ u_int which;
+ struct rlimit *lim;
+};
+/* ARGSUSED */
+setrlimit(p, uap, retval)
+ struct proc *p;
+ register struct __setrlimit_args *uap;
+ int *retval;
+{
+ struct rlimit alim;
+ int error;
+
+ if (error =
+ copyin((caddr_t)uap->lim, (caddr_t)&alim, sizeof (struct rlimit)))
+ return (error);
+ return (dosetrlimit(p, uap->which, &alim));
+}
+
+dosetrlimit(p, which, limp)
+ struct proc *p;
+ u_int which;
+ struct rlimit *limp;
+{
+ register struct rlimit *alimp;
+ extern unsigned maxdmap;
+ int error;
+
+ if (which >= RLIM_NLIMITS)
+ return (EINVAL);
+ alimp = &p->p_rlimit[which];
+ if (limp->rlim_cur > alimp->rlim_max ||
+ limp->rlim_max > alimp->rlim_max)
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ if (limp->rlim_cur > limp->rlim_max)
+ limp->rlim_cur = limp->rlim_max;
+ if (p->p_limit->p_refcnt > 1 &&
+ (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
+ p->p_limit->p_refcnt--;
+ p->p_limit = limcopy(p->p_limit);
+ alimp = &p->p_rlimit[which];
+ }
+
+ switch (which) {
+
+ case RLIMIT_DATA:
+ if (limp->rlim_cur > maxdmap)
+ limp->rlim_cur = maxdmap;
+ if (limp->rlim_max > maxdmap)
+ limp->rlim_max = maxdmap;
+ break;
+
+ case RLIMIT_STACK:
+ if (limp->rlim_cur > maxdmap)
+ limp->rlim_cur = maxdmap;
+ if (limp->rlim_max > maxdmap)
+ limp->rlim_max = maxdmap;
+ /*
+ * Stack is allocated to the max at exec time with only
+ * "rlim_cur" bytes accessible. If stack limit is going
+ * up make more accessible, if going down make inaccessible.
+ */
+ if (limp->rlim_cur != alimp->rlim_cur) {
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_prot_t prot;
+
+ if (limp->rlim_cur > alimp->rlim_cur) {
+ prot = VM_PROT_ALL;
+ size = limp->rlim_cur - alimp->rlim_cur;
+ addr = USRSTACK - limp->rlim_cur;
+ } else {
+ prot = VM_PROT_NONE;
+ size = alimp->rlim_cur - limp->rlim_cur;
+ addr = USRSTACK - alimp->rlim_cur;
+ }
+ addr = trunc_page(addr);
+ size = round_page(size);
+ (void) vm_map_protect(&p->p_vmspace->vm_map,
+ addr, addr+size, prot, FALSE);
+ }
+ break;
+
+ case RLIMIT_NOFILE:
+ if (limp->rlim_cur > maxfiles)
+ limp->rlim_cur = maxfiles;
+ if (limp->rlim_max > maxfiles)
+ limp->rlim_max = maxfiles;
+ break;
+
+ case RLIMIT_NPROC:
+ if (limp->rlim_cur > maxproc)
+ limp->rlim_cur = maxproc;
+ if (limp->rlim_max > maxproc)
+ limp->rlim_max = maxproc;
+ break;
+ }
+ *alimp = *limp;
+ return (0);
+}
+
+struct __getrlimit_args {
+ u_int which;
+ struct rlimit *rlp;
+};
+/* ARGSUSED */
+getrlimit(p, uap, retval)
+ struct proc *p;
+ register struct __getrlimit_args *uap;
+ int *retval;
+{
+
+ if (uap->which >= RLIM_NLIMITS)
+ return (EINVAL);
+ return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
+ sizeof (struct rlimit)));
+}
+
+/*
+ * Transform the running time and tick information in proc p into user,
+ * system, and interrupt time usage.
+ */
+calcru(p, up, sp, ip)
+ register struct proc *p;
+ register struct timeval *up;
+ register struct timeval *sp;
+ register struct timeval *ip;
+{
+ register u_quad_t u, st, ut, it, tot;
+ register u_long sec, usec;
+ register int s;
+ struct timeval tv;
+
+ s = splstatclock();
+ st = p->p_sticks;
+ ut = p->p_uticks;
+ it = p->p_iticks;
+ splx(s);
+
+ tot = st + ut + it;
+ if (tot == 0) {
+ up->tv_sec = up->tv_usec = 0;
+ sp->tv_sec = sp->tv_usec = 0;
+ if (ip != NULL)
+ ip->tv_sec = ip->tv_usec = 0;
+ return;
+ }
+
+ sec = p->p_rtime.tv_sec;
+ usec = p->p_rtime.tv_usec;
+ if (p == curproc) {
+ /*
+ * Adjust for the current time slice. This is actually fairly
+ * important since the error here is on the order of a time
+ * quantum, which is much greater than the sampling error.
+ */
+ microtime(&tv);
+ sec += tv.tv_sec - runtime.tv_sec;
+ usec += tv.tv_usec - runtime.tv_usec;
+ }
+ u = sec * 1000000 + usec;
+ st = (u * st) / tot;
+ sp->tv_sec = st / 1000000;
+ sp->tv_usec = st % 1000000;
+ ut = (u * ut) / tot;
+ up->tv_sec = ut / 1000000;
+ up->tv_usec = ut % 1000000;
+ if (ip != NULL) {
+ it = (u * it) / tot;
+ ip->tv_sec = it / 1000000;
+ ip->tv_usec = it % 1000000;
+ }
+}
+
+struct getrusage_args {
+ int who;
+ struct rusage *rusage;
+};
+/* ARGSUSED */
+getrusage(p, uap, retval)
+ register struct proc *p;
+ register struct getrusage_args *uap;
+ int *retval;
+{
+ register struct rusage *rup;
+
+ switch (uap->who) {
+
+ case RUSAGE_SELF:
+ rup = &p->p_stats->p_ru;
+ calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
+ break;
+
+ case RUSAGE_CHILDREN:
+ rup = &p->p_stats->p_cru;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+ return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
+ sizeof (struct rusage)));
+}
+
+ruadd(ru, ru2)
+ register struct rusage *ru, *ru2;
+{
+ register long *ip, *ip2;
+ register int i;
+
+ timevaladd(&ru->ru_utime, &ru2->ru_utime);
+ timevaladd(&ru->ru_stime, &ru2->ru_stime);
+ if (ru->ru_maxrss < ru2->ru_maxrss)
+ ru->ru_maxrss = ru2->ru_maxrss;
+ ip = &ru->ru_first; ip2 = &ru2->ru_first;
+ for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
+ *ip++ += *ip2++;
+}
+
+/*
+ * Make a copy of the plimit structure.
+ * We share these structures copy-on-write after fork,
+ * and copy when a limit is changed.
+ */
+struct plimit *
+limcopy(lim)
+ struct plimit *lim;
+{
+ register struct plimit *copy;
+
+ MALLOC(copy, struct plimit *, sizeof(struct plimit),
+ M_SUBPROC, M_WAITOK);
+ bcopy(lim->pl_rlimit, copy->pl_rlimit,
+ sizeof(struct rlimit) * RLIM_NLIMITS);
+ copy->p_lflags = 0;
+ copy->p_refcnt = 1;
+ return (copy);
+}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
new file mode 100644
index 000000000000..3dcff922c399
--- /dev/null
+++ b/sys/kern/kern_sig.c
@@ -0,0 +1,1197 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94
+ */
+
+#define SIGPROP /* include signal properties table */
+#include <sys/param.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/file.h>
+#include <sys/kernel.h>
+#include <sys/wait.h>
+#include <sys/ktrace.h>
+#include <sys/syslog.h>
+#include <sys/stat.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+#include <sys/user.h> /* for coredump */
+
+/*
+ * Can process p, with pcred pc, send the signal signum to process q?
+ */
+#define CANSIGNAL(p, pc, q, signum) \
+ ((pc)->pc_ucred->cr_uid == 0 || \
+ (pc)->p_ruid == (q)->p_cred->p_ruid || \
+ (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \
+ (pc)->p_ruid == (q)->p_ucred->cr_uid || \
+ (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid || \
+ ((signum) == SIGCONT && (q)->p_session == (p)->p_session))
+
+struct sigaction_args {
+ int signum;
+ struct sigaction *nsa;
+ struct sigaction *osa;
+};
+/* ARGSUSED */
+sigaction(p, uap, retval)
+ struct proc *p;
+ register struct sigaction_args *uap;
+ int *retval;
+{
+ struct sigaction vec;
+ register struct sigaction *sa;
+ register struct sigacts *ps = p->p_sigacts;
+ register int signum;
+ int bit, error;
+
+ signum = uap->signum;
+ if (signum <= 0 || signum >= NSIG ||
+ signum == SIGKILL || signum == SIGSTOP)
+ return (EINVAL);
+ sa = &vec;
+ if (uap->osa) {
+ sa->sa_handler = ps->ps_sigact[signum];
+ sa->sa_mask = ps->ps_catchmask[signum];
+ bit = sigmask(signum);
+ sa->sa_flags = 0;
+ if ((ps->ps_sigonstack & bit) != 0)
+ sa->sa_flags |= SA_ONSTACK;
+ if ((ps->ps_sigintr & bit) == 0)
+ sa->sa_flags |= SA_RESTART;
+ if (p->p_flag & P_NOCLDSTOP)
+ sa->sa_flags |= SA_NOCLDSTOP;
+ if (error = copyout((caddr_t)sa, (caddr_t)uap->osa,
+ sizeof (vec)))
+ return (error);
+ }
+ if (uap->nsa) {
+ if (error = copyin((caddr_t)uap->nsa, (caddr_t)sa,
+ sizeof (vec)))
+ return (error);
+ setsigvec(p, signum, sa);
+ }
+ return (0);
+}
+
+setsigvec(p, signum, sa)
+ register struct proc *p;
+ int signum;
+ register struct sigaction *sa;
+{
+ register struct sigacts *ps = p->p_sigacts;
+ register int bit;
+
+ bit = sigmask(signum);
+ /*
+ * Change setting atomically.
+ */
+ (void) splhigh();
+ ps->ps_sigact[signum] = sa->sa_handler;
+ ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask;
+ if ((sa->sa_flags & SA_RESTART) == 0)
+ ps->ps_sigintr |= bit;
+ else
+ ps->ps_sigintr &= ~bit;
+ if (sa->sa_flags & SA_ONSTACK)
+ ps->ps_sigonstack |= bit;
+ else
+ ps->ps_sigonstack &= ~bit;
+#ifdef COMPAT_SUNOS
+ if (sa->sa_flags & SA_USERTRAMP)
+ ps->ps_usertramp |= bit;
+ else
+ ps->ps_usertramp &= ~bit;
+#endif
+ if (signum == SIGCHLD) {
+ if (sa->sa_flags & SA_NOCLDSTOP)
+ p->p_flag |= P_NOCLDSTOP;
+ else
+ p->p_flag &= ~P_NOCLDSTOP;
+ }
+ /*
+ * Set bit in p_sigignore for signals that are set to SIG_IGN,
+ * and for signals set to SIG_DFL where the default is to ignore.
+ * However, don't put SIGCONT in p_sigignore,
+ * as we have to restart the process.
+ */
+ if (sa->sa_handler == SIG_IGN ||
+ (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) {
+ p->p_siglist &= ~bit; /* never to be seen again */
+ if (signum != SIGCONT)
+ p->p_sigignore |= bit; /* easier in psignal */
+ p->p_sigcatch &= ~bit;
+ } else {
+ p->p_sigignore &= ~bit;
+ if (sa->sa_handler == SIG_DFL)
+ p->p_sigcatch &= ~bit;
+ else
+ p->p_sigcatch |= bit;
+ }
+ (void) spl0();
+}
+
+/*
+ * Initialize signal state for process 0;
+ * set to ignore signals that are ignored by default.
+ */
+void
+siginit(p)
+ struct proc *p;
+{
+ register int i;
+
+ for (i = 0; i < NSIG; i++)
+ if (sigprop[i] & SA_IGNORE && i != SIGCONT)
+ p->p_sigignore |= sigmask(i);
+}
+
+/*
+ * Reset signals for an exec of the specified process.
+ */
+void
+execsigs(p)
+ register struct proc *p;
+{
+ register struct sigacts *ps = p->p_sigacts;
+ register int nc, mask;
+
+ /*
+ * Reset caught signals. Held signals remain held
+ * through p_sigmask (unless they were caught,
+ * and are now ignored by default).
+ */
+ while (p->p_sigcatch) {
+ nc = ffs((long)p->p_sigcatch);
+ mask = sigmask(nc);
+ p->p_sigcatch &= ~mask;
+ if (sigprop[nc] & SA_IGNORE) {
+ if (nc != SIGCONT)
+ p->p_sigignore |= mask;
+ p->p_siglist &= ~mask;
+ }
+ ps->ps_sigact[nc] = SIG_DFL;
+ }
+ /*
+ * Reset stack state to the user stack.
+ * Clear set of signals caught on the signal stack.
+ */
+ ps->ps_sigstk.ss_flags = SA_DISABLE;
+ ps->ps_sigstk.ss_size = 0;
+ ps->ps_sigstk.ss_base = 0;
+ ps->ps_flags = 0;
+}
+
+/*
+ * Manipulate signal mask.
+ * Note that we receive new mask, not pointer,
+ * and return old mask as return value;
+ * the library stub does the rest.
+ */
+struct sigprocmask_args {
+ int how;
+ sigset_t mask;
+};
+sigprocmask(p, uap, retval)
+ register struct proc *p;
+ struct sigprocmask_args *uap;
+ int *retval;
+{
+ int error = 0;
+
+ *retval = p->p_sigmask;
+ (void) splhigh();
+
+ switch (uap->how) {
+ case SIG_BLOCK:
+ p->p_sigmask |= uap->mask &~ sigcantmask;
+ break;
+
+ case SIG_UNBLOCK:
+ p->p_sigmask &= ~uap->mask;
+ break;
+
+ case SIG_SETMASK:
+ p->p_sigmask = uap->mask &~ sigcantmask;
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ (void) spl0();
+ return (error);
+}
+
+struct sigpending_args {
+ int dummy;
+};
+/* ARGSUSED */
+sigpending(p, uap, retval)
+ struct proc *p;
+ struct sigpending_args *uap;
+ int *retval;
+{
+
+ *retval = p->p_siglist;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Generalized interface signal handler, 4.3-compatible.
+ */
+struct osigvec_args {
+ int signum;
+ struct sigvec *nsv;
+ struct sigvec *osv;
+};
+/* ARGSUSED */
+osigvec(p, uap, retval)
+ struct proc *p;
+ register struct osigvec_args *uap;
+ int *retval;
+{
+ struct sigvec vec;
+ register struct sigacts *ps = p->p_sigacts;
+ register struct sigvec *sv;
+ register int signum;
+ int bit, error;
+
+ signum = uap->signum;
+ if (signum <= 0 || signum >= NSIG ||
+ signum == SIGKILL || signum == SIGSTOP)
+ return (EINVAL);
+ sv = &vec;
+ if (uap->osv) {
+ *(sig_t *)&sv->sv_handler = ps->ps_sigact[signum];
+ sv->sv_mask = ps->ps_catchmask[signum];
+ bit = sigmask(signum);
+ sv->sv_flags = 0;
+ if ((ps->ps_sigonstack & bit) != 0)
+ sv->sv_flags |= SV_ONSTACK;
+ if ((ps->ps_sigintr & bit) != 0)
+ sv->sv_flags |= SV_INTERRUPT;
+#ifndef COMPAT_SUNOS
+ if (p->p_flag & P_NOCLDSTOP)
+ sv->sv_flags |= SA_NOCLDSTOP;
+#endif
+ if (error = copyout((caddr_t)sv, (caddr_t)uap->osv,
+ sizeof (vec)))
+ return (error);
+ }
+ if (uap->nsv) {
+ if (error = copyin((caddr_t)uap->nsv, (caddr_t)sv,
+ sizeof (vec)))
+ return (error);
+#ifdef COMPAT_SUNOS
+ /*
+ * SunOS uses this bit (4, aka SA_DISABLE) as SV_RESETHAND,
+ * `reset to SIG_DFL on delivery'. We have no such option
+ * now or ever!
+ */
+ if (sv->sv_flags & SA_DISABLE)
+ return (EINVAL);
+ sv->sv_flags |= SA_USERTRAMP;
+#endif
+ sv->sv_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */
+ setsigvec(p, signum, (struct sigaction *)sv);
+ }
+ return (0);
+}
+
+struct osigblock_args {
+ int mask;
+};
+osigblock(p, uap, retval)
+ register struct proc *p;
+ struct osigblock_args *uap;
+ int *retval;
+{
+
+ (void) splhigh();
+ *retval = p->p_sigmask;
+ p->p_sigmask |= uap->mask &~ sigcantmask;
+ (void) spl0();
+ return (0);
+}
+
+struct osigsetmask_args {
+ int mask;
+};
+osigsetmask(p, uap, retval)
+ struct proc *p;
+ struct osigsetmask_args *uap;
+ int *retval;
+{
+
+ (void) splhigh();
+ *retval = p->p_sigmask;
+ p->p_sigmask = uap->mask &~ sigcantmask;
+ (void) spl0();
+ return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Suspend process until signal, providing mask to be set
+ * in the meantime. Note nonstandard calling convention:
+ * libc stub passes mask, not pointer, to save a copyin.
+ */
+struct sigsuspend_args {
+ sigset_t mask;
+};
+/* ARGSUSED */
+sigsuspend(p, uap, retval)
+ register struct proc *p;
+ struct sigsuspend_args *uap;
+ int *retval;
+{
+ register struct sigacts *ps = p->p_sigacts;
+
+ /*
+ * When returning from sigpause, we want
+ * the old mask to be restored after the
+ * signal handler has finished. Thus, we
+ * save it here and mark the sigacts structure
+ * to indicate this.
+ */
+ ps->ps_oldmask = p->p_sigmask;
+ ps->ps_flags |= SAS_OLDMASK;
+ p->p_sigmask = uap->mask &~ sigcantmask;
+ while (tsleep((caddr_t) ps, PPAUSE|PCATCH, "pause", 0) == 0)
+ /* void */;
+ /* always return EINTR rather than ERESTART... */
+ return (EINTR);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct osigstack_args {
+ struct sigstack *nss;
+ struct sigstack *oss;
+};
+/* ARGSUSED */
+osigstack(p, uap, retval)
+ struct proc *p;
+ register struct osigstack_args *uap;
+ int *retval;
+{
+ struct sigstack ss;
+ struct sigacts *psp;
+ int error = 0;
+
+ psp = p->p_sigacts;
+ ss.ss_sp = psp->ps_sigstk.ss_base;
+ ss.ss_onstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
+ if (uap->oss && (error = copyout((caddr_t)&ss, (caddr_t)uap->oss,
+ sizeof (struct sigstack))))
+ return (error);
+ if (uap->nss && (error = copyin((caddr_t)uap->nss, (caddr_t)&ss,
+ sizeof (ss))) == 0) {
+ psp->ps_sigstk.ss_base = ss.ss_sp;
+ psp->ps_sigstk.ss_size = 0;
+ psp->ps_sigstk.ss_flags |= ss.ss_onstack & SA_ONSTACK;
+ psp->ps_flags |= SAS_ALTSTACK;
+ }
+ return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct sigaltstack_args {
+ struct sigaltstack *nss;
+ struct sigaltstack *oss;
+};
+/* ARGSUSED */
+sigaltstack(p, uap, retval)
+ struct proc *p;
+ register struct sigaltstack_args *uap;
+ int *retval;
+{
+ struct sigacts *psp;
+ struct sigaltstack ss;
+ int error;
+
+ psp = p->p_sigacts;
+ if ((psp->ps_flags & SAS_ALTSTACK) == 0)
+ psp->ps_sigstk.ss_flags |= SA_DISABLE;
+ if (uap->oss && (error = copyout((caddr_t)&psp->ps_sigstk,
+ (caddr_t)uap->oss, sizeof (struct sigaltstack))))
+ return (error);
+ if (uap->nss == 0)
+ return (0);
+ if (error = copyin((caddr_t)uap->nss, (caddr_t)&ss, sizeof (ss)))
+ return (error);
+ if (ss.ss_flags & SA_DISABLE) {
+ if (psp->ps_sigstk.ss_flags & SA_ONSTACK)
+ return (EINVAL);
+ psp->ps_flags &= ~SAS_ALTSTACK;
+ psp->ps_sigstk.ss_flags = ss.ss_flags;
+ return (0);
+ }
+ if (ss.ss_size < MINSIGSTKSZ)
+ return (ENOMEM);
+ psp->ps_flags |= SAS_ALTSTACK;
+ psp->ps_sigstk= ss;
+ return (0);
+}
+
+struct kill_args {
+ int pid;
+ int signum;
+};
+/* ARGSUSED */
+kill(cp, uap, retval)
+ register struct proc *cp;
+ register struct kill_args *uap;
+ int *retval;
+{
+ register struct proc *p;
+ register struct pcred *pc = cp->p_cred;
+
+ if ((u_int)uap->signum >= NSIG)
+ return (EINVAL);
+ if (uap->pid > 0) {
+ /* kill single process */
+ if ((p = pfind(uap->pid)) == NULL)
+ return (ESRCH);
+ if (!CANSIGNAL(cp, pc, p, uap->signum))
+ return (EPERM);
+ if (uap->signum)
+ psignal(p, uap->signum);
+ return (0);
+ }
+ switch (uap->pid) {
+ case -1: /* broadcast signal */
+ return (killpg1(cp, uap->signum, 0, 1));
+ case 0: /* signal own process group */
+ return (killpg1(cp, uap->signum, 0, 0));
+ default: /* negative explicit process group */
+ return (killpg1(cp, uap->signum, -uap->pid, 0));
+ }
+ /* NOTREACHED */
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct okillpg_args {
+ int pgid;
+ int signum;
+};
+/* ARGSUSED */
+okillpg(p, uap, retval)
+ struct proc *p;
+ register struct okillpg_args *uap;
+ int *retval;
+{
+
+ if ((u_int)uap->signum >= NSIG)
+ return (EINVAL);
+ return (killpg1(p, uap->signum, uap->pgid, 0));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Common code for kill process group/broadcast kill.
+ * cp is calling process.
+ */
+killpg1(cp, signum, pgid, all)
+ register struct proc *cp;
+ int signum, pgid, all;
+{
+ register struct proc *p;
+ register struct pcred *pc = cp->p_cred;
+ struct pgrp *pgrp;
+ int nfound = 0;
+
+ if (all)
+ /*
+ * broadcast
+ */
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
+ p == cp || !CANSIGNAL(cp, pc, p, signum))
+ continue;
+ nfound++;
+ if (signum)
+ psignal(p, signum);
+ }
+ else {
+ if (pgid == 0)
+ /*
+ * zero pgid means send to my process group.
+ */
+ pgrp = cp->p_pgrp;
+ else {
+ pgrp = pgfind(pgid);
+ if (pgrp == NULL)
+ return (ESRCH);
+ }
+ for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+ if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
+ p->p_stat == SZOMB ||
+ !CANSIGNAL(cp, pc, p, signum))
+ continue;
+ nfound++;
+ if (signum)
+ psignal(p, signum);
+ }
+ }
+ return (nfound ? 0 : ESRCH);
+}
+
+/*
+ * Send a signal to a process group.
+ */
+void
+gsignal(pgid, signum)
+ int pgid, signum;
+{
+ struct pgrp *pgrp;
+
+ if (pgid && (pgrp = pgfind(pgid)))
+ pgsignal(pgrp, signum, 0);
+}
+
+/*
+ * Send a signal to a process group. If checktty is 1,
+ * limit to members which have a controlling terminal.
+ */
+void
+pgsignal(pgrp, signum, checkctty)
+ struct pgrp *pgrp;
+ int signum, checkctty;
+{
+ register struct proc *p;
+
+ if (pgrp)
+ for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt)
+ if (checkctty == 0 || p->p_flag & P_CONTROLT)
+ psignal(p, signum);
+}
+
+/*
+ * Send a signal caused by a trap to the current process.
+ * If it will be caught immediately, deliver it with correct code.
+ * Otherwise, post it normally.
+ */
+void
+trapsignal(p, signum, code)
+ struct proc *p;
+ register int signum;
+ u_int code;
+{
+ register struct sigacts *ps = p->p_sigacts;
+ int mask;
+
+ mask = sigmask(signum);
+ if ((p->p_flag & P_TRACED) == 0 && (p->p_sigcatch & mask) != 0 &&
+ (p->p_sigmask & mask) == 0) {
+ p->p_stats->p_ru.ru_nsignals++;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_PSIG))
+ ktrpsig(p->p_tracep, signum, ps->ps_sigact[signum],
+ p->p_sigmask, code);
+#endif
+ sendsig(ps->ps_sigact[signum], signum, p->p_sigmask, code);
+ p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+ } else {
+ ps->ps_code = code; /* XXX for core dump/debugger */
+ psignal(p, signum);
+ }
+}
+
+/*
+ * Send the signal to the process. If the signal has an action, the action
+ * is usually performed by the target process rather than the caller; we add
+ * the signal to the set of pending signals for the process.
+ *
+ * Exceptions:
+ * o When a stop signal is sent to a sleeping process that takes the
+ * default action, the process is stopped without awakening it.
+ * o SIGCONT restarts stopped processes (or puts them back to sleep)
+ * regardless of the signal action (eg, blocked or ignored).
+ *
+ * Other ignored signals are discarded immediately.
+ */
+void
+psignal(p, signum)
+ register struct proc *p;
+ register int signum;
+{
+ register int s, prop;
+ register sig_t action;
+ int mask;
+
+ if ((u_int)signum >= NSIG || signum == 0)
+ panic("psignal signal number");
+ mask = sigmask(signum);
+ prop = sigprop[signum];
+
+ /*
+ * If proc is traced, always give parent a chance.
+ */
+ if (p->p_flag & P_TRACED)
+ action = SIG_DFL;
+ else {
+ /*
+ * If the signal is being ignored,
+ * then we forget about it immediately.
+ * (Note: we don't set SIGCONT in p_sigignore,
+ * and if it is set to SIG_IGN,
+ * action will be SIG_DFL here.)
+ */
+ if (p->p_sigignore & mask)
+ return;
+ if (p->p_sigmask & mask)
+ action = SIG_HOLD;
+ else if (p->p_sigcatch & mask)
+ action = SIG_CATCH;
+ else
+ action = SIG_DFL;
+ }
+
+ if (p->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
+ (p->p_flag & P_TRACED) == 0)
+ p->p_nice = NZERO;
+
+ if (prop & SA_CONT)
+ p->p_siglist &= ~stopsigmask;
+
+ if (prop & SA_STOP) {
+ /*
+ * If sending a tty stop signal to a member of an orphaned
+ * process group, discard the signal here if the action
+ * is default; don't stop the process below if sleeping,
+ * and don't clear any pending SIGCONT.
+ */
+ if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 &&
+ action == SIG_DFL)
+ return;
+ p->p_siglist &= ~contsigmask;
+ }
+ p->p_siglist |= mask;
+
+ /*
+ * Defer further processing for signals which are held,
+ * except that stopped processes must be continued by SIGCONT.
+ */
+ if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP))
+ return;
+ s = splhigh();
+ switch (p->p_stat) {
+
+ case SSLEEP:
+ /*
+ * If process is sleeping uninterruptibly
+ * we can't interrupt the sleep... the signal will
+ * be noticed when the process returns through
+ * trap() or syscall().
+ */
+ if ((p->p_flag & P_SINTR) == 0)
+ goto out;
+ /*
+ * Process is sleeping and traced... make it runnable
+ * so it can discover the signal in issignal() and stop
+ * for the parent.
+ */
+ if (p->p_flag & P_TRACED)
+ goto run;
+ /*
+ * If SIGCONT is default (or ignored) and process is
+ * asleep, we are finished; the process should not
+ * be awakened.
+ */
+ if ((prop & SA_CONT) && action == SIG_DFL) {
+ p->p_siglist &= ~mask;
+ goto out;
+ }
+ /*
+ * When a sleeping process receives a stop
+ * signal, process immediately if possible.
+ * All other (caught or default) signals
+ * cause the process to run.
+ */
+ if (prop & SA_STOP) {
+ if (action != SIG_DFL)
+ goto runfast;
+ /*
+ * If a child holding parent blocked,
+ * stopping could cause deadlock.
+ */
+ if (p->p_flag & P_PPWAIT)
+ goto out;
+ p->p_siglist &= ~mask;
+ p->p_xstat = signum;
+ if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+ psignal(p->p_pptr, SIGCHLD);
+ stop(p);
+ goto out;
+ } else
+ goto runfast;
+ /*NOTREACHED*/
+
+ case SSTOP:
+ /*
+ * If traced process is already stopped,
+ * then no further action is necessary.
+ */
+ if (p->p_flag & P_TRACED)
+ goto out;
+
+ /*
+ * Kill signal always sets processes running.
+ */
+ if (signum == SIGKILL)
+ goto runfast;
+
+ if (prop & SA_CONT) {
+ /*
+ * If SIGCONT is default (or ignored), we continue the
+ * process but don't leave the signal in p_siglist, as
+ * it has no further action. If SIGCONT is held, we
+ * continue the process and leave the signal in
+ * p_siglist. If the process catches SIGCONT, let it
+ * handle the signal itself. If it isn't waiting on
+ * an event, then it goes back to run state.
+ * Otherwise, process goes back to sleep state.
+ */
+ if (action == SIG_DFL)
+ p->p_siglist &= ~mask;
+ if (action == SIG_CATCH)
+ goto runfast;
+ if (p->p_wchan == 0)
+ goto run;
+ p->p_stat = SSLEEP;
+ goto out;
+ }
+
+ if (prop & SA_STOP) {
+ /*
+ * Already stopped, don't need to stop again.
+ * (If we did the shell could get confused.)
+ */
+ p->p_siglist &= ~mask; /* take it away */
+ goto out;
+ }
+
+ /*
+ * If process is sleeping interruptibly, then simulate a
+ * wakeup so that when it is continued, it will be made
+ * runnable and can look at the signal. But don't make
+ * the process runnable, leave it stopped.
+ */
+ if (p->p_wchan && p->p_flag & P_SINTR)
+ unsleep(p);
+ goto out;
+
+ default:
+ /*
+ * SRUN, SIDL, SZOMB do nothing with the signal,
+ * other than kicking ourselves if we are running.
+ * It will either never be noticed, or noticed very soon.
+ */
+ if (p == curproc)
+ signotify(p);
+ goto out;
+ }
+ /*NOTREACHED*/
+
+runfast:
+ /*
+ * Raise priority to at least PUSER.
+ */
+ if (p->p_priority > PUSER)
+ p->p_priority = PUSER;
+run:
+ setrunnable(p);
+out:
+ splx(s);
+}
+
+/*
+ * If the current process has received a signal (should be caught or cause
+ * termination, should interrupt current syscall), return the signal number.
+ * Stop signals with default action are processed immediately, then cleared;
+ * they aren't returned. This is checked after each entry to the system for
+ * a syscall or trap (though this can usually be done without calling issignal
+ * by checking the pending signal masks in the CURSIG macro.) The normal call
+ * sequence is
+ *
+ * while (signum = CURSIG(curproc))
+ * postsig(signum);
+ */
+issignal(p)
+ register struct proc *p;
+{
+ register int signum, mask, prop;
+
+ for (;;) {
+ mask = p->p_siglist & ~p->p_sigmask;
+ if (p->p_flag & P_PPWAIT)
+ mask &= ~stopsigmask;
+ if (mask == 0) /* no signal to send */
+ return (0);
+ signum = ffs((long)mask);
+ mask = sigmask(signum);
+ prop = sigprop[signum];
+ /*
+ * We should see pending but ignored signals
+ * only if P_TRACED was on when they were posted.
+ */
+ if (mask & p->p_sigignore && (p->p_flag & P_TRACED) == 0) {
+ p->p_siglist &= ~mask;
+ continue;
+ }
+ if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) {
+ /*
+ * If traced, always stop, and stay
+ * stopped until released by the parent.
+ */
+ p->p_xstat = signum;
+ psignal(p->p_pptr, SIGCHLD);
+ do {
+ stop(p);
+ mi_switch();
+ } while (!trace_req(p) && p->p_flag & P_TRACED);
+
+ /*
+ * If the traced bit got turned off, go back up
+ * to the top to rescan signals. This ensures
+ * that p_sig* and ps_sigact are consistent.
+ */
+ if ((p->p_flag & P_TRACED) == 0)
+ continue;
+
+ /*
+ * If parent wants us to take the signal,
+ * then it will leave it in p->p_xstat;
+ * otherwise we just look for signals again.
+ */
+ p->p_siglist &= ~mask; /* clear the old signal */
+ signum = p->p_xstat;
+ if (signum == 0)
+ continue;
+
+ /*
+ * Put the new signal into p_siglist. If the
+ * signal is being masked, look for other signals.
+ */
+ mask = sigmask(signum);
+ p->p_siglist |= mask;
+ if (p->p_sigmask & mask)
+ continue;
+ }
+
+ /*
+ * Decide whether the signal should be returned.
+ * Return the signal's number, or fall through
+ * to clear it from the pending mask.
+ */
+ switch ((int)p->p_sigacts->ps_sigact[signum]) {
+
+ case SIG_DFL:
+ /*
+ * Don't take default actions on system processes.
+ */
+ if (p->p_pid <= 1) {
+#ifdef DIAGNOSTIC
+ /*
+ * Are you sure you want to ignore SIGSEGV
+ * in init? XXX
+ */
+ printf("Process (pid %d) got signal %d\n",
+ p->p_pid, signum);
+#endif
+ break; /* == ignore */
+ }
+ /*
+ * If there is a pending stop signal to process
+ * with default action, stop here,
+ * then clear the signal. However,
+ * if process is member of an orphaned
+ * process group, ignore tty stop signals.
+ */
+ if (prop & SA_STOP) {
+ if (p->p_flag & P_TRACED ||
+ (p->p_pgrp->pg_jobc == 0 &&
+ prop & SA_TTYSTOP))
+ break; /* == ignore */
+ p->p_xstat = signum;
+ stop(p);
+ if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+ psignal(p->p_pptr, SIGCHLD);
+ mi_switch();
+ break;
+ } else if (prop & SA_IGNORE) {
+ /*
+ * Except for SIGCONT, shouldn't get here.
+ * Default action is to ignore; drop it.
+ */
+ break; /* == ignore */
+ } else
+ return (signum);
+ /*NOTREACHED*/
+
+ case SIG_IGN:
+ /*
+ * Masking above should prevent us ever trying
+ * to take action on an ignored signal other
+ * than SIGCONT, unless process is traced.
+ */
+ if ((prop & SA_CONT) == 0 &&
+ (p->p_flag & P_TRACED) == 0)
+ printf("issignal\n");
+ break; /* == ignore */
+
+ default:
+ /*
+ * This signal has an action, let
+ * postsig() process it.
+ */
+ return (signum);
+ }
+ p->p_siglist &= ~mask; /* take the signal! */
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Put the argument process into the stopped state and notify the parent
+ * via wakeup. Signals are handled elsewhere. The process must not be
+ * on the run queue.
+ */
+stop(p)
+ register struct proc *p;
+{
+
+ p->p_stat = SSTOP;
+ p->p_flag &= ~P_WAITED;
+ wakeup((caddr_t)p->p_pptr);
+}
+
+/*
+ * Take the action for the specified signal
+ * from the current set of pending signals.
+ */
+void
+postsig(signum)
+ register int signum;
+{
+ register struct proc *p = curproc;
+ register struct sigacts *ps = p->p_sigacts;
+ register sig_t action;
+ int code, mask, returnmask;
+
+#ifdef DIAGNOSTIC
+ if (signum == 0)
+ panic("postsig");
+#endif
+ mask = sigmask(signum);
+ p->p_siglist &= ~mask;
+ action = ps->ps_sigact[signum];
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_PSIG))
+ ktrpsig(p->p_tracep,
+ signum, action, ps->ps_flags & SAS_OLDMASK ?
+ ps->ps_oldmask : p->p_sigmask, 0);
+#endif
+ if (action == SIG_DFL) {
+ /*
+ * Default action, where the default is to kill
+ * the process. (Other cases were ignored above.)
+ */
+ sigexit(p, signum);
+ /* NOTREACHED */
+ } else {
+ /*
+ * If we get here, the signal must be caught.
+ */
+#ifdef DIAGNOSTIC
+ if (action == SIG_IGN || (p->p_sigmask & mask))
+ panic("postsig action");
+#endif
+ /*
+ * Set the new mask value and also defer further
+ * occurences of this signal.
+ *
+ * Special case: user has done a sigpause. Here the
+ * current mask is not of interest, but rather the
+ * mask from before the sigpause is what we want
+ * restored after the signal processing is completed.
+ */
+ (void) splhigh();
+ if (ps->ps_flags & SAS_OLDMASK) {
+ returnmask = ps->ps_oldmask;
+ ps->ps_flags &= ~SAS_OLDMASK;
+ } else
+ returnmask = p->p_sigmask;
+ p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+ (void) spl0();
+ p->p_stats->p_ru.ru_nsignals++;
+ if (ps->ps_sig != signum) {
+ code = 0;
+ } else {
+ code = ps->ps_code;
+ ps->ps_code = 0;
+ }
+ sendsig(action, signum, returnmask, code);
+ }
+}
+
+/*
+ * Kill the current process for stated reason.
+ */
+killproc(p, why)
+ struct proc *p;
+ char *why;
+{
+
+ log(LOG_ERR, "pid %d was killed: %s\n", p->p_pid, why);
+ uprintf("sorry, pid %d was killed: %s\n", p->p_pid, why);
+ psignal(p, SIGKILL);
+}
+
+/*
+ * Force the current process to exit with the specified signal, dumping core
+ * if appropriate. We bypass the normal tests for masked and caught signals,
+ * allowing unrecoverable failures to terminate the process without changing
+ * signal state. Mark the accounting record with the signal termination.
+ * If dumping core, save the signal number for the debugger. Calls exit and
+ * does not return.
+ */
+sigexit(p, signum)
+ register struct proc *p;
+ int signum;
+{
+
+ p->p_acflag |= AXSIG;
+ if (sigprop[signum] & SA_CORE) {
+ p->p_sigacts->ps_sig = signum;
+ if (coredump(p) == 0)
+ signum |= WCOREFLAG;
+ }
+ exit1(p, W_EXITCODE(0, signum));
+ /* NOTREACHED */
+}
+
+/*
+ * Dump core, into a file named "progname.core", unless the process was
+ * setuid/setgid.
+ */
+coredump(p)
+ register struct proc *p;
+{
+ register struct vnode *vp;
+ register struct pcred *pcred = p->p_cred;
+ register struct ucred *cred = pcred->pc_ucred;
+ register struct vmspace *vm = p->p_vmspace;
+ struct nameidata nd;
+ struct vattr vattr;
+ int error, error1;
+ char name[MAXCOMLEN+6]; /* progname.core */
+
+ if (pcred->p_svuid != pcred->p_ruid || pcred->p_svgid != pcred->p_rgid)
+ return (EFAULT);
+ if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >=
+ p->p_rlimit[RLIMIT_CORE].rlim_cur)
+ return (EFAULT);
+ sprintf(name, "%s.core", p->p_comm);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, name, p);
+ if (error = vn_open(&nd,
+ O_CREAT | FWRITE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))
+ return (error);
+ vp = nd.ni_vp;
+
+ /* Don't dump to non-regular files or files with links. */
+ if (vp->v_type != VREG ||
+ VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
+ error = EFAULT;
+ goto out;
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_size = 0;
+ LEASE_CHECK(vp, p, cred, LEASE_WRITE);
+ VOP_SETATTR(vp, &vattr, cred, p);
+ p->p_acflag |= ACORE;
+ bcopy(p, &p->p_addr->u_kproc.kp_proc, sizeof(struct proc));
+ fill_eproc(p, &p->p_addr->u_kproc.kp_eproc);
+ error = cpu_coredump(p, vp, cred);
+ if (error == 0)
+ error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
+ (int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE,
+ IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+ if (error == 0)
+ error = vn_rdwr(UIO_WRITE, vp,
+ (caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
+ round_page(ctob(vm->vm_ssize)),
+ (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
+ IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+out:
+ VOP_UNLOCK(vp);
+ error1 = vn_close(vp, FWRITE, cred, p);
+ if (error == 0)
+ error = error1;
+ return (error);
+}
+
+/*
+ * Nonexistent system call-- signal process (may want to handle it).
+ * Flag error in case process won't see signal immediately (blocked or ignored).
+ */
+struct nosys_args {
+ int dummy;
+};
+/* ARGSUSED */
+nosys(p, args, retval)
+ struct proc *p;
+ struct nosys_args *args;
+ int *retval;
+{
+
+ psignal(p, SIGSYS);
+ return (EINVAL);
+}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
new file mode 100644
index 000000000000..5c12afcba33b
--- /dev/null
+++ b/sys/kern/kern_subr.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+
+uiomove(cp, n, uio)
+ register caddr_t cp;
+ register int n;
+ register struct uio *uio;
+{
+ register struct iovec *iov;
+ u_int cnt;
+ int error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
+ panic("uiomove: mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("uiomove proc");
+#endif
+ while (n > 0 && uio->uio_resid) {
+ iov = uio->uio_iov;
+ cnt = iov->iov_len;
+ if (cnt == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ continue;
+ }
+ if (cnt > n)
+ cnt = n;
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ case UIO_USERISPACE:
+ if (uio->uio_rw == UIO_READ)
+ error = copyout(cp, iov->iov_base, cnt);
+ else
+ error = copyin(iov->iov_base, cp, cnt);
+ if (error)
+ return (error);
+ break;
+
+ case UIO_SYSSPACE:
+ if (uio->uio_rw == UIO_READ)
+ bcopy((caddr_t)cp, iov->iov_base, cnt);
+ else
+ bcopy(iov->iov_base, (caddr_t)cp, cnt);
+ break;
+ }
+ iov->iov_base += cnt;
+ iov->iov_len -= cnt;
+ uio->uio_resid -= cnt;
+ uio->uio_offset += cnt;
+ cp += cnt;
+ n -= cnt;
+ }
+ return (error);
+}
+
+/*
+ * Give next character to user as result of read.
+ */
+ureadc(c, uio)
+ register int c;
+ register struct uio *uio;
+{
+ register struct iovec *iov;
+
+again:
+ if (uio->uio_iovcnt == 0 || uio->uio_resid == 0)
+ panic("ureadc");
+ iov = uio->uio_iov;
+ if (iov->iov_len == 0) {
+ uio->uio_iovcnt--;
+ uio->uio_iov++;
+ goto again;
+ }
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ if (subyte(iov->iov_base, c) < 0)
+ return (EFAULT);
+ break;
+
+ case UIO_SYSSPACE:
+ *iov->iov_base = c;
+ break;
+
+ case UIO_USERISPACE:
+ if (suibyte(iov->iov_base, c) < 0)
+ return (EFAULT);
+ break;
+ }
+ iov->iov_base++;
+ iov->iov_len--;
+ uio->uio_resid--;
+ uio->uio_offset++;
+ return (0);
+}
+
+#ifdef vax /* unused except by ct.c, other oddities XXX */
+/*
+ * Get next character written in by user from uio.
+ */
+uwritec(uio)
+ struct uio *uio;
+{
+ register struct iovec *iov;
+ register int c;
+
+ if (uio->uio_resid <= 0)
+ return (-1);
+again:
+ if (uio->uio_iovcnt <= 0)
+ panic("uwritec");
+ iov = uio->uio_iov;
+ if (iov->iov_len == 0) {
+ uio->uio_iov++;
+ if (--uio->uio_iovcnt == 0)
+ return (-1);
+ goto again;
+ }
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ c = fubyte(iov->iov_base);
+ break;
+
+ case UIO_SYSSPACE:
+ c = *(u_char *) iov->iov_base;
+ break;
+
+ case UIO_USERISPACE:
+ c = fuibyte(iov->iov_base);
+ break;
+ }
+ if (c < 0)
+ return (-1);
+ iov->iov_base++;
+ iov->iov_len--;
+ uio->uio_resid--;
+ uio->uio_offset++;
+ return (c);
+}
+#endif /* vax */
+
+/*
+ * General routine to allocate a hash table.
+ */
+void *
+hashinit(elements, type, hashmask)
+ int elements, type;
+ u_long *hashmask;
+{
+ long hashsize;
+ LIST_HEAD(generic, generic) *hashtbl;
+ int i;
+
+ if (elements <= 0)
+ panic("hashinit: bad cnt");
+ for (hashsize = 1; hashsize <= elements; hashsize <<= 1)
+ continue;
+ hashsize >>= 1;
+ hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK);
+ for (i = 0; i < hashsize; i++)
+ LIST_INIT(&hashtbl[i]);
+ *hashmask = hashsize - 1;
+ return (hashtbl);
+}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
new file mode 100644
index 000000000000..1c2a578f3036
--- /dev/null
+++ b/sys/kern/kern_synch.c
@@ -0,0 +1,666 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/vmmeter.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <machine/cpu.h>
+
+u_char curpriority; /* usrpri of curproc */
+int lbolt; /* once a second sleep address */
+
+/*
+ * Force switch among equal priority processes every 100ms.
+ */
+/* ARGSUSED */
+void
+roundrobin(arg)
+ void *arg;
+{
+
+ need_resched();
+ timeout(roundrobin, NULL, hz / 10);
+}
+
+/*
+ * Constants for digital decay and forget:
+ * 90% of (p_estcpu) usage in 5 * loadav time
+ * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
+ * Note that, as ps(1) mentions, this can let percentages
+ * total over 100% (I've seen 137.9% for 3 processes).
+ *
+ * Note that hardclock updates p_estcpu and p_cpticks independently.
+ *
+ * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
+ * That is, the system wants to compute a value of decay such
+ * that the following for loop:
+ * for (i = 0; i < (5 * loadavg); i++)
+ * p_estcpu *= decay;
+ * will compute
+ * p_estcpu *= 0.1;
+ * for all values of loadavg:
+ *
+ * Mathematically this loop can be expressed by saying:
+ * decay ** (5 * loadavg) ~= .1
+ *
+ * The system computes decay as:
+ * decay = (2 * loadavg) / (2 * loadavg + 1)
+ *
+ * We wish to prove that the system's computation of decay
+ * will always fulfill the equation:
+ * decay ** (5 * loadavg) ~= .1
+ *
+ * If we compute b as:
+ * b = 2 * loadavg
+ * then
+ * decay = b / (b + 1)
+ *
+ * We now need to prove two things:
+ * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
+ * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
+ *
+ * Facts:
+ * For x close to zero, exp(x) =~ 1 + x, since
+ * exp(x) = 0! + x**1/1! + x**2/2! + ... .
+ * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
+ * For x close to zero, ln(1+x) =~ x, since
+ * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
+ * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
+ * ln(.1) =~ -2.30
+ *
+ * Proof of (1):
+ * Solve (factor)**(power) =~ .1 given power (5*loadav):
+ * solving for factor,
+ * ln(factor) =~ (-2.30/5*loadav), or
+ * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
+ * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
+ *
+ * Proof of (2):
+ * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
+ * solving for power,
+ * power*ln(b/(b+1)) =~ -2.30, or
+ * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
+ *
+ * Actual power values for the implemented algorithm are as follows:
+ * loadav: 1 2 3 4
+ * power: 5.68 10.32 14.94 19.55
+ */
+
+/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
+#define loadfactor(loadav) (2 * (loadav))
+#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
+
+/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
+fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
+
+/*
+ * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
+ * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
+ * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
+ *
+ * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
+ * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
+ *
+ * If you dont want to bother with the faster/more-accurate formula, you
+ * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
+ * (more general) method of calculating the %age of CPU used by a process.
+ */
+#define CCPU_SHIFT 11
+
+/*
+ * Recompute process priorities, every hz ticks.
+ */
+/* ARGSUSED */
+void
+schedcpu(arg)
+ void *arg;
+{
+ register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+ register struct proc *p;
+ register int s;
+ register unsigned int newcpu;
+
+ wakeup((caddr_t)&lbolt);
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ /*
+ * Increment time in/out of memory and sleep time
+ * (if sleeping). We ignore overflow; with 16-bit int's
+ * (remember them?) overflow takes 45 days.
+ */
+ p->p_swtime++;
+ if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
+ p->p_slptime++;
+ p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
+ /*
+ * If the process has slept the entire second,
+ * stop recalculating its priority until it wakes up.
+ */
+ if (p->p_slptime > 1)
+ continue;
+ s = splstatclock(); /* prevent state changes */
+ /*
+ * p_pctcpu is only for ps.
+ */
+#if (FSHIFT >= CCPU_SHIFT)
+ p->p_pctcpu += (hz == 100)?
+ ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
+ 100 * (((fixpt_t) p->p_cpticks)
+ << (FSHIFT - CCPU_SHIFT)) / hz;
+#else
+ p->p_pctcpu += ((FSCALE - ccpu) *
+ (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
+#endif
+ p->p_cpticks = 0;
+ newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
+ p->p_estcpu = min(newcpu, UCHAR_MAX);
+ resetpriority(p);
+ if (p->p_priority >= PUSER) {
+#define PPQ (128 / NQS) /* priorities per queue */
+ if ((p != curproc) &&
+ p->p_stat == SRUN &&
+ (p->p_flag & P_INMEM) &&
+ (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
+ remrq(p);
+ p->p_priority = p->p_usrpri;
+ setrunqueue(p);
+ } else
+ p->p_priority = p->p_usrpri;
+ }
+ splx(s);
+ }
+ vmmeter();
+ if (bclnlist != NULL)
+ wakeup((caddr_t)pageproc);
+ timeout(schedcpu, (void *)0, hz);
+}
+
+/*
+ * Recalculate the priority of a process after it has slept for a while.
+ * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
+ * least six times the loadfactor will decay p_estcpu to zero.
+ */
+void
+updatepri(p)
+ register struct proc *p;
+{
+ register unsigned int newcpu = p->p_estcpu;
+ register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+
+ if (p->p_slptime > 5 * loadfac)
+ p->p_estcpu = 0;
+ else {
+ p->p_slptime--; /* the first time was done in schedcpu */
+ while (newcpu && --p->p_slptime)
+ newcpu = (int) decay_cpu(loadfac, newcpu);
+ p->p_estcpu = min(newcpu, UCHAR_MAX);
+ }
+ resetpriority(p);
+}
+
+/*
+ * We're only looking at 7 bits of the address; everything is
+ * aligned to 4, lots of things are aligned to greater powers
+ * of 2. Shift right by 8, i.e. drop the bottom 256 worth.
+ */
+#define TABLESIZE 128
+#define LOOKUP(x) (((int)(x) >> 8) & (TABLESIZE - 1))
+struct slpque {
+ struct proc *sq_head;
+ struct proc **sq_tailp;
+} slpque[TABLESIZE];
+
+/*
+ * During autoconfiguration or after a panic, a sleep will simply
+ * lower the priority briefly to allow interrupts, then return.
+ * The priority to be used (safepri) is machine-dependent, thus this
+ * value is initialized and maintained in the machine-dependent layers.
+ * This priority will typically be 0, or the lowest priority
+ * that is safe for use on the interrupt stack; it can be made
+ * higher to block network software interrupts after panics.
+ */
+int safepri;
+
+/*
+ * General sleep call. Suspends the current process until a wakeup is
+ * performed on the specified identifier. The process will then be made
+ * runnable with the specified priority. Sleeps at most timo/hz seconds
+ * (0 means no timeout). If pri includes PCATCH flag, signals are checked
+ * before and after sleeping, else signals are not checked. Returns 0 if
+ * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
+ * signal needs to be delivered, ERESTART is returned if the current system
+ * call should be restarted if possible, and EINTR is returned if the system
+ * call should be interrupted by the signal (return EINTR).
+ */
+int
+tsleep(ident, priority, wmesg, timo)
+ void *ident;
+ int priority, timo;
+ char *wmesg;
+{
+ register struct proc *p = curproc;
+ register struct slpque *qp;
+ register s;
+ int sig, catch = priority & PCATCH;
+ extern int cold;
+ void endtsleep __P((void *));
+
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 1, 0);
+#endif
+ s = splhigh();
+ if (cold || panicstr) {
+ /*
+ * After a panic, or during autoconfiguration,
+ * just give interrupts a chance, then just return;
+ * don't run any other procs or panic below,
+ * in case this is the idle process and already asleep.
+ */
+ splx(safepri);
+ splx(s);
+ return (0);
+ }
+#ifdef DIAGNOSTIC
+ if (ident == NULL || p->p_stat != SRUN || p->p_back)
+ panic("tsleep");
+#endif
+ p->p_wchan = ident;
+ p->p_wmesg = wmesg;
+ p->p_slptime = 0;
+ p->p_priority = priority & PRIMASK;
+ qp = &slpque[LOOKUP(ident)];
+ if (qp->sq_head == 0)
+ qp->sq_head = p;
+ else
+ *qp->sq_tailp = p;
+ *(qp->sq_tailp = &p->p_forw) = 0;
+ if (timo)
+ timeout(endtsleep, (void *)p, timo);
+ /*
+ * We put ourselves on the sleep queue and start our timeout
+ * before calling CURSIG, as we could stop there, and a wakeup
+ * or a SIGCONT (or both) could occur while we were stopped.
+ * A SIGCONT would cause us to be marked as SSLEEP
+ * without resuming us, thus we must be ready for sleep
+ * when CURSIG is called. If the wakeup happens while we're
+ * stopped, p->p_wchan will be 0 upon return from CURSIG.
+ */
+ if (catch) {
+ p->p_flag |= P_SINTR;
+ if (sig = CURSIG(p)) {
+ if (p->p_wchan)
+ unsleep(p);
+ p->p_stat = SRUN;
+ goto resume;
+ }
+ if (p->p_wchan == 0) {
+ catch = 0;
+ goto resume;
+ }
+ } else
+ sig = 0;
+ p->p_stat = SSLEEP;
+ p->p_stats->p_ru.ru_nvcsw++;
+ mi_switch();
+resume:
+ curpriority = p->p_usrpri;
+ splx(s);
+ p->p_flag &= ~P_SINTR;
+ if (p->p_flag & P_TIMEOUT) {
+ p->p_flag &= ~P_TIMEOUT;
+ if (sig == 0) {
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ return (EWOULDBLOCK);
+ }
+ } else if (timo)
+ untimeout(endtsleep, (void *)p);
+ if (catch && (sig != 0 || (sig = CURSIG(p)))) {
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ if (p->p_sigacts->ps_sigintr & sigmask(sig))
+ return (EINTR);
+ return (ERESTART);
+ }
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ return (0);
+}
+
+/*
+ * Implement timeout for tsleep.
+ * If process hasn't been awakened (wchan non-zero),
+ * set timeout flag and undo the sleep. If proc
+ * is stopped, just unsleep so it will remain stopped.
+ */
+void
+endtsleep(arg)
+ void *arg;
+{
+ register struct proc *p;
+ int s;
+
+ p = (struct proc *)arg;
+ s = splhigh();
+ if (p->p_wchan) {
+ if (p->p_stat == SSLEEP)
+ setrunnable(p);
+ else
+ unsleep(p);
+ p->p_flag |= P_TIMEOUT;
+ }
+ splx(s);
+}
+
+/*
+ * Short-term, non-interruptable sleep.
+ */
+void
+sleep(ident, priority)
+ void *ident;
+ int priority;
+{
+ register struct proc *p = curproc;
+ register struct slpque *qp;
+ register s;
+ extern int cold;
+
+#ifdef DIAGNOSTIC
+ if (priority > PZERO) {
+ printf("sleep called with priority %d > PZERO, wchan: %x\n",
+ priority, ident);
+ panic("old sleep");
+ }
+#endif
+ s = splhigh();
+ if (cold || panicstr) {
+ /*
+ * After a panic, or during autoconfiguration,
+ * just give interrupts a chance, then just return;
+ * don't run any other procs or panic below,
+ * in case this is the idle process and already asleep.
+ */
+ splx(safepri);
+ splx(s);
+ return;
+ }
+#ifdef DIAGNOSTIC
+ if (ident == NULL || p->p_stat != SRUN || p->p_back)
+ panic("sleep");
+#endif
+ p->p_wchan = ident;
+ p->p_wmesg = NULL;
+ p->p_slptime = 0;
+ p->p_priority = priority;
+ qp = &slpque[LOOKUP(ident)];
+ if (qp->sq_head == 0)
+ qp->sq_head = p;
+ else
+ *qp->sq_tailp = p;
+ *(qp->sq_tailp = &p->p_forw) = 0;
+ p->p_stat = SSLEEP;
+ p->p_stats->p_ru.ru_nvcsw++;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 1, 0);
+#endif
+ mi_switch();
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p->p_tracep, 0, 0);
+#endif
+ curpriority = p->p_usrpri;
+ splx(s);
+}
+
+/*
+ * Remove a process from its wait queue
+ */
+void
+unsleep(p)
+ register struct proc *p;
+{
+ register struct slpque *qp;
+ register struct proc **hp;
+ int s;
+
+ s = splhigh();
+ if (p->p_wchan) {
+ hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
+ while (*hp != p)
+ hp = &(*hp)->p_forw;
+ *hp = p->p_forw;
+ if (qp->sq_tailp == &p->p_forw)
+ qp->sq_tailp = hp;
+ p->p_wchan = 0;
+ }
+ splx(s);
+}
+
+/*
+ * Make all processes sleeping on the specified identifier runnable.
+ */
+void
+wakeup(ident)
+ register void *ident;
+{
+ register struct slpque *qp;
+ register struct proc *p, **q;
+ int s;
+
+ s = splhigh();
+ qp = &slpque[LOOKUP(ident)];
+restart:
+ for (q = &qp->sq_head; p = *q; ) {
+#ifdef DIAGNOSTIC
+ if (p->p_back || p->p_stat != SSLEEP && p->p_stat != SSTOP)
+ panic("wakeup");
+#endif
+ if (p->p_wchan == ident) {
+ p->p_wchan = 0;
+ *q = p->p_forw;
+ if (qp->sq_tailp == &p->p_forw)
+ qp->sq_tailp = q;
+ if (p->p_stat == SSLEEP) {
+ /* OPTIMIZED EXPANSION OF setrunnable(p); */
+ if (p->p_slptime > 1)
+ updatepri(p);
+ p->p_slptime = 0;
+ p->p_stat = SRUN;
+ if (p->p_flag & P_INMEM)
+ setrunqueue(p);
+ /*
+ * Since curpriority is a user priority,
+ * p->p_priority is always better than
+ * curpriority.
+ */
+ if ((p->p_flag & P_INMEM) == 0)
+ wakeup((caddr_t)&proc0);
+ else
+ need_resched();
+ /* END INLINE EXPANSION */
+ goto restart;
+ }
+ } else
+ q = &p->p_forw;
+ }
+ splx(s);
+}
+
+/*
+ * The machine independent parts of mi_switch().
+ * Must be called at splstatclock() or higher.
+ */
+void
+mi_switch()
+{
+ register struct proc *p = curproc; /* XXX */
+ register struct rlimit *rlim;
+ register long s, u;
+ struct timeval tv;
+
+ /*
+ * Compute the amount of time during which the current
+ * process was running, and add that to its total so far.
+ */
+ microtime(&tv);
+ u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
+ s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
+ if (u < 0) {
+ u += 1000000;
+ s--;
+ } else if (u >= 1000000) {
+ u -= 1000000;
+ s++;
+ }
+ p->p_rtime.tv_usec = u;
+ p->p_rtime.tv_sec = s;
+
+ /*
+ * Check if the process exceeds its cpu resource allocation.
+ * If over max, kill it. In any case, if it has run for more
+ * than 10 minutes, reduce priority to give others a chance.
+ */
+ rlim = &p->p_rlimit[RLIMIT_CPU];
+ if (s >= rlim->rlim_cur) {
+ if (s >= rlim->rlim_max)
+ psignal(p, SIGKILL);
+ else {
+ psignal(p, SIGXCPU);
+ if (rlim->rlim_cur < rlim->rlim_max)
+ rlim->rlim_cur += 5;
+ }
+ }
+ if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) {
+ p->p_nice = NZERO + 4;
+ resetpriority(p);
+ }
+
+ /*
+ * Pick a new current process and record its start time.
+ */
+ cnt.v_swtch++;
+ cpu_switch(p);
+ microtime(&runtime);
+}
+
+/*
+ * Initialize the (doubly-linked) run queues
+ * to be empty.
+ */
+rqinit()
+{
+ register int i;
+
+ for (i = 0; i < NQS; i++)
+ qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
+}
+
+/*
+ * Change process state to be runnable,
+ * placing it on the run queue if it is in memory,
+ * and awakening the swapper if it isn't in memory.
+ */
+void
+setrunnable(p)
+ register struct proc *p;
+{
+ register int s;
+
+ s = splhigh();
+ switch (p->p_stat) {
+ case 0:
+ case SRUN:
+ case SZOMB:
+ default:
+ panic("setrunnable");
+ case SSTOP:
+ case SSLEEP:
+ unsleep(p); /* e.g. when sending signals */
+ break;
+
+ case SIDL:
+ break;
+ }
+ p->p_stat = SRUN;
+ if (p->p_flag & P_INMEM)
+ setrunqueue(p);
+ splx(s);
+ if (p->p_slptime > 1)
+ updatepri(p);
+ p->p_slptime = 0;
+ if ((p->p_flag & P_INMEM) == 0)
+ wakeup((caddr_t)&proc0);
+ else if (p->p_priority < curpriority)
+ need_resched();
+}
+
+/*
+ * Compute the priority of a process when running in user mode.
+ * Arrange to reschedule if the resulting priority is better
+ * than that of the current process.
+ */
+void
+resetpriority(p)
+ register struct proc *p;
+{
+ register unsigned int newpriority;
+
+ newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
+ newpriority = min(newpriority, MAXPRI);
+ p->p_usrpri = newpriority;
+ if (newpriority < curpriority)
+ need_resched();
+}
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
new file mode 100644
index 000000000000..ae16decff813
--- /dev/null
+++ b/sys/kern/kern_sysctl.c
@@ -0,0 +1,787 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Karels at Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94
+ */
+
+/*
+ * sysctl system call.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/unistd.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+sysctlfn kern_sysctl;
+sysctlfn hw_sysctl;
+#ifdef DEBUG
+sysctlfn debug_sysctl;
+#endif
+extern sysctlfn vm_sysctl;
+extern sysctlfn fs_sysctl;
+extern sysctlfn net_sysctl;
+extern sysctlfn cpu_sysctl;
+
+/*
+ * Locking and stats
+ */
+static struct sysctl_lock {
+ int sl_lock;
+ int sl_want;
+ int sl_locked;
+} memlock;
+
+struct sysctl_args {
+ int *name;
+ u_int namelen;
+ void *old;
+ size_t *oldlenp;
+ void *new;
+ size_t newlen;
+};
+
+int
+__sysctl(p, uap, retval)
+ struct proc *p;
+ register struct sysctl_args *uap;
+ int *retval;
+{
+ int error, dolock = 1;
+ u_int savelen, oldlen = 0;
+ sysctlfn *fn;
+ int name[CTL_MAXNAME];
+
+ if (uap->new != NULL && (error = suser(p->p_ucred, &p->p_acflag)))
+ return (error);
+ /*
+ * all top-level sysctl names are non-terminal
+ */
+ if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
+ return (EINVAL);
+ if (error = copyin(uap->name, &name, uap->namelen * sizeof(int)))
+ return (error);
+
+ switch (name[0]) {
+ case CTL_KERN:
+ fn = kern_sysctl;
+ if (name[2] != KERN_VNODE) /* XXX */
+ dolock = 0;
+ break;
+ case CTL_HW:
+ fn = hw_sysctl;
+ break;
+ case CTL_VM:
+ fn = vm_sysctl;
+ break;
+ case CTL_NET:
+ fn = net_sysctl;
+ break;
+#ifdef notyet
+ case CTL_FS:
+ fn = fs_sysctl;
+ break;
+#endif
+ case CTL_MACHDEP:
+ fn = cpu_sysctl;
+ break;
+#ifdef DEBUG
+ case CTL_DEBUG:
+ fn = debug_sysctl;
+ break;
+#endif
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ if (uap->oldlenp &&
+ (error = copyin(uap->oldlenp, &oldlen, sizeof(oldlen))))
+ return (error);
+ if (uap->old != NULL) {
+ if (!useracc(uap->old, oldlen, B_WRITE))
+ return (EFAULT);
+ while (memlock.sl_lock) {
+ memlock.sl_want = 1;
+ sleep((caddr_t)&memlock, PRIBIO+1);
+ memlock.sl_locked++;
+ }
+ memlock.sl_lock = 1;
+ if (dolock)
+ vslock(uap->old, oldlen);
+ savelen = oldlen;
+ }
+ error = (*fn)(name + 1, uap->namelen - 1, uap->old, &oldlen,
+ uap->new, uap->newlen, p);
+ if (uap->old != NULL) {
+ if (dolock)
+ vsunlock(uap->old, savelen, B_WRITE);
+ memlock.sl_lock = 0;
+ if (memlock.sl_want) {
+ memlock.sl_want = 0;
+ wakeup((caddr_t)&memlock);
+ }
+ }
+ if (error)
+ return (error);
+ if (uap->oldlenp)
+ error = copyout(&oldlen, uap->oldlenp, sizeof(oldlen));
+ *retval = oldlen;
+ return (0);
+}
+
+/*
+ * Attributes stored in the kernel.
+ */
+char hostname[MAXHOSTNAMELEN];
+int hostnamelen;
+long hostid;
+int securelevel;
+
+/*
+ * kernel related system variables.
+ */
+kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ int error, level, inthostid;
+ extern char ostype[], osrelease[], version[];
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1 && !(name[0] == KERN_PROC || name[0] == KERN_PROF))
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case KERN_OSTYPE:
+ return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
+ case KERN_OSRELEASE:
+ return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
+ case KERN_OSREV:
+ return (sysctl_rdint(oldp, oldlenp, newp, BSD));
+ case KERN_VERSION:
+ return (sysctl_rdstring(oldp, oldlenp, newp, version));
+ case KERN_MAXVNODES:
+ return(sysctl_int(oldp, oldlenp, newp, newlen, &desiredvnodes));
+ case KERN_MAXPROC:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &maxproc));
+ case KERN_MAXFILES:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &maxfiles));
+ case KERN_ARGMAX:
+ return (sysctl_rdint(oldp, oldlenp, newp, ARG_MAX));
+ case KERN_SECURELVL:
+ level = securelevel;
+ if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) ||
+ newp == NULL)
+ return (error);
+ if (level < securelevel && p->p_pid != 1)
+ return (EPERM);
+ securelevel = level;
+ return (0);
+ case KERN_HOSTNAME:
+ error = sysctl_string(oldp, oldlenp, newp, newlen,
+ hostname, sizeof(hostname));
+ if (newp && !error)
+ hostnamelen = newlen;
+ return (error);
+ case KERN_HOSTID:
+ inthostid = hostid; /* XXX assumes sizeof long <= sizeof int */
+ error = sysctl_int(oldp, oldlenp, newp, newlen, &inthostid);
+ hostid = inthostid;
+ return (error);
+ case KERN_CLOCKRATE:
+ return (sysctl_clockrate(oldp, oldlenp));
+ case KERN_BOOTTIME:
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime,
+ sizeof(struct timeval)));
+ case KERN_VNODE:
+ return (sysctl_vnode(oldp, oldlenp));
+ case KERN_PROC:
+ return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp));
+ case KERN_FILE:
+ return (sysctl_file(oldp, oldlenp));
+#ifdef GPROF
+ case KERN_PROF:
+ return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
+ newp, newlen));
+#endif
+ case KERN_POSIX1:
+ return (sysctl_rdint(oldp, oldlenp, newp, _POSIX_VERSION));
+ case KERN_NGROUPS:
+ return (sysctl_rdint(oldp, oldlenp, newp, NGROUPS_MAX));
+ case KERN_JOB_CONTROL:
+ return (sysctl_rdint(oldp, oldlenp, newp, 1));
+ case KERN_SAVED_IDS:
+#ifdef _POSIX_SAVED_IDS
+ return (sysctl_rdint(oldp, oldlenp, newp, 1));
+#else
+ return (sysctl_rdint(oldp, oldlenp, newp, 0));
+#endif
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * hardware related system variables.
+ */
+hw_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ extern char machine[], cpu_model[];
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case HW_MACHINE:
+ return (sysctl_rdstring(oldp, oldlenp, newp, machine));
+ case HW_MODEL:
+ return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
+ case HW_NCPU:
+ return (sysctl_rdint(oldp, oldlenp, newp, 1)); /* XXX */
+ case HW_BYTEORDER:
+ return (sysctl_rdint(oldp, oldlenp, newp, BYTE_ORDER));
+ case HW_PHYSMEM:
+ return (sysctl_rdint(oldp, oldlenp, newp, ctob(physmem)));
+ case HW_USERMEM:
+ return (sysctl_rdint(oldp, oldlenp, newp,
+ ctob(physmem - cnt.v_wire_count)));
+ case HW_PAGESIZE:
+ return (sysctl_rdint(oldp, oldlenp, newp, PAGE_SIZE));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
+#ifdef DEBUG
+/*
+ * Debugging related system variables.
+ */
+struct ctldebug debug0, debug1, debug2, debug3, debug4;
+struct ctldebug debug5, debug6, debug7, debug8, debug9;
+struct ctldebug debug10, debug11, debug12, debug13, debug14;
+struct ctldebug debug15, debug16, debug17, debug18, debug19;
+static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
+ &debug0, &debug1, &debug2, &debug3, &debug4,
+ &debug5, &debug6, &debug7, &debug8, &debug9,
+ &debug10, &debug11, &debug12, &debug13, &debug14,
+ &debug15, &debug16, &debug17, &debug18, &debug19,
+};
+int
+debug_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ struct ctldebug *cdp;
+
+ /* all sysctl names at this level are name and field */
+ if (namelen != 2)
+ return (ENOTDIR); /* overloaded */
+ cdp = debugvars[name[0]];
+ if (cdp->debugname == 0)
+ return (EOPNOTSUPP);
+ switch (name[1]) {
+ case CTL_DEBUG_NAME:
+ return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
+ case CTL_DEBUG_VALUE:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+#endif /* DEBUG */
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for an integer-valued sysctl function.
+ */
+sysctl_int(oldp, oldlenp, newp, newlen, valp)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ int *valp;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < sizeof(int))
+ return (ENOMEM);
+ if (newp && newlen != sizeof(int))
+ return (EINVAL);
+ *oldlenp = sizeof(int);
+ if (oldp)
+ error = copyout(valp, oldp, sizeof(int));
+ if (error == 0 && newp)
+ error = copyin(newp, valp, sizeof(int));
+ return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdint(oldp, oldlenp, newp, val)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ int val;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < sizeof(int))
+ return (ENOMEM);
+ if (newp)
+ return (EPERM);
+ *oldlenp = sizeof(int);
+ if (oldp)
+ error = copyout((caddr_t)&val, oldp, sizeof(int));
+ return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a string-valued sysctl function.
+ */
+sysctl_string(oldp, oldlenp, newp, newlen, str, maxlen)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ char *str;
+ int maxlen;
+{
+ int len, error = 0;
+
+ len = strlen(str) + 1;
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp && newlen >= maxlen)
+ return (EINVAL);
+ if (oldp) {
+ *oldlenp = len;
+ error = copyout(str, oldp, len);
+ }
+ if (error == 0 && newp) {
+ error = copyin(newp, str, newlen);
+ str[newlen] = 0;
+ }
+ return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdstring(oldp, oldlenp, newp, str)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ char *str;
+{
+ int len, error = 0;
+
+ len = strlen(str) + 1;
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp)
+ return (EPERM);
+ *oldlenp = len;
+ if (oldp)
+ error = copyout(str, oldp, len);
+ return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_struct(oldp, oldlenp, newp, newlen, sp, len)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ void *sp;
+ int len;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp && newlen > len)
+ return (EINVAL);
+ if (oldp) {
+ *oldlenp = len;
+ error = copyout(sp, oldp, len);
+ }
+ if (error == 0 && newp)
+ error = copyin(newp, sp, len);
+ return (error);
+}
+
+/*
+ * Validate parameters and get old parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_rdstruct(oldp, oldlenp, newp, sp, len)
+ void *oldp;
+ size_t *oldlenp;
+ void *newp, *sp;
+ int len;
+{
+ int error = 0;
+
+ if (oldp && *oldlenp < len)
+ return (ENOMEM);
+ if (newp)
+ return (EPERM);
+ *oldlenp = len;
+ if (oldp)
+ error = copyout(sp, oldp, len);
+ return (error);
+}
+
+/*
+ * Get file structures.
+ */
+sysctl_file(where, sizep)
+ char *where;
+ size_t *sizep;
+{
+ int buflen, error;
+ struct file *fp;
+ char *start = where;
+
+ buflen = *sizep;
+ if (where == NULL) {
+ /*
+ * overestimate by 10 files
+ */
+ *sizep = sizeof(filehead) + (nfiles + 10) * sizeof(struct file);
+ return (0);
+ }
+
+ /*
+ * first copyout filehead
+ */
+ if (buflen < sizeof(filehead)) {
+ *sizep = 0;
+ return (0);
+ }
+ if (error = copyout((caddr_t)&filehead, where, sizeof(filehead)))
+ return (error);
+ buflen -= sizeof(filehead);
+ where += sizeof(filehead);
+
+ /*
+ * followed by an array of file structures
+ */
+ for (fp = filehead; fp != NULL; fp = fp->f_filef) {
+ if (buflen < sizeof(struct file)) {
+ *sizep = where - start;
+ return (ENOMEM);
+ }
+ if (error = copyout((caddr_t)fp, where, sizeof (struct file)))
+ return (error);
+ buflen -= sizeof(struct file);
+ where += sizeof(struct file);
+ }
+ *sizep = where - start;
+ return (0);
+}
+
+/*
+ * try over estimating by 5 procs
+ */
+#define KERN_PROCSLOP (5 * sizeof (struct kinfo_proc))
+
+sysctl_doproc(name, namelen, where, sizep)
+ int *name;
+ u_int namelen;
+ char *where;
+ size_t *sizep;
+{
+ register struct proc *p;
+ register struct kinfo_proc *dp = (struct kinfo_proc *)where;
+ register int needed = 0;
+ int buflen = where != NULL ? *sizep : 0;
+ int doingzomb;
+ struct eproc eproc;
+ int error = 0;
+
+ if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL))
+ return (EINVAL);
+ p = (struct proc *)allproc;
+ doingzomb = 0;
+again:
+ for (; p != NULL; p = p->p_next) {
+ /*
+ * Skip embryonic processes.
+ */
+ if (p->p_stat == SIDL)
+ continue;
+ /*
+ * TODO - make more efficient (see notes below).
+ * do by session.
+ */
+ switch (name[0]) {
+
+ case KERN_PROC_PID:
+ /* could do this with just a lookup */
+ if (p->p_pid != (pid_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_PGRP:
+ /* could do this by traversing pgrp */
+ if (p->p_pgrp->pg_id != (pid_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_TTY:
+ if ((p->p_flag & P_CONTROLT) == 0 ||
+ p->p_session->s_ttyp == NULL ||
+ p->p_session->s_ttyp->t_dev != (dev_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_UID:
+ if (p->p_ucred->cr_uid != (uid_t)name[1])
+ continue;
+ break;
+
+ case KERN_PROC_RUID:
+ if (p->p_cred->p_ruid != (uid_t)name[1])
+ continue;
+ break;
+ }
+ if (buflen >= sizeof(struct kinfo_proc)) {
+ fill_eproc(p, &eproc);
+ if (error = copyout((caddr_t)p, &dp->kp_proc,
+ sizeof(struct proc)))
+ return (error);
+ if (error = copyout((caddr_t)&eproc, &dp->kp_eproc,
+ sizeof(eproc)))
+ return (error);
+ dp++;
+ buflen -= sizeof(struct kinfo_proc);
+ }
+ needed += sizeof(struct kinfo_proc);
+ }
+ if (doingzomb == 0) {
+ p = zombproc;
+ doingzomb++;
+ goto again;
+ }
+ if (where != NULL) {
+ *sizep = (caddr_t)dp - where;
+ if (needed > *sizep)
+ return (ENOMEM);
+ } else {
+ needed += KERN_PROCSLOP;
+ *sizep = needed;
+ }
+ return (0);
+}
+
+/*
+ * Fill in an eproc structure for the specified process.
+ */
+void
+fill_eproc(p, ep)
+ register struct proc *p;
+ register struct eproc *ep;
+{
+ register struct tty *tp;
+
+ ep->e_paddr = p;
+ ep->e_sess = p->p_pgrp->pg_session;
+ ep->e_pcred = *p->p_cred;
+ ep->e_ucred = *p->p_ucred;
+ if (p->p_stat == SIDL || p->p_stat == SZOMB) {
+ ep->e_vm.vm_rssize = 0;
+ ep->e_vm.vm_tsize = 0;
+ ep->e_vm.vm_dsize = 0;
+ ep->e_vm.vm_ssize = 0;
+#ifndef sparc
+ /* ep->e_vm.vm_pmap = XXX; */
+#endif
+ } else {
+ register struct vmspace *vm = p->p_vmspace;
+
+#ifdef pmap_resident_count
+ ep->e_vm.vm_rssize = pmap_resident_count(&vm->vm_pmap); /*XXX*/
+#else
+ ep->e_vm.vm_rssize = vm->vm_rssize;
+#endif
+ ep->e_vm.vm_tsize = vm->vm_tsize;
+ ep->e_vm.vm_dsize = vm->vm_dsize;
+ ep->e_vm.vm_ssize = vm->vm_ssize;
+#ifndef sparc
+ ep->e_vm.vm_pmap = vm->vm_pmap;
+#endif
+ }
+ if (p->p_pptr)
+ ep->e_ppid = p->p_pptr->p_pid;
+ else
+ ep->e_ppid = 0;
+ ep->e_pgid = p->p_pgrp->pg_id;
+ ep->e_jobc = p->p_pgrp->pg_jobc;
+ if ((p->p_flag & P_CONTROLT) &&
+ (tp = ep->e_sess->s_ttyp)) {
+ ep->e_tdev = tp->t_dev;
+ ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+ ep->e_tsess = tp->t_session;
+ } else
+ ep->e_tdev = NODEV;
+ ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0;
+ if (SESS_LEADER(p))
+ ep->e_flag |= EPROC_SLEADER;
+ if (p->p_wmesg)
+ strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN);
+ ep->e_xsize = ep->e_xrssize = 0;
+ ep->e_xccount = ep->e_xswrss = 0;
+}
+
+#ifdef COMPAT_43
+#include <sys/socket.h>
+#define KINFO_PROC (0<<8)
+#define KINFO_RT (1<<8)
+#define KINFO_VNODE (2<<8)
+#define KINFO_FILE (3<<8)
+#define KINFO_METER (4<<8)
+#define KINFO_LOADAVG (5<<8)
+#define KINFO_CLOCKRATE (6<<8)
+
+struct getkerninfo_args {
+ int op;
+ char *where;
+ int *size;
+ int arg;
+};
+
+ogetkerninfo(p, uap, retval)
+ struct proc *p;
+ register struct getkerninfo_args *uap;
+ int *retval;
+{
+ int error, name[5];
+ u_int size;
+
+ if (uap->size &&
+ (error = copyin((caddr_t)uap->size, (caddr_t)&size, sizeof(size))))
+ return (error);
+
+ switch (uap->op & 0xff00) {
+
+ case KINFO_RT:
+ name[0] = PF_ROUTE;
+ name[1] = 0;
+ name[2] = (uap->op & 0xff0000) >> 16;
+ name[3] = uap->op & 0xff;
+ name[4] = uap->arg;
+ error = net_sysctl(name, 5, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_VNODE:
+ name[0] = KERN_VNODE;
+ error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_PROC:
+ name[0] = KERN_PROC;
+ name[1] = uap->op & 0xff;
+ name[2] = uap->arg;
+ error = kern_sysctl(name, 3, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_FILE:
+ name[0] = KERN_FILE;
+ error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_METER:
+ name[0] = VM_METER;
+ error = vm_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_LOADAVG:
+ name[0] = VM_LOADAVG;
+ error = vm_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ case KINFO_CLOCKRATE:
+ name[0] = KERN_CLOCKRATE;
+ error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+ break;
+
+ default:
+ return (EOPNOTSUPP);
+ }
+ if (error)
+ return (error);
+ *retval = size;
+ if (uap->size)
+ error = copyout((caddr_t)&size, (caddr_t)uap->size,
+ sizeof(size));
+ return (error);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
new file mode 100644
index 000000000000..f42900cb75d2
--- /dev/null
+++ b/sys/kern/kern_tc.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
new file mode 100644
index 000000000000..4dadcb8e0b9d
--- /dev/null
+++ b/sys/kern/kern_time.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_time.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#include <machine/cpu.h>
+
+/*
+ * Time of day and interval timer support.
+ *
+ * These routines provide the kernel entry points to get and set
+ * the time-of-day and per-process interval timers. Subroutines
+ * here provide support for adding and subtracting timeval structures
+ * and decrementing interval timers, optionally reloading the interval
+ * timers when they expire.
+ */
+
+struct gettimeofday_args {
+ struct timeval *tp;
+ struct timezone *tzp;
+};
+/* ARGSUSED */
+gettimeofday(p, uap, retval)
+ struct proc *p;
+ register struct gettimeofday_args *uap;
+ int *retval;
+{
+ struct timeval atv;
+ int error = 0;
+
+ if (uap->tp) {
+ microtime(&atv);
+ if (error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
+ sizeof (atv)))
+ return (error);
+ }
+ if (uap->tzp)
+ error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
+ sizeof (tz));
+ return (error);
+}
+
+struct settimeofday_args {
+ struct timeval *tv;
+ struct timezone *tzp;
+};
+/* ARGSUSED */
+settimeofday(p, uap, retval)
+ struct proc *p;
+ struct settimeofday_args *uap;
+ int *retval;
+{
+ struct timeval atv, delta;
+ struct timezone atz;
+ int error, s;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ /* Verify all parameters before changing time. */
+ if (uap->tv &&
+ (error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof(atv))))
+ return (error);
+ if (uap->tzp &&
+ (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
+ return (error);
+ if (uap->tv) {
+ /* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
+ s = splclock();
+ /* nb. delta.tv_usec may be < 0, but this is OK here */
+ delta.tv_sec = atv.tv_sec - time.tv_sec;
+ delta.tv_usec = atv.tv_usec - time.tv_usec;
+ time = atv;
+ (void) splsoftclock();
+ timevaladd(&boottime, &delta);
+ timevalfix(&boottime);
+ timevaladd(&runtime, &delta);
+ timevalfix(&runtime);
+ LEASE_UPDATETIME(delta.tv_sec);
+ splx(s);
+ resettodr();
+ }
+ if (uap->tzp)
+ tz = atz;
+ return (0);
+}
+
+extern int tickadj; /* "standard" clock skew, us./tick */
+int tickdelta; /* current clock skew, us. per tick */
+long timedelta; /* unapplied time correction, us. */
+long bigadj = 1000000; /* use 10x skew above bigadj us. */
+
+struct adjtime_args {
+ struct timeval *delta;
+ struct timeval *olddelta;
+};
+/* ARGSUSED */
+adjtime(p, uap, retval)
+ struct proc *p;
+ register struct adjtime_args *uap;
+ int *retval;
+{
+ struct timeval atv;
+ register long ndelta, ntickdelta, odelta;
+ int s, error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ if (error =
+ copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval)))
+ return (error);
+
+ /*
+ * Compute the total correction and the rate at which to apply it.
+ * Round the adjustment down to a whole multiple of the per-tick
+ * delta, so that after some number of incremental changes in
+ * hardclock(), tickdelta will become zero, lest the correction
+ * overshoot and start taking us away from the desired final time.
+ */
+ ndelta = atv.tv_sec * 1000000 + atv.tv_usec;
+ if (ndelta > bigadj)
+ ntickdelta = 10 * tickadj;
+ else
+ ntickdelta = tickadj;
+ if (ndelta % ntickdelta)
+ ndelta = ndelta / ntickdelta * ntickdelta;
+
+ /*
+ * To make hardclock()'s job easier, make the per-tick delta negative
+ * if we want time to run slower; then hardclock can simply compute
+ * tick + tickdelta, and subtract tickdelta from timedelta.
+ */
+ if (ndelta < 0)
+ ntickdelta = -ntickdelta;
+ s = splclock();
+ odelta = timedelta;
+ timedelta = ndelta;
+ tickdelta = ntickdelta;
+ splx(s);
+
+ if (uap->olddelta) {
+ atv.tv_sec = odelta / 1000000;
+ atv.tv_usec = odelta % 1000000;
+ (void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta,
+ sizeof(struct timeval));
+ }
+ return (0);
+}
+
+/*
+ * Get value of an interval timer. The process virtual and
+ * profiling virtual time timers are kept in the p_stats area, since
+ * they can be swapped out. These are kept internally in the
+ * way they are specified externally: in time until they expire.
+ *
+ * The real time interval timer is kept in the process table slot
+ * for the process, and its value (it_value) is kept as an
+ * absolute time rather than as a delta, so that it is easy to keep
+ * periodic real-time signals from drifting.
+ *
+ * Virtual time timers are processed in the hardclock() routine of
+ * kern_clock.c. The real time timer is processed by a timeout
+ * routine, called from the softclock() routine. Since a callout
+ * may be delayed in real time due to interrupt processing in the system,
+ * it is possible for the real time timeout routine (realitexpire, given below),
+ * to be delayed in real time past when it is supposed to occur. It
+ * does not suffice, therefore, to reload the real timer .it_value from the
+ * real time timers .it_interval. Rather, we compute the next time in
+ * absolute time the timer should go off.
+ */
+struct getitimer_args {
+ u_int which;
+ struct itimerval *itv;
+};
+/* ARGSUSED */
+getitimer(p, uap, retval)
+ struct proc *p;
+ register struct getitimer_args *uap;
+ int *retval;
+{
+ struct itimerval aitv;
+ int s;
+
+ if (uap->which > ITIMER_PROF)
+ return (EINVAL);
+ s = splclock();
+ if (uap->which == ITIMER_REAL) {
+ /*
+ * Convert from absoulte to relative time in .it_value
+ * part of real time timer. If time for real time timer
+ * has passed return 0, else return difference between
+ * current time and time for the timer to go off.
+ */
+ aitv = p->p_realtimer;
+ if (timerisset(&aitv.it_value))
+ if (timercmp(&aitv.it_value, &time, <))
+ timerclear(&aitv.it_value);
+ else
+ timevalsub(&aitv.it_value,
+ (struct timeval *)&time);
+ } else
+ aitv = p->p_stats->p_timer[uap->which];
+ splx(s);
+ return (copyout((caddr_t)&aitv, (caddr_t)uap->itv,
+ sizeof (struct itimerval)));
+}
+
+struct setitimer_args {
+ u_int which;
+ struct itimerval *itv, *oitv;
+};
+/* ARGSUSED */
+setitimer(p, uap, retval)
+ struct proc *p;
+ register struct setitimer_args *uap;
+ int *retval;
+{
+ struct itimerval aitv;
+ register struct itimerval *itvp;
+ int s, error;
+
+ if (uap->which > ITIMER_PROF)
+ return (EINVAL);
+ itvp = uap->itv;
+ if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
+ sizeof(struct itimerval))))
+ return (error);
+ if ((uap->itv = uap->oitv) && (error = getitimer(p, uap, retval)))
+ return (error);
+ if (itvp == 0)
+ return (0);
+ if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval))
+ return (EINVAL);
+ s = splclock();
+ if (uap->which == ITIMER_REAL) {
+ untimeout(realitexpire, (caddr_t)p);
+ if (timerisset(&aitv.it_value)) {
+ timevaladd(&aitv.it_value, (struct timeval *)&time);
+ timeout(realitexpire, (caddr_t)p, hzto(&aitv.it_value));
+ }
+ p->p_realtimer = aitv;
+ } else
+ p->p_stats->p_timer[uap->which] = aitv;
+ splx(s);
+ return (0);
+}
+
+/*
+ * Real interval timer expired:
+ * send process whose timer expired an alarm signal.
+ * If time is not set up to reload, then just return.
+ * Else compute next time timer should go off which is > current time.
+ * This is where delay in processing this timeout causes multiple
+ * SIGALRM calls to be compressed into one.
+ */
+void
+realitexpire(arg)
+ void *arg;
+{
+ register struct proc *p;
+ int s;
+
+ p = (struct proc *)arg;
+ psignal(p, SIGALRM);
+ if (!timerisset(&p->p_realtimer.it_interval)) {
+ timerclear(&p->p_realtimer.it_value);
+ return;
+ }
+ for (;;) {
+ s = splclock();
+ timevaladd(&p->p_realtimer.it_value,
+ &p->p_realtimer.it_interval);
+ if (timercmp(&p->p_realtimer.it_value, &time, >)) {
+ timeout(realitexpire, (caddr_t)p,
+ hzto(&p->p_realtimer.it_value));
+ splx(s);
+ return;
+ }
+ splx(s);
+ }
+}
+
+/*
+ * Check that a proposed value to load into the .it_value or
+ * .it_interval part of an interval timer is acceptable, and
+ * fix it to have at least minimal value (i.e. if it is less
+ * than the resolution of the clock, round it up.)
+ */
+itimerfix(tv)
+ struct timeval *tv;
+{
+
+ if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
+ tv->tv_usec < 0 || tv->tv_usec >= 1000000)
+ return (EINVAL);
+ if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
+ tv->tv_usec = tick;
+ return (0);
+}
+
+/*
+ * Decrement an interval timer by a specified number
+ * of microseconds, which must be less than a second,
+ * i.e. < 1000000. If the timer expires, then reload
+ * it. In this case, carry over (usec - old value) to
+ * reduce the value reloaded into the timer so that
+ * the timer does not drift. This routine assumes
+ * that it is called in a context where the timers
+ * on which it is operating cannot change in value.
+ */
+itimerdecr(itp, usec)
+ register struct itimerval *itp;
+ int usec;
+{
+
+ if (itp->it_value.tv_usec < usec) {
+ if (itp->it_value.tv_sec == 0) {
+ /* expired, and already in next interval */
+ usec -= itp->it_value.tv_usec;
+ goto expire;
+ }
+ itp->it_value.tv_usec += 1000000;
+ itp->it_value.tv_sec--;
+ }
+ itp->it_value.tv_usec -= usec;
+ usec = 0;
+ if (timerisset(&itp->it_value))
+ return (1);
+ /* expired, exactly at end of interval */
+expire:
+ if (timerisset(&itp->it_interval)) {
+ itp->it_value = itp->it_interval;
+ itp->it_value.tv_usec -= usec;
+ if (itp->it_value.tv_usec < 0) {
+ itp->it_value.tv_usec += 1000000;
+ itp->it_value.tv_sec--;
+ }
+ } else
+ itp->it_value.tv_usec = 0; /* sec is already 0 */
+ return (0);
+}
+
+/*
+ * Add and subtract routines for timevals.
+ * N.B.: subtract routine doesn't deal with
+ * results which are before the beginning,
+ * it just gets very confused in this case.
+ * Caveat emptor.
+ */
+timevaladd(t1, t2)
+ struct timeval *t1, *t2;
+{
+
+ t1->tv_sec += t2->tv_sec;
+ t1->tv_usec += t2->tv_usec;
+ timevalfix(t1);
+}
+
+timevalsub(t1, t2)
+ struct timeval *t1, *t2;
+{
+
+ t1->tv_sec -= t2->tv_sec;
+ t1->tv_usec -= t2->tv_usec;
+ timevalfix(t1);
+}
+
+timevalfix(t1)
+ struct timeval *t1;
+{
+
+ if (t1->tv_usec < 0) {
+ t1->tv_sec--;
+ t1->tv_usec += 1000000;
+ }
+ if (t1->tv_usec >= 1000000) {
+ t1->tv_sec++;
+ t1->tv_usec -= 1000000;
+ }
+}
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
new file mode 100644
index 000000000000..f42900cb75d2
--- /dev/null
+++ b/sys/kern/kern_timeout.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_xxx.c b/sys/kern/kern_xxx.c
new file mode 100644
index 000000000000..64fac9105d7f
--- /dev/null
+++ b/sys/kern/kern_xxx.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_xxx.c 8.2 (Berkeley) 11/14/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/reboot.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+struct reboot_args {
+ int opt;
+};
+/* ARGSUSED */
+reboot(p, uap, retval)
+ struct proc *p;
+ struct reboot_args *uap;
+ int *retval;
+{
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ boot(uap->opt);
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+struct gethostname_args {
+ char *hostname;
+ u_int len;
+};
+/* ARGSUSED */
+ogethostname(p, uap, retval)
+ struct proc *p;
+ struct gethostname_args *uap;
+ int *retval;
+{
+ int name;
+
+ name = KERN_HOSTNAME;
+ return (kern_sysctl(&name, 1, uap->hostname, &uap->len, 0, 0));
+}
+
+struct sethostname_args {
+ char *hostname;
+ u_int len;
+};
+/* ARGSUSED */
+osethostname(p, uap, retval)
+ struct proc *p;
+ register struct sethostname_args *uap;
+ int *retval;
+{
+ int name;
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ name = KERN_HOSTNAME;
+ return (kern_sysctl(&name, 1, 0, 0, uap->hostname, uap->len));
+}
+
+extern long hostid;
+
+struct gethostid_args {
+ int dummy;
+};
+/* ARGSUSED */
+ogethostid(p, uap, retval)
+ struct proc *p;
+ struct gethostid_args *uap;
+ int *retval;
+{
+
+ *(long *)retval = hostid;
+ return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+#ifdef COMPAT_43
+struct sethostid_args {
+ long hostid;
+};
+/* ARGSUSED */
+osethostid(p, uap, retval)
+ struct proc *p;
+ struct sethostid_args *uap;
+ int *retval;
+{
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ hostid = uap->hostid;
+ return (0);
+}
+
+oquota()
+{
+
+ return (ENOSYS);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh
new file mode 100644
index 000000000000..0ddea0c28fa4
--- /dev/null
+++ b/sys/kern/makesyscalls.sh
@@ -0,0 +1,171 @@
+#! /bin/sh -
+# @(#)makesyscalls.sh 8.1 (Berkeley) 6/10/93
+
+set -e
+
+# name of compat option:
+compat=COMPAT_43
+
+# output files:
+sysnames="syscalls.c"
+syshdr="../sys/syscall.h"
+syssw="init_sysent.c"
+
+# tmp files:
+sysdcl="sysent.dcl"
+syscompat="sysent.compat"
+sysent="sysent.switch"
+
+trap "rm $sysdcl $syscompat $sysent" 0
+
+case $# in
+ 0) echo "Usage: $0 input-file" 1>&2
+ exit 1
+ ;;
+esac
+
+awk < $1 "
+ BEGIN {
+ sysdcl = \"$sysdcl\"
+ syscompat = \"$syscompat\"
+ sysent = \"$sysent\"
+ sysnames = \"$sysnames\"
+ syshdr = \"$syshdr\"
+ compat = \"$compat\"
+ infile = \"$1\"
+ "'
+
+ printf "/*\n * System call switch table.\n *\n" > sysdcl
+ printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysdcl
+
+ printf "\n#ifdef %s\n", compat > syscompat
+ printf "#define compat(n, name) n, __CONCAT(o,name)\n\n" > syscompat
+
+ printf "/*\n * System call names.\n *\n" > sysnames
+ printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames
+
+ printf "/*\n * System call numbers.\n *\n" > syshdr
+ printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr
+ }
+ NR == 1 {
+ printf " * created from%s\n */\n\n", $0 > sysdcl
+ printf "#include <sys/param.h>\n" > sysdcl
+ printf "#include <sys/systm.h>\n\n" > sysdcl
+ printf "int\tnosys();\n\n" > sysdcl
+
+ printf "struct sysent sysent[] = {\n" > sysent
+
+ printf " * created from%s\n */\n\n", $0 > sysnames
+ printf "char *syscallnames[] = {\n" > sysnames
+
+ printf " * created from%s\n */\n\n", $0 > syshdr
+ next
+ }
+ NF == 0 || $1 ~ /^;/ {
+ next
+ }
+ $1 ~ /^#[ ]*if/ {
+ print > sysent
+ print > sysdcl
+ print > syscompat
+ print > sysnames
+ savesyscall = syscall
+ next
+ }
+ $1 ~ /^#[ ]*else/ {
+ print > sysent
+ print > sysdcl
+ print > syscompat
+ print > sysnames
+ syscall = savesyscall
+ next
+ }
+ $1 ~ /^#/ {
+ print > sysent
+ print > sysdcl
+ print > syscompat
+ print > sysnames
+ next
+ }
+ syscall != $1 {
+ printf "%s: line %d: syscall number out of sync at %d\n", \
+ infile, NR, syscall
+ printf "line is:\n"
+ print
+ exit 1
+ }
+ { comment = $4
+ for (i = 5; i <= NF; i++)
+ comment = comment " " $i
+ if (NF < 5)
+ $5 = $4
+ }
+ $2 == "STD" {
+ printf("int\t%s();\n", $4) > sysdcl
+ printf("\t{ %d, %s },\t\t\t/* %d = %s */\n", \
+ $3, $4, syscall, $5) > sysent
+ printf("\t\"%s\",\t\t\t/* %d = %s */\n", \
+ $5, syscall, $5) > sysnames
+ printf("#define\tSYS_%s\t%d\n", \
+ $5, syscall) > syshdr
+ syscall++
+ next
+ }
+ $2 == "COMPAT" {
+ printf("int\to%s();\n", $4) > syscompat
+ printf("\t{ compat(%d,%s) },\t\t/* %d = old %s */\n", \
+ $3, $4, syscall, $5) > sysent
+ printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \
+ $5, syscall, $5) > sysnames
+ printf("\t\t\t\t/* %d is old %s */\n", \
+ syscall, comment) > syshdr
+ syscall++
+ next
+ }
+ $2 == "LIBCOMPAT" {
+ printf("int\to%s();\n", $4) > syscompat
+ printf("\t{ compat(%d,%s) },\t\t/* %d = old %s */\n", \
+ $3, $4, syscall, $5) > sysent
+ printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \
+ $5, syscall, $5) > sysnames
+ printf("#define\tSYS_%s\t%d\t/* compatibility; still used by libc */\n", \
+ $5, syscall) > syshdr
+ syscall++
+ next
+ }
+ $2 == "OBSOL" {
+ printf("\t{ 0, nosys },\t\t\t/* %d = obsolete %s */\n", \
+ syscall, comment) > sysent
+ printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", \
+ $4, syscall, comment) > sysnames
+ printf("\t\t\t\t/* %d is obsolete %s */\n", \
+ syscall, comment) > syshdr
+ syscall++
+ next
+ }
+ $2 == "UNIMPL" {
+ printf("\t{ 0, nosys },\t\t\t/* %d = %s */\n", \
+ syscall, comment) > sysent
+ printf("\t\"#%d\",\t\t\t/* %d = %s */\n", \
+ syscall, syscall, comment) > sysnames
+ syscall++
+ next
+ }
+ {
+ printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $2
+ exit 1
+ }
+ END {
+ printf("\n#else /* %s */\n", compat) > syscompat
+ printf("#define compat(n, name) 0, nosys\n") > syscompat
+ printf("#endif /* %s */\n\n", compat) > syscompat
+
+ printf("};\n\n") > sysent
+ printf("int\tnsysent = sizeof(sysent) / sizeof(sysent[0]);\n") > sysent
+
+ printf("};\n") > sysnames
+ } '
+
+cat $sysdcl $syscompat $sysent >$syssw
+
+chmod 444 $sysnames $syshdr $syssw
diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c
new file mode 100644
index 000000000000..af17988c9359
--- /dev/null
+++ b/sys/kern/subr_autoconf.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratories.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_autoconf.c 8.1 (Berkeley) 6/10/93
+ *
+ * from: $Header: subr_autoconf.c,v 1.12 93/02/01 19:31:48 torek Exp $ (LBL)
+ */
+
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+
+/*
+ * Autoconfiguration subroutines.
+ */
+
+/*
+ * ioconf.c exports exactly two names: cfdata and cfroots. All system
+ * devices and drivers are found via these tables.
+ */
+extern struct cfdata cfdata[];
+extern short cfroots[];
+
+#define ROOT ((struct device *)NULL)
+
+struct matchinfo {
+ cfmatch_t fn;
+ struct device *parent;
+ void *aux;
+ struct cfdata *match;
+ int pri;
+};
+
+/*
+ * Apply the matching function and choose the best. This is used
+ * a few times and we want to keep the code small.
+ */
+static void
+mapply(m, cf)
+ register struct matchinfo *m;
+ register struct cfdata *cf;
+{
+ register int pri;
+
+ if (m->fn != NULL)
+ pri = (*m->fn)(m->parent, cf, m->aux);
+ else
+ pri = (*cf->cf_driver->cd_match)(m->parent, cf, m->aux);
+ if (pri > m->pri) {
+ m->match = cf;
+ m->pri = pri;
+ }
+}
+
+/*
+ * Iterate over all potential children of some device, calling the given
+ * function (default being the child's match function) for each one.
+ * Nonzero returns are matches; the highest value returned is considered
+ * the best match. Return the `found child' if we got a match, or NULL
+ * otherwise. The `aux' pointer is simply passed on through.
+ *
+ * Note that this function is designed so that it can be used to apply
+ * an arbitrary function to all potential children (its return value
+ * can be ignored).
+ */
+struct cfdata *
+config_search(fn, parent, aux)
+ cfmatch_t fn;
+ register struct device *parent;
+ void *aux;
+{
+ register struct cfdata *cf;
+ register short *p;
+ struct matchinfo m;
+
+ m.fn = fn;
+ m.parent = parent;
+ m.aux = aux;
+ m.match = NULL;
+ m.pri = 0;
+ for (cf = cfdata; cf->cf_driver; cf++) {
+ /*
+ * Skip cf if no longer eligible, otherwise scan through
+ * parents for one matching `parent', and try match function.
+ */
+ if (cf->cf_fstate == FSTATE_FOUND)
+ continue;
+ for (p = cf->cf_parents; *p >= 0; p++)
+ if (parent->dv_cfdata == &cfdata[*p])
+ mapply(&m, cf);
+ }
+ return (m.match);
+}
+
+/*
+ * Find the given root device.
+ * This is much like config_search, but there is no parent.
+ */
+struct cfdata *
+config_rootsearch(fn, rootname, aux)
+ register cfmatch_t fn;
+ register char *rootname;
+ register void *aux;
+{
+ register struct cfdata *cf;
+ register short *p;
+ struct matchinfo m;
+
+ m.fn = fn;
+ m.parent = ROOT;
+ m.aux = aux;
+ m.match = NULL;
+ m.pri = 0;
+ /*
+ * Look at root entries for matching name. We do not bother
+ * with found-state here since only one root should ever be
+ * searched (and it must be done first).
+ */
+ for (p = cfroots; *p >= 0; p++) {
+ cf = &cfdata[*p];
+ if (strcmp(cf->cf_driver->cd_name, rootname) == 0)
+ mapply(&m, cf);
+ }
+ return (m.match);
+}
+
+static char *msgs[3] = { "", " not configured\n", " unsupported\n" };
+
+/*
+ * The given `aux' argument describes a device that has been found
+ * on the given parent, but not necessarily configured. Locate the
+ * configuration data for that device (using the cd_match configuration
+ * driver function) and attach it, and return true. If the device was
+ * not configured, call the given `print' function and return 0.
+ */
+int
+config_found(parent, aux, print)
+ struct device *parent;
+ void *aux;
+ cfprint_t print;
+{
+ struct cfdata *cf;
+
+ if ((cf = config_search((cfmatch_t)NULL, parent, aux)) != NULL) {
+ config_attach(parent, cf, aux, print);
+ return (1);
+ }
+ printf(msgs[(*print)(aux, parent->dv_xname)]);
+ return (0);
+}
+
+/*
+ * As above, but for root devices.
+ */
+int
+config_rootfound(rootname, aux)
+ char *rootname;
+ void *aux;
+{
+ struct cfdata *cf;
+
+ if ((cf = config_rootsearch((cfmatch_t)NULL, rootname, aux)) != NULL) {
+ config_attach(ROOT, cf, aux, (cfprint_t)NULL);
+ return (1);
+ }
+ printf("root device %s not configured\n", rootname);
+ return (0);
+}
+
+/* just like sprintf(buf, "%d") except that it works from the end */
+static char *
+number(ep, n)
+ register char *ep;
+ register int n;
+{
+
+ *--ep = 0;
+ while (n >= 10) {
+ *--ep = (n % 10) + '0';
+ n /= 10;
+ }
+ *--ep = n + '0';
+ return (ep);
+}
+
+/*
+ * Attach a found device. Allocates memory for device variables.
+ */
+void
+config_attach(parent, cf, aux, print)
+ register struct device *parent;
+ register struct cfdata *cf;
+ register void *aux;
+ cfprint_t print;
+{
+ register struct device *dev;
+ register struct cfdriver *cd;
+ register size_t lname, lunit;
+ register char *xunit;
+ int myunit;
+ char num[10];
+ static struct device **nextp = &alldevs;
+
+ cd = cf->cf_driver;
+ if (cd->cd_devsize < sizeof(struct device))
+ panic("config_attach");
+ myunit = cf->cf_unit;
+ if (cf->cf_fstate == FSTATE_NOTFOUND)
+ cf->cf_fstate = FSTATE_FOUND;
+ else
+ cf->cf_unit++;
+
+ /* compute length of name and decimal expansion of unit number */
+ lname = strlen(cd->cd_name);
+ xunit = number(&num[sizeof num], myunit);
+ lunit = &num[sizeof num] - xunit;
+ if (lname + lunit >= sizeof(dev->dv_xname))
+ panic("config_attach: device name too long");
+
+ /* get memory for all device vars */
+ dev = (struct device *)malloc(cd->cd_devsize, M_DEVBUF, M_WAITOK);
+ /* XXX cannot wait! */
+ bzero(dev, cd->cd_devsize);
+ *nextp = dev; /* link up */
+ nextp = &dev->dv_next;
+ dev->dv_class = cd->cd_class;
+ dev->dv_cfdata = cf;
+ dev->dv_unit = myunit;
+ bcopy(cd->cd_name, dev->dv_xname, lname);
+ bcopy(xunit, dev->dv_xname + lname, lunit);
+ dev->dv_parent = parent;
+ if (parent == ROOT)
+ printf("%s (root)", dev->dv_xname);
+ else {
+ printf("%s at %s", dev->dv_xname, parent->dv_xname);
+ (void) (*print)(aux, (char *)0);
+ }
+
+ /* put this device in the devices array */
+ if (dev->dv_unit >= cd->cd_ndevs) {
+ /*
+ * Need to expand the array.
+ */
+ int old = cd->cd_ndevs, oldbytes, new, newbytes;
+ void **nsp;
+
+ if (old == 0) {
+ nsp = malloc(MINALLOCSIZE, M_DEVBUF, M_WAITOK); /*XXX*/
+ bzero(nsp, MINALLOCSIZE);
+ cd->cd_ndevs = MINALLOCSIZE / sizeof(void *);
+ } else {
+ new = cd->cd_ndevs;
+ do {
+ new *= 2;
+ } while (new <= dev->dv_unit);
+ cd->cd_ndevs = new;
+ oldbytes = old * sizeof(void *);
+ newbytes = new * sizeof(void *);
+ nsp = malloc(newbytes, M_DEVBUF, M_WAITOK); /*XXX*/
+ bcopy(cd->cd_devs, nsp, oldbytes);
+ bzero(&nsp[old], newbytes - oldbytes);
+ free(cd->cd_devs, M_DEVBUF);
+ }
+ cd->cd_devs = nsp;
+ }
+ if (cd->cd_devs[dev->dv_unit])
+ panic("config_attach: duplicate %s", dev->dv_xname);
+ cd->cd_devs[dev->dv_unit] = dev;
+
+ /*
+ * Before attaching, clobber any unfound devices that are
+ * otherwise identical.
+ */
+ for (cf = cfdata; cf->cf_driver; cf++)
+ if (cf->cf_driver == cd && cf->cf_unit == dev->dv_unit &&
+ cf->cf_fstate == FSTATE_NOTFOUND)
+ cf->cf_fstate = FSTATE_FOUND;
+ (*cd->cd_attach)(parent, dev, aux);
+}
+
+/*
+ * Attach an event. These must come from initially-zero space (see
+ * commented-out assignments below), but that occurs naturally for
+ * device instance variables.
+ */
+void
+evcnt_attach(dev, name, ev)
+ struct device *dev;
+ const char *name;
+ struct evcnt *ev;
+{
+ static struct evcnt **nextp = &allevents;
+
+#ifdef DIAGNOSTIC
+ if (strlen(name) >= sizeof(ev->ev_name))
+ panic("evcnt_attach");
+#endif
+ /* ev->ev_next = NULL; */
+ ev->ev_dev = dev;
+ /* ev->ev_count = 0; */
+ strcpy(ev->ev_name, name);
+ *nextp = ev;
+ nextp = &ev->ev_next;
+}
diff --git a/sys/kern/subr_clist.c b/sys/kern/subr_clist.c
new file mode 100644
index 000000000000..fe8f000f87d5
--- /dev/null
+++ b/sys/kern/subr_clist.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)tty_subr.c 8.2 (Berkeley) 9/5/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+char cwaiting;
+struct cblock *cfree, *cfreelist;
+int cfreecount, nclist;
+
+void
+clist_init()
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+getc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+q_to_b(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+ndqb(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+void
+ndflush(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+putc(a1, a2)
+ char a1;
+ struct clist *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+b_to_q(a1, a2, a3)
+ char *a1;
+ int a2;
+ struct clist *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+char *
+nextc(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char *)0);
+}
+
+unputc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+void
+catq(a1, a2)
+ struct clist *a1, *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
diff --git a/sys/kern/subr_disklabel.c b/sys/kern/subr_disklabel.c
new file mode 100644
index 000000000000..78dede4da773
--- /dev/null
+++ b/sys/kern/subr_disklabel.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/syslog.h>
+
+/*
+ * Seek sort for disks. We depend on the driver which calls us using b_resid
+ * as the current cylinder number.
+ *
+ * The argument ap structure holds a b_actf activity chain pointer on which we
+ * keep two queues, sorted in ascending cylinder order. The first queue holds
+ * those requests which are positioned after the current cylinder (in the first
+ * request); the second holds requests which came in after their cylinder number
+ * was passed. Thus we implement a one way scan, retracting after reaching the
+ * end of the drive to the first request on the second queue, at which time it
+ * becomes the first queue.
+ *
+ * A one-way scan is natural because of the way UNIX read-ahead blocks are
+ * allocated.
+ */
+
+/*
+ * For portability with historic industry practice, the
+ * cylinder number has to be maintained in the `b_resid'
+ * field.
+ */
+#define b_cylinder b_resid
+
+void
+disksort(ap, bp)
+ register struct buf *ap, *bp;
+{
+ register struct buf *bq;
+
+ /* If the queue is empty, then it's easy. */
+ if (ap->b_actf == NULL) {
+ bp->b_actf = NULL;
+ ap->b_actf = bp;
+ return;
+ }
+
+ /*
+ * If we lie after the first (currently active) request, then we
+ * must locate the second request list and add ourselves to it.
+ */
+ bq = ap->b_actf;
+ if (bp->b_cylinder < bq->b_cylinder) {
+ while (bq->b_actf) {
+ /*
+ * Check for an ``inversion'' in the normally ascending
+ * cylinder numbers, indicating the start of the second
+ * request list.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder) {
+ /*
+ * Search the second request list for the first
+ * request at a larger cylinder number. We go
+ * before that; if there is no such request, we
+ * go at end.
+ */
+ do {
+ if (bp->b_cylinder <
+ bq->b_actf->b_cylinder)
+ goto insert;
+ if (bp->b_cylinder ==
+ bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno)
+ goto insert;
+ bq = bq->b_actf;
+ } while (bq->b_actf);
+ goto insert; /* after last */
+ }
+ bq = bq->b_actf;
+ }
+ /*
+ * No inversions... we will go after the last, and
+ * be the first request in the second request list.
+ */
+ goto insert;
+ }
+ /*
+ * Request is at/after the current request...
+ * sort in the first request list.
+ */
+ while (bq->b_actf) {
+ /*
+ * We want to go after the current request if there is an
+ * inversion after it (i.e. it is the end of the first
+ * request list), or if the next request is a larger cylinder
+ * than our request.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder ||
+ bp->b_cylinder < bq->b_actf->b_cylinder ||
+ (bp->b_cylinder == bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno))
+ goto insert;
+ bq = bq->b_actf;
+ }
+ /*
+ * Neither a second list nor a larger request... we go at the end of
+ * the first list, which is the same as the end of the whole schebang.
+ */
+insert: bp->b_actf = bq->b_actf;
+ bq->b_actf = bp;
+}
+
+/*
+ * Attempt to read a disk label from a device using the indicated stategy
+ * routine. The label must be partly set up before this: secpercyl and
+ * anything required in the strategy routine (e.g., sector size) must be
+ * filled in before calling us. Returns NULL on success and an error
+ * string on failure.
+ */
+char *
+readdisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ register struct buf *bp;
+ struct disklabel *dlp;
+ char *msg = NULL;
+
+ if (lp->d_secperunit == 0)
+ lp->d_secperunit = 0x1fffffff;
+ lp->d_npartitions = 1;
+ if (lp->d_partitions[0].p_size == 0)
+ lp->d_partitions[0].p_size = 0x1fffffff;
+ lp->d_partitions[0].p_offset = 0;
+
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = dev;
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_BUSY | B_READ;
+ bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
+ (*strat)(bp);
+ if (biowait(bp))
+ msg = "I/O error";
+ else for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)((char *)bp->b_data +
+ DEV_BSIZE - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
+ if (msg == NULL)
+ msg = "no disk label";
+ } else if (dlp->d_npartitions > MAXPARTITIONS ||
+ dkcksum(dlp) != 0)
+ msg = "disk label corrupted";
+ else {
+ *lp = *dlp;
+ msg = NULL;
+ break;
+ }
+ }
+ bp->b_flags = B_INVAL | B_AGE;
+ brelse(bp);
+ return (msg);
+}
+
+/*
+ * Check new disk label for sensibility before setting it.
+ */
+int
+setdisklabel(olp, nlp, openmask)
+ register struct disklabel *olp, *nlp;
+ u_long openmask;
+{
+ register i;
+ register struct partition *opp, *npp;
+
+ if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
+ dkcksum(nlp) != 0)
+ return (EINVAL);
+ while ((i = ffs((long)openmask)) != 0) {
+ i--;
+ openmask &= ~(1 << i);
+ if (nlp->d_npartitions <= i)
+ return (EBUSY);
+ opp = &olp->d_partitions[i];
+ npp = &nlp->d_partitions[i];
+ if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
+ return (EBUSY);
+ /*
+ * Copy internally-set partition information
+ * if new label doesn't include it. XXX
+ */
+ if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
+ npp->p_fstype = opp->p_fstype;
+ npp->p_fsize = opp->p_fsize;
+ npp->p_frag = opp->p_frag;
+ npp->p_cpg = opp->p_cpg;
+ }
+ }
+ nlp->d_checksum = 0;
+ nlp->d_checksum = dkcksum(nlp);
+ *olp = *nlp;
+ return (0);
+}
+
+/* encoding of disk minor numbers, should be elsewhere... */
+#define dkunit(dev) (minor(dev) >> 3)
+#define dkpart(dev) (minor(dev) & 07)
+#define dkminor(unit, part) (((unit) << 3) | (part))
+
+/*
+ * Write disk label back to device after modification.
+ */
+int
+writedisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ struct buf *bp;
+ struct disklabel *dlp;
+ int labelpart;
+ int error = 0;
+
+ labelpart = dkpart(dev);
+ if (lp->d_partitions[labelpart].p_offset != 0) {
+ if (lp->d_partitions[0].p_offset != 0)
+ return (EXDEV); /* not quite right */
+ labelpart = 0;
+ }
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_READ;
+ (*strat)(bp);
+ if (error = biowait(bp))
+ goto done;
+ for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)
+ ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
+ dkcksum(dlp) == 0) {
+ *dlp = *lp;
+ bp->b_flags = B_WRITE;
+ (*strat)(bp);
+ error = biowait(bp);
+ goto done;
+ }
+ }
+ error = ESRCH;
+done:
+ brelse(bp);
+ return (error);
+}
+
+/*
+ * Compute checksum for disk label.
+ */
+dkcksum(lp)
+ register struct disklabel *lp;
+{
+ register u_short *start, *end;
+ register u_short sum = 0;
+
+ start = (u_short *)lp;
+ end = (u_short *)&lp->d_partitions[lp->d_npartitions];
+ while (start < end)
+ sum ^= *start++;
+ return (sum);
+}
+
+/*
+ * Disk error is the preface to plaintive error messages
+ * about failing disk transfers. It prints messages of the form
+
+hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
+
+ * if the offset of the error in the transfer and a disk label
+ * are both available. blkdone should be -1 if the position of the error
+ * is unknown; the disklabel pointer may be null from drivers that have not
+ * been converted to use them. The message is printed with printf
+ * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
+ * The message should be completed (with at least a newline) with printf
+ * or addlog, respectively. There is no trailing space.
+ */
+void
+diskerr(bp, dname, what, pri, blkdone, lp)
+ register struct buf *bp;
+ char *dname, *what;
+ int pri, blkdone;
+ register struct disklabel *lp;
+{
+ int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
+ register void (*pr) __P((const char *, ...));
+ char partname = 'a' + part;
+ int sn;
+
+ if (pri != LOG_PRINTF) {
+ log(pri, "");
+ pr = addlog;
+ } else
+ pr = printf;
+ (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
+ bp->b_flags & B_READ ? "read" : "writ");
+ sn = bp->b_blkno;
+ if (bp->b_bcount <= DEV_BSIZE)
+ (*pr)("%d", sn);
+ else {
+ if (blkdone >= 0) {
+ sn += blkdone;
+ (*pr)("%d of ", sn);
+ }
+ (*pr)("%d-%d", bp->b_blkno,
+ bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
+ }
+ if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
+#ifdef tahoe
+ sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
+#endif
+ sn += lp->d_partitions[part].p_offset;
+ (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
+ sn / lp->d_secpercyl);
+ sn %= lp->d_secpercyl;
+ (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
+ }
+}
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
new file mode 100644
index 000000000000..f065761d756e
--- /dev/null
+++ b/sys/kern/subr_log.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_log.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Error log buffer for kernel printf's.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/msgbuf.h>
+#include <sys/file.h>
+
+#define LOG_RDPRI (PZERO + 1)
+
+#define LOG_ASYNC 0x04
+#define LOG_RDWAIT 0x08
+
+struct logsoftc {
+ int sc_state; /* see above for possibilities */
+ struct selinfo sc_selp; /* process waiting on select call */
+ int sc_pgid; /* process/group for async I/O */
+} logsoftc;
+
+int log_open; /* also used in log() */
+
+/*ARGSUSED*/
+logopen(dev, flags, mode, p)
+ dev_t dev;
+ int flags, mode;
+ struct proc *p;
+{
+ register struct msgbuf *mbp = msgbufp;
+
+ if (log_open)
+ return (EBUSY);
+ log_open = 1;
+ logsoftc.sc_pgid = p->p_pid; /* signal process only */
+ /*
+ * Potential race here with putchar() but since putchar should be
+ * called by autoconf, msg_magic should be initialized by the time
+ * we get here.
+ */
+ if (mbp->msg_magic != MSG_MAGIC) {
+ register int i;
+
+ mbp->msg_magic = MSG_MAGIC;
+ mbp->msg_bufx = mbp->msg_bufr = 0;
+ for (i=0; i < MSG_BSIZE; i++)
+ mbp->msg_bufc[i] = 0;
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+logclose(dev, flag, mode, p)
+ dev_t dev;
+ int flag, mode;
+ struct proc *p;
+{
+
+ log_open = 0;
+ logsoftc.sc_state = 0;
+ return (0);
+}
+
+/*ARGSUSED*/
+logread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct msgbuf *mbp = msgbufp;
+ register long l;
+ register int s;
+ int error = 0;
+
+ s = splhigh();
+ while (mbp->msg_bufr == mbp->msg_bufx) {
+ if (flag & IO_NDELAY) {
+ splx(s);
+ return (EWOULDBLOCK);
+ }
+ logsoftc.sc_state |= LOG_RDWAIT;
+ if (error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH,
+ "klog", 0)) {
+ splx(s);
+ return (error);
+ }
+ }
+ splx(s);
+ logsoftc.sc_state &= ~LOG_RDWAIT;
+
+ while (uio->uio_resid > 0) {
+ l = mbp->msg_bufx - mbp->msg_bufr;
+ if (l < 0)
+ l = MSG_BSIZE - mbp->msg_bufr;
+ l = min(l, uio->uio_resid);
+ if (l == 0)
+ break;
+ error = uiomove((caddr_t)&mbp->msg_bufc[mbp->msg_bufr],
+ (int)l, uio);
+ if (error)
+ break;
+ mbp->msg_bufr += l;
+ if (mbp->msg_bufr < 0 || mbp->msg_bufr >= MSG_BSIZE)
+ mbp->msg_bufr = 0;
+ }
+ return (error);
+}
+
+/*ARGSUSED*/
+logselect(dev, rw, p)
+ dev_t dev;
+ int rw;
+ struct proc *p;
+{
+ int s = splhigh();
+
+ switch (rw) {
+
+ case FREAD:
+ if (msgbufp->msg_bufr != msgbufp->msg_bufx) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &logsoftc.sc_selp);
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+logwakeup()
+{
+ struct proc *p;
+
+ if (!log_open)
+ return;
+ selwakeup(&logsoftc.sc_selp);
+ if (logsoftc.sc_state & LOG_ASYNC) {
+ if (logsoftc.sc_pgid < 0)
+ gsignal(-logsoftc.sc_pgid, SIGIO);
+ else if (p = pfind(logsoftc.sc_pgid))
+ psignal(p, SIGIO);
+ }
+ if (logsoftc.sc_state & LOG_RDWAIT) {
+ wakeup((caddr_t)msgbufp);
+ logsoftc.sc_state &= ~LOG_RDWAIT;
+ }
+}
+
+/*ARGSUSED*/
+logioctl(dev, com, data, flag, p)
+ dev_t dev;
+ int com;
+ caddr_t data;
+ int flag;
+ struct proc *p;
+{
+ long l;
+ int s;
+
+ switch (com) {
+
+ /* return number of characters immediately available */
+ case FIONREAD:
+ s = splhigh();
+ l = msgbufp->msg_bufx - msgbufp->msg_bufr;
+ splx(s);
+ if (l < 0)
+ l += MSG_BSIZE;
+ *(int *)data = l;
+ break;
+
+ case FIONBIO:
+ break;
+
+ case FIOASYNC:
+ if (*(int *)data)
+ logsoftc.sc_state |= LOG_ASYNC;
+ else
+ logsoftc.sc_state &= ~LOG_ASYNC;
+ break;
+
+ case TIOCSPGRP:
+ logsoftc.sc_pgid = *(int *)data;
+ break;
+
+ case TIOCGPGRP:
+ *(int *)data = logsoftc.sc_pgid;
+ break;
+
+ default:
+ return (-1);
+ }
+ return (0);
+}
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
new file mode 100644
index 000000000000..9f4e2cae857c
--- /dev/null
+++ b/sys/kern/subr_param.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)param.c 8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/callout.h>
+#include <sys/clist.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+
+#ifdef SYSVSHM
+#include <machine/vmparam.h>
+#include <sys/shm.h>
+#endif
+
+/*
+ * System parameter formulae.
+ *
+ * This file is copied into each directory where we compile
+ * the kernel; it should be modified there to suit local taste
+ * if necessary.
+ *
+ * Compiled with -DHZ=xx -DTIMEZONE=x -DDST=x -DMAXUSERS=xx
+ */
+
+#ifndef HZ
+#define HZ 100
+#endif
+int hz = HZ;
+int tick = 1000000 / HZ;
+int tickadj = 30000 / (60 * HZ); /* can adjust 30ms in 60s */
+struct timezone tz = { TIMEZONE, DST };
+#define NPROC (20 + 16 * MAXUSERS)
+int maxproc = NPROC;
+#define NTEXT (80 + NPROC / 8) /* actually the object cache */
+#define NVNODE (NPROC + NTEXT + 100)
+int desiredvnodes = NVNODE;
+int maxfiles = 3 * (NPROC + MAXUSERS) + 80;
+int ncallout = 16 + NPROC;
+int nclist = 60 + 12 * MAXUSERS;
+int nmbclusters = NMBCLUSTERS;
+int fscale = FSCALE; /* kernel uses `FSCALE', user uses `fscale' */
+
+/*
+ * Values in support of System V compatible shared memory. XXX
+ */
+#ifdef SYSVSHM
+#define SHMMAX (SHMMAXPGS*NBPG)
+#define SHMMIN 1
+#define SHMMNI 32 /* <= SHMMMNI in shm.h */
+#define SHMSEG 8
+#define SHMALL (SHMMAXPGS/CLSIZE)
+
+struct shminfo shminfo = {
+ SHMMAX,
+ SHMMIN,
+ SHMMNI,
+ SHMSEG,
+ SHMALL
+};
+#endif
+
+/*
+ * These are initialized at bootstrap time
+ * to values dependent on memory size
+ */
+int nbuf, nswbuf;
+
+/*
+ * These have to be allocated somewhere; allocating
+ * them here forces loader errors if this file is omitted
+ * (if they've been externed everywhere else; hah!).
+ */
+struct callout *callout;
+struct cblock *cfree;
+struct buf *buf, *swbuf;
+char *buffers;
+
+/*
+ * Proc/pgrp hashing.
+ * Here so that hash table sizes can depend on MAXUSERS/NPROC.
+ * Hash size must be a power of two.
+ * NOW omission of this file will cause loader errors!
+ */
+
+#if NPROC > 1024
+#define PIDHSZ 512
+#else
+#if NPROC > 512
+#define PIDHSZ 256
+#else
+#if NPROC > 256
+#define PIDHSZ 128
+#else
+#define PIDHSZ 64
+#endif
+#endif
+#endif
+
+struct proc *pidhash[PIDHSZ];
+struct pgrp *pgrphash[PIDHSZ];
+int pidhashmask = PIDHSZ - 1;
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
new file mode 100644
index 000000000000..2adb7793a3c5
--- /dev/null
+++ b/sys/kern/subr_prf.c
@@ -0,0 +1,601 @@
+/*-
+ * Copyright (c) 1986, 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_prf.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/reboot.h>
+#include <sys/msgbuf.h>
+#include <sys/proc.h>
+#include <sys/ioctl.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/tty.h>
+#include <sys/tprintf.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+
+/*
+ * Note that stdarg.h and the ANSI style va_start macro is used for both
+ * ANSI and traditional C compilers.
+ */
+#include <machine/stdarg.h>
+
+#ifdef KADB
+#include <machine/kdbparam.h>
+#endif
+
+#define TOCONS 0x01
+#define TOTTY 0x02
+#define TOLOG 0x04
+
+struct tty *constty; /* pointer to console "window" tty */
+
+extern cnputc(); /* standard console putc */
+int (*v_putc)() = cnputc; /* routine to putc on virtual console */
+
+void logpri __P((int level));
+static void putchar __P((int ch, int flags, struct tty *tp));
+static char *ksprintn __P((u_long num, int base, int *len));
+void kprintf __P((const char *fmt, int flags, struct tty *tp, va_list ap));
+
+int consintr = 1; /* Ok to handle console interrupts? */
+
+/*
+ * Variable panicstr contains argument to first call to panic; used as flag
+ * to indicate that the kernel has already called panic.
+ */
+const char *panicstr;
+
+/*
+ * Panic is called on unresolvable fatal errors. It prints "panic: mesg",
+ * and then reboots. If we are called twice, then we avoid trying to sync
+ * the disks as this often leads to recursive panics.
+ */
+#ifdef __GNUC__
+volatile void boot(int flags); /* boot() does not return */
+volatile /* panic() does not return */
+#endif
+void
+#ifdef __STDC__
+panic(const char *fmt, ...)
+#else
+panic(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ int bootopt;
+ va_list ap;
+
+ bootopt = RB_AUTOBOOT | RB_DUMP;
+ if (panicstr)
+ bootopt |= RB_NOSYNC;
+ else
+ panicstr = fmt;
+
+ va_start(ap, fmt);
+ printf("panic: %r\n", fmt, ap);
+ va_end(ap);
+
+#ifdef KGDB
+ kgdb_panic();
+#endif
+#ifdef KADB
+ if (boothowto & RB_KDB)
+ kdbpanic();
+#endif
+ boot(bootopt);
+}
+
+/*
+ * Warn that a system table is full.
+ */
+void
+tablefull(tab)
+ const char *tab;
+{
+
+ log(LOG_ERR, "%s: table is full\n", tab);
+}
+
+/*
+ * Uprintf prints to the controlling terminal for the current process.
+ * It may block if the tty queue is overfull. No message is printed if
+ * the queue does not clear in a reasonable time.
+ */
+void
+#ifdef __STDC__
+uprintf(const char *fmt, ...)
+#else
+uprintf(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ register struct proc *p = curproc;
+ va_list ap;
+
+ if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+ va_start(ap, fmt);
+ kprintf(fmt, TOTTY, p->p_session->s_ttyp, ap);
+ va_end(ap);
+ }
+}
+
+tpr_t
+tprintf_open(p)
+ register struct proc *p;
+{
+
+ if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+ SESSHOLD(p->p_session);
+ return ((tpr_t) p->p_session);
+ }
+ return ((tpr_t) NULL);
+}
+
+void
+tprintf_close(sess)
+ tpr_t sess;
+{
+
+ if (sess)
+ SESSRELE((struct session *) sess);
+}
+
+/*
+ * tprintf prints on the controlling terminal associated
+ * with the given session.
+ */
+void
+#ifdef __STDC__
+tprintf(tpr_t tpr, const char *fmt, ...)
+#else
+tprintf(tpr, fmt, va_alist)
+ tpr_t tpr;
+ char *fmt;
+#endif
+{
+ register struct session *sess = (struct session *)tpr;
+ struct tty *tp = NULL;
+ int flags = TOLOG;
+ va_list ap;
+
+ logpri(LOG_INFO);
+ if (sess && sess->s_ttyvp && ttycheckoutq(sess->s_ttyp, 0)) {
+ flags |= TOTTY;
+ tp = sess->s_ttyp;
+ }
+ va_start(ap, fmt);
+ kprintf(fmt, flags, tp, ap);
+ va_end(ap);
+ logwakeup();
+}
+
+/*
+ * Ttyprintf displays a message on a tty; it should be used only by
+ * the tty driver, or anything that knows the underlying tty will not
+ * be revoke(2)'d away. Other callers should use tprintf.
+ */
+void
+#ifdef __STDC__
+ttyprintf(struct tty *tp, const char *fmt, ...)
+#else
+ttyprintf(tp, fmt, va_alist)
+ struct tty *tp;
+ char *fmt;
+#endif
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ kprintf(fmt, TOTTY, tp, ap);
+ va_end(ap);
+}
+
+extern int log_open;
+
+/*
+ * Log writes to the log buffer, and guarantees not to sleep (so can be
+ * called by interrupt routines). If there is no process reading the
+ * log yet, it writes to the console also.
+ */
+void
+#ifdef __STDC__
+log(int level, const char *fmt, ...)
+#else
+log(level, fmt, va_alist)
+ int level;
+ char *fmt;
+#endif
+{
+ register int s;
+ va_list ap;
+
+ s = splhigh();
+ logpri(level);
+ va_start(ap, fmt);
+ kprintf(fmt, TOLOG, NULL, ap);
+ splx(s);
+ va_end(ap);
+ if (!log_open) {
+ va_start(ap, fmt);
+ kprintf(fmt, TOCONS, NULL, ap);
+ va_end(ap);
+ }
+ logwakeup();
+}
+
+void
+logpri(level)
+ int level;
+{
+ register int ch;
+ register char *p;
+
+ putchar('<', TOLOG, NULL);
+ for (p = ksprintn((u_long)level, 10, NULL); ch = *p--;)
+ putchar(ch, TOLOG, NULL);
+ putchar('>', TOLOG, NULL);
+}
+
+void
+#ifdef __STDC__
+addlog(const char *fmt, ...)
+#else
+addlog(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ register int s;
+ va_list ap;
+
+ s = splhigh();
+ va_start(ap, fmt);
+ kprintf(fmt, TOLOG, NULL, ap);
+ splx(s);
+ va_end(ap);
+ if (!log_open) {
+ va_start(ap, fmt);
+ kprintf(fmt, TOCONS, NULL, ap);
+ va_end(ap);
+ }
+ logwakeup();
+}
+
+void
+#ifdef __STDC__
+printf(const char *fmt, ...)
+#else
+printf(fmt, va_alist)
+ char *fmt;
+#endif
+{
+ va_list ap;
+ register int savintr;
+
+ savintr = consintr; /* disable interrupts */
+ consintr = 0;
+ va_start(ap, fmt);
+ kprintf(fmt, TOCONS | TOLOG, NULL, ap);
+ va_end(ap);
+ if (!panicstr)
+ logwakeup();
+ consintr = savintr; /* reenable interrupts */
+}
+
+/*
+ * Scaled down version of printf(3).
+ *
+ * Two additional formats:
+ *
+ * The format %b is supported to decode error registers.
+ * Its usage is:
+ *
+ * printf("reg=%b\n", regval, "<base><arg>*");
+ *
+ * where <base> is the output base expressed as a control character, e.g.
+ * \10 gives octal; \20 gives hex. Each arg is a sequence of characters,
+ * the first of which gives the bit number to be inspected (origin 1), and
+ * the next characters (up to a control character, i.e. a character <= 32),
+ * give the name of the register. Thus:
+ *
+ * kprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
+ *
+ * would produce output:
+ *
+ * reg=3<BITTWO,BITONE>
+ *
+ * The format %r passes an additional format string and argument list
+ * recursively. Its usage is:
+ *
+ * fn(char *fmt, ...)
+ * {
+ * va_list ap;
+ * va_start(ap, fmt);
+ * printf("prefix: %r: suffix\n", fmt, ap);
+ * va_end(ap);
+ * }
+ *
+ * Space or zero padding and a field width are supported for the numeric
+ * formats only.
+ */
+void
+kprintf(fmt, flags, tp, ap)
+ register const char *fmt;
+ int flags;
+ struct tty *tp;
+ va_list ap;
+{
+ register char *p, *q;
+ register int ch, n;
+ u_long ul;
+ int base, lflag, tmp, width;
+ char padc;
+
+ for (;;) {
+ padc = ' ';
+ width = 0;
+ while ((ch = *(u_char *)fmt++) != '%') {
+ if (ch == '\0')
+ return;
+ putchar(ch, flags, tp);
+ }
+ lflag = 0;
+reswitch: switch (ch = *(u_char *)fmt++) {
+ case '0':
+ padc = '0';
+ goto reswitch;
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ for (width = 0;; ++fmt) {
+ width = width * 10 + ch - '0';
+ ch = *fmt;
+ if (ch < '0' || ch > '9')
+ break;
+ }
+ goto reswitch;
+ case 'l':
+ lflag = 1;
+ goto reswitch;
+ case 'b':
+ ul = va_arg(ap, int);
+ p = va_arg(ap, char *);
+ for (q = ksprintn(ul, *p++, NULL); ch = *q--;)
+ putchar(ch, flags, tp);
+
+ if (!ul)
+ break;
+
+ for (tmp = 0; n = *p++;) {
+ if (ul & (1 << (n - 1))) {
+ putchar(tmp ? ',' : '<', flags, tp);
+ for (; (n = *p) > ' '; ++p)
+ putchar(n, flags, tp);
+ tmp = 1;
+ } else
+ for (; *p > ' '; ++p)
+ continue;
+ }
+ if (tmp)
+ putchar('>', flags, tp);
+ break;
+ case 'c':
+ putchar(va_arg(ap, int), flags, tp);
+ break;
+ case 'r':
+ p = va_arg(ap, char *);
+ kprintf(p, flags, tp, va_arg(ap, va_list));
+ break;
+ case 's':
+ p = va_arg(ap, char *);
+ while (ch = *p++)
+ putchar(ch, flags, tp);
+ break;
+ case 'd':
+ ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+ if ((long)ul < 0) {
+ putchar('-', flags, tp);
+ ul = -(long)ul;
+ }
+ base = 10;
+ goto number;
+ case 'o':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 8;
+ goto number;
+ case 'u':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 10;
+ goto number;
+ case 'x':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 16;
+number: p = ksprintn(ul, base, &tmp);
+ if (width && (width -= tmp) > 0)
+ while (width--)
+ putchar(padc, flags, tp);
+ while (ch = *p--)
+ putchar(ch, flags, tp);
+ break;
+ default:
+ putchar('%', flags, tp);
+ if (lflag)
+ putchar('l', flags, tp);
+ /* FALLTHROUGH */
+ case '%':
+ putchar(ch, flags, tp);
+ }
+ }
+}
+
+/*
+ * Print a character on console or users terminal. If destination is
+ * the console then the last MSGBUFS characters are saved in msgbuf for
+ * inspection later.
+ */
+static void
+putchar(c, flags, tp)
+ register int c;
+ int flags;
+ struct tty *tp;
+{
+ extern int msgbufmapped;
+ register struct msgbuf *mbp;
+
+ if (panicstr)
+ constty = NULL;
+ if ((flags & TOCONS) && tp == NULL && constty) {
+ tp = constty;
+ flags |= TOTTY;
+ }
+ if ((flags & TOTTY) && tp && tputchar(c, tp) < 0 &&
+ (flags & TOCONS) && tp == constty)
+ constty = NULL;
+ if ((flags & TOLOG) &&
+ c != '\0' && c != '\r' && c != 0177 && msgbufmapped) {
+ mbp = msgbufp;
+ if (mbp->msg_magic != MSG_MAGIC) {
+ bzero((caddr_t)mbp, sizeof(*mbp));
+ mbp->msg_magic = MSG_MAGIC;
+ }
+ mbp->msg_bufc[mbp->msg_bufx++] = c;
+ if (mbp->msg_bufx < 0 || mbp->msg_bufx >= MSG_BSIZE)
+ mbp->msg_bufx = 0;
+ }
+ if ((flags & TOCONS) && constty == NULL && c != '\0')
+ (*v_putc)(c);
+}
+
+/*
+ * Scaled down version of sprintf(3).
+ */
+#ifdef __STDC__
+sprintf(char *buf, const char *cfmt, ...)
+#else
+sprintf(buf, cfmt, va_alist)
+ char *buf, *cfmt;
+#endif
+{
+ register const char *fmt = cfmt;
+ register char *p, *bp;
+ register int ch, base;
+ u_long ul;
+ int lflag;
+ va_list ap;
+
+ va_start(ap, cfmt);
+ for (bp = buf; ; ) {
+ while ((ch = *(u_char *)fmt++) != '%')
+ if ((*bp++ = ch) == '\0')
+ return ((bp - buf) - 1);
+
+ lflag = 0;
+reswitch: switch (ch = *(u_char *)fmt++) {
+ case 'l':
+ lflag = 1;
+ goto reswitch;
+ case 'c':
+ *bp++ = va_arg(ap, int);
+ break;
+ case 's':
+ p = va_arg(ap, char *);
+ while (*bp++ = *p++)
+ continue;
+ --bp;
+ break;
+ case 'd':
+ ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+ if ((long)ul < 0) {
+ *bp++ = '-';
+ ul = -(long)ul;
+ }
+ base = 10;
+ goto number;
+ break;
+ case 'o':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 8;
+ goto number;
+ break;
+ case 'u':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 10;
+ goto number;
+ break;
+ case 'x':
+ ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+ base = 16;
+number: for (p = ksprintn(ul, base, NULL); ch = *p--;)
+ *bp++ = ch;
+ break;
+ default:
+ *bp++ = '%';
+ if (lflag)
+ *bp++ = 'l';
+ /* FALLTHROUGH */
+ case '%':
+ *bp++ = ch;
+ }
+ }
+ va_end(ap);
+}
+
+/*
+ * Put a number (base <= 16) in a buffer in reverse order; return an
+ * optional length and a pointer to the NULL terminated (preceded?)
+ * buffer.
+ */
+static char *
+ksprintn(ul, base, lenp)
+ register u_long ul;
+ register int base, *lenp;
+{ /* A long in base 8, plus NULL. */
+ static char buf[sizeof(long) * NBBY / 3 + 2];
+ register char *p;
+
+ p = buf;
+ do {
+ *++p = "0123456789abcdef"[ul % base];
+ } while (ul /= base);
+ if (lenp)
+ *lenp = p - buf;
+ return (p);
+}
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
new file mode 100644
index 000000000000..4fb81d823cac
--- /dev/null
+++ b/sys/kern/subr_prof.c
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/malloc.h>
+#include <sys/gmon.h>
+
+/*
+ * Froms is actually a bunch of unsigned shorts indexing tos
+ */
+struct gmonparam _gmonparam = { GMON_PROF_OFF };
+
+extern char etext[];
+
+kmstartup()
+{
+ char *cp;
+ struct gmonparam *p = &_gmonparam;
+ /*
+ * Round lowpc and highpc to multiples of the density we're using
+ * so the rest of the scaling (here and in gprof) stays in ints.
+ */
+ p->lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
+ p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
+ p->textsize = p->highpc - p->lowpc;
+ printf("Profiling kernel, textsize=%d [%x..%x]\n",
+ p->textsize, p->lowpc, p->highpc);
+ p->kcountsize = p->textsize / HISTFRACTION;
+ p->hashfraction = HASHFRACTION;
+ p->fromssize = p->textsize / HASHFRACTION;
+ p->tolimit = p->textsize * ARCDENSITY / 100;
+ if (p->tolimit < MINARCS)
+ p->tolimit = MINARCS;
+ else if (p->tolimit > MAXARCS)
+ p->tolimit = MAXARCS;
+ p->tossize = p->tolimit * sizeof(struct tostruct);
+ cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize,
+ M_GPROF, M_NOWAIT);
+ if (cp == 0) {
+ printf("No memory for profiling.\n");
+ return;
+ }
+ bzero(cp, p->kcountsize + p->tossize + p->fromssize);
+ p->tos = (struct tostruct *)cp;
+ cp += p->tossize;
+ p->kcount = (u_short *)cp;
+ cp += p->kcountsize;
+ p->froms = (u_short *)cp;
+}
+
+/*
+ * Return kernel profiling information.
+ */
+sysctl_doprof(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+{
+ struct gmonparam *gp = &_gmonparam;
+ int error;
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case GPROF_STATE:
+ error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
+ if (error)
+ return (error);
+ if (gp->state == GMON_PROF_OFF)
+ stopprofclock(&proc0);
+ else
+ startprofclock(&proc0);
+ return (0);
+ case GPROF_COUNT:
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ gp->kcount, gp->kcountsize));
+ case GPROF_FROMS:
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ gp->froms, gp->fromssize));
+ case GPROF_TOS:
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ gp->tos, gp->tossize));
+ case GPROF_GMONPARAM:
+ return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+#endif /* GPROF */
+
+/*
+ * Profiling system call.
+ *
+ * The scale factor is a fixed point number with 16 bits of fraction, so that
+ * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling.
+ */
+struct profil_args {
+ caddr_t samples;
+ u_int size;
+ u_int offset;
+ u_int scale;
+};
+/* ARGSUSED */
+profil(p, uap, retval)
+ struct proc *p;
+ register struct profil_args *uap;
+ int *retval;
+{
+ register struct uprof *upp;
+ int s;
+
+ if (uap->scale > (1 << 16))
+ return (EINVAL);
+ if (uap->scale == 0) {
+ stopprofclock(p);
+ return (0);
+ }
+ upp = &p->p_stats->p_prof;
+
+ /* Block profile interrupts while changing state. */
+ s = splstatclock();
+ upp->pr_off = uap->offset;
+ upp->pr_scale = uap->scale;
+ upp->pr_base = uap->samples;
+ upp->pr_size = uap->size;
+ startprofclock(p);
+ splx(s);
+
+ return (0);
+}
+
+/*
+ * Scale is a fixed-point number with the binary point 16 bits
+ * into the value, and is <= 1.0. pc is at most 32 bits, so the
+ * intermediate result is at most 48 bits.
+ */
+#define PC_TO_INDEX(pc, prof) \
+ ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
+ (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
+
+/*
+ * Collect user-level profiling statistics; called on a profiling tick,
+ * when a process is running in user-mode. This routine may be called
+ * from an interrupt context. We try to update the user profiling buffers
+ * cheaply with fuswintr() and suswintr(). If that fails, we revert to
+ * an AST that will vector us to trap() with a context in which copyin
+ * and copyout will work. Trap will then call addupc_task().
+ *
+ * Note that we may (rarely) not get around to the AST soon enough, and
+ * lose profile ticks when the next tick overwrites this one, but in this
+ * case the system is overloaded and the profile is probably already
+ * inaccurate.
+ */
+void
+addupc_intr(p, pc, ticks)
+ register struct proc *p;
+ register u_long pc;
+ u_int ticks;
+{
+ register struct uprof *prof;
+ register caddr_t addr;
+ register u_int i;
+ register int v;
+
+ if (ticks == 0)
+ return;
+ prof = &p->p_stats->p_prof;
+ if (pc < prof->pr_off ||
+ (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+ return; /* out of range; ignore */
+
+ addr = prof->pr_base + i;
+ if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) {
+ prof->pr_addr = pc;
+ prof->pr_ticks = ticks;
+ need_proftick(p);
+ }
+}
+
+/*
+ * Much like before, but we can afford to take faults here. If the
+ * update fails, we simply turn off profiling.
+ */
+void
+addupc_task(p, pc, ticks)
+ register struct proc *p;
+ register u_long pc;
+ u_int ticks;
+{
+ register struct uprof *prof;
+ register caddr_t addr;
+ register u_int i;
+ u_short v;
+
+ /* Testing P_PROFIL may be unnecessary, but is certainly safe. */
+ if ((p->p_flag & P_PROFIL) == 0 || ticks == 0)
+ return;
+
+ prof = &p->p_stats->p_prof;
+ if (pc < prof->pr_off ||
+ (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+ return;
+
+ addr = prof->pr_base + i;
+ if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
+ v += ticks;
+ if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
+ return;
+ }
+ stopprofclock(p);
+}
diff --git a/sys/kern/subr_rmap.c b/sys/kern/subr_rmap.c
new file mode 100644
index 000000000000..2f31173321dd
--- /dev/null
+++ b/sys/kern/subr_rmap.c
@@ -0,0 +1,81 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)subr_rmap.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/map.h>
+#include <sys/proc.h>
+
+void
+rminit(a1, a2, a3, a4, a5)
+ struct map *a1;
+ long a2, a3;
+ char *a4;
+ int a5;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+long
+rmalloc(a1, a2)
+ struct map *a1;
+ long a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+void
+rmfree(a1, a2, a3)
+ struct map *a1;
+ long a2, a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c
new file mode 100644
index 000000000000..c692ec11a3bd
--- /dev/null
+++ b/sys/kern/subr_xxx.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)subr_xxx.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Miscellaneous trivial functions, including many
+ * that are often inline-expanded or done in assembler.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+
+/*
+ * Unsupported device function (e.g. writing to read-only device).
+ */
+enodev()
+{
+
+ return (ENODEV);
+}
+
+/*
+ * Unconfigured device function; driver not configured.
+ */
+enxio()
+{
+
+ return (ENXIO);
+}
+
+/*
+ * Unsupported ioctl function.
+ */
+enoioctl()
+{
+
+ return (ENOTTY);
+}
+
+/*
+ * Unsupported system function.
+ * This is used for an otherwise-reasonable operation
+ * that is not supported by the current system binary.
+ */
+enosys()
+{
+
+ return (ENOSYS);
+}
+
+/*
+ * Return error for operation not supported
+ * on a specific object or file type.
+ */
+eopnotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Generic null operation, always returns success.
+ */
+nullop()
+{
+
+ return (0);
+}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
new file mode 100644
index 000000000000..a121209f9fef
--- /dev/null
+++ b/sys/kern/sys_generic.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * Read system call.
+ */
+struct read_args {
+ int fd;
+ char *buf;
+ u_int nbyte;
+};
+/* ARGSUSED */
+read(p, uap, retval)
+ struct proc *p;
+ register struct read_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ struct iovec aiov;
+ long cnt, error = 0;
+#ifdef KTRACE
+ struct iovec ktriov;
+#endif
+
+ if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+ (fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ aiov.iov_base = (caddr_t)uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO))
+ ktriov = aiov;
+#endif
+ cnt = uap->nbyte;
+ if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO) && error == 0)
+ ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
+#endif
+ *retval = cnt;
+ return (error);
+}
+
+/*
+ * Scatter read system call.
+ */
+struct readv_args {
+ int fdes;
+ struct iovec *iovp;
+ u_int iovcnt;
+};
+readv(p, uap, retval)
+ struct proc *p;
+ register struct readv_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ register struct iovec *iov;
+ struct iovec *needfree;
+ struct iovec aiov[UIO_SMALLIOV];
+ long i, cnt, error = 0;
+ u_int iovlen;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (((u_int)uap->fdes) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fdes]) == NULL ||
+ (fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ /* note: can't use iovlen until iovcnt is validated */
+ iovlen = uap->iovcnt * sizeof (struct iovec);
+ if (uap->iovcnt > UIO_SMALLIOV) {
+ if (uap->iovcnt > UIO_MAXIOV)
+ return (EINVAL);
+ MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+ needfree = iov;
+ } else {
+ iov = aiov;
+ needfree = NULL;
+ }
+ auio.uio_iov = iov;
+ auio.uio_iovcnt = uap->iovcnt;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
+ goto done;
+ auio.uio_resid = 0;
+ for (i = 0; i < uap->iovcnt; i++) {
+ if (iov->iov_len < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ auio.uio_resid += iov->iov_len;
+ if (auio.uio_resid < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ iov++;
+ }
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO)) {
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ cnt = auio.uio_resid;
+ if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, uap->fdes, UIO_READ, ktriov,
+ cnt, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+ *retval = cnt;
+done:
+ if (needfree)
+ FREE(needfree, M_IOV);
+ return (error);
+}
+
+/*
+ * Write system call
+ */
+struct write_args {
+ int fd;
+ char *buf;
+ u_int nbyte;
+};
+write(p, uap, retval)
+ struct proc *p;
+ register struct write_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ struct iovec aiov;
+ long cnt, error = 0;
+#ifdef KTRACE
+ struct iovec ktriov;
+#endif
+
+ if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+ (fp->f_flag & FWRITE) == 0)
+ return (EBADF);
+ aiov.iov_base = (caddr_t)uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO))
+ ktriov = aiov;
+#endif
+ cnt = uap->nbyte;
+ if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ if (error == EPIPE)
+ psignal(p, SIGPIPE);
+ }
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO) && error == 0)
+ ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
+ &ktriov, cnt, error);
+#endif
+ *retval = cnt;
+ return (error);
+}
+
+/*
+ * Gather write system call
+ */
+struct writev_args {
+ int fd;
+ struct iovec *iovp;
+ u_int iovcnt;
+};
+writev(p, uap, retval)
+ struct proc *p;
+ register struct writev_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp = p->p_fd;
+ struct uio auio;
+ register struct iovec *iov;
+ struct iovec *needfree;
+ struct iovec aiov[UIO_SMALLIOV];
+ long i, cnt, error = 0;
+ u_int iovlen;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+ (fp->f_flag & FWRITE) == 0)
+ return (EBADF);
+ /* note: can't use iovlen until iovcnt is validated */
+ iovlen = uap->iovcnt * sizeof (struct iovec);
+ if (uap->iovcnt > UIO_SMALLIOV) {
+ if (uap->iovcnt > UIO_MAXIOV)
+ return (EINVAL);
+ MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+ needfree = iov;
+ } else {
+ iov = aiov;
+ needfree = NULL;
+ }
+ auio.uio_iov = iov;
+ auio.uio_iovcnt = uap->iovcnt;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
+ goto done;
+ auio.uio_resid = 0;
+ for (i = 0; i < uap->iovcnt; i++) {
+ if (iov->iov_len < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ auio.uio_resid += iov->iov_len;
+ if (auio.uio_resid < 0) {
+ error = EINVAL;
+ goto done;
+ }
+ iov++;
+ }
+#ifdef KTRACE
+ /*
+ * if tracing, save a copy of iovec
+ */
+ if (KTRPOINT(p, KTR_GENIO)) {
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ cnt = auio.uio_resid;
+ if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ if (error == EPIPE)
+ psignal(p, SIGPIPE);
+ }
+ cnt -= auio.uio_resid;
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
+ ktriov, cnt, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+ *retval = cnt;
+done:
+ if (needfree)
+ FREE(needfree, M_IOV);
+ return (error);
+}
+
+/*
+ * Ioctl system call
+ */
+struct ioctl_args {
+ int fd;
+ int com;
+ caddr_t data;
+};
+/* ARGSUSED */
+ioctl(p, uap, retval)
+ struct proc *p;
+ register struct ioctl_args *uap;
+ int *retval;
+{
+ register struct file *fp;
+ register struct filedesc *fdp;
+ register int com, error;
+ register u_int size;
+ caddr_t data, memp;
+ int tmp;
+#define STK_PARAMS 128
+ char stkbuf[STK_PARAMS];
+
+ fdp = p->p_fd;
+ if ((u_int)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+
+ if ((fp->f_flag & (FREAD | FWRITE)) == 0)
+ return (EBADF);
+
+ switch (com = uap->com) {
+ case FIONCLEX:
+ fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
+ return (0);
+ case FIOCLEX:
+ fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
+ return (0);
+ }
+
+ /*
+ * Interpret high order word to find amount of data to be
+ * copied to/from the user's address space.
+ */
+ size = IOCPARM_LEN(com);
+ if (size > IOCPARM_MAX)
+ return (ENOTTY);
+ memp = NULL;
+ if (size > sizeof (stkbuf)) {
+ memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+ data = memp;
+ } else
+ data = stkbuf;
+ if (com&IOC_IN) {
+ if (size) {
+ error = copyin(uap->data, data, (u_int)size);
+ if (error) {
+ if (memp)
+ free(memp, M_IOCTLOPS);
+ return (error);
+ }
+ } else
+ *(caddr_t *)data = uap->data;
+ } else if ((com&IOC_OUT) && size)
+ /*
+ * Zero the buffer so the user always
+ * gets back something deterministic.
+ */
+ bzero(data, size);
+ else if (com&IOC_VOID)
+ *(caddr_t *)data = uap->data;
+
+ switch (com) {
+
+ case FIONBIO:
+ if (tmp = *(int *)data)
+ fp->f_flag |= FNONBLOCK;
+ else
+ fp->f_flag &= ~FNONBLOCK;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+ break;
+
+ case FIOASYNC:
+ if (tmp = *(int *)data)
+ fp->f_flag |= FASYNC;
+ else
+ fp->f_flag &= ~FASYNC;
+ error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+ break;
+
+ case FIOSETOWN:
+ tmp = *(int *)data;
+ if (fp->f_type == DTYPE_SOCKET) {
+ ((struct socket *)fp->f_data)->so_pgid = tmp;
+ error = 0;
+ break;
+ }
+ if (tmp <= 0) {
+ tmp = -tmp;
+ } else {
+ struct proc *p1 = pfind(tmp);
+ if (p1 == 0) {
+ error = ESRCH;
+ break;
+ }
+ tmp = p1->p_pgrp->pg_id;
+ }
+ error = (*fp->f_ops->fo_ioctl)
+ (fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
+ break;
+
+ case FIOGETOWN:
+ if (fp->f_type == DTYPE_SOCKET) {
+ error = 0;
+ *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
+ break;
+ }
+ error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
+ *(int *)data = -*(int *)data;
+ break;
+
+ default:
+ error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
+ /*
+ * Copy any data to user, size was
+ * already set and checked above.
+ */
+ if (error == 0 && (com&IOC_OUT) && size)
+ error = copyout(data, uap->data, (u_int)size);
+ break;
+ }
+ if (memp)
+ free(memp, M_IOCTLOPS);
+ return (error);
+}
+
+int selwait, nselcoll;
+
+/*
+ * Select system call.
+ */
+struct select_args {
+ u_int nd;
+ fd_set *in, *ou, *ex;
+ struct timeval *tv;
+};
+select(p, uap, retval)
+ register struct proc *p;
+ register struct select_args *uap;
+ int *retval;
+{
+ fd_set ibits[3], obits[3];
+ struct timeval atv;
+ int s, ncoll, error = 0, timo;
+ u_int ni;
+
+ bzero((caddr_t)ibits, sizeof(ibits));
+ bzero((caddr_t)obits, sizeof(obits));
+ if (uap->nd > FD_SETSIZE)
+ return (EINVAL);
+ if (uap->nd > p->p_fd->fd_nfiles)
+ uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
+ ni = howmany(uap->nd, NFDBITS) * sizeof(fd_mask);
+
+#define getbits(name, x) \
+ if (uap->name && \
+ (error = copyin((caddr_t)uap->name, (caddr_t)&ibits[x], ni))) \
+ goto done;
+ getbits(in, 0);
+ getbits(ou, 1);
+ getbits(ex, 2);
+#undef getbits
+
+ if (uap->tv) {
+ error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
+ sizeof (atv));
+ if (error)
+ goto done;
+ if (itimerfix(&atv)) {
+ error = EINVAL;
+ goto done;
+ }
+ s = splclock();
+ timevaladd(&atv, (struct timeval *)&time);
+ timo = hzto(&atv);
+ /*
+ * Avoid inadvertently sleeping forever.
+ */
+ if (timo == 0)
+ timo = 1;
+ splx(s);
+ } else
+ timo = 0;
+retry:
+ ncoll = nselcoll;
+ p->p_flag |= P_SELECT;
+ error = selscan(p, ibits, obits, uap->nd, retval);
+ if (error || *retval)
+ goto done;
+ s = splhigh();
+ /* this should be timercmp(&time, &atv, >=) */
+ if (uap->tv && (time.tv_sec > atv.tv_sec ||
+ time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec)) {
+ splx(s);
+ goto done;
+ }
+ if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
+ splx(s);
+ goto retry;
+ }
+ p->p_flag &= ~P_SELECT;
+ error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
+ splx(s);
+ if (error == 0)
+ goto retry;
+done:
+ p->p_flag &= ~P_SELECT;
+ /* select is not restarted after signals... */
+ if (error == ERESTART)
+ error = EINTR;
+ if (error == EWOULDBLOCK)
+ error = 0;
+#define putbits(name, x) \
+ if (uap->name && \
+ (error2 = copyout((caddr_t)&obits[x], (caddr_t)uap->name, ni))) \
+ error = error2;
+ if (error == 0) {
+ int error2;
+
+ putbits(in, 0);
+ putbits(ou, 1);
+ putbits(ex, 2);
+#undef putbits
+ }
+ return (error);
+}
+
+selscan(p, ibits, obits, nfd, retval)
+ struct proc *p;
+ fd_set *ibits, *obits;
+ int nfd, *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register int msk, i, j, fd;
+ register fd_mask bits;
+ struct file *fp;
+ int n = 0;
+ static int flag[3] = { FREAD, FWRITE, 0 };
+
+ for (msk = 0; msk < 3; msk++) {
+ for (i = 0; i < nfd; i += NFDBITS) {
+ bits = ibits[msk].fds_bits[i/NFDBITS];
+ while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
+ bits &= ~(1 << j);
+ fp = fdp->fd_ofiles[fd];
+ if (fp == NULL)
+ return (EBADF);
+ if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
+ FD_SET(fd, &obits[msk]);
+ n++;
+ }
+ }
+ }
+ }
+ *retval = n;
+ return (0);
+}
+
+/*ARGSUSED*/
+seltrue(dev, flag, p)
+ dev_t dev;
+ int flag;
+ struct proc *p;
+{
+
+ return (1);
+}
+
+/*
+ * Record a select request.
+ */
+void
+selrecord(selector, sip)
+ struct proc *selector;
+ struct selinfo *sip;
+{
+ struct proc *p;
+ pid_t mypid;
+
+ mypid = selector->p_pid;
+ if (sip->si_pid == mypid)
+ return;
+ if (sip->si_pid && (p = pfind(sip->si_pid)) &&
+ p->p_wchan == (caddr_t)&selwait)
+ sip->si_flags |= SI_COLL;
+ else
+ sip->si_pid = mypid;
+}
+
+/*
+ * Do a wakeup when a selectable event occurs.
+ */
+void
+selwakeup(sip)
+ register struct selinfo *sip;
+{
+ register struct proc *p;
+ int s;
+
+ if (sip->si_pid == 0)
+ return;
+ if (sip->si_flags & SI_COLL) {
+ nselcoll++;
+ sip->si_flags &= ~SI_COLL;
+ wakeup((caddr_t)&selwait);
+ }
+ p = pfind(sip->si_pid);
+ sip->si_pid = 0;
+ if (p != NULL) {
+ s = splhigh();
+ if (p->p_wchan == (caddr_t)&selwait) {
+ if (p->p_stat == SSLEEP)
+ setrunnable(p);
+ else
+ unsleep(p);
+ } else if (p->p_flag & P_SELECT)
+ p->p_flag &= ~P_SELECT;
+ splx(s);
+ }
+}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
new file mode 100644
index 000000000000..4cc40baf5821
--- /dev/null
+++ b/sys/kern/sys_process.c
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)sys_process.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+
+/*
+ * Process debugging system call.
+ */
+struct ptrace_args {
+ int req;
+ pid_t pid;
+ caddr_t addr;
+ int data;
+};
+ptrace(a1, a2, a3)
+ struct proc *a1;
+ struct ptrace_args *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (ENOSYS);
+}
+
+trace_req(a1)
+ struct proc *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
new file mode 100644
index 000000000000..a93ae86df853
--- /dev/null
+++ b/sys/kern/sys_socket.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sys_socket.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+struct fileops socketops =
+ { soo_read, soo_write, soo_ioctl, soo_select, soo_close };
+
+/* ARGSUSED */
+soo_read(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+
+ return (soreceive((struct socket *)fp->f_data, (struct mbuf **)0,
+ uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0));
+}
+
+/* ARGSUSED */
+soo_write(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+
+ return (sosend((struct socket *)fp->f_data, (struct mbuf *)0,
+ uio, (struct mbuf *)0, (struct mbuf *)0, 0));
+}
+
+soo_ioctl(fp, cmd, data, p)
+ struct file *fp;
+ int cmd;
+ register caddr_t data;
+ struct proc *p;
+{
+ register struct socket *so = (struct socket *)fp->f_data;
+
+ switch (cmd) {
+
+ case FIONBIO:
+ if (*(int *)data)
+ so->so_state |= SS_NBIO;
+ else
+ so->so_state &= ~SS_NBIO;
+ return (0);
+
+ case FIOASYNC:
+ if (*(int *)data) {
+ so->so_state |= SS_ASYNC;
+ so->so_rcv.sb_flags |= SB_ASYNC;
+ so->so_snd.sb_flags |= SB_ASYNC;
+ } else {
+ so->so_state &= ~SS_ASYNC;
+ so->so_rcv.sb_flags &= ~SB_ASYNC;
+ so->so_snd.sb_flags &= ~SB_ASYNC;
+ }
+ return (0);
+
+ case FIONREAD:
+ *(int *)data = so->so_rcv.sb_cc;
+ return (0);
+
+ case SIOCSPGRP:
+ so->so_pgid = *(int *)data;
+ return (0);
+
+ case SIOCGPGRP:
+ *(int *)data = so->so_pgid;
+ return (0);
+
+ case SIOCATMARK:
+ *(int *)data = (so->so_state&SS_RCVATMARK) != 0;
+ return (0);
+ }
+ /*
+ * Interface/routing/protocol specific ioctls:
+ * interface and routing ioctls should have a
+ * different entry since a socket's unnecessary
+ */
+ if (IOCGROUP(cmd) == 'i')
+ return (ifioctl(so, cmd, data, p));
+ if (IOCGROUP(cmd) == 'r')
+ return (rtioctl(cmd, data, p));
+ return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
+ (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)0));
+}
+
+soo_select(fp, which, p)
+ struct file *fp;
+ int which;
+ struct proc *p;
+{
+ register struct socket *so = (struct socket *)fp->f_data;
+ register int s = splnet();
+
+ switch (which) {
+
+ case FREAD:
+ if (soreadable(so)) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &so->so_rcv.sb_sel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ break;
+
+ case FWRITE:
+ if (sowriteable(so)) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &so->so_snd.sb_sel);
+ so->so_snd.sb_flags |= SB_SEL;
+ break;
+
+ case 0:
+ if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
+ splx(s);
+ return (1);
+ }
+ selrecord(p, &so->so_rcv.sb_sel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+soo_stat(so, ub)
+ register struct socket *so;
+ register struct stat *ub;
+{
+
+ bzero((caddr_t)ub, sizeof (*ub));
+ ub->st_mode = S_IFSOCK;
+ return ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
+ (struct mbuf *)ub, (struct mbuf *)0,
+ (struct mbuf *)0));
+}
+
+/* ARGSUSED */
+soo_close(fp, p)
+ struct file *fp;
+ struct proc *p;
+{
+ int error = 0;
+
+ if (fp->f_data)
+ error = soclose((struct socket *)fp->f_data);
+ fp->f_data = 0;
+ return (error);
+}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
new file mode 100644
index 000000000000..1809905a4f6a
--- /dev/null
+++ b/sys/kern/syscalls.c
@@ -0,0 +1,251 @@
+/*
+ * System call names.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from @(#)syscalls.master 8.2 (Berkeley) 1/13/94
+ */
+
+char *syscallnames[] = {
+ "syscall", /* 0 = syscall */
+ "exit", /* 1 = exit */
+ "fork", /* 2 = fork */
+ "read", /* 3 = read */
+ "write", /* 4 = write */
+ "open", /* 5 = open */
+ "close", /* 6 = close */
+ "wait4", /* 7 = wait4 */
+ "old.creat", /* 8 = old creat */
+ "link", /* 9 = link */
+ "unlink", /* 10 = unlink */
+ "obs_execv", /* 11 = obsolete execv */
+ "chdir", /* 12 = chdir */
+ "fchdir", /* 13 = fchdir */
+ "mknod", /* 14 = mknod */
+ "chmod", /* 15 = chmod */
+ "chown", /* 16 = chown */
+ "break", /* 17 = break */
+ "getfsstat", /* 18 = getfsstat */
+ "old.lseek", /* 19 = old lseek */
+ "getpid", /* 20 = getpid */
+ "mount", /* 21 = mount */
+ "unmount", /* 22 = unmount */
+ "setuid", /* 23 = setuid */
+ "getuid", /* 24 = getuid */
+ "geteuid", /* 25 = geteuid */
+ "ptrace", /* 26 = ptrace */
+ "recvmsg", /* 27 = recvmsg */
+ "sendmsg", /* 28 = sendmsg */
+ "recvfrom", /* 29 = recvfrom */
+ "accept", /* 30 = accept */
+ "getpeername", /* 31 = getpeername */
+ "getsockname", /* 32 = getsockname */
+ "access", /* 33 = access */
+ "chflags", /* 34 = chflags */
+ "fchflags", /* 35 = fchflags */
+ "sync", /* 36 = sync */
+ "kill", /* 37 = kill */
+ "old.stat", /* 38 = old stat */
+ "getppid", /* 39 = getppid */
+ "old.lstat", /* 40 = old lstat */
+ "dup", /* 41 = dup */
+ "pipe", /* 42 = pipe */
+ "getegid", /* 43 = getegid */
+ "profil", /* 44 = profil */
+#ifdef KTRACE
+ "ktrace", /* 45 = ktrace */
+#else
+ "#45", /* 45 = ktrace */
+#endif
+ "sigaction", /* 46 = sigaction */
+ "getgid", /* 47 = getgid */
+ "sigprocmask", /* 48 = sigprocmask */
+ "getlogin", /* 49 = getlogin */
+ "setlogin", /* 50 = setlogin */
+ "acct", /* 51 = acct */
+ "sigpending", /* 52 = sigpending */
+ "sigaltstack", /* 53 = sigaltstack */
+ "ioctl", /* 54 = ioctl */
+ "reboot", /* 55 = reboot */
+ "revoke", /* 56 = revoke */
+ "symlink", /* 57 = symlink */
+ "readlink", /* 58 = readlink */
+ "execve", /* 59 = execve */
+ "umask", /* 60 = umask */
+ "chroot", /* 61 = chroot */
+ "old.fstat", /* 62 = old fstat */
+ "old.getkerninfo", /* 63 = old getkerninfo */
+ "old.getpagesize", /* 64 = old getpagesize */
+ "msync", /* 65 = msync */
+ "vfork", /* 66 = vfork */
+ "obs_vread", /* 67 = obsolete vread */
+ "obs_vwrite", /* 68 = obsolete vwrite */
+ "sbrk", /* 69 = sbrk */
+ "sstk", /* 70 = sstk */
+ "old.mmap", /* 71 = old mmap */
+ "vadvise", /* 72 = vadvise */
+ "munmap", /* 73 = munmap */
+ "mprotect", /* 74 = mprotect */
+ "madvise", /* 75 = madvise */
+ "obs_vhangup", /* 76 = obsolete vhangup */
+ "obs_vlimit", /* 77 = obsolete vlimit */
+ "mincore", /* 78 = mincore */
+ "getgroups", /* 79 = getgroups */
+ "setgroups", /* 80 = setgroups */
+ "getpgrp", /* 81 = getpgrp */
+ "setpgid", /* 82 = setpgid */
+ "setitimer", /* 83 = setitimer */
+ "old.wait", /* 84 = old wait */
+ "swapon", /* 85 = swapon */
+ "getitimer", /* 86 = getitimer */
+ "old.gethostname", /* 87 = old gethostname */
+ "old.sethostname", /* 88 = old sethostname */
+ "getdtablesize", /* 89 = getdtablesize */
+ "dup2", /* 90 = dup2 */
+ "#91", /* 91 = getdopt */
+ "fcntl", /* 92 = fcntl */
+ "select", /* 93 = select */
+ "#94", /* 94 = setdopt */
+ "fsync", /* 95 = fsync */
+ "setpriority", /* 96 = setpriority */
+ "socket", /* 97 = socket */
+ "connect", /* 98 = connect */
+ "old.accept", /* 99 = old accept */
+ "getpriority", /* 100 = getpriority */
+ "old.send", /* 101 = old send */
+ "old.recv", /* 102 = old recv */
+ "sigreturn", /* 103 = sigreturn */
+ "bind", /* 104 = bind */
+ "setsockopt", /* 105 = setsockopt */
+ "listen", /* 106 = listen */
+ "obs_vtimes", /* 107 = obsolete vtimes */
+ "old.sigvec", /* 108 = old sigvec */
+ "old.sigblock", /* 109 = old sigblock */
+ "old.sigsetmask", /* 110 = old sigsetmask */
+ "sigsuspend", /* 111 = sigsuspend */
+ "old.sigstack", /* 112 = old sigstack */
+ "old.recvmsg", /* 113 = old recvmsg */
+ "old.sendmsg", /* 114 = old sendmsg */
+#ifdef TRACE
+ "vtrace", /* 115 = vtrace */
+#else
+ "obs_vtrace", /* 115 = obsolete vtrace */
+#endif
+ "gettimeofday", /* 116 = gettimeofday */
+ "getrusage", /* 117 = getrusage */
+ "getsockopt", /* 118 = getsockopt */
+#ifdef vax
+ "resuba", /* 119 = resuba */
+#else
+ "#119", /* 119 = nosys */
+#endif
+ "readv", /* 120 = readv */
+ "writev", /* 121 = writev */
+ "settimeofday", /* 122 = settimeofday */
+ "fchown", /* 123 = fchown */
+ "fchmod", /* 124 = fchmod */
+ "old.recvfrom", /* 125 = old recvfrom */
+ "old.setreuid", /* 126 = old setreuid */
+ "old.setregid", /* 127 = old setregid */
+ "rename", /* 128 = rename */
+ "old.truncate", /* 129 = old truncate */
+ "old.ftruncate", /* 130 = old ftruncate */
+ "flock", /* 131 = flock */
+ "mkfifo", /* 132 = mkfifo */
+ "sendto", /* 133 = sendto */
+ "shutdown", /* 134 = shutdown */
+ "socketpair", /* 135 = socketpair */
+ "mkdir", /* 136 = mkdir */
+ "rmdir", /* 137 = rmdir */
+ "utimes", /* 138 = utimes */
+ "obs_4.2", /* 139 = obsolete 4.2 sigreturn */
+ "adjtime", /* 140 = adjtime */
+ "old.getpeername", /* 141 = old getpeername */
+ "old.gethostid", /* 142 = old gethostid */
+ "old.sethostid", /* 143 = old sethostid */
+ "old.getrlimit", /* 144 = old getrlimit */
+ "old.setrlimit", /* 145 = old setrlimit */
+ "old.killpg", /* 146 = old killpg */
+ "setsid", /* 147 = setsid */
+ "quotactl", /* 148 = quotactl */
+ "old.quota", /* 149 = old quota */
+ "old.getsockname", /* 150 = old getsockname */
+ "#151", /* 151 = nosys */
+ "#152", /* 152 = nosys */
+ "#153", /* 153 = nosys */
+ "#154", /* 154 = nosys */
+#ifdef NFS
+ "nfssvc", /* 155 = nfssvc */
+#else
+ "#155", /* 155 = nosys */
+#endif
+ "old.getdirentries", /* 156 = old getdirentries */
+ "statfs", /* 157 = statfs */
+ "fstatfs", /* 158 = fstatfs */
+ "#159", /* 159 = nosys */
+ "#160", /* 160 = nosys */
+#ifdef NFS
+ "getfh", /* 161 = getfh */
+#else
+ "#161", /* 161 = nosys */
+#endif
+ "#162", /* 162 = nosys */
+ "#163", /* 163 = nosys */
+ "#164", /* 164 = nosys */
+ "#165", /* 165 = nosys */
+ "#166", /* 166 = nosys */
+ "#167", /* 167 = nosys */
+ "#168", /* 168 = nosys */
+ "#169", /* 169 = nosys */
+ "#170", /* 170 = nosys */
+#ifdef SYSVSHM
+ "shmsys", /* 171 = shmsys */
+#else
+ "#171", /* 171 = nosys */
+#endif
+ "#172", /* 172 = nosys */
+ "#173", /* 173 = nosys */
+ "#174", /* 174 = nosys */
+ "#175", /* 175 = nosys */
+ "#176", /* 176 = nosys */
+ "#177", /* 177 = nosys */
+ "#178", /* 178 = nosys */
+ "#179", /* 179 = nosys */
+ "#180", /* 180 = nosys */
+ "setgid", /* 181 = setgid */
+ "setegid", /* 182 = setegid */
+ "seteuid", /* 183 = seteuid */
+#ifdef LFS
+ "lfs_bmapv", /* 184 = lfs_bmapv */
+ "lfs_markv", /* 185 = lfs_markv */
+ "lfs_segclean", /* 186 = lfs_segclean */
+ "lfs_segwait", /* 187 = lfs_segwait */
+#else
+ "#184", /* 184 = nosys */
+ "#185", /* 185 = nosys */
+ "#186", /* 186 = nosys */
+ "#187", /* 187 = nosys */
+#endif
+ "stat", /* 188 = stat */
+ "fstat", /* 189 = fstat */
+ "lstat", /* 190 = lstat */
+ "pathconf", /* 191 = pathconf */
+ "fpathconf", /* 192 = fpathconf */
+ "#193", /* 193 = nosys */
+ "getrlimit", /* 194 = getrlimit */
+ "setrlimit", /* 195 = setrlimit */
+ "getdirentries", /* 196 = getdirentries */
+ "mmap", /* 197 = mmap */
+ "__syscall", /* 198 = __syscall */
+ "lseek", /* 199 = lseek */
+ "truncate", /* 200 = truncate */
+ "ftruncate", /* 201 = ftruncate */
+ "__sysctl", /* 202 = __sysctl */
+ "mlock", /* 203 = mlock */
+ "munlock", /* 204 = munlock */
+ "#205", /* 205 = nosys */
+ "#206", /* 206 = nosys */
+ "#207", /* 207 = nosys */
+ "#208", /* 208 = nosys */
+ "#209", /* 209 = nosys */
+ "#210", /* 210 = nosys */
+};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
new file mode 100644
index 000000000000..1b8de145fba7
--- /dev/null
+++ b/sys/kern/syscalls.master
@@ -0,0 +1,276 @@
+ @(#)syscalls.master 8.2 (Berkeley) 1/13/94
+; System call name/number master file.
+; Processed to created init_sysent.c, syscalls.c and syscall.h.
+
+; Columns: number type nargs name altname/comments
+; number system call number, must be in order
+; type one of STD, OBSOL, UNIMPL, COMPAT
+; nargs number of arguments
+; name name of syscall routine
+; altname name of system call if different
+; for UNIMPL/OBSOL, name continues with comments
+
+; types:
+; STD always included
+; COMPAT included on COMPAT #ifdef
+; LIBCOMPAT included on COMPAT #ifdef, and placed in syscall.h
+; OBSOL obsolete, not included in system, only specifies name
+; UNIMPL not implemented, placeholder only
+
+; #ifdef's, etc. may be included, and are copied to the output files.
+
+; Reserved/unimplemented system calls in the range 0-150 inclusive
+; are reserved for use in future Berkeley releases.
+; Additional system calls implemented in vendor and other
+; redistributions should be placed in the reserved range at the end
+; of the current calls.
+
+0 STD 0 nosys syscall
+1 STD 1 exit
+2 STD 0 fork
+3 STD 3 read
+4 STD 3 write
+5 STD 3 open
+6 STD 1 close
+7 STD 4 wait4
+8 COMPAT 2 creat
+9 STD 2 link
+10 STD 1 unlink
+11 OBSOL 2 execv
+12 STD 1 chdir
+13 STD 1 fchdir
+14 STD 3 mknod
+15 STD 2 chmod
+16 STD 3 chown
+17 STD 1 obreak break
+18 STD 3 getfsstat
+19 COMPAT 3 lseek
+20 STD 0 getpid
+21 STD 4 mount
+22 STD 2 unmount
+23 STD 1 setuid
+24 STD 0 getuid
+25 STD 0 geteuid
+26 STD 4 ptrace
+27 STD 3 recvmsg
+28 STD 3 sendmsg
+29 STD 6 recvfrom
+30 STD 3 accept
+31 STD 3 getpeername
+32 STD 3 getsockname
+33 STD 2 access
+34 STD 2 chflags
+35 STD 2 fchflags
+36 STD 0 sync
+37 STD 2 kill
+38 COMPAT 2 stat
+39 STD 0 getppid
+40 COMPAT 2 lstat
+41 STD 2 dup
+42 STD 0 pipe
+43 STD 0 getegid
+44 STD 4 profil
+#ifdef KTRACE
+45 STD 4 ktrace
+#else
+45 UNIMPL 0 ktrace
+#endif
+46 STD 3 sigaction
+47 STD 0 getgid
+48 STD 2 sigprocmask
+49 STD 2 getlogin
+50 STD 1 setlogin
+51 STD 1 acct
+52 STD 0 sigpending
+53 STD 2 sigaltstack
+54 STD 3 ioctl
+55 STD 1 reboot
+56 STD 1 revoke
+57 STD 2 symlink
+58 STD 3 readlink
+59 STD 3 execve
+60 STD 1 umask
+61 STD 1 chroot
+62 COMPAT 2 fstat
+63 COMPAT 4 getkerninfo
+64 COMPAT 0 getpagesize
+65 STD 2 msync
+66 STD 0 vfork
+67 OBSOL 0 vread
+68 OBSOL 0 vwrite
+69 STD 1 sbrk
+70 STD 1 sstk
+71 COMPAT 7 mmap
+72 STD 1 ovadvise vadvise
+73 STD 2 munmap
+74 STD 3 mprotect
+75 STD 3 madvise
+76 OBSOL 0 vhangup
+77 OBSOL 0 vlimit
+78 STD 3 mincore
+79 STD 2 getgroups
+80 STD 2 setgroups
+81 STD 0 getpgrp
+82 STD 2 setpgid
+83 STD 3 setitimer
+84 COMPAT 0 wait
+85 STD 1 swapon
+86 STD 2 getitimer
+87 COMPAT 2 gethostname
+88 COMPAT 2 sethostname
+89 STD 0 getdtablesize
+90 STD 2 dup2
+91 UNIMPL 2 getdopt
+92 STD 3 fcntl
+93 STD 5 select
+94 UNIMPL 2 setdopt
+95 STD 1 fsync
+96 STD 3 setpriority
+97 STD 3 socket
+98 STD 3 connect
+99 COMPAT 3 accept
+100 STD 2 getpriority
+101 COMPAT 4 send
+102 COMPAT 4 recv
+103 STD 1 sigreturn
+104 STD 3 bind
+105 STD 5 setsockopt
+106 STD 2 listen
+107 OBSOL 0 vtimes
+108 COMPAT 3 sigvec
+109 COMPAT 1 sigblock
+110 COMPAT 1 sigsetmask
+111 STD 1 sigsuspend
+112 COMPAT 2 sigstack
+113 COMPAT 3 recvmsg
+114 COMPAT 3 sendmsg
+#ifdef TRACE
+115 STD 2 vtrace
+#else
+115 OBSOL 2 vtrace
+#endif
+116 STD 2 gettimeofday
+117 STD 2 getrusage
+118 STD 5 getsockopt
+#ifdef vax
+119 STD 1 resuba
+#else
+119 UNIMPL 0 nosys
+#endif
+120 STD 3 readv
+121 STD 3 writev
+122 STD 2 settimeofday
+123 STD 3 fchown
+124 STD 2 fchmod
+125 COMPAT 6 recvfrom
+126 COMPAT 2 setreuid
+127 COMPAT 2 setregid
+128 STD 2 rename
+129 COMPAT 2 truncate
+130 COMPAT 2 ftruncate
+131 STD 2 flock
+132 STD 2 mkfifo
+133 STD 6 sendto
+134 STD 2 shutdown
+135 STD 5 socketpair
+136 STD 2 mkdir
+137 STD 1 rmdir
+138 STD 2 utimes
+139 OBSOL 0 4.2 sigreturn
+140 STD 2 adjtime
+141 COMPAT 3 getpeername
+142 COMPAT 0 gethostid
+143 COMPAT 1 sethostid
+144 COMPAT 2 getrlimit
+145 COMPAT 2 setrlimit
+146 COMPAT 2 killpg
+147 STD 0 setsid
+148 STD 4 quotactl
+149 COMPAT 4 quota
+150 COMPAT 3 getsockname
+
+; Syscalls 151-180 inclusive are reserved for vendor-specific
+; system calls. (This includes various calls added for compatibity
+; with other Unix variants.)
+; Some of these calls are now supported by BSD...
+151 UNIMPL 0 nosys
+152 UNIMPL 0 nosys
+153 UNIMPL 0 nosys
+154 UNIMPL 0 nosys
+#ifdef NFS
+155 STD 2 nfssvc
+#else
+155 UNIMPL 0 nosys
+#endif
+156 COMPAT 4 getdirentries
+157 STD 2 statfs
+158 STD 2 fstatfs
+159 UNIMPL 0 nosys
+160 UNIMPL 0 nosys
+#ifdef NFS
+161 STD 2 getfh
+#else
+161 UNIMPL 0 nosys
+#endif
+162 UNIMPL 0 nosys
+163 UNIMPL 0 nosys
+164 UNIMPL 0 nosys
+165 UNIMPL 0 nosys
+166 UNIMPL 0 nosys
+167 UNIMPL 0 nosys
+168 UNIMPL 0 nosys
+169 UNIMPL 0 nosys
+170 UNIMPL 0 nosys
+#ifdef SYSVSHM
+171 STD 4 shmsys
+#else
+171 UNIMPL 0 nosys
+#endif
+172 UNIMPL 0 nosys
+173 UNIMPL 0 nosys
+174 UNIMPL 0 nosys
+175 UNIMPL 0 nosys
+176 UNIMPL 0 nosys
+177 UNIMPL 0 nosys
+178 UNIMPL 0 nosys
+179 UNIMPL 0 nosys
+180 UNIMPL 0 nosys
+
+; Syscalls 180-199 are used by/reserved for BSD
+181 STD 1 setgid
+182 STD 1 setegid
+183 STD 1 seteuid
+#ifdef LFS
+184 STD 3 lfs_bmapv
+185 STD 3 lfs_markv
+186 STD 2 lfs_segclean
+187 STD 2 lfs_segwait
+#else
+184 UNIMPL 0 nosys
+185 UNIMPL 0 nosys
+186 UNIMPL 0 nosys
+187 UNIMPL 0 nosys
+#endif
+188 STD 2 stat
+189 STD 2 fstat
+190 STD 2 lstat
+191 STD 2 pathconf
+192 STD 2 fpathconf
+193 UNIMPL 0 nosys
+194 STD 2 getrlimit
+195 STD 2 setrlimit
+196 STD 4 getdirentries
+197 STD 8 mmap
+198 STD 0 nosys __syscall
+199 STD 5 lseek
+200 STD 4 truncate
+201 STD 4 ftruncate
+202 STD 6 __sysctl
+203 STD 2 mlock
+204 STD 2 munlock
+205 UNIMPL 0 nosys
+206 UNIMPL 0 nosys
+207 UNIMPL 0 nosys
+208 UNIMPL 0 nosys
+209 UNIMPL 0 nosys
+210 UNIMPL 0 nosys
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
new file mode 100644
index 000000000000..6cc7be23700f
--- /dev/null
+++ b/sys/kern/tty.c
@@ -0,0 +1,1923 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty.c 8.8 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#define TTYDEFCHARS
+#include <sys/tty.h>
+#undef TTYDEFCHARS
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/dkstat.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+
+static int proc_compare __P((struct proc *p1, struct proc *p2));
+static int ttnread __P((struct tty *));
+static void ttyblock __P((struct tty *tp));
+static void ttyecho __P((int, struct tty *tp));
+static void ttyrubo __P((struct tty *, int));
+
+/* Symbolic sleep message strings. */
+char ttclos[] = "ttycls";
+char ttopen[] = "ttyopn";
+char ttybg[] = "ttybg";
+char ttybuf[] = "ttybuf";
+char ttyin[] = "ttyin";
+char ttyout[] = "ttyout";
+
+/*
+ * Table with character classes and parity. The 8th bit indicates parity,
+ * the 7th bit indicates the character is an alphameric or underscore (for
+ * ALTWERASE), and the low 6 bits indicate delay type. If the low 6 bits
+ * are 0 then the character needs no special processing on output; classes
+ * other than 0 might be translated or (not currently) require delays.
+ */
+#define E 0x00 /* Even parity. */
+#define O 0x80 /* Odd parity. */
+#define PARITY(c) (char_type[c] & O)
+
+#define ALPHA 0x40 /* Alpha or underscore. */
+#define ISALPHA(c) (char_type[(c) & TTY_CHARMASK] & ALPHA)
+
+#define CCLASSMASK 0x3f
+#define CCLASS(c) (char_type[c] & CCLASSMASK)
+
+#define BS BACKSPACE
+#define CC CONTROL
+#define CR RETURN
+#define NA ORDINARY | ALPHA
+#define NL NEWLINE
+#define NO ORDINARY
+#define TB TAB
+#define VT VTAB
+
+char const char_type[] = {
+ E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* nul - bel */
+ O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */
+ O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */
+ E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */
+ O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */
+ E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */
+ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */
+ O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */
+ O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */
+ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */
+ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */
+ O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */
+ E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */
+ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */
+ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */
+ E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */
+ /*
+ * Meta chars; should be settable per character set;
+ * for now, treat them all as normal characters.
+ */
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+ NA, NA, NA, NA, NA, NA, NA, NA,
+};
+#undef BS
+#undef CC
+#undef CR
+#undef NA
+#undef NL
+#undef NO
+#undef TB
+#undef VT
+
+/* Macros to clear/set/test flags. */
+#define SET(t, f) (t) |= (f)
+#define CLR(t, f) (t) &= ~(f)
+#define ISSET(t, f) ((t) & (f))
+
+/*
+ * Initial open of tty, or (re)entry to standard tty line discipline.
+ */
+int
+ttyopen(device, tp)
+ dev_t device;
+ register struct tty *tp;
+{
+ int s;
+
+ s = spltty();
+ tp->t_dev = device;
+ if (!ISSET(tp->t_state, TS_ISOPEN)) {
+ SET(tp->t_state, TS_ISOPEN);
+ bzero(&tp->t_winsize, sizeof(tp->t_winsize));
+ }
+ CLR(tp->t_state, TS_WOPEN);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Handle close() on a tty line: flush and set to initial state,
+ * bumping generation number so that pending read/write calls
+ * can detect recycling of the tty.
+ */
+int
+ttyclose(tp)
+ register struct tty *tp;
+{
+ extern struct tty *constty; /* Temporary virtual console. */
+
+ if (constty == tp)
+ constty = NULL;
+
+ ttyflush(tp, FREAD | FWRITE);
+
+ tp->t_gen++;
+ tp->t_pgrp = NULL;
+ tp->t_session = NULL;
+ tp->t_state = 0;
+ return (0);
+}
+
+#define FLUSHQ(q) { \
+ if ((q)->c_cc) \
+ ndflush(q, (q)->c_cc); \
+}
+
+/* Is 'c' a line delimiter ("break" character)? */
+#define TTBREAKC(c) \
+ ((c) == '\n' || ((c) == cc[VEOF] || \
+ (c) == cc[VEOL] || (c) == cc[VEOL2]) && (c) != _POSIX_VDISABLE)
+
+
+/*
+ * Process input of a single character received on a tty.
+ */
+int
+ttyinput(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register int iflag, lflag;
+ register u_char *cc;
+ int i, err;
+
+ /*
+ * If input is pending take it first.
+ */
+ lflag = tp->t_lflag;
+ if (ISSET(lflag, PENDIN))
+ ttypend(tp);
+ /*
+ * Gather stats.
+ */
+ if (ISSET(lflag, ICANON)) {
+ ++tk_cancc;
+ ++tp->t_cancc;
+ } else {
+ ++tk_rawcc;
+ ++tp->t_rawcc;
+ }
+ ++tk_nin;
+
+ /* Handle exceptional conditions (break, parity, framing). */
+ cc = tp->t_cc;
+ iflag = tp->t_iflag;
+ if (err = (ISSET(c, TTY_ERRORMASK))) {
+ CLR(c, TTY_ERRORMASK);
+ if (ISSET(err, TTY_FE) && !c) { /* Break. */
+ if (ISSET(iflag, IGNBRK))
+ goto endcase;
+ else if (ISSET(iflag, BRKINT) &&
+ ISSET(lflag, ISIG) &&
+ (cc[VINTR] != _POSIX_VDISABLE))
+ c = cc[VINTR];
+ else if (ISSET(iflag, PARMRK))
+ goto parmrk;
+ } else if (ISSET(err, TTY_PE) &&
+ ISSET(iflag, INPCK) || ISSET(err, TTY_FE)) {
+ if (ISSET(iflag, IGNPAR))
+ goto endcase;
+ else if (ISSET(iflag, PARMRK)) {
+parmrk: (void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
+ (void)putc(0 | TTY_QUOTE, &tp->t_rawq);
+ (void)putc(c | TTY_QUOTE, &tp->t_rawq);
+ goto endcase;
+ } else
+ c = 0;
+ }
+ }
+ /*
+ * In tandem mode, check high water mark.
+ */
+ if (ISSET(iflag, IXOFF))
+ ttyblock(tp);
+ if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP))
+ CLR(c, 0x80);
+ if (!ISSET(lflag, EXTPROC)) {
+ /*
+ * Check for literal nexting very first
+ */
+ if (ISSET(tp->t_state, TS_LNCH)) {
+ SET(c, TTY_QUOTE);
+ CLR(tp->t_state, TS_LNCH);
+ }
+ /*
+ * Scan for special characters. This code
+ * is really just a big case statement with
+ * non-constant cases. The bottom of the
+ * case statement is labeled ``endcase'', so goto
+ * it after a case match, or similar.
+ */
+
+ /*
+ * Control chars which aren't controlled
+ * by ICANON, ISIG, or IXON.
+ */
+ if (ISSET(lflag, IEXTEN)) {
+ if (CCEQ(cc[VLNEXT], c)) {
+ if (ISSET(lflag, ECHO)) {
+ if (ISSET(lflag, ECHOE)) {
+ (void)ttyoutput('^', tp);
+ (void)ttyoutput('\b', tp);
+ } else
+ ttyecho(c, tp);
+ }
+ SET(tp->t_state, TS_LNCH);
+ goto endcase;
+ }
+ if (CCEQ(cc[VDISCARD], c)) {
+ if (ISSET(lflag, FLUSHO))
+ CLR(tp->t_lflag, FLUSHO);
+ else {
+ ttyflush(tp, FWRITE);
+ ttyecho(c, tp);
+ if (tp->t_rawq.c_cc + tp->t_canq.c_cc)
+ ttyretype(tp);
+ SET(tp->t_lflag, FLUSHO);
+ }
+ goto startoutput;
+ }
+ }
+ /*
+ * Signals.
+ */
+ if (ISSET(lflag, ISIG)) {
+ if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) {
+ if (!ISSET(lflag, NOFLSH))
+ ttyflush(tp, FREAD | FWRITE);
+ ttyecho(c, tp);
+ pgsignal(tp->t_pgrp,
+ CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1);
+ goto endcase;
+ }
+ if (CCEQ(cc[VSUSP], c)) {
+ if (!ISSET(lflag, NOFLSH))
+ ttyflush(tp, FREAD);
+ ttyecho(c, tp);
+ pgsignal(tp->t_pgrp, SIGTSTP, 1);
+ goto endcase;
+ }
+ }
+ /*
+ * Handle start/stop characters.
+ */
+ if (ISSET(iflag, IXON)) {
+ if (CCEQ(cc[VSTOP], c)) {
+ if (!ISSET(tp->t_state, TS_TTSTOP)) {
+ SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, 0);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp,
+ 0);
+#endif
+ return (0);
+ }
+ if (!CCEQ(cc[VSTART], c))
+ return (0);
+ /*
+ * if VSTART == VSTOP then toggle
+ */
+ goto endcase;
+ }
+ if (CCEQ(cc[VSTART], c))
+ goto restartoutput;
+ }
+ /*
+ * IGNCR, ICRNL, & INLCR
+ */
+ if (c == '\r') {
+ if (ISSET(iflag, IGNCR))
+ goto endcase;
+ else if (ISSET(iflag, ICRNL))
+ c = '\n';
+ } else if (c == '\n' && ISSET(iflag, INLCR))
+ c = '\r';
+ }
+ if (!ISSET(tp->t_lflag, EXTPROC) && ISSET(lflag, ICANON)) {
+ /*
+ * From here on down canonical mode character
+ * processing takes place.
+ */
+ /*
+ * erase (^H / ^?)
+ */
+ if (CCEQ(cc[VERASE], c)) {
+ if (tp->t_rawq.c_cc)
+ ttyrub(unputc(&tp->t_rawq), tp);
+ goto endcase;
+ }
+ /*
+ * kill (^U)
+ */
+ if (CCEQ(cc[VKILL], c)) {
+ if (ISSET(lflag, ECHOKE) &&
+ tp->t_rawq.c_cc == tp->t_rocount &&
+ !ISSET(lflag, ECHOPRT))
+ while (tp->t_rawq.c_cc)
+ ttyrub(unputc(&tp->t_rawq), tp);
+ else {
+ ttyecho(c, tp);
+ if (ISSET(lflag, ECHOK) ||
+ ISSET(lflag, ECHOKE))
+ ttyecho('\n', tp);
+ FLUSHQ(&tp->t_rawq);
+ tp->t_rocount = 0;
+ }
+ CLR(tp->t_state, TS_LOCAL);
+ goto endcase;
+ }
+ /*
+ * word erase (^W)
+ */
+ if (CCEQ(cc[VWERASE], c)) {
+ int alt = ISSET(lflag, ALTWERASE);
+ int ctype;
+
+ /*
+ * erase whitespace
+ */
+ while ((c = unputc(&tp->t_rawq)) == ' ' || c == '\t')
+ ttyrub(c, tp);
+ if (c == -1)
+ goto endcase;
+ /*
+ * erase last char of word and remember the
+ * next chars type (for ALTWERASE)
+ */
+ ttyrub(c, tp);
+ c = unputc(&tp->t_rawq);
+ if (c == -1)
+ goto endcase;
+ if (c == ' ' || c == '\t') {
+ (void)putc(c, &tp->t_rawq);
+ goto endcase;
+ }
+ ctype = ISALPHA(c);
+ /*
+ * erase rest of word
+ */
+ do {
+ ttyrub(c, tp);
+ c = unputc(&tp->t_rawq);
+ if (c == -1)
+ goto endcase;
+ } while (c != ' ' && c != '\t' &&
+ (alt == 0 || ISALPHA(c) == ctype));
+ (void)putc(c, &tp->t_rawq);
+ goto endcase;
+ }
+ /*
+ * reprint line (^R)
+ */
+ if (CCEQ(cc[VREPRINT], c)) {
+ ttyretype(tp);
+ goto endcase;
+ }
+ /*
+ * ^T - kernel info and generate SIGINFO
+ */
+ if (CCEQ(cc[VSTATUS], c)) {
+ if (ISSET(lflag, ISIG))
+ pgsignal(tp->t_pgrp, SIGINFO, 1);
+ if (!ISSET(lflag, NOKERNINFO))
+ ttyinfo(tp);
+ goto endcase;
+ }
+ }
+ /*
+ * Check for input buffer overflow
+ */
+ if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= TTYHOG) {
+ if (ISSET(iflag, IMAXBEL)) {
+ if (tp->t_outq.c_cc < tp->t_hiwat)
+ (void)ttyoutput(CTRL('g'), tp);
+ } else
+ ttyflush(tp, FREAD | FWRITE);
+ goto endcase;
+ }
+ /*
+ * Put data char in q for user and
+ * wakeup on seeing a line delimiter.
+ */
+ if (putc(c, &tp->t_rawq) >= 0) {
+ if (!ISSET(lflag, ICANON)) {
+ ttwakeup(tp);
+ ttyecho(c, tp);
+ goto endcase;
+ }
+ if (TTBREAKC(c)) {
+ tp->t_rocount = 0;
+ catq(&tp->t_rawq, &tp->t_canq);
+ ttwakeup(tp);
+ } else if (tp->t_rocount++ == 0)
+ tp->t_rocol = tp->t_column;
+ if (ISSET(tp->t_state, TS_ERASE)) {
+ /*
+ * end of prterase \.../
+ */
+ CLR(tp->t_state, TS_ERASE);
+ (void)ttyoutput('/', tp);
+ }
+ i = tp->t_column;
+ ttyecho(c, tp);
+ if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) {
+ /*
+ * Place the cursor over the '^' of the ^D.
+ */
+ i = min(2, tp->t_column - i);
+ while (i > 0) {
+ (void)ttyoutput('\b', tp);
+ i--;
+ }
+ }
+ }
+endcase:
+ /*
+ * IXANY means allow any character to restart output.
+ */
+ if (ISSET(tp->t_state, TS_TTSTOP) &&
+ !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP])
+ return (0);
+restartoutput:
+ CLR(tp->t_lflag, FLUSHO);
+ CLR(tp->t_state, TS_TTSTOP);
+startoutput:
+ return (ttstart(tp));
+}
+
+/*
+ * Output a single character on a tty, doing output processing
+ * as needed (expanding tabs, newline processing, etc.).
+ * Returns < 0 if succeeds, otherwise returns char to resend.
+ * Must be recursive.
+ */
+int
+ttyoutput(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register long oflag;
+ register int col, s;
+
+ oflag = tp->t_oflag;
+ if (!ISSET(oflag, OPOST)) {
+ if (ISSET(tp->t_lflag, FLUSHO))
+ return (-1);
+ if (putc(c, &tp->t_outq))
+ return (c);
+ tk_nout++;
+ tp->t_outcc++;
+ return (-1);
+ }
+ /*
+ * Do tab expansion if OXTABS is set. Special case if we external
+ * processing, we don't do the tab expansion because we'll probably
+ * get it wrong. If tab expansion needs to be done, let it happen
+ * externally.
+ */
+ CLR(c, ~TTY_CHARMASK);
+ if (c == '\t' &&
+ ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) {
+ c = 8 - (tp->t_column & 7);
+ if (!ISSET(tp->t_lflag, FLUSHO)) {
+ s = spltty(); /* Don't interrupt tabs. */
+ c -= b_to_q(" ", c, &tp->t_outq);
+ tk_nout += c;
+ tp->t_outcc += c;
+ splx(s);
+ }
+ tp->t_column += c;
+ return (c ? -1 : '\t');
+ }
+ if (c == CEOT && ISSET(oflag, ONOEOT))
+ return (-1);
+
+ /*
+ * Newline translation: if ONLCR is set,
+ * translate newline into "\r\n".
+ */
+ if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) {
+ tk_nout++;
+ tp->t_outcc++;
+ if (putc('\r', &tp->t_outq))
+ return (c);
+ }
+ tk_nout++;
+ tp->t_outcc++;
+ if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq))
+ return (c);
+
+ col = tp->t_column;
+ switch (CCLASS(c)) {
+ case BACKSPACE:
+ if (col > 0)
+ --col;
+ break;
+ case CONTROL:
+ break;
+ case NEWLINE:
+ case RETURN:
+ col = 0;
+ break;
+ case ORDINARY:
+ ++col;
+ break;
+ case TAB:
+ col = (col + 8) & ~7;
+ break;
+ }
+ tp->t_column = col;
+ return (-1);
+}
+
+/*
+ * Ioctls for all tty devices. Called after line-discipline specific ioctl
+ * has been called to do discipline-specific functions and/or reject any
+ * of these ioctl commands.
+ */
+/* ARGSUSED */
+int
+ttioctl(tp, cmd, data, flag)
+ register struct tty *tp;
+ int cmd, flag;
+ void *data;
+{
+ extern struct tty *constty; /* Temporary virtual console. */
+ extern int nlinesw;
+ register struct proc *p;
+ int s, error;
+
+ p = curproc; /* XXX */
+
+ /* If the ioctl involves modification, hang if in the background. */
+ switch (cmd) {
+ case TIOCFLUSH:
+ case TIOCSETA:
+ case TIOCSETD:
+ case TIOCSETAF:
+ case TIOCSETAW:
+#ifdef notdef
+ case TIOCSPGRP:
+#endif
+ case TIOCSTI:
+ case TIOCSWINSZ:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ case TIOCLBIC:
+ case TIOCLBIS:
+ case TIOCLSET:
+ case TIOCSETC:
+ case OTIOCSETD:
+ case TIOCSETN:
+ case TIOCSETP:
+ case TIOCSLTC:
+#endif
+ while (isbackground(curproc, tp) &&
+ p->p_pgrp->pg_jobc && (p->p_flag & P_PPWAIT) == 0 &&
+ (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+ (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
+ pgsignal(p->p_pgrp, SIGTTOU, 1);
+ if (error = ttysleep(tp,
+ &lbolt, TTOPRI | PCATCH, ttybg, 0))
+ return (error);
+ }
+ break;
+ }
+
+ switch (cmd) { /* Process the ioctl. */
+ case FIOASYNC: /* set/clear async i/o */
+ s = spltty();
+ if (*(int *)data)
+ SET(tp->t_state, TS_ASYNC);
+ else
+ CLR(tp->t_state, TS_ASYNC);
+ splx(s);
+ break;
+ case FIONBIO: /* set/clear non-blocking i/o */
+ break; /* XXX: delete. */
+ case FIONREAD: /* get # bytes to read */
+ *(int *)data = ttnread(tp);
+ break;
+ case TIOCEXCL: /* set exclusive use of tty */
+ s = spltty();
+ SET(tp->t_state, TS_XCLUDE);
+ splx(s);
+ break;
+ case TIOCFLUSH: { /* flush buffers */
+ register int flags = *(int *)data;
+
+ if (flags == 0)
+ flags = FREAD | FWRITE;
+ else
+ flags &= FREAD | FWRITE;
+ ttyflush(tp, flags);
+ break;
+ }
+ case TIOCCONS: /* become virtual console */
+ if (*(int *)data) {
+ if (constty && constty != tp &&
+ ISSET(constty->t_state, TS_CARR_ON | TS_ISOPEN) ==
+ (TS_CARR_ON | TS_ISOPEN))
+ return (EBUSY);
+#ifndef UCONSOLE
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+#endif
+ constty = tp;
+ } else if (tp == constty)
+ constty = NULL;
+ break;
+ case TIOCDRAIN: /* wait till output drained */
+ if (error = ttywait(tp))
+ return (error);
+ break;
+ case TIOCGETA: { /* get termios struct */
+ struct termios *t = (struct termios *)data;
+
+ bcopy(&tp->t_termios, t, sizeof(struct termios));
+ break;
+ }
+ case TIOCGETD: /* get line discipline */
+ *(int *)data = tp->t_line;
+ break;
+ case TIOCGWINSZ: /* get window size */
+ *(struct winsize *)data = tp->t_winsize;
+ break;
+ case TIOCGPGRP: /* get pgrp of tty */
+ if (!isctty(p, tp))
+ return (ENOTTY);
+ *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+ break;
+#ifdef TIOCHPCL
+ case TIOCHPCL: /* hang up on last close */
+ s = spltty();
+ SET(tp->t_cflag, HUPCL);
+ splx(s);
+ break;
+#endif
+ case TIOCNXCL: /* reset exclusive use of tty */
+ s = spltty();
+ CLR(tp->t_state, TS_XCLUDE);
+ splx(s);
+ break;
+ case TIOCOUTQ: /* output queue size */
+ *(int *)data = tp->t_outq.c_cc;
+ break;
+ case TIOCSETA: /* set termios struct */
+ case TIOCSETAW: /* drain output, set */
+ case TIOCSETAF: { /* drn out, fls in, set */
+ register struct termios *t = (struct termios *)data;
+
+ s = spltty();
+ if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
+ if (error = ttywait(tp)) {
+ splx(s);
+ return (error);
+ }
+ if (cmd == TIOCSETAF)
+ ttyflush(tp, FREAD);
+ }
+ if (!ISSET(t->c_cflag, CIGNORE)) {
+ /*
+ * Set device hardware.
+ */
+ if (tp->t_param && (error = (*tp->t_param)(tp, t))) {
+ splx(s);
+ return (error);
+ } else {
+ if (!ISSET(tp->t_state, TS_CARR_ON) &&
+ ISSET(tp->t_cflag, CLOCAL) &&
+ !ISSET(t->c_cflag, CLOCAL)) {
+ CLR(tp->t_state, TS_ISOPEN);
+ SET(tp->t_state, TS_WOPEN);
+ ttwakeup(tp);
+ }
+ tp->t_cflag = t->c_cflag;
+ tp->t_ispeed = t->c_ispeed;
+ tp->t_ospeed = t->c_ospeed;
+ }
+ ttsetwater(tp);
+ }
+ if (cmd != TIOCSETAF) {
+ if (ISSET(t->c_lflag, ICANON) !=
+ ISSET(tp->t_lflag, ICANON))
+ if (ISSET(t->c_lflag, ICANON)) {
+ SET(tp->t_lflag, PENDIN);
+ ttwakeup(tp);
+ } else {
+ struct clist tq;
+
+ catq(&tp->t_rawq, &tp->t_canq);
+ tq = tp->t_rawq;
+ tp->t_rawq = tp->t_canq;
+ tp->t_canq = tq;
+ CLR(tp->t_lflag, PENDIN);
+ }
+ }
+ tp->t_iflag = t->c_iflag;
+ tp->t_oflag = t->c_oflag;
+ /*
+ * Make the EXTPROC bit read only.
+ */
+ if (ISSET(tp->t_lflag, EXTPROC))
+ SET(t->c_lflag, EXTPROC);
+ else
+ CLR(t->c_lflag, EXTPROC);
+ tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN);
+ bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc));
+ splx(s);
+ break;
+ }
+ case TIOCSETD: { /* set line discipline */
+ register int t = *(int *)data;
+ dev_t device = tp->t_dev;
+
+ if ((u_int)t >= nlinesw)
+ return (ENXIO);
+ if (t != tp->t_line) {
+ s = spltty();
+ (*linesw[tp->t_line].l_close)(tp, flag);
+ error = (*linesw[t].l_open)(device, tp);
+ if (error) {
+ (void)(*linesw[tp->t_line].l_open)(device, tp);
+ splx(s);
+ return (error);
+ }
+ tp->t_line = t;
+ splx(s);
+ }
+ break;
+ }
+ case TIOCSTART: /* start output, like ^Q */
+ s = spltty();
+ if (ISSET(tp->t_state, TS_TTSTOP) ||
+ ISSET(tp->t_lflag, FLUSHO)) {
+ CLR(tp->t_lflag, FLUSHO);
+ CLR(tp->t_state, TS_TTSTOP);
+ ttstart(tp);
+ }
+ splx(s);
+ break;
+ case TIOCSTI: /* simulate terminal input */
+ if (p->p_ucred->cr_uid && (flag & FREAD) == 0)
+ return (EPERM);
+ if (p->p_ucred->cr_uid && !isctty(p, tp))
+ return (EACCES);
+ (*linesw[tp->t_line].l_rint)(*(u_char *)data, tp);
+ break;
+ case TIOCSTOP: /* stop output, like ^S */
+ s = spltty();
+ if (!ISSET(tp->t_state, TS_TTSTOP)) {
+ SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, 0);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+ }
+ splx(s);
+ break;
+ case TIOCSCTTY: /* become controlling tty */
+ /* Session ctty vnode pointer set in vnode layer. */
+ if (!SESS_LEADER(p) ||
+ (p->p_session->s_ttyvp || tp->t_session) &&
+ (tp->t_session != p->p_session))
+ return (EPERM);
+ tp->t_session = p->p_session;
+ tp->t_pgrp = p->p_pgrp;
+ p->p_session->s_ttyp = tp;
+ p->p_flag |= P_CONTROLT;
+ break;
+ case TIOCSPGRP: { /* set pgrp of tty */
+ register struct pgrp *pgrp = pgfind(*(int *)data);
+
+ if (!isctty(p, tp))
+ return (ENOTTY);
+ else if (pgrp == NULL || pgrp->pg_session != p->p_session)
+ return (EPERM);
+ tp->t_pgrp = pgrp;
+ break;
+ }
+ case TIOCSWINSZ: /* set window size */
+ if (bcmp((caddr_t)&tp->t_winsize, data,
+ sizeof (struct winsize))) {
+ tp->t_winsize = *(struct winsize *)data;
+ pgsignal(tp->t_pgrp, SIGWINCH, 1);
+ }
+ break;
+ default:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ return (ttcompat(tp, cmd, data, flag));
+#else
+ return (-1);
+#endif
+ }
+ return (0);
+}
+
+int
+ttselect(device, rw, p)
+ dev_t device;
+ int rw;
+ struct proc *p;
+{
+ register struct tty *tp;
+ int nread, s;
+
+ tp = &cdevsw[major(device)].d_ttys[minor(device)];
+
+ s = spltty();
+ switch (rw) {
+ case FREAD:
+ nread = ttnread(tp);
+ if (nread > 0 || !ISSET(tp->t_cflag, CLOCAL) &&
+ !ISSET(tp->t_state, TS_CARR_ON))
+ goto win;
+ selrecord(p, &tp->t_rsel);
+ break;
+ case FWRITE:
+ if (tp->t_outq.c_cc <= tp->t_lowat) {
+win: splx(s);
+ return (1);
+ }
+ selrecord(p, &tp->t_wsel);
+ break;
+ }
+ splx(s);
+ return (0);
+}
+
+static int
+ttnread(tp)
+ struct tty *tp;
+{
+ int nread;
+
+ if (ISSET(tp->t_lflag, PENDIN))
+ ttypend(tp);
+ nread = tp->t_canq.c_cc;
+ if (!ISSET(tp->t_lflag, ICANON))
+ nread += tp->t_rawq.c_cc;
+ return (nread);
+}
+
+/*
+ * Wait for output to drain.
+ */
+int
+ttywait(tp)
+ register struct tty *tp;
+{
+ int error, s;
+
+ error = 0;
+ s = spltty();
+ while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) &&
+ (ISSET(tp->t_state, TS_CARR_ON) || ISSET(tp->t_cflag, CLOCAL))
+ && tp->t_oproc) {
+ (*tp->t_oproc)(tp);
+ SET(tp->t_state, TS_ASLEEP);
+ if (error = ttysleep(tp,
+ &tp->t_outq, TTOPRI | PCATCH, ttyout, 0))
+ break;
+ }
+ splx(s);
+ return (error);
+}
+
+/*
+ * Flush if successfully wait.
+ */
+int
+ttywflush(tp)
+ struct tty *tp;
+{
+ int error;
+
+ if ((error = ttywait(tp)) == 0)
+ ttyflush(tp, FREAD);
+ return (error);
+}
+
+/*
+ * Flush tty read and/or write queues, notifying anyone waiting.
+ */
+void
+ttyflush(tp, rw)
+ register struct tty *tp;
+ int rw;
+{
+ register int s;
+
+ s = spltty();
+ if (rw & FREAD) {
+ FLUSHQ(&tp->t_canq);
+ FLUSHQ(&tp->t_rawq);
+ tp->t_rocount = 0;
+ tp->t_rocol = 0;
+ CLR(tp->t_state, TS_LOCAL);
+ ttwakeup(tp);
+ }
+ if (rw & FWRITE) {
+ CLR(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, rw);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp, rw);
+#endif
+ FLUSHQ(&tp->t_outq);
+ wakeup((caddr_t)&tp->t_outq);
+ selwakeup(&tp->t_wsel);
+ }
+ splx(s);
+}
+
+/*
+ * Copy in the default termios characters.
+ */
+void
+ttychars(tp)
+ struct tty *tp;
+{
+
+ bcopy(ttydefchars, tp->t_cc, sizeof(ttydefchars));
+}
+
+/*
+ * Send stop character on input overflow.
+ */
+static void
+ttyblock(tp)
+ register struct tty *tp;
+{
+ register int total;
+
+ total = tp->t_rawq.c_cc + tp->t_canq.c_cc;
+ if (tp->t_rawq.c_cc > TTYHOG) {
+ ttyflush(tp, FREAD | FWRITE);
+ CLR(tp->t_state, TS_TBLOCK);
+ }
+ /*
+ * Block further input iff: current input > threshold
+ * AND input is available to user program.
+ */
+ if (total >= TTYHOG / 2 &&
+ !ISSET(tp->t_state, TS_TBLOCK) &&
+ !ISSET(tp->t_lflag, ICANON) || tp->t_canq.c_cc > 0 &&
+ tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
+ if (putc(tp->t_cc[VSTOP], &tp->t_outq) == 0) {
+ SET(tp->t_state, TS_TBLOCK);
+ ttstart(tp);
+ }
+ }
+}
+
+void
+ttrstrt(tp_arg)
+ void *tp_arg;
+{
+ struct tty *tp;
+ int s;
+
+#ifdef DIAGNOSTIC
+ if (tp_arg == NULL)
+ panic("ttrstrt");
+#endif
+ tp = tp_arg;
+ s = spltty();
+
+ CLR(tp->t_state, TS_TIMEOUT);
+ ttstart(tp);
+
+ splx(s);
+}
+
+int
+ttstart(tp)
+ struct tty *tp;
+{
+
+ if (tp->t_oproc != NULL) /* XXX: Kludge for pty. */
+ (*tp->t_oproc)(tp);
+ return (0);
+}
+
+/*
+ * "close" a line discipline
+ */
+int
+ttylclose(tp, flag)
+ struct tty *tp;
+ int flag;
+{
+
+ if (flag & IO_NDELAY)
+ ttyflush(tp, FREAD | FWRITE);
+ else
+ ttywflush(tp);
+ return (0);
+}
+
+/*
+ * Handle modem control transition on a tty.
+ * Flag indicates new state of carrier.
+ * Returns 0 if the line should be turned off, otherwise 1.
+ */
+int
+ttymodem(tp, flag)
+ register struct tty *tp;
+ int flag;
+{
+
+ if (!ISSET(tp->t_state, TS_WOPEN) && ISSET(tp->t_cflag, MDMBUF)) {
+ /*
+ * MDMBUF: do flow control according to carrier flag
+ */
+ if (flag) {
+ CLR(tp->t_state, TS_TTSTOP);
+ ttstart(tp);
+ } else if (!ISSET(tp->t_state, TS_TTSTOP)) {
+ SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c /* XXX */
+ (*tp->t_stop)(tp, 0);
+#else
+ (*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+ }
+ } else if (flag == 0) {
+ /*
+ * Lost carrier.
+ */
+ CLR(tp->t_state, TS_CARR_ON);
+ if (ISSET(tp->t_state, TS_ISOPEN) &&
+ !ISSET(tp->t_cflag, CLOCAL)) {
+ if (tp->t_session && tp->t_session->s_leader)
+ psignal(tp->t_session->s_leader, SIGHUP);
+ ttyflush(tp, FREAD | FWRITE);
+ return (0);
+ }
+ } else {
+ /*
+ * Carrier now on.
+ */
+ SET(tp->t_state, TS_CARR_ON);
+ ttwakeup(tp);
+ }
+ return (1);
+}
+
+/*
+ * Default modem control routine (for other line disciplines).
+ * Return argument flag, to turn off device on carrier drop.
+ */
+int
+nullmodem(tp, flag)
+ register struct tty *tp;
+ int flag;
+{
+
+ if (flag)
+ SET(tp->t_state, TS_CARR_ON);
+ else {
+ CLR(tp->t_state, TS_CARR_ON);
+ if (!ISSET(tp->t_cflag, CLOCAL)) {
+ if (tp->t_session && tp->t_session->s_leader)
+ psignal(tp->t_session->s_leader, SIGHUP);
+ return (0);
+ }
+ }
+ return (1);
+}
+
+/*
+ * Reinput pending characters after state switch
+ * call at spltty().
+ */
+void
+ttypend(tp)
+ register struct tty *tp;
+{
+ struct clist tq;
+ register c;
+
+ CLR(tp->t_lflag, PENDIN);
+ SET(tp->t_state, TS_TYPEN);
+ tq = tp->t_rawq;
+ tp->t_rawq.c_cc = 0;
+ tp->t_rawq.c_cf = tp->t_rawq.c_cl = 0;
+ while ((c = getc(&tq)) >= 0)
+ ttyinput(c, tp);
+ CLR(tp->t_state, TS_TYPEN);
+}
+
+/*
+ * Process a read call on a tty device.
+ */
+int
+ttread(tp, uio, flag)
+ register struct tty *tp;
+ struct uio *uio;
+ int flag;
+{
+ register struct clist *qp;
+ register int c;
+ register long lflag;
+ register u_char *cc = tp->t_cc;
+ register struct proc *p = curproc;
+ int s, first, error = 0;
+
+loop: lflag = tp->t_lflag;
+ s = spltty();
+ /*
+ * take pending input first
+ */
+ if (ISSET(lflag, PENDIN))
+ ttypend(tp);
+ splx(s);
+
+ /*
+ * Hang process if it's in the background.
+ */
+ if (isbackground(p, tp)) {
+ if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+ (p->p_sigmask & sigmask(SIGTTIN)) ||
+ p->p_flag & P_PPWAIT || p->p_pgrp->pg_jobc == 0)
+ return (EIO);
+ pgsignal(p->p_pgrp, SIGTTIN, 1);
+ if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+ return (error);
+ goto loop;
+ }
+
+ /*
+ * If canonical, use the canonical queue,
+ * else use the raw queue.
+ *
+ * (should get rid of clists...)
+ */
+ qp = ISSET(lflag, ICANON) ? &tp->t_canq : &tp->t_rawq;
+
+ /*
+ * If there is no input, sleep on rawq
+ * awaiting hardware receipt and notification.
+ * If we have data, we don't need to check for carrier.
+ */
+ s = spltty();
+ if (qp->c_cc <= 0) {
+ int carrier;
+
+ carrier = ISSET(tp->t_state, TS_CARR_ON) ||
+ ISSET(tp->t_cflag, CLOCAL);
+ if (!carrier && ISSET(tp->t_state, TS_ISOPEN)) {
+ splx(s);
+ return (0); /* EOF */
+ }
+ if (flag & IO_NDELAY) {
+ splx(s);
+ return (EWOULDBLOCK);
+ }
+ error = ttysleep(tp, &tp->t_rawq, TTIPRI | PCATCH,
+ carrier ? ttyin : ttopen, 0);
+ splx(s);
+ if (error)
+ return (error);
+ goto loop;
+ }
+ splx(s);
+
+ /*
+ * Input present, check for input mapping and processing.
+ */
+ first = 1;
+ while ((c = getc(qp)) >= 0) {
+ /*
+ * delayed suspend (^Y)
+ */
+ if (CCEQ(cc[VDSUSP], c) && ISSET(lflag, ISIG)) {
+ pgsignal(tp->t_pgrp, SIGTSTP, 1);
+ if (first) {
+ if (error = ttysleep(tp,
+ &lbolt, TTIPRI | PCATCH, ttybg, 0))
+ break;
+ goto loop;
+ }
+ break;
+ }
+ /*
+ * Interpret EOF only in canonical mode.
+ */
+ if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON))
+ break;
+ /*
+ * Give user character.
+ */
+ error = ureadc(c, uio);
+ if (error)
+ break;
+ if (uio->uio_resid == 0)
+ break;
+ /*
+ * In canonical mode check for a "break character"
+ * marking the end of a "line of input".
+ */
+ if (ISSET(lflag, ICANON) && TTBREAKC(c))
+ break;
+ first = 0;
+ }
+ /*
+ * Look to unblock output now that (presumably)
+ * the input queue has gone down.
+ */
+ s = spltty();
+ if (ISSET(tp->t_state, TS_TBLOCK) && tp->t_rawq.c_cc < TTYHOG/5) {
+ if (cc[VSTART] != _POSIX_VDISABLE &&
+ putc(cc[VSTART], &tp->t_outq) == 0) {
+ CLR(tp->t_state, TS_TBLOCK);
+ ttstart(tp);
+ }
+ }
+ splx(s);
+ return (error);
+}
+
+/*
+ * Check the output queue on tp for space for a kernel message (from uprintf
+ * or tprintf). Allow some space over the normal hiwater mark so we don't
+ * lose messages due to normal flow control, but don't let the tty run amok.
+ * Sleeps here are not interruptible, but we return prematurely if new signals
+ * arrive.
+ */
+int
+ttycheckoutq(tp, wait)
+ register struct tty *tp;
+ int wait;
+{
+ int hiwat, s, oldsig;
+
+ hiwat = tp->t_hiwat;
+ s = spltty();
+ oldsig = wait ? curproc->p_siglist : 0;
+ if (tp->t_outq.c_cc > hiwat + 200)
+ while (tp->t_outq.c_cc > hiwat) {
+ ttstart(tp);
+ if (wait == 0 || curproc->p_siglist != oldsig) {
+ splx(s);
+ return (0);
+ }
+ timeout((void (*)__P((void *)))wakeup,
+ (void *)&tp->t_outq, hz);
+ SET(tp->t_state, TS_ASLEEP);
+ sleep((caddr_t)&tp->t_outq, PZERO - 1);
+ }
+ splx(s);
+ return (1);
+}
+
+/*
+ * Process a write call on a tty device.
+ */
+int
+ttwrite(tp, uio, flag)
+ register struct tty *tp;
+ register struct uio *uio;
+ int flag;
+{
+ register char *cp;
+ register int cc, ce;
+ register struct proc *p;
+ int i, hiwat, cnt, error, s;
+ char obuf[OBUFSIZ];
+
+ hiwat = tp->t_hiwat;
+ cnt = uio->uio_resid;
+ error = 0;
+ cc = 0;
+loop:
+ s = spltty();
+ if (!ISSET(tp->t_state, TS_CARR_ON) &&
+ !ISSET(tp->t_cflag, CLOCAL)) {
+ if (ISSET(tp->t_state, TS_ISOPEN)) {
+ splx(s);
+ return (EIO);
+ } else if (flag & IO_NDELAY) {
+ splx(s);
+ error = EWOULDBLOCK;
+ goto out;
+ } else {
+ /* Sleep awaiting carrier. */
+ error = ttysleep(tp,
+ &tp->t_rawq, TTIPRI | PCATCH,ttopen, 0);
+ splx(s);
+ if (error)
+ goto out;
+ goto loop;
+ }
+ }
+ splx(s);
+ /*
+ * Hang the process if it's in the background.
+ */
+ p = curproc;
+ if (isbackground(p, tp) &&
+ ISSET(tp->t_lflag, TOSTOP) && (p->p_flag & P_PPWAIT) == 0 &&
+ (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+ (p->p_sigmask & sigmask(SIGTTOU)) == 0 &&
+ p->p_pgrp->pg_jobc) {
+ pgsignal(p->p_pgrp, SIGTTOU, 1);
+ if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+ goto out;
+ goto loop;
+ }
+ /*
+ * Process the user's data in at most OBUFSIZ chunks. Perform any
+ * output translation. Keep track of high water mark, sleep on
+ * overflow awaiting device aid in acquiring new space.
+ */
+ while (uio->uio_resid > 0 || cc > 0) {
+ if (ISSET(tp->t_lflag, FLUSHO)) {
+ uio->uio_resid = 0;
+ return (0);
+ }
+ if (tp->t_outq.c_cc > hiwat)
+ goto ovhiwat;
+ /*
+ * Grab a hunk of data from the user, unless we have some
+ * leftover from last time.
+ */
+ if (cc == 0) {
+ cc = min(uio->uio_resid, OBUFSIZ);
+ cp = obuf;
+ error = uiomove(cp, cc, uio);
+ if (error) {
+ cc = 0;
+ break;
+ }
+ }
+ /*
+ * If nothing fancy need be done, grab those characters we
+ * can handle without any of ttyoutput's processing and
+ * just transfer them to the output q. For those chars
+ * which require special processing (as indicated by the
+ * bits in char_type), call ttyoutput. After processing
+ * a hunk of data, look for FLUSHO so ^O's will take effect
+ * immediately.
+ */
+ while (cc > 0) {
+ if (!ISSET(tp->t_oflag, OPOST))
+ ce = cc;
+ else {
+ ce = cc - scanc((u_int)cc, (u_char *)cp,
+ (u_char *)char_type, CCLASSMASK);
+ /*
+ * If ce is zero, then we're processing
+ * a special character through ttyoutput.
+ */
+ if (ce == 0) {
+ tp->t_rocount = 0;
+ if (ttyoutput(*cp, tp) >= 0) {
+ /* No Clists, wait a bit. */
+ ttstart(tp);
+ if (error = ttysleep(tp, &lbolt,
+ TTOPRI | PCATCH, ttybuf, 0))
+ break;
+ goto loop;
+ }
+ cp++;
+ cc--;
+ if (ISSET(tp->t_lflag, FLUSHO) ||
+ tp->t_outq.c_cc > hiwat)
+ goto ovhiwat;
+ continue;
+ }
+ }
+ /*
+ * A bunch of normal characters have been found.
+ * Transfer them en masse to the output queue and
+ * continue processing at the top of the loop.
+ * If there are any further characters in this
+ * <= OBUFSIZ chunk, the first should be a character
+ * requiring special handling by ttyoutput.
+ */
+ tp->t_rocount = 0;
+ i = b_to_q(cp, ce, &tp->t_outq);
+ ce -= i;
+ tp->t_column += ce;
+ cp += ce, cc -= ce, tk_nout += ce;
+ tp->t_outcc += ce;
+ if (i > 0) {
+ /* No Clists, wait a bit. */
+ ttstart(tp);
+ if (error = ttysleep(tp,
+ &lbolt, TTOPRI | PCATCH, ttybuf, 0))
+ break;
+ goto loop;
+ }
+ if (ISSET(tp->t_lflag, FLUSHO) ||
+ tp->t_outq.c_cc > hiwat)
+ break;
+ }
+ ttstart(tp);
+ }
+out:
+ /*
+ * If cc is nonzero, we leave the uio structure inconsistent, as the
+ * offset and iov pointers have moved forward, but it doesn't matter
+ * (the call will either return short or restart with a new uio).
+ */
+ uio->uio_resid += cc;
+ return (error);
+
+ovhiwat:
+ ttstart(tp);
+ s = spltty();
+ /*
+ * This can only occur if FLUSHO is set in t_lflag,
+ * or if ttstart/oproc is synchronous (or very fast).
+ */
+ if (tp->t_outq.c_cc <= hiwat) {
+ splx(s);
+ goto loop;
+ }
+ if (flag & IO_NDELAY) {
+ splx(s);
+ uio->uio_resid += cc;
+ return (uio->uio_resid == cnt ? EWOULDBLOCK : 0);
+ }
+ SET(tp->t_state, TS_ASLEEP);
+ error = ttysleep(tp, &tp->t_outq, TTOPRI | PCATCH, ttyout, 0);
+ splx(s);
+ if (error)
+ goto out;
+ goto loop;
+}
+
+/*
+ * Rubout one character from the rawq of tp
+ * as cleanly as possible.
+ */
+void
+ttyrub(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register char *cp;
+ register int savecol;
+ int tabc, s;
+
+ if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC))
+ return;
+ CLR(tp->t_lflag, FLUSHO);
+ if (ISSET(tp->t_lflag, ECHOE)) {
+ if (tp->t_rocount == 0) {
+ /*
+ * Screwed by ttwrite; retype
+ */
+ ttyretype(tp);
+ return;
+ }
+ if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE))
+ ttyrubo(tp, 2);
+ else {
+ CLR(c, ~TTY_CHARMASK);
+ switch (CCLASS(c)) {
+ case ORDINARY:
+ ttyrubo(tp, 1);
+ break;
+ case BACKSPACE:
+ case CONTROL:
+ case NEWLINE:
+ case RETURN:
+ case VTAB:
+ if (ISSET(tp->t_lflag, ECHOCTL))
+ ttyrubo(tp, 2);
+ break;
+ case TAB:
+ if (tp->t_rocount < tp->t_rawq.c_cc) {
+ ttyretype(tp);
+ return;
+ }
+ s = spltty();
+ savecol = tp->t_column;
+ SET(tp->t_state, TS_CNTTB);
+ SET(tp->t_lflag, FLUSHO);
+ tp->t_column = tp->t_rocol;
+ cp = tp->t_rawq.c_cf;
+ if (cp)
+ tabc = *cp; /* XXX FIX NEXTC */
+ for (; cp; cp = nextc(&tp->t_rawq, cp, &tabc))
+ ttyecho(tabc, tp);
+ CLR(tp->t_lflag, FLUSHO);
+ CLR(tp->t_state, TS_CNTTB);
+ splx(s);
+
+ /* savecol will now be length of the tab. */
+ savecol -= tp->t_column;
+ tp->t_column += savecol;
+ if (savecol > 8)
+ savecol = 8; /* overflow screw */
+ while (--savecol >= 0)
+ (void)ttyoutput('\b', tp);
+ break;
+ default: /* XXX */
+#define PANICSTR "ttyrub: would panic c = %d, val = %d\n"
+ (void)printf(PANICSTR, c, CCLASS(c));
+#ifdef notdef
+ panic(PANICSTR, c, CCLASS(c));
+#endif
+ }
+ }
+ } else if (ISSET(tp->t_lflag, ECHOPRT)) {
+ if (!ISSET(tp->t_state, TS_ERASE)) {
+ SET(tp->t_state, TS_ERASE);
+ (void)ttyoutput('\\', tp);
+ }
+ ttyecho(c, tp);
+ } else
+ ttyecho(tp->t_cc[VERASE], tp);
+ --tp->t_rocount;
+}
+
+/*
+ * Back over cnt characters, erasing them.
+ */
+static void
+ttyrubo(tp, cnt)
+ register struct tty *tp;
+ int cnt;
+{
+
+ while (cnt-- > 0) {
+ (void)ttyoutput('\b', tp);
+ (void)ttyoutput(' ', tp);
+ (void)ttyoutput('\b', tp);
+ }
+}
+
+/*
+ * ttyretype --
+ * Reprint the rawq line. Note, it is assumed that c_cc has already
+ * been checked.
+ */
+void
+ttyretype(tp)
+ register struct tty *tp;
+{
+ register char *cp;
+ int s, c;
+
+ /* Echo the reprint character. */
+ if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE)
+ ttyecho(tp->t_cc[VREPRINT], tp);
+
+ (void)ttyoutput('\n', tp);
+
+ /*
+ * XXX
+ * FIX: NEXTC IS BROKEN - DOESN'T CHECK QUOTE
+ * BIT OF FIRST CHAR.
+ */
+ s = spltty();
+ for (cp = tp->t_canq.c_cf, c = (cp != NULL ? *cp : 0);
+ cp != NULL; cp = nextc(&tp->t_canq, cp, &c))
+ ttyecho(c, tp);
+ for (cp = tp->t_rawq.c_cf, c = (cp != NULL ? *cp : 0);
+ cp != NULL; cp = nextc(&tp->t_rawq, cp, &c))
+ ttyecho(c, tp);
+ CLR(tp->t_state, TS_ERASE);
+ splx(s);
+
+ tp->t_rocount = tp->t_rawq.c_cc;
+ tp->t_rocol = 0;
+}
+
+/*
+ * Echo a typed character to the terminal.
+ */
+static void
+ttyecho(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+
+ if (!ISSET(tp->t_state, TS_CNTTB))
+ CLR(tp->t_lflag, FLUSHO);
+ if ((!ISSET(tp->t_lflag, ECHO) &&
+ (!ISSET(tp->t_lflag, ECHONL) || c == '\n')) ||
+ ISSET(tp->t_lflag, EXTPROC))
+ return;
+ if (ISSET(tp->t_lflag, ECHOCTL) &&
+ (ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n' ||
+ ISSET(c, TTY_CHARMASK) == 0177)) {
+ (void)ttyoutput('^', tp);
+ CLR(c, ~TTY_CHARMASK);
+ if (c == 0177)
+ c = '?';
+ else
+ c += 'A' - 1;
+ }
+ (void)ttyoutput(c, tp);
+}
+
+/*
+ * Wake up any readers on a tty.
+ */
+void
+ttwakeup(tp)
+ register struct tty *tp;
+{
+
+ selwakeup(&tp->t_rsel);
+ if (ISSET(tp->t_state, TS_ASYNC))
+ pgsignal(tp->t_pgrp, SIGIO, 1);
+ wakeup((caddr_t)&tp->t_rawq);
+}
+
+/*
+ * Look up a code for a specified speed in a conversion table;
+ * used by drivers to map software speed values to hardware parameters.
+ */
+int
+ttspeedtab(speed, table)
+ int speed;
+ register struct speedtab *table;
+{
+
+ for ( ; table->sp_speed != -1; table++)
+ if (table->sp_speed == speed)
+ return (table->sp_code);
+ return (-1);
+}
+
+/*
+ * Set tty hi and low water marks.
+ *
+ * Try to arrange the dynamics so there's about one second
+ * from hi to low water.
+ *
+ */
+void
+ttsetwater(tp)
+ struct tty *tp;
+{
+ register int cps, x;
+
+#define CLAMP(x, h, l) ((x) > h ? h : ((x) < l) ? l : (x))
+
+ cps = tp->t_ospeed / 10;
+ tp->t_lowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT);
+ x += cps;
+ x = CLAMP(x, TTMAXHIWAT, TTMINHIWAT);
+ tp->t_hiwat = roundup(x, CBSIZE);
+#undef CLAMP
+}
+
+/*
+ * Report on state of foreground process group.
+ */
+void
+ttyinfo(tp)
+ register struct tty *tp;
+{
+ register struct proc *p, *pick;
+ struct timeval utime, stime;
+ int tmp;
+
+ if (ttycheckoutq(tp,0) == 0)
+ return;
+
+ /* Print load average. */
+ tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
+ ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100);
+
+ if (tp->t_session == NULL)
+ ttyprintf(tp, "not a controlling terminal\n");
+ else if (tp->t_pgrp == NULL)
+ ttyprintf(tp, "no foreground process group\n");
+ else if ((p = tp->t_pgrp->pg_mem) == NULL)
+ ttyprintf(tp, "empty foreground process group\n");
+ else {
+ /* Pick interesting process. */
+ for (pick = NULL; p != NULL; p = p->p_pgrpnxt)
+ if (proc_compare(pick, p))
+ pick = p;
+
+ ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid,
+ pick->p_stat == SRUN ? "running" :
+ pick->p_wmesg ? pick->p_wmesg : "iowait");
+
+ calcru(pick, &utime, &stime, NULL);
+
+ /* Print user time. */
+ ttyprintf(tp, "%d.%02du ",
+ utime.tv_sec, (utime.tv_usec + 5000) / 10000);
+
+ /* Print system time. */
+ ttyprintf(tp, "%d.%02ds ",
+ stime.tv_sec, (stime.tv_usec + 5000) / 10000);
+
+#define pgtok(a) (((a) * NBPG) / 1024)
+ /* Print percentage cpu, resident set size. */
+ tmp = pick->p_pctcpu * 10000 + FSCALE / 2 >> FSHIFT;
+ ttyprintf(tp, "%d%% %dk\n",
+ tmp / 100,
+ pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 :
+#ifdef pmap_resident_count
+ pgtok(pmap_resident_count(&pick->p_vmspace->vm_pmap))
+#else
+ pgtok(pick->p_vmspace->vm_rssize)
+#endif
+ );
+ }
+ tp->t_rocount = 0; /* so pending input will be retyped if BS */
+}
+
+/*
+ * Returns 1 if p2 is "better" than p1
+ *
+ * The algorithm for picking the "interesting" process is thus:
+ *
+ * 1) Only foreground processes are eligible - implied.
+ * 2) Runnable processes are favored over anything else. The runner
+ * with the highest cpu utilization is picked (p_estcpu). Ties are
+ * broken by picking the highest pid.
+ * 3) The sleeper with the shortest sleep time is next. With ties,
+ * we pick out just "short-term" sleepers (P_SINTR == 0).
+ * 4) Further ties are broken by picking the highest pid.
+ */
+#define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL))
+#define TESTAB(a, b) ((a)<<1 | (b))
+#define ONLYA 2
+#define ONLYB 1
+#define BOTH 3
+
+static int
+proc_compare(p1, p2)
+ register struct proc *p1, *p2;
+{
+
+ if (p1 == NULL)
+ return (1);
+ /*
+ * see if at least one of them is runnable
+ */
+ switch (TESTAB(ISRUN(p1), ISRUN(p2))) {
+ case ONLYA:
+ return (0);
+ case ONLYB:
+ return (1);
+ case BOTH:
+ /*
+ * tie - favor one with highest recent cpu utilization
+ */
+ if (p2->p_estcpu > p1->p_estcpu)
+ return (1);
+ if (p1->p_estcpu > p2->p_estcpu)
+ return (0);
+ return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+ }
+ /*
+ * weed out zombies
+ */
+ switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) {
+ case ONLYA:
+ return (1);
+ case ONLYB:
+ return (0);
+ case BOTH:
+ return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+ }
+ /*
+ * pick the one with the smallest sleep time
+ */
+ if (p2->p_slptime > p1->p_slptime)
+ return (0);
+ if (p1->p_slptime > p2->p_slptime)
+ return (1);
+ /*
+ * favor one sleeping in a non-interruptible sleep
+ */
+ if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0)
+ return (1);
+ if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0)
+ return (0);
+ return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+}
+
+/*
+ * Output char to tty; console putchar style.
+ */
+int
+tputchar(c, tp)
+ int c;
+ struct tty *tp;
+{
+ register int s;
+
+ s = spltty();
+ if (ISSET(tp->t_state,
+ TS_CARR_ON | TS_ISOPEN) != (TS_CARR_ON | TS_ISOPEN)) {
+ splx(s);
+ return (-1);
+ }
+ if (c == '\n')
+ (void)ttyoutput('\r', tp);
+ (void)ttyoutput(c, tp);
+ ttstart(tp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Sleep on chan, returning ERESTART if tty changed while we napped and
+ * returning any errors (e.g. EINTR/ETIMEDOUT) reported by tsleep. If
+ * the tty is revoked, restarting a pending call will redo validation done
+ * at the start of the call.
+ */
+int
+ttysleep(tp, chan, pri, wmesg, timo)
+ struct tty *tp;
+ void *chan;
+ int pri, timo;
+ char *wmesg;
+{
+ int error;
+ short gen;
+
+ gen = tp->t_gen;
+ if (error = tsleep(chan, pri, wmesg, timo))
+ return (error);
+ return (tp->t_gen == gen ? 0 : ERESTART);
+}
diff --git a/sys/kern/tty_compat.c b/sys/kern/tty_compat.c
new file mode 100644
index 000000000000..a6a39d9d7bf3
--- /dev/null
+++ b/sys/kern/tty_compat.c
@@ -0,0 +1,411 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_compat.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * mapping routines for old line discipline (yuck)
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/termios.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+int ttydebug = 0;
+
+static struct speedtab compatspeeds[] = {
+ { 38400, 15 },
+ { 19200, 14 },
+ { 9600, 13 },
+ { 4800, 12 },
+ { 2400, 11 },
+ { 1800, 10 },
+ { 1200, 9 },
+ { 600, 8 },
+ { 300, 7 },
+ { 200, 6 },
+ { 150, 5 },
+ { 134, 4 },
+ { 110, 3 },
+ { 75, 2 },
+ { 50, 1 },
+ { 0, 0 },
+ { -1, -1 },
+};
+static int compatspcodes[16] = {
+ 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200,
+ 1800, 2400, 4800, 9600, 19200, 38400,
+};
+
+/*ARGSUSED*/
+ttcompat(tp, com, data, flag)
+ register struct tty *tp;
+ int com;
+ caddr_t data;
+ int flag;
+{
+
+ switch (com) {
+ case TIOCGETP: {
+ register struct sgttyb *sg = (struct sgttyb *)data;
+ register u_char *cc = tp->t_cc;
+ register speed;
+
+ speed = ttspeedtab(tp->t_ospeed, compatspeeds);
+ sg->sg_ospeed = (speed == -1) ? 15 : speed;
+ if (tp->t_ispeed == 0)
+ sg->sg_ispeed = sg->sg_ospeed;
+ else {
+ speed = ttspeedtab(tp->t_ispeed, compatspeeds);
+ sg->sg_ispeed = (speed == -1) ? 15 : speed;
+ }
+ sg->sg_erase = cc[VERASE];
+ sg->sg_kill = cc[VKILL];
+ sg->sg_flags = ttcompatgetflags(tp);
+ break;
+ }
+
+ case TIOCSETP:
+ case TIOCSETN: {
+ register struct sgttyb *sg = (struct sgttyb *)data;
+ struct termios term;
+ int speed;
+
+ term = tp->t_termios;
+ if ((speed = sg->sg_ispeed) > 15 || speed < 0)
+ term.c_ispeed = speed;
+ else
+ term.c_ispeed = compatspcodes[speed];
+ if ((speed = sg->sg_ospeed) > 15 || speed < 0)
+ term.c_ospeed = speed;
+ else
+ term.c_ospeed = compatspcodes[speed];
+ term.c_cc[VERASE] = sg->sg_erase;
+ term.c_cc[VKILL] = sg->sg_kill;
+ tp->t_flags = tp->t_flags&0xffff0000 | sg->sg_flags&0xffff;
+ ttcompatsetflags(tp, &term);
+ return (ttioctl(tp, com == TIOCSETP ? TIOCSETAF : TIOCSETA,
+ &term, flag));
+ }
+
+ case TIOCGETC: {
+ struct tchars *tc = (struct tchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ tc->t_intrc = cc[VINTR];
+ tc->t_quitc = cc[VQUIT];
+ tc->t_startc = cc[VSTART];
+ tc->t_stopc = cc[VSTOP];
+ tc->t_eofc = cc[VEOF];
+ tc->t_brkc = cc[VEOL];
+ break;
+ }
+ case TIOCSETC: {
+ struct tchars *tc = (struct tchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ cc[VINTR] = tc->t_intrc;
+ cc[VQUIT] = tc->t_quitc;
+ cc[VSTART] = tc->t_startc;
+ cc[VSTOP] = tc->t_stopc;
+ cc[VEOF] = tc->t_eofc;
+ cc[VEOL] = tc->t_brkc;
+ if (tc->t_brkc == -1)
+ cc[VEOL2] = _POSIX_VDISABLE;
+ break;
+ }
+ case TIOCSLTC: {
+ struct ltchars *ltc = (struct ltchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ cc[VSUSP] = ltc->t_suspc;
+ cc[VDSUSP] = ltc->t_dsuspc;
+ cc[VREPRINT] = ltc->t_rprntc;
+ cc[VDISCARD] = ltc->t_flushc;
+ cc[VWERASE] = ltc->t_werasc;
+ cc[VLNEXT] = ltc->t_lnextc;
+ break;
+ }
+ case TIOCGLTC: {
+ struct ltchars *ltc = (struct ltchars *)data;
+ register u_char *cc = tp->t_cc;
+
+ ltc->t_suspc = cc[VSUSP];
+ ltc->t_dsuspc = cc[VDSUSP];
+ ltc->t_rprntc = cc[VREPRINT];
+ ltc->t_flushc = cc[VDISCARD];
+ ltc->t_werasc = cc[VWERASE];
+ ltc->t_lnextc = cc[VLNEXT];
+ break;
+ }
+ case TIOCLBIS:
+ case TIOCLBIC:
+ case TIOCLSET: {
+ struct termios term;
+
+ term = tp->t_termios;
+ if (com == TIOCLSET)
+ tp->t_flags = (tp->t_flags&0xffff) | *(int *)data<<16;
+ else {
+ tp->t_flags =
+ (ttcompatgetflags(tp)&0xffff0000)|(tp->t_flags&0xffff);
+ if (com == TIOCLBIS)
+ tp->t_flags |= *(int *)data<<16;
+ else
+ tp->t_flags &= ~(*(int *)data<<16);
+ }
+ ttcompatsetlflags(tp, &term);
+ return (ttioctl(tp, TIOCSETA, &term, flag));
+ }
+ case TIOCLGET:
+ *(int *)data = ttcompatgetflags(tp)>>16;
+ if (ttydebug)
+ printf("CLGET: returning %x\n", *(int *)data);
+ break;
+
+ case OTIOCGETD:
+ *(int *)data = tp->t_line ? tp->t_line : 2;
+ break;
+
+ case OTIOCSETD: {
+ int ldisczero = 0;
+
+ return (ttioctl(tp, TIOCSETD,
+ *(int *)data == 2 ? (caddr_t)&ldisczero : data, flag));
+ }
+
+ case OTIOCCONS:
+ *(int *)data = 1;
+ return (ttioctl(tp, TIOCCONS, data, flag));
+
+ default:
+ return (-1);
+ }
+ return (0);
+}
+
+ttcompatgetflags(tp)
+ register struct tty *tp;
+{
+ register long iflag = tp->t_iflag;
+ register long lflag = tp->t_lflag;
+ register long oflag = tp->t_oflag;
+ register long cflag = tp->t_cflag;
+ register flags = 0;
+
+ if (iflag&IXOFF)
+ flags |= TANDEM;
+ if (iflag&ICRNL || oflag&ONLCR)
+ flags |= CRMOD;
+ if (cflag&PARENB) {
+ if (iflag&INPCK) {
+ if (cflag&PARODD)
+ flags |= ODDP;
+ else
+ flags |= EVENP;
+ } else
+ flags |= EVENP | ODDP;
+ } else {
+ if ((tp->t_flags&LITOUT) && !(oflag&OPOST))
+ flags |= LITOUT;
+ if (tp->t_flags&PASS8)
+ flags |= PASS8;
+ }
+
+ if ((lflag&ICANON) == 0) {
+ /* fudge */
+ if (iflag&IXON || lflag&ISIG || lflag&IEXTEN || cflag&PARENB)
+ flags |= CBREAK;
+ else
+ flags |= RAW;
+ }
+ if (cflag&MDMBUF)
+ flags |= MDMBUF;
+ if ((cflag&HUPCL) == 0)
+ flags |= NOHANG;
+ if (oflag&OXTABS)
+ flags |= XTABS;
+ if (lflag&ECHOE)
+ flags |= CRTERA|CRTBS;
+ if (lflag&ECHOKE)
+ flags |= CRTKIL|CRTBS;
+ if (lflag&ECHOPRT)
+ flags |= PRTERA;
+ if (lflag&ECHOCTL)
+ flags |= CTLECH;
+ if ((iflag&IXANY) == 0)
+ flags |= DECCTQ;
+ flags |= lflag&(ECHO|TOSTOP|FLUSHO|PENDIN|NOFLSH);
+if (ttydebug)
+ printf("getflags: %x\n", flags);
+ return (flags);
+}
+
+ttcompatsetflags(tp, t)
+ register struct tty *tp;
+ register struct termios *t;
+{
+ register flags = tp->t_flags;
+ register long iflag = t->c_iflag;
+ register long oflag = t->c_oflag;
+ register long lflag = t->c_lflag;
+ register long cflag = t->c_cflag;
+
+ if (flags & RAW) {
+ iflag &= IXOFF;
+ oflag &= ~OPOST;
+ lflag &= ~(ECHOCTL|ISIG|ICANON|IEXTEN);
+ } else {
+ iflag |= BRKINT|IXON|IMAXBEL;
+ oflag |= OPOST;
+ lflag |= ISIG|IEXTEN|ECHOCTL; /* XXX was echoctl on ? */
+ if (flags & XTABS)
+ oflag |= OXTABS;
+ else
+ oflag &= ~OXTABS;
+ if (flags & CBREAK)
+ lflag &= ~ICANON;
+ else
+ lflag |= ICANON;
+ if (flags&CRMOD) {
+ iflag |= ICRNL;
+ oflag |= ONLCR;
+ } else {
+ iflag &= ~ICRNL;
+ oflag &= ~ONLCR;
+ }
+ }
+ if (flags&ECHO)
+ lflag |= ECHO;
+ else
+ lflag &= ~ECHO;
+
+ if (flags&(RAW|LITOUT|PASS8)) {
+ cflag &= ~(CSIZE|PARENB);
+ cflag |= CS8;
+ if ((flags&(RAW|PASS8)) == 0)
+ iflag |= ISTRIP;
+ else
+ iflag &= ~ISTRIP;
+ } else {
+ cflag &= ~CSIZE;
+ cflag |= CS7|PARENB;
+ iflag |= ISTRIP;
+ }
+ if ((flags&(EVENP|ODDP)) == EVENP) {
+ iflag |= INPCK;
+ cflag &= ~PARODD;
+ } else if ((flags&(EVENP|ODDP)) == ODDP) {
+ iflag |= INPCK;
+ cflag |= PARODD;
+ } else
+ iflag &= ~INPCK;
+ if (flags&LITOUT)
+ oflag &= ~OPOST; /* move earlier ? */
+ if (flags&TANDEM)
+ iflag |= IXOFF;
+ else
+ iflag &= ~IXOFF;
+ t->c_iflag = iflag;
+ t->c_oflag = oflag;
+ t->c_lflag = lflag;
+ t->c_cflag = cflag;
+}
+
+ttcompatsetlflags(tp, t)
+ register struct tty *tp;
+ register struct termios *t;
+{
+ register flags = tp->t_flags;
+ register long iflag = t->c_iflag;
+ register long oflag = t->c_oflag;
+ register long lflag = t->c_lflag;
+ register long cflag = t->c_cflag;
+
+ if (flags&CRTERA)
+ lflag |= ECHOE;
+ else
+ lflag &= ~ECHOE;
+ if (flags&CRTKIL)
+ lflag |= ECHOKE;
+ else
+ lflag &= ~ECHOKE;
+ if (flags&PRTERA)
+ lflag |= ECHOPRT;
+ else
+ lflag &= ~ECHOPRT;
+ if (flags&CTLECH)
+ lflag |= ECHOCTL;
+ else
+ lflag &= ~ECHOCTL;
+ if ((flags&DECCTQ) == 0)
+ iflag |= IXANY;
+ else
+ iflag &= ~IXANY;
+ if (flags & MDMBUF)
+ cflag |= MDMBUF;
+ else
+ cflag &= ~MDMBUF;
+ if (flags&NOHANG)
+ cflag &= ~HUPCL;
+ else
+ cflag |= HUPCL;
+ lflag &= ~(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+ lflag |= flags&(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+ if (flags&(LITOUT|PASS8)) {
+ iflag &= ~ISTRIP;
+ cflag &= ~(CSIZE|PARENB);
+ cflag |= CS8;
+ if (flags&LITOUT)
+ oflag &= ~OPOST;
+ if ((flags&(PASS8|RAW)) == 0)
+ iflag |= ISTRIP;
+ } else if ((flags&RAW) == 0) {
+ cflag &= ~CSIZE;
+ cflag |= CS7|PARENB;
+ oflag |= OPOST;
+ }
+ t->c_iflag = iflag;
+ t->c_oflag = oflag;
+ t->c_lflag = lflag;
+ t->c_cflag = cflag;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
diff --git a/sys/kern/tty_conf.c b/sys/kern/tty_conf.c
new file mode 100644
index 000000000000..b53edb429756
--- /dev/null
+++ b/sys/kern/tty_conf.c
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_conf.c 8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+
+#define ttynodisc ((int (*) __P((dev_t, struct tty *)))enodev)
+#define ttyerrclose ((int (*) __P((struct tty *, int flags)))enodev)
+#define ttyerrio ((int (*) __P((struct tty *, struct uio *, int)))enodev)
+#define ttyerrinput ((int (*) __P((int c, struct tty *)))enodev)
+#define ttyerrstart ((int (*) __P((struct tty *)))enodev)
+
+int nullioctl __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+
+#include "tb.h"
+#if NTB > 0
+int tbopen __P((dev_t dev, struct tty *tp));
+int tbclose __P((struct tty *tp, int flags));
+int tbread __P((struct tty *, struct uio *, int flags));
+int tbioctl __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+int tbinput __P((int c, struct tty *tp));
+#endif
+
+#include "sl.h"
+#if NSL > 0
+int slopen __P((dev_t dev, struct tty *tp));
+int slclose __P((struct tty *tp, int flags));
+int sltioctl __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+int slinput __P((int c, struct tty *tp));
+int slstart __P((struct tty *tp));
+#endif
+
+
+struct linesw linesw[] =
+{
+ { ttyopen, ttylclose, ttread, ttwrite, nullioctl,
+ ttyinput, ttstart, ttymodem }, /* 0- termios */
+
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem }, /* 1- defunct */
+
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem }, /* 2- defunct */
+
+#if NTB > 0
+ { tbopen, tbclose, tbread, enodev, tbioctl,
+ tbinput, ttstart, nullmodem }, /* 3- TABLDISC */
+#else
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem },
+#endif
+
+#if NSL > 0
+ { slopen, slclose, ttyerrio, ttyerrio, sltioctl,
+ slinput, slstart, nullmodem }, /* 4- SLIPDISC */
+#else
+ { ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+ ttyerrinput, ttyerrstart, nullmodem },
+#endif
+};
+
+int nlinesw = sizeof (linesw) / sizeof (linesw[0]);
+
+/*
+ * Do nothing specific version of line
+ * discipline specific ioctl command.
+ */
+/*ARGSUSED*/
+nullioctl(tp, cmd, data, flags, p)
+ struct tty *tp;
+ int cmd;
+ char *data;
+ int flags;
+ struct proc *p;
+{
+
+#ifdef lint
+ tp = tp; data = data; flags = flags; p = p;
+#endif
+ return (-1);
+}
diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
new file mode 100644
index 000000000000..0e6911b63e19
--- /dev/null
+++ b/sys/kern/tty_pty.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_pty.c 8.2 (Berkeley) 9/23/93
+ */
+
+/*
+ * Pseudo-teletype Driver
+ * (Actually two drivers, requiring two entries in 'cdevsw')
+ */
+#include "pty.h" /* XXX */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+
+#if NPTY == 1
+#undef NPTY
+#define NPTY 32 /* crude XXX */
+#endif
+
+#define BUFSIZ 100 /* Chunk size iomoved to/from user */
+
+/*
+ * pts == /dev/tty[pqrs]?
+ * ptc == /dev/pty[pqrs]?
+ */
+struct tty pt_tty[NPTY]; /* XXX */
+struct pt_ioctl {
+ int pt_flags;
+ struct selinfo pt_selr, pt_selw;
+ u_char pt_send;
+ u_char pt_ucntl;
+} pt_ioctl[NPTY]; /* XXX */
+int npty = NPTY; /* for pstat -t */
+
+#define PF_PKT 0x08 /* packet mode */
+#define PF_STOPPED 0x10 /* user told stopped */
+#define PF_REMOTE 0x20 /* remote and flow controlled input */
+#define PF_NOSTOP 0x40
+#define PF_UCNTL 0x80 /* user control mode */
+
+void ptsstop __P((struct tty *, int));
+
+/*
+ * Establish n (or default if n is 1) ptys in the system.
+ *
+ * XXX cdevsw & pstat require the array `pty[]' to be an array
+ */
+void
+ptyattach(n)
+ int n;
+{
+#ifdef notyet
+ char *mem;
+ register u_long ntb;
+#define DEFAULT_NPTY 32
+
+ /* maybe should allow 0 => none? */
+ if (n <= 1)
+ n = DEFAULT_NPTY;
+ ntb = n * sizeof(struct tty);
+ mem = malloc(ntb + ALIGNBYTES + n * sizeof(struct pt_ioctl),
+ M_DEVBUF, M_WAITOK);
+ pt_tty = (struct tty *)mem;
+ mem = (char *)ALIGN(mem + ntb);
+ pt_ioctl = (struct pt_ioctl *)mem;
+ npty = n;
+#endif
+}
+
+/*ARGSUSED*/
+ptsopen(dev, flag, devtype, p)
+ dev_t dev;
+ int flag, devtype;
+ struct proc *p;
+{
+ register struct tty *tp;
+ int error;
+
+ if (minor(dev) >= npty)
+ return (ENXIO);
+ tp = &pt_tty[minor(dev)];
+ if ((tp->t_state & TS_ISOPEN) == 0) {
+ tp->t_state |= TS_WOPEN;
+ ttychars(tp); /* Set up default chars */
+ tp->t_iflag = TTYDEF_IFLAG;
+ tp->t_oflag = TTYDEF_OFLAG;
+ tp->t_lflag = TTYDEF_LFLAG;
+ tp->t_cflag = TTYDEF_CFLAG;
+ tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+ ttsetwater(tp); /* would be done in xxparam() */
+ } else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0)
+ return (EBUSY);
+ if (tp->t_oproc) /* Ctrlr still around. */
+ tp->t_state |= TS_CARR_ON;
+ while ((tp->t_state & TS_CARR_ON) == 0) {
+ tp->t_state |= TS_WOPEN;
+ if (flag&FNONBLOCK)
+ break;
+ if (error = ttysleep(tp, (caddr_t)&tp->t_rawq, TTIPRI | PCATCH,
+ ttopen, 0))
+ return (error);
+ }
+ error = (*linesw[tp->t_line].l_open)(dev, tp);
+ ptcwakeup(tp, FREAD|FWRITE);
+ return (error);
+}
+
+ptsclose(dev, flag, mode, p)
+ dev_t dev;
+ int flag, mode;
+ struct proc *p;
+{
+ register struct tty *tp;
+ int err;
+
+ tp = &pt_tty[minor(dev)];
+ err = (*linesw[tp->t_line].l_close)(tp, flag);
+ err |= ttyclose(tp);
+ ptcwakeup(tp, FREAD|FWRITE);
+ return (err);
+}
+
+ptsread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ struct proc *p = curproc;
+ register struct tty *tp = &pt_tty[minor(dev)];
+ register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ int error = 0;
+
+again:
+ if (pti->pt_flags & PF_REMOTE) {
+ while (isbackground(p, tp)) {
+ if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+ (p->p_sigmask & sigmask(SIGTTIN)) ||
+ p->p_pgrp->pg_jobc == 0 ||
+ p->p_flag & P_PPWAIT)
+ return (EIO);
+ pgsignal(p->p_pgrp, SIGTTIN, 1);
+ if (error = ttysleep(tp, (caddr_t)&lbolt,
+ TTIPRI | PCATCH, ttybg, 0))
+ return (error);
+ }
+ if (tp->t_canq.c_cc == 0) {
+ if (flag & IO_NDELAY)
+ return (EWOULDBLOCK);
+ if (error = ttysleep(tp, (caddr_t)&tp->t_canq,
+ TTIPRI | PCATCH, ttyin, 0))
+ return (error);
+ goto again;
+ }
+ while (tp->t_canq.c_cc > 1 && uio->uio_resid > 0)
+ if (ureadc(getc(&tp->t_canq), uio) < 0) {
+ error = EFAULT;
+ break;
+ }
+ if (tp->t_canq.c_cc == 1)
+ (void) getc(&tp->t_canq);
+ if (tp->t_canq.c_cc)
+ return (error);
+ } else
+ if (tp->t_oproc)
+ error = (*linesw[tp->t_line].l_read)(tp, uio, flag);
+ ptcwakeup(tp, FWRITE);
+ return (error);
+}
+
+/*
+ * Write to pseudo-tty.
+ * Wakeups of controlling tty will happen
+ * indirectly, when tty driver calls ptsstart.
+ */
+ptswrite(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct tty *tp;
+
+ tp = &pt_tty[minor(dev)];
+ if (tp->t_oproc == 0)
+ return (EIO);
+ return ((*linesw[tp->t_line].l_write)(tp, uio, flag));
+}
+
+/*
+ * Start output on pseudo-tty.
+ * Wake up process selecting or sleeping for input from controlling tty.
+ */
+void
+ptsstart(tp)
+ struct tty *tp;
+{
+ register struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+ if (tp->t_state & TS_TTSTOP)
+ return;
+ if (pti->pt_flags & PF_STOPPED) {
+ pti->pt_flags &= ~PF_STOPPED;
+ pti->pt_send = TIOCPKT_START;
+ }
+ ptcwakeup(tp, FREAD);
+}
+
+ptcwakeup(tp, flag)
+ struct tty *tp;
+ int flag;
+{
+ struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+ if (flag & FREAD) {
+ selwakeup(&pti->pt_selr);
+ wakeup((caddr_t)&tp->t_outq.c_cf);
+ }
+ if (flag & FWRITE) {
+ selwakeup(&pti->pt_selw);
+ wakeup((caddr_t)&tp->t_rawq.c_cf);
+ }
+}
+
+/*ARGSUSED*/
+#ifdef __STDC__
+ptcopen(dev_t dev, int flag, int devtype, struct proc *p)
+#else
+ptcopen(dev, flag, devtype, p)
+ dev_t dev;
+ int flag, devtype;
+ struct proc *p;
+#endif
+{
+ register struct tty *tp;
+ struct pt_ioctl *pti;
+
+ if (minor(dev) >= npty)
+ return (ENXIO);
+ tp = &pt_tty[minor(dev)];
+ if (tp->t_oproc)
+ return (EIO);
+ tp->t_oproc = ptsstart;
+#ifdef sun4c
+ tp->t_stop = ptsstop;
+#endif
+ (void)(*linesw[tp->t_line].l_modem)(tp, 1);
+ tp->t_lflag &= ~EXTPROC;
+ pti = &pt_ioctl[minor(dev)];
+ pti->pt_flags = 0;
+ pti->pt_send = 0;
+ pti->pt_ucntl = 0;
+ return (0);
+}
+
+ptcclose(dev)
+ dev_t dev;
+{
+ register struct tty *tp;
+
+ tp = &pt_tty[minor(dev)];
+ (void)(*linesw[tp->t_line].l_modem)(tp, 0);
+ tp->t_state &= ~TS_CARR_ON;
+ tp->t_oproc = 0; /* mark closed */
+ tp->t_session = 0;
+ return (0);
+}
+
+ptcread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ char buf[BUFSIZ];
+ int error = 0, cc;
+
+ /*
+ * We want to block until the slave
+ * is open, and there's something to read;
+ * but if we lost the slave or we're NBIO,
+ * then return the appropriate error instead.
+ */
+ for (;;) {
+ if (tp->t_state&TS_ISOPEN) {
+ if (pti->pt_flags&PF_PKT && pti->pt_send) {
+ error = ureadc((int)pti->pt_send, uio);
+ if (error)
+ return (error);
+ if (pti->pt_send & TIOCPKT_IOCTL) {
+ cc = min(uio->uio_resid,
+ sizeof(tp->t_termios));
+ uiomove(&tp->t_termios, cc, uio);
+ }
+ pti->pt_send = 0;
+ return (0);
+ }
+ if (pti->pt_flags&PF_UCNTL && pti->pt_ucntl) {
+ error = ureadc((int)pti->pt_ucntl, uio);
+ if (error)
+ return (error);
+ pti->pt_ucntl = 0;
+ return (0);
+ }
+ if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0)
+ break;
+ }
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (0); /* EOF */
+ if (flag & IO_NDELAY)
+ return (EWOULDBLOCK);
+ if (error = tsleep((caddr_t)&tp->t_outq.c_cf, TTIPRI | PCATCH,
+ ttyin, 0))
+ return (error);
+ }
+ if (pti->pt_flags & (PF_PKT|PF_UCNTL))
+ error = ureadc(0, uio);
+ while (uio->uio_resid > 0 && error == 0) {
+ cc = q_to_b(&tp->t_outq, buf, min(uio->uio_resid, BUFSIZ));
+ if (cc <= 0)
+ break;
+ error = uiomove(buf, cc, uio);
+ }
+ if (tp->t_outq.c_cc <= tp->t_lowat) {
+ if (tp->t_state&TS_ASLEEP) {
+ tp->t_state &= ~TS_ASLEEP;
+ wakeup((caddr_t)&tp->t_outq);
+ }
+ selwakeup(&tp->t_wsel);
+ }
+ return (error);
+}
+
+void
+ptsstop(tp, flush)
+ register struct tty *tp;
+ int flush;
+{
+ struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+ int flag;
+
+ /* note: FLUSHREAD and FLUSHWRITE already ok */
+ if (flush == 0) {
+ flush = TIOCPKT_STOP;
+ pti->pt_flags |= PF_STOPPED;
+ } else
+ pti->pt_flags &= ~PF_STOPPED;
+ pti->pt_send |= flush;
+ /* change of perspective */
+ flag = 0;
+ if (flush & FREAD)
+ flag |= FWRITE;
+ if (flush & FWRITE)
+ flag |= FREAD;
+ ptcwakeup(tp, flag);
+}
+
+ptcselect(dev, rw, p)
+ dev_t dev;
+ int rw;
+ struct proc *p;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ int s;
+
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (1);
+ switch (rw) {
+
+ case FREAD:
+ /*
+ * Need to block timeouts (ttrstart).
+ */
+ s = spltty();
+ if ((tp->t_state&TS_ISOPEN) &&
+ tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) {
+ splx(s);
+ return (1);
+ }
+ splx(s);
+ /* FALLTHROUGH */
+
+ case 0: /* exceptional */
+ if ((tp->t_state&TS_ISOPEN) &&
+ (pti->pt_flags&PF_PKT && pti->pt_send ||
+ pti->pt_flags&PF_UCNTL && pti->pt_ucntl))
+ return (1);
+ selrecord(p, &pti->pt_selr);
+ break;
+
+
+ case FWRITE:
+ if (tp->t_state&TS_ISOPEN) {
+ if (pti->pt_flags & PF_REMOTE) {
+ if (tp->t_canq.c_cc == 0)
+ return (1);
+ } else {
+ if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2)
+ return (1);
+ if (tp->t_canq.c_cc == 0 && (tp->t_iflag&ICANON))
+ return (1);
+ }
+ }
+ selrecord(p, &pti->pt_selw);
+ break;
+
+ }
+ return (0);
+}
+
+ptcwrite(dev, uio, flag)
+ dev_t dev;
+ register struct uio *uio;
+ int flag;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ register u_char *cp;
+ register int cc = 0;
+ u_char locbuf[BUFSIZ];
+ int cnt = 0;
+ struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ int error = 0;
+
+again:
+ if ((tp->t_state&TS_ISOPEN) == 0)
+ goto block;
+ if (pti->pt_flags & PF_REMOTE) {
+ if (tp->t_canq.c_cc)
+ goto block;
+ while (uio->uio_resid > 0 && tp->t_canq.c_cc < TTYHOG - 1) {
+ if (cc == 0) {
+ cc = min(uio->uio_resid, BUFSIZ);
+ cc = min(cc, TTYHOG - 1 - tp->t_canq.c_cc);
+ cp = locbuf;
+ error = uiomove((caddr_t)cp, cc, uio);
+ if (error)
+ return (error);
+ /* check again for safety */
+ if ((tp->t_state&TS_ISOPEN) == 0)
+ return (EIO);
+ }
+ if (cc)
+ (void) b_to_q((char *)cp, cc, &tp->t_canq);
+ cc = 0;
+ }
+ (void) putc(0, &tp->t_canq);
+ ttwakeup(tp);
+ wakeup((caddr_t)&tp->t_canq);
+ return (0);
+ }
+ while (uio->uio_resid > 0) {
+ if (cc == 0) {
+ cc = min(uio->uio_resid, BUFSIZ);
+ cp = locbuf;
+ error = uiomove((caddr_t)cp, cc, uio);
+ if (error)
+ return (error);
+ /* check again for safety */
+ if ((tp->t_state&TS_ISOPEN) == 0)
+ return (EIO);
+ }
+ while (cc > 0) {
+ if ((tp->t_rawq.c_cc + tp->t_canq.c_cc) >= TTYHOG - 2 &&
+ (tp->t_canq.c_cc > 0 || !(tp->t_iflag&ICANON))) {
+ wakeup((caddr_t)&tp->t_rawq);
+ goto block;
+ }
+ (*linesw[tp->t_line].l_rint)(*cp++, tp);
+ cnt++;
+ cc--;
+ }
+ cc = 0;
+ }
+ return (0);
+block:
+ /*
+ * Come here to wait for slave to open, for space
+ * in outq, or space in rawq.
+ */
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (EIO);
+ if (flag & IO_NDELAY) {
+ /* adjust for data copied in but not written */
+ uio->uio_resid += cc;
+ if (cnt == 0)
+ return (EWOULDBLOCK);
+ return (0);
+ }
+ if (error = tsleep((caddr_t)&tp->t_rawq.c_cf, TTOPRI | PCATCH,
+ ttyout, 0)) {
+ /* adjust for data copied in but not written */
+ uio->uio_resid += cc;
+ return (error);
+ }
+ goto again;
+}
+
+/*ARGSUSED*/
+ptyioctl(dev, cmd, data, flag, p)
+ dev_t dev;
+ int cmd;
+ caddr_t data;
+ int flag;
+ struct proc *p;
+{
+ register struct tty *tp = &pt_tty[minor(dev)];
+ register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+ register u_char *cc = tp->t_cc;
+ int stop, error;
+
+ /*
+ * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG.
+ * ttywflush(tp) will hang if there are characters in the outq.
+ */
+ if (cmd == TIOCEXT) {
+ /*
+ * When the EXTPROC bit is being toggled, we need
+ * to send an TIOCPKT_IOCTL if the packet driver
+ * is turned on.
+ */
+ if (*(int *)data) {
+ if (pti->pt_flags & PF_PKT) {
+ pti->pt_send |= TIOCPKT_IOCTL;
+ ptcwakeup(tp, FREAD);
+ }
+ tp->t_lflag |= EXTPROC;
+ } else {
+ if ((tp->t_state & EXTPROC) &&
+ (pti->pt_flags & PF_PKT)) {
+ pti->pt_send |= TIOCPKT_IOCTL;
+ ptcwakeup(tp, FREAD);
+ }
+ tp->t_lflag &= ~EXTPROC;
+ }
+ return(0);
+ } else
+ if (cdevsw[major(dev)].d_open == ptcopen)
+ switch (cmd) {
+
+ case TIOCGPGRP:
+ /*
+ * We aviod calling ttioctl on the controller since,
+ * in that case, tp must be the controlling terminal.
+ */
+ *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0;
+ return (0);
+
+ case TIOCPKT:
+ if (*(int *)data) {
+ if (pti->pt_flags & PF_UCNTL)
+ return (EINVAL);
+ pti->pt_flags |= PF_PKT;
+ } else
+ pti->pt_flags &= ~PF_PKT;
+ return (0);
+
+ case TIOCUCNTL:
+ if (*(int *)data) {
+ if (pti->pt_flags & PF_PKT)
+ return (EINVAL);
+ pti->pt_flags |= PF_UCNTL;
+ } else
+ pti->pt_flags &= ~PF_UCNTL;
+ return (0);
+
+ case TIOCREMOTE:
+ if (*(int *)data)
+ pti->pt_flags |= PF_REMOTE;
+ else
+ pti->pt_flags &= ~PF_REMOTE;
+ ttyflush(tp, FREAD|FWRITE);
+ return (0);
+
+#ifdef COMPAT_43
+ case TIOCSETP:
+ case TIOCSETN:
+#endif
+ case TIOCSETD:
+ case TIOCSETA:
+ case TIOCSETAW:
+ case TIOCSETAF:
+ ndflush(&tp->t_outq, tp->t_outq.c_cc);
+ break;
+
+ case TIOCSIG:
+ if (*(unsigned int *)data >= NSIG)
+ return(EINVAL);
+ if ((tp->t_lflag&NOFLSH) == 0)
+ ttyflush(tp, FREAD|FWRITE);
+ pgsignal(tp->t_pgrp, *(unsigned int *)data, 1);
+ if ((*(unsigned int *)data == SIGINFO) &&
+ ((tp->t_lflag&NOKERNINFO) == 0))
+ ttyinfo(tp);
+ return(0);
+ }
+ error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p);
+ if (error < 0)
+ error = ttioctl(tp, cmd, data, flag);
+ if (error < 0) {
+ if (pti->pt_flags & PF_UCNTL &&
+ (cmd & ~0xff) == UIOCCMD(0)) {
+ if (cmd & 0xff) {
+ pti->pt_ucntl = (u_char)cmd;
+ ptcwakeup(tp, FREAD);
+ }
+ return (0);
+ }
+ error = ENOTTY;
+ }
+ /*
+ * If external processing and packet mode send ioctl packet.
+ */
+ if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) {
+ switch(cmd) {
+ case TIOCSETA:
+ case TIOCSETAW:
+ case TIOCSETAF:
+#ifdef COMPAT_43
+ case TIOCSETP:
+ case TIOCSETN:
+#endif
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ case TIOCSETC:
+ case TIOCSLTC:
+ case TIOCLBIS:
+ case TIOCLBIC:
+ case TIOCLSET:
+#endif
+ pti->pt_send |= TIOCPKT_IOCTL;
+ ptcwakeup(tp, FREAD);
+ default:
+ break;
+ }
+ }
+ stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s'))
+ && CCEQ(cc[VSTART], CTRL('q'));
+ if (pti->pt_flags & PF_NOSTOP) {
+ if (stop) {
+ pti->pt_send &= ~TIOCPKT_NOSTOP;
+ pti->pt_send |= TIOCPKT_DOSTOP;
+ pti->pt_flags &= ~PF_NOSTOP;
+ ptcwakeup(tp, FREAD);
+ }
+ } else {
+ if (!stop) {
+ pti->pt_send &= ~TIOCPKT_DOSTOP;
+ pti->pt_send |= TIOCPKT_NOSTOP;
+ pti->pt_flags |= PF_NOSTOP;
+ ptcwakeup(tp, FREAD);
+ }
+ }
+ return (error);
+}
diff --git a/sys/kern/tty_subr.c b/sys/kern/tty_subr.c
new file mode 100644
index 000000000000..fe8f000f87d5
--- /dev/null
+++ b/sys/kern/tty_subr.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)tty_subr.c 8.2 (Berkeley) 9/5/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+char cwaiting;
+struct cblock *cfree, *cfreelist;
+int cfreecount, nclist;
+
+void
+clist_init()
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+getc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+q_to_b(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+ndqb(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+void
+ndflush(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+putc(a1, a2)
+ char a1;
+ struct clist *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+b_to_q(a1, a2, a3)
+ char *a1;
+ int a2;
+ struct clist *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+char *
+nextc(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char *)0);
+}
+
+unputc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+void
+catq(a1, a2)
+ struct clist *a1, *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
diff --git a/sys/kern/tty_tb.c b/sys/kern/tty_tb.c
new file mode 100644
index 000000000000..242301a52e89
--- /dev/null
+++ b/sys/kern/tty_tb.c
@@ -0,0 +1,366 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_tb.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include "tb.h"
+#if NTB > 0
+
+/*
+ * Line discipline for RS232 tablets;
+ * supplies binary coordinate data.
+ */
+#include <sys/param.h>
+#include <sys/tablet.h>
+#include <sys/tty.h>
+
+/*
+ * Tablet configuration table.
+ */
+struct tbconf {
+ short tbc_recsize; /* input record size in bytes */
+ short tbc_uiosize; /* size of data record returned user */
+ int tbc_sync; /* mask for finding sync byte/bit */
+ int (*tbc_decode)();/* decoding routine */
+ char *tbc_run; /* enter run mode sequence */
+ char *tbc_point; /* enter point mode sequence */
+ char *tbc_stop; /* stop sequence */
+ char *tbc_start; /* start/restart sequence */
+ int tbc_flags;
+#define TBF_POL 0x1 /* polhemus hack */
+#define TBF_INPROX 0x2 /* tablet has proximity info */
+};
+
+static int tbdecode(), gtcodecode(), poldecode();
+static int tblresdecode(), tbhresdecode();
+
+struct tbconf tbconf[TBTYPE] = {
+{ 0 },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "6", "4" },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "\1CN", "\1RT", "\2", "\4" },
+{ 8, sizeof (struct gtcopos), 0200, gtcodecode },
+{17, sizeof (struct polpos), 0200, poldecode, 0, 0, "\21", "\5\22\2\23",
+ TBF_POL },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CN", "\1PT", "\2", "\4",
+ TBF_INPROX },
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CN", "\1PT", "\2", "\4",
+ TBF_INPROX },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CL\33", "\1PT\33", 0, 0},
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CL\33", "\1PT\33", 0, 0},
+};
+
+/*
+ * Tablet state
+ */
+struct tb {
+ int tbflags; /* mode & type bits */
+#define TBMAXREC 17 /* max input record size */
+ char cbuf[TBMAXREC]; /* input buffer */
+ union {
+ struct tbpos tbpos;
+ struct gtcopos gtcopos;
+ struct polpos polpos;
+ } rets; /* processed state */
+#define NTBS 16
+} tb[NTBS];
+
+/*
+ * Open as tablet discipline; called on discipline change.
+ */
+/*ARGSUSED*/
+tbopen(dev, tp)
+ dev_t dev;
+ register struct tty *tp;
+{
+ register struct tb *tbp;
+
+ if (tp->t_line == TABLDISC)
+ return (ENODEV);
+ ttywflush(tp);
+ for (tbp = tb; tbp < &tb[NTBS]; tbp++)
+ if (tbp->tbflags == 0)
+ break;
+ if (tbp >= &tb[NTBS])
+ return (EBUSY);
+ tbp->tbflags = TBTIGER|TBPOINT; /* default */
+ tp->t_cp = tbp->cbuf;
+ tp->t_inbuf = 0;
+ bzero((caddr_t)&tbp->rets, sizeof (tbp->rets));
+ tp->T_LINEP = (caddr_t)tbp;
+ tp->t_flags |= LITOUT;
+ return (0);
+}
+
+/*
+ * Line discipline change or last device close.
+ */
+tbclose(tp)
+ register struct tty *tp;
+{
+ register int s;
+ int modebits = TBPOINT|TBSTOP;
+
+ tbioctl(tp, BIOSMODE, &modebits, 0);
+ s = spltty();
+ ((struct tb *)tp->T_LINEP)->tbflags = 0;
+ tp->t_cp = 0;
+ tp->t_inbuf = 0;
+ tp->t_rawq.c_cc = 0; /* clear queues -- paranoid */
+ tp->t_canq.c_cc = 0;
+ tp->t_line = 0; /* paranoid: avoid races */
+ splx(s);
+}
+
+/*
+ * Read from a tablet line.
+ * Characters have been buffered in a buffer and decoded.
+ */
+tbread(tp, uio)
+ register struct tty *tp;
+ struct uio *uio;
+{
+ register struct tb *tbp = (struct tb *)tp->T_LINEP;
+ register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+ int ret;
+
+ if ((tp->t_state&TS_CARR_ON) == 0)
+ return (EIO);
+ ret = uiomove(&tbp->rets, tc->tbc_uiosize, uio);
+ if (tc->tbc_flags&TBF_POL)
+ tbp->rets.polpos.p_key = ' ';
+ return (ret);
+}
+
+/*
+ * Low level character input routine.
+ * Stuff the character in the buffer, and decode
+ * if all the chars are there.
+ *
+ * This routine could be expanded in-line in the receiver
+ * interrupt routine to make it run as fast as possible.
+ */
+tbinput(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register struct tb *tbp = (struct tb *)tp->T_LINEP;
+ register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+
+ if (tc->tbc_recsize == 0 || tc->tbc_decode == 0) /* paranoid? */
+ return;
+ /*
+ * Locate sync bit/byte or reset input buffer.
+ */
+ if (c&tc->tbc_sync || tp->t_inbuf == tc->tbc_recsize) {
+ tp->t_cp = tbp->cbuf;
+ tp->t_inbuf = 0;
+ }
+ *tp->t_cp++ = c&0177;
+ /*
+ * Call decode routine only if a full record has been collected.
+ */
+ if (++tp->t_inbuf == tc->tbc_recsize)
+ (*tc->tbc_decode)(tc, tbp->cbuf, &tbp->rets);
+}
+
+/*
+ * Decode GTCO 8 byte format (high res, tilt, and pressure).
+ */
+static
+gtcodecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct gtcopos *tbpos;
+{
+
+ tbpos->pressure = *cp >> 2;
+ tbpos->status = (tbpos->pressure > 16) | TBINPROX; /* half way down */
+ tbpos->xpos = (*cp++ & 03) << 14;
+ tbpos->xpos |= *cp++ << 7;
+ tbpos->xpos |= *cp++;
+ tbpos->ypos = (*cp++ & 03) << 14;
+ tbpos->ypos |= *cp++ << 7;
+ tbpos->ypos |= *cp++;
+ tbpos->xtilt = *cp++;
+ tbpos->ytilt = *cp++;
+ tbpos->scount++;
+}
+
+/*
+ * Decode old Hitachi 5 byte format (low res).
+ */
+static
+tbdecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct tbpos *tbpos;
+{
+ register char byte;
+
+ byte = *cp++;
+ tbpos->status = (byte&0100) ? TBINPROX : 0;
+ byte &= ~0100;
+ if (byte > 036)
+ tbpos->status |= 1 << ((byte-040)/2);
+ tbpos->xpos = *cp++ << 7;
+ tbpos->xpos |= *cp++;
+ if (tbpos->xpos < 256) /* tablet wraps around at 256 */
+ tbpos->status &= ~TBINPROX; /* make it out of proximity */
+ tbpos->ypos = *cp++ << 7;
+ tbpos->ypos |= *cp++;
+ tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 5-byte format (low res).
+ */
+static
+tblresdecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct tbpos *tbpos;
+{
+
+ *cp &= ~0100; /* mask sync bit */
+ tbpos->status = (*cp++ >> 2) | TBINPROX;
+ if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+ tbpos->status &= ~(020|TBINPROX);
+ tbpos->xpos = *cp++;
+ tbpos->xpos |= *cp++ << 6;
+ tbpos->ypos = *cp++;
+ tbpos->ypos |= *cp++ << 6;
+ tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 6-byte format (high res).
+ */
+static
+tbhresdecode(tc, cp, tbpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct tbpos *tbpos;
+{
+ char byte;
+
+ byte = *cp++;
+ tbpos->xpos = (byte & 03) << 14;
+ tbpos->xpos |= *cp++ << 7;
+ tbpos->xpos |= *cp++;
+ tbpos->ypos = *cp++ << 14;
+ tbpos->ypos |= *cp++ << 7;
+ tbpos->ypos |= *cp++;
+ tbpos->status = (byte >> 2) | TBINPROX;
+ if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+ tbpos->status &= ~(020|TBINPROX);
+ tbpos->scount++;
+}
+
+/*
+ * Polhemus decode.
+ */
+static
+poldecode(tc, cp, polpos)
+ struct tbconf *tc;
+ register char *cp;
+ register struct polpos *polpos;
+{
+
+ polpos->p_x = cp[4] | cp[3]<<7 | (cp[9] & 0x03) << 14;
+ polpos->p_y = cp[6] | cp[5]<<7 | (cp[9] & 0x0c) << 12;
+ polpos->p_z = cp[8] | cp[7]<<7 | (cp[9] & 0x30) << 10;
+ polpos->p_azi = cp[11] | cp[10]<<7 | (cp[16] & 0x03) << 14;
+ polpos->p_pit = cp[13] | cp[12]<<7 | (cp[16] & 0x0c) << 12;
+ polpos->p_rol = cp[15] | cp[14]<<7 | (cp[16] & 0x30) << 10;
+ polpos->p_stat = cp[1] | cp[0]<<7;
+ if (cp[2] != ' ')
+ polpos->p_key = cp[2];
+}
+
+/*ARGSUSED*/
+tbioctl(tp, cmd, data, flag)
+ struct tty *tp;
+ caddr_t data;
+{
+ register struct tb *tbp = (struct tb *)tp->T_LINEP;
+
+ switch (cmd) {
+
+ case BIOGMODE:
+ *(int *)data = tbp->tbflags & TBMODE;
+ break;
+
+ case BIOSTYPE:
+ if (tbconf[*(int *)data & TBTYPE].tbc_recsize == 0 ||
+ tbconf[*(int *)data & TBTYPE].tbc_decode == 0)
+ return (EINVAL);
+ tbp->tbflags &= ~TBTYPE;
+ tbp->tbflags |= *(int *)data & TBTYPE;
+ /* fall thru... to set mode bits */
+
+ case BIOSMODE: {
+ register struct tbconf *tc;
+
+ tbp->tbflags &= ~TBMODE;
+ tbp->tbflags |= *(int *)data & TBMODE;
+ tc = &tbconf[tbp->tbflags & TBTYPE];
+ if (tbp->tbflags&TBSTOP) {
+ if (tc->tbc_stop)
+ ttyout(tc->tbc_stop, tp);
+ } else if (tc->tbc_start)
+ ttyout(tc->tbc_start, tp);
+ if (tbp->tbflags&TBPOINT) {
+ if (tc->tbc_point)
+ ttyout(tc->tbc_point, tp);
+ } else if (tc->tbc_run)
+ ttyout(tc->tbc_run, tp);
+ ttstart(tp);
+ break;
+ }
+
+ case BIOGTYPE:
+ *(int *)data = tbp->tbflags & TBTYPE;
+ break;
+
+ case TIOCSETD:
+ case TIOCGETD:
+ case TIOCGETP:
+ case TIOCGETC:
+ return (-1); /* pass thru... */
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
+#endif
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
new file mode 100644
index 000000000000..964fc6f6d5ed
--- /dev/null
+++ b/sys/kern/tty_tty.c
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty_tty.c 8.2 (Berkeley) 9/23/93
+ */
+
+/*
+ * Indirect driver for controlling tty.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+/*ARGSUSED*/
+cttyopen(dev, flag, mode, p)
+ dev_t dev;
+ int flag, mode;
+ struct proc *p;
+{
+ struct vnode *ttyvp = cttyvp(p);
+ int error;
+
+ if (ttyvp == NULL)
+ return (ENXIO);
+ VOP_LOCK(ttyvp);
+#ifdef PARANOID
+ /*
+ * Since group is tty and mode is 620 on most terminal lines
+ * and since sessions protect terminals from processes outside
+ * your session, this check is probably no longer necessary.
+ * Since it inhibits setuid root programs that later switch
+ * to another user from accessing /dev/tty, we have decided
+ * to delete this test. (mckusick 5/93)
+ */
+ error = VOP_ACCESS(ttyvp,
+ (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), p->p_ucred, p);
+ if (!error)
+#endif /* PARANOID */
+ error = VOP_OPEN(ttyvp, flag, NOCRED, p);
+ VOP_UNLOCK(ttyvp);
+ return (error);
+}
+
+/*ARGSUSED*/
+cttyread(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct vnode *ttyvp = cttyvp(uio->uio_procp);
+ int error;
+
+ if (ttyvp == NULL)
+ return (EIO);
+ VOP_LOCK(ttyvp);
+ error = VOP_READ(ttyvp, uio, flag, NOCRED);
+ VOP_UNLOCK(ttyvp);
+ return (error);
+}
+
+/*ARGSUSED*/
+cttywrite(dev, uio, flag)
+ dev_t dev;
+ struct uio *uio;
+ int flag;
+{
+ register struct vnode *ttyvp = cttyvp(uio->uio_procp);
+ int error;
+
+ if (ttyvp == NULL)
+ return (EIO);
+ VOP_LOCK(ttyvp);
+ error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
+ VOP_UNLOCK(ttyvp);
+ return (error);
+}
+
+/*ARGSUSED*/
+cttyioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ int cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ struct vnode *ttyvp = cttyvp(p);
+
+ if (ttyvp == NULL)
+ return (EIO);
+ if (cmd == TIOCNOTTY) {
+ if (!SESS_LEADER(p)) {
+ p->p_flag &= ~P_CONTROLT;
+ return (0);
+ } else
+ return (EINVAL);
+ }
+ return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p));
+}
+
+/*ARGSUSED*/
+cttyselect(dev, flag, p)
+ dev_t dev;
+ int flag;
+ struct proc *p;
+{
+ struct vnode *ttyvp = cttyvp(p);
+
+ if (ttyvp == NULL)
+ return (1); /* try operation to get EOF/failure */
+ return (VOP_SELECT(ttyvp, flag, FREAD|FWRITE, NOCRED, p));
+}
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
new file mode 100644
index 000000000000..8834dbf44427
--- /dev/null
+++ b/sys/kern/uipc_domain.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+void pffasttimo __P((void *));
+void pfslowtimo __P((void *));
+
+#define ADDDOMAIN(x) { \
+ extern struct domain __CONCAT(x,domain); \
+ __CONCAT(x,domain.dom_next) = domains; \
+ domains = &__CONCAT(x,domain); \
+}
+
+domaininit()
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+#undef unix
+#ifndef lint
+ ADDDOMAIN(unix);
+ ADDDOMAIN(route);
+#ifdef INET
+ ADDDOMAIN(inet);
+#endif
+#ifdef NS
+ ADDDOMAIN(ns);
+#endif
+#ifdef ISO
+ ADDDOMAIN(iso);
+#endif
+#ifdef CCITT
+ ADDDOMAIN(ccitt);
+#endif
+#include "imp.h"
+#if NIMP > 0
+ ADDDOMAIN(imp);
+#endif
+#endif
+
+ for (dp = domains; dp; dp = dp->dom_next) {
+ if (dp->dom_init)
+ (*dp->dom_init)();
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_init)
+ (*pr->pr_init)();
+ }
+
+if (max_linkhdr < 16) /* XXX */
+max_linkhdr = 16;
+ max_hdr = max_linkhdr + max_protohdr;
+ max_datalen = MHLEN - max_hdr;
+ timeout(pffasttimo, (void *)0, 1);
+ timeout(pfslowtimo, (void *)0, 1);
+}
+
+struct protosw *
+pffindtype(family, type)
+ int family, type;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ goto found;
+ return (0);
+found:
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_type && pr->pr_type == type)
+ return (pr);
+ return (0);
+}
+
+struct protosw *
+pffindproto(family, protocol, type)
+ int family, protocol, type;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+ struct protosw *maybe = 0;
+
+ if (family == 0)
+ return (0);
+ for (dp = domains; dp; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ goto found;
+ return (0);
+found:
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
+ if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
+ return (pr);
+
+ if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
+ pr->pr_protocol == 0 && maybe == (struct protosw *)0)
+ maybe = pr;
+ }
+ return (maybe);
+}
+
+net_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+ int family, protocol;
+
+ /*
+ * All sysctl names at this level are nonterminal;
+ * next two components are protocol family and protocol number,
+ * then at least one addition component.
+ */
+ if (namelen < 3)
+ return (EISDIR); /* overloaded */
+ family = name[0];
+ protocol = name[1];
+
+ if (family == 0)
+ return (0);
+ for (dp = domains; dp; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ goto found;
+ return (ENOPROTOOPT);
+found:
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_protocol == protocol && pr->pr_sysctl)
+ return ((*pr->pr_sysctl)(name + 2, namelen - 2,
+ oldp, oldlenp, newp, newlen));
+ return (ENOPROTOOPT);
+}
+
+pfctlinput(cmd, sa)
+ int cmd;
+ struct sockaddr *sa;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_ctlinput)
+ (*pr->pr_ctlinput)(cmd, sa, (caddr_t)0);
+}
+
+void
+pfslowtimo(arg)
+ void *arg;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_slowtimo)
+ (*pr->pr_slowtimo)();
+ timeout(pfslowtimo, (void *)0, hz/2);
+}
+
+void
+pffasttimo(arg)
+ void *arg;
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_fasttimo)
+ (*pr->pr_fasttimo)();
+ timeout(pffasttimo, (void *)0, hz/5);
+}
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
new file mode 100644
index 000000000000..b71c6345e361
--- /dev/null
+++ b/sys/kern/uipc_mbuf.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/map.h>
+#define MBTYPES
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+
+#include <vm/vm.h>
+
+extern vm_map_t mb_map;
+struct mbuf *mbutl;
+char *mclrefcnt;
+
+mbinit()
+{
+ int s;
+
+#if CLBYTES < 4096
+#define NCL_INIT (4096/CLBYTES)
+#else
+#define NCL_INIT 1
+#endif
+ s = splimp();
+ if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
+ goto bad;
+ splx(s);
+ return;
+bad:
+ panic("mbinit");
+}
+
+/*
+ * Allocate some number of mbuf clusters
+ * and place on cluster free list.
+ * Must be called at splimp.
+ */
+/* ARGSUSED */
+m_clalloc(ncl, nowait)
+ register int ncl;
+ int nowait;
+{
+ static int logged;
+ register caddr_t p;
+ register int i;
+ int npg;
+
+ npg = ncl * CLSIZE;
+ p = (caddr_t)kmem_malloc(mb_map, ctob(npg), !nowait);
+ if (p == NULL) {
+ if (logged == 0) {
+ logged++;
+ log(LOG_ERR, "mb_map full\n");
+ }
+ return (0);
+ }
+ ncl = ncl * CLBYTES / MCLBYTES;
+ for (i = 0; i < ncl; i++) {
+ ((union mcluster *)p)->mcl_next = mclfree;
+ mclfree = (union mcluster *)p;
+ p += MCLBYTES;
+ mbstat.m_clfree++;
+ }
+ mbstat.m_clusters += ncl;
+ return (1);
+}
+
+/*
+ * When MGET failes, ask protocols to free space when short of memory,
+ * then re-attempt to allocate an mbuf.
+ */
+struct mbuf *
+m_retry(i, t)
+ int i, t;
+{
+ register struct mbuf *m;
+
+ m_reclaim();
+#define m_retry(i, t) (struct mbuf *)0
+ MGET(m, i, t);
+#undef m_retry
+ return (m);
+}
+
+/*
+ * As above; retry an MGETHDR.
+ */
+struct mbuf *
+m_retryhdr(i, t)
+ int i, t;
+{
+ register struct mbuf *m;
+
+ m_reclaim();
+#define m_retryhdr(i, t) (struct mbuf *)0
+ MGETHDR(m, i, t);
+#undef m_retryhdr
+ return (m);
+}
+
+m_reclaim()
+{
+ register struct domain *dp;
+ register struct protosw *pr;
+ int s = splimp();
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_drain)
+ (*pr->pr_drain)();
+ splx(s);
+ mbstat.m_drain++;
+}
+
+/*
+ * Space allocation routines.
+ * These are also available as macros
+ * for critical paths.
+ */
+struct mbuf *
+m_get(nowait, type)
+ int nowait, type;
+{
+ register struct mbuf *m;
+
+ MGET(m, nowait, type);
+ return (m);
+}
+
+struct mbuf *
+m_gethdr(nowait, type)
+ int nowait, type;
+{
+ register struct mbuf *m;
+
+ MGETHDR(m, nowait, type);
+ return (m);
+}
+
+struct mbuf *
+m_getclr(nowait, type)
+ int nowait, type;
+{
+ register struct mbuf *m;
+
+ MGET(m, nowait, type);
+ if (m == 0)
+ return (0);
+ bzero(mtod(m, caddr_t), MLEN);
+ return (m);
+}
+
+struct mbuf *
+m_free(m)
+ struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ MFREE(m, n);
+ return (n);
+}
+
+void
+m_freem(m)
+ register struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ if (m == NULL)
+ return;
+ do {
+ MFREE(m, n);
+ } while (m = n);
+}
+
+/*
+ * Mbuffer utility routines.
+ */
+
+/*
+ * Lesser-used path for M_PREPEND:
+ * allocate new mbuf to prepend to chain,
+ * copy junk along.
+ */
+struct mbuf *
+m_prepend(m, len, how)
+ register struct mbuf *m;
+ int len, how;
+{
+ struct mbuf *mn;
+
+ MGET(mn, how, m->m_type);
+ if (mn == (struct mbuf *)NULL) {
+ m_freem(m);
+ return ((struct mbuf *)NULL);
+ }
+ if (m->m_flags & M_PKTHDR) {
+ M_COPY_PKTHDR(mn, m);
+ m->m_flags &= ~M_PKTHDR;
+ }
+ mn->m_next = m;
+ m = mn;
+ if (len < MHLEN)
+ MH_ALIGN(m, len);
+ m->m_len = len;
+ return (m);
+}
+
+/*
+ * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
+ * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
+ * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
+ */
+int MCFail;
+
+struct mbuf *
+m_copym(m, off0, len, wait)
+ register struct mbuf *m;
+ int off0, wait;
+ register int len;
+{
+ register struct mbuf *n, **np;
+ register int off = off0;
+ struct mbuf *top;
+ int copyhdr = 0;
+
+ if (off < 0 || len < 0)
+ panic("m_copym");
+ if (off == 0 && m->m_flags & M_PKTHDR)
+ copyhdr = 1;
+ while (off > 0) {
+ if (m == 0)
+ panic("m_copym");
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+ np = &top;
+ top = 0;
+ while (len > 0) {
+ if (m == 0) {
+ if (len != M_COPYALL)
+ panic("m_copym");
+ break;
+ }
+ MGET(n, wait, m->m_type);
+ *np = n;
+ if (n == 0)
+ goto nospace;
+ if (copyhdr) {
+ M_COPY_PKTHDR(n, m);
+ if (len == M_COPYALL)
+ n->m_pkthdr.len -= off0;
+ else
+ n->m_pkthdr.len = len;
+ copyhdr = 0;
+ }
+ n->m_len = min(len, m->m_len - off);
+ if (m->m_flags & M_EXT) {
+ n->m_data = m->m_data + off;
+ mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+ n->m_ext = m->m_ext;
+ n->m_flags |= M_EXT;
+ } else
+ bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+ (unsigned)n->m_len);
+ if (len != M_COPYALL)
+ len -= n->m_len;
+ off = 0;
+ m = m->m_next;
+ np = &n->m_next;
+ }
+ if (top == 0)
+ MCFail++;
+ return (top);
+nospace:
+ m_freem(top);
+ MCFail++;
+ return (0);
+}
+
+/*
+ * Copy data from an mbuf chain starting "off" bytes from the beginning,
+ * continuing for "len" bytes, into the indicated buffer.
+ */
+m_copydata(m, off, len, cp)
+ register struct mbuf *m;
+ register int off;
+ register int len;
+ caddr_t cp;
+{
+ register unsigned count;
+
+ if (off < 0 || len < 0)
+ panic("m_copydata");
+ while (off > 0) {
+ if (m == 0)
+ panic("m_copydata");
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+ while (len > 0) {
+ if (m == 0)
+ panic("m_copydata");
+ count = min(m->m_len - off, len);
+ bcopy(mtod(m, caddr_t) + off, cp, count);
+ len -= count;
+ cp += count;
+ off = 0;
+ m = m->m_next;
+ }
+}
+
+/*
+ * Concatenate mbuf chain n to m.
+ * Both chains must be of the same type (e.g. MT_DATA).
+ * Any m_pkthdr is not updated.
+ */
+m_cat(m, n)
+ register struct mbuf *m, *n;
+{
+ while (m->m_next)
+ m = m->m_next;
+ while (n) {
+ if (m->m_flags & M_EXT ||
+ m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+ /* just join the two chains */
+ m->m_next = n;
+ return;
+ }
+ /* splat the data from one into the other */
+ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+ (u_int)n->m_len);
+ m->m_len += n->m_len;
+ n = m_free(n);
+ }
+}
+
+m_adj(mp, req_len)
+ struct mbuf *mp;
+ int req_len;
+{
+ register int len = req_len;
+ register struct mbuf *m;
+ register count;
+
+ if ((m = mp) == NULL)
+ return;
+ if (len >= 0) {
+ /*
+ * Trim from head.
+ */
+ while (m != NULL && len > 0) {
+ if (m->m_len <= len) {
+ len -= m->m_len;
+ m->m_len = 0;
+ m = m->m_next;
+ } else {
+ m->m_len -= len;
+ m->m_data += len;
+ len = 0;
+ }
+ }
+ m = mp;
+ if (mp->m_flags & M_PKTHDR)
+ m->m_pkthdr.len -= (req_len - len);
+ } else {
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ len = -len;
+ count = 0;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len >= len) {
+ m->m_len -= len;
+ if (mp->m_flags & M_PKTHDR)
+ mp->m_pkthdr.len -= len;
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ m = mp;
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len = count;
+ for (; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+ }
+}
+
+/*
+ * Rearange an mbuf chain so that len bytes are contiguous
+ * and in the data area of an mbuf (so that mtod and dtom
+ * will work for a structure of size len). Returns the resulting
+ * mbuf chain on success, frees it and returns null on failure.
+ * If there is room, it will add up to max_protohdr-len extra bytes to the
+ * contiguous region in an attempt to avoid being called next time.
+ */
+int MPFail;
+
+struct mbuf *
+m_pullup(n, len)
+ register struct mbuf *n;
+ int len;
+{
+ register struct mbuf *m;
+ register int count;
+ int space;
+
+ /*
+ * If first mbuf has no cluster, and has room for len bytes
+ * without shifting current data, pullup into it,
+ * otherwise allocate a new mbuf to prepend to the chain.
+ */
+ if ((n->m_flags & M_EXT) == 0 &&
+ n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
+ if (n->m_len >= len)
+ return (n);
+ m = n;
+ n = n->m_next;
+ len -= m->m_len;
+ } else {
+ if (len > MHLEN)
+ goto bad;
+ MGET(m, M_DONTWAIT, n->m_type);
+ if (m == 0)
+ goto bad;
+ m->m_len = 0;
+ if (n->m_flags & M_PKTHDR) {
+ M_COPY_PKTHDR(m, n);
+ n->m_flags &= ~M_PKTHDR;
+ }
+ }
+ space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+ do {
+ count = min(min(max(len, max_protohdr), space), n->m_len);
+ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+ (unsigned)count);
+ len -= count;
+ m->m_len += count;
+ n->m_len -= count;
+ space -= count;
+ if (n->m_len)
+ n->m_data += count;
+ else
+ n = m_free(n);
+ } while (len > 0 && n);
+ if (len > 0) {
+ (void) m_free(m);
+ goto bad;
+ }
+ m->m_next = n;
+ return (m);
+bad:
+ m_freem(n);
+ MPFail++;
+ return (0);
+}
+
+/*
+ * Partition an mbuf chain in two pieces, returning the tail --
+ * all but the first len0 bytes. In case of failure, it returns NULL and
+ * attempts to restore the chain to its original state.
+ */
+struct mbuf *
+m_split(m0, len0, wait)
+ register struct mbuf *m0;
+ int len0, wait;
+{
+ register struct mbuf *m, *n;
+ unsigned len = len0, remain;
+
+ for (m = m0; m && len > m->m_len; m = m->m_next)
+ len -= m->m_len;
+ if (m == 0)
+ return (0);
+ remain = m->m_len - len;
+ if (m0->m_flags & M_PKTHDR) {
+ MGETHDR(n, wait, m0->m_type);
+ if (n == 0)
+ return (0);
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ n->m_pkthdr.len = m0->m_pkthdr.len - len0;
+ m0->m_pkthdr.len = len0;
+ if (m->m_flags & M_EXT)
+ goto extpacket;
+ if (remain > MHLEN) {
+ /* m can't be the lead packet */
+ MH_ALIGN(n, 0);
+ n->m_next = m_split(m, len, wait);
+ if (n->m_next == 0) {
+ (void) m_free(n);
+ return (0);
+ } else
+ return (n);
+ } else
+ MH_ALIGN(n, remain);
+ } else if (remain == 0) {
+ n = m->m_next;
+ m->m_next = 0;
+ return (n);
+ } else {
+ MGET(n, wait, m->m_type);
+ if (n == 0)
+ return (0);
+ M_ALIGN(n, remain);
+ }
+extpacket:
+ if (m->m_flags & M_EXT) {
+ n->m_flags |= M_EXT;
+ n->m_ext = m->m_ext;
+ mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+ m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
+ n->m_data = m->m_data + len;
+ } else {
+ bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
+ }
+ n->m_len = remain;
+ m->m_len = len;
+ n->m_next = m->m_next;
+ m->m_next = 0;
+ return (n);
+}
+/*
+ * Routine to copy from device local memory into mbufs.
+ */
+struct mbuf *
+m_devget(buf, totlen, off0, ifp, copy)
+ char *buf;
+ int totlen, off0;
+ struct ifnet *ifp;
+ void (*copy)();
+{
+ register struct mbuf *m;
+ struct mbuf *top = 0, **mp = &top;
+ register int off = off0, len;
+ register char *cp;
+ char *epkt;
+
+ cp = buf;
+ epkt = cp + totlen;
+ if (off) {
+ cp += off + 2 * sizeof(u_short);
+ totlen -= 2 * sizeof(u_short);
+ }
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == 0)
+ return (0);
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.len = totlen;
+ m->m_len = MHLEN;
+
+ while (totlen > 0) {
+ if (top) {
+ MGET(m, M_DONTWAIT, MT_DATA);
+ if (m == 0) {
+ m_freem(top);
+ return (0);
+ }
+ m->m_len = MLEN;
+ }
+ len = min(totlen, epkt - cp);
+ if (len >= MINCLSIZE) {
+ MCLGET(m, M_DONTWAIT);
+ if (m->m_flags & M_EXT)
+ m->m_len = len = min(len, MCLBYTES);
+ else
+ len = m->m_len;
+ } else {
+ /*
+ * Place initial small packet/header at end of mbuf.
+ */
+ if (len < m->m_len) {
+ if (top == 0 && len + max_linkhdr <= m->m_len)
+ m->m_data += max_linkhdr;
+ m->m_len = len;
+ } else
+ len = m->m_len;
+ }
+ if (copy)
+ copy(cp, mtod(m, caddr_t), (unsigned)len);
+ else
+ bcopy(cp, mtod(m, caddr_t), (unsigned)len);
+ cp += len;
+ *mp = m;
+ mp = &m->m_next;
+ totlen -= len;
+ if (cp == epkt)
+ cp = buf;
+ }
+ return (top);
+}
diff --git a/sys/kern/uipc_proto.c b/sys/kern/uipc_proto.c
new file mode 100644
index 000000000000..da9828aa2679
--- /dev/null
+++ b/sys/kern/uipc_proto.c
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_proto.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+/*
+ * Definitions of protocols supported in the UNIX domain.
+ */
+
+int uipc_usrreq(), raw_usrreq();
+void raw_init(),raw_input(),raw_ctlinput();
+extern struct domain unixdomain; /* or at least forward */
+
+struct protosw unixsw[] = {
+{ SOCK_STREAM, &unixdomain, 0, PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
+ 0, 0, 0, 0,
+ uipc_usrreq,
+ 0, 0, 0, 0,
+},
+{ SOCK_DGRAM, &unixdomain, 0, PR_ATOMIC|PR_ADDR|PR_RIGHTS,
+ 0, 0, 0, 0,
+ uipc_usrreq,
+ 0, 0, 0, 0,
+},
+{ 0, 0, 0, 0,
+ raw_input, 0, raw_ctlinput, 0,
+ raw_usrreq,
+ raw_init, 0, 0, 0,
+}
+};
+
+int unp_externalize(), unp_dispose();
+
+struct domain unixdomain =
+ { AF_UNIX, "unix", 0, unp_externalize, unp_dispose,
+ unixsw, &unixsw[sizeof(unixsw)/sizeof(unixsw[0])] };
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
new file mode 100644
index 000000000000..d4af592d79b5
--- /dev/null
+++ b/sys/kern/uipc_sockbuf.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char netio[] = "netio";
+char netcon[] = "netcon";
+char netcls[] = "netcls";
+
+u_long sb_max = SB_MAX; /* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups. Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established. When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed. The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn(). When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ *
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+soisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= SS_ISCONNECTING;
+}
+
+soisconnected(so)
+ register struct socket *so;
+{
+ register struct socket *head = so->so_head;
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+ so->so_state |= SS_ISCONNECTED;
+ if (head && soqremque(so, 0)) {
+ soqinsque(head, so, 1);
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ } else {
+ wakeup((caddr_t)&so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+ }
+}
+
+soisdisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~SS_ISCONNECTING;
+ so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+soisdisconnected(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called. If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+ register struct socket *head;
+ int connstatus;
+{
+ register struct socket *so;
+ int soqueue = connstatus ? 1 : 0;
+
+ if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+ return ((struct socket *)0);
+ MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+ if (so == NULL)
+ return ((struct socket *)0);
+ bzero((caddr_t)so, sizeof(*so));
+ so->so_type = head->so_type;
+ so->so_options = head->so_options &~ SO_ACCEPTCONN;
+ so->so_linger = head->so_linger;
+ so->so_state = head->so_state | SS_NOFDREF;
+ so->so_proto = head->so_proto;
+ so->so_timeo = head->so_timeo;
+ so->so_pgid = head->so_pgid;
+ (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+ soqinsque(head, so, soqueue);
+ if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+ (void) soqremque(so, soqueue);
+ (void) free((caddr_t)so, M_SOCKET);
+ return ((struct socket *)0);
+ }
+ if (connstatus) {
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ so->so_state |= connstatus;
+ }
+ return (so);
+}
+
+soqinsque(head, so, q)
+ register struct socket *head, *so;
+ int q;
+{
+
+ register struct socket **prev;
+ so->so_head = head;
+ if (q == 0) {
+ head->so_q0len++;
+ so->so_q0 = 0;
+ for (prev = &(head->so_q0); *prev; )
+ prev = &((*prev)->so_q0);
+ } else {
+ head->so_qlen++;
+ so->so_q = 0;
+ for (prev = &(head->so_q); *prev; )
+ prev = &((*prev)->so_q);
+ }
+ *prev = so;
+}
+
+soqremque(so, q)
+ register struct socket *so;
+ int q;
+{
+ register struct socket *head, *prev, *next;
+
+ head = so->so_head;
+ prev = head;
+ for (;;) {
+ next = q ? prev->so_q : prev->so_q0;
+ if (next == so)
+ break;
+ if (next == 0)
+ return (0);
+ prev = next;
+ }
+ if (q == 0) {
+ prev->so_q0 = next->so_q0;
+ head->so_q0len--;
+ } else {
+ prev->so_q = next->so_q;
+ head->so_qlen--;
+ }
+ next->so_q0 = next->so_q = 0;
+ next->so_head = 0;
+ return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+socantsendmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTSENDMORE;
+ sowwakeup(so);
+}
+
+socantrcvmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTRCVMORE;
+ sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+sbwait(sb)
+ struct sockbuf *sb;
+{
+
+ sb->sb_flags |= SB_WAIT;
+ return (tsleep((caddr_t)&sb->sb_cc,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+ sb->sb_timeo));
+}
+
+/*
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+sb_lock(sb)
+ register struct sockbuf *sb;
+{
+ int error;
+
+ while (sb->sb_flags & SB_LOCK) {
+ sb->sb_flags |= SB_WANT;
+ if (error = tsleep((caddr_t)&sb->sb_flags,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+ netio, 0))
+ return (error);
+ }
+ sb->sb_flags |= SB_LOCK;
+ return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+sowakeup(so, sb)
+ register struct socket *so;
+ register struct sockbuf *sb;
+{
+ struct proc *p;
+
+ selwakeup(&sb->sb_sel);
+ sb->sb_flags &= ~SB_SEL;
+ if (sb->sb_flags & SB_WAIT) {
+ sb->sb_flags &= ~SB_WAIT;
+ wakeup((caddr_t)&sb->sb_cc);
+ }
+ if (so->so_state & SS_ASYNC) {
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGIO);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGIO);
+ }
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data. Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field. Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ * name, then a record containing that name must be present before
+ * any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ * just additional data associated with the message), and there are
+ * ``rights'' to be received, then a record containing this data
+ * should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ * a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve(). This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits). The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+soreserve(so, sndcc, rcvcc)
+ register struct socket *so;
+ u_long sndcc, rcvcc;
+{
+
+ if (sbreserve(&so->so_snd, sndcc) == 0)
+ goto bad;
+ if (sbreserve(&so->so_rcv, rcvcc) == 0)
+ goto bad2;
+ if (so->so_rcv.sb_lowat == 0)
+ so->so_rcv.sb_lowat = 1;
+ if (so->so_snd.sb_lowat == 0)
+ so->so_snd.sb_lowat = MCLBYTES;
+ if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+ return (0);
+bad2:
+ sbrelease(&so->so_snd);
+bad:
+ return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+sbreserve(sb, cc)
+ struct sockbuf *sb;
+ u_long cc;
+{
+
+ if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+ return (0);
+ sb->sb_hiwat = cc;
+ sb->sb_mbmax = min(cc * 2, sb_max);
+ if (sb->sb_lowat > sb->sb_hiwat)
+ sb->sb_lowat = sb->sb_hiwat;
+ return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+sbrelease(sb)
+ struct sockbuf *sb;
+{
+
+ sbflush(sb);
+ sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added. sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used. To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used. In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement. Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb. The additional space associated
+ * the mbuf chain is recorded in sb. Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+sbappend(sb, m)
+ struct sockbuf *sb;
+ struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ if (m == 0)
+ return;
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ do {
+ if (n->m_flags & M_EOR) {
+ sbappendrecord(sb, m); /* XXXXXX!!!! */
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ }
+ sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+sbcheck(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m;
+ register int len = 0, mbcnt = 0;
+
+ for (m = sb->sb_mb; m; m = m->m_next) {
+ len += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT)
+ mbcnt += m->m_ext.ext_size;
+ if (m->m_nextpkt)
+ panic("sbcheck nextpkt");
+ }
+ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+ printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+ mbcnt, sb->sb_mbcnt);
+ panic("sbcheck");
+ }
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+sbappendrecord(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+
+ if (m0 == 0)
+ return;
+ if (m = sb->sb_mb)
+ while (m->m_nextpkt)
+ m = m->m_nextpkt;
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ if (m)
+ m->m_nextpkt = m0;
+ else
+ sb->sb_mb = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+sbinsertoob(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+ register struct mbuf **mp;
+
+ if (m0 == 0)
+ return;
+ for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+ again:
+ switch (m->m_type) {
+
+ case MT_OOBDATA:
+ continue; /* WANT next train */
+
+ case MT_CONTROL:
+ if (m = m->m_next)
+ goto again; /* inspect THIS train further */
+ }
+ break;
+ }
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ m0->m_nextpkt = *mp;
+ *mp = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket. If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+sbappendaddr(sb, asa, m0, control)
+ register struct sockbuf *sb;
+ struct sockaddr *asa;
+ struct mbuf *m0, *control;
+{
+ register struct mbuf *m, *n;
+ int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+ if (m0)
+ space += m0->m_pkthdr.len;
+ for (n = control; n; n = n->m_next) {
+ space += n->m_len;
+ if (n->m_next == 0) /* keep pointer to last control buf */
+ break;
+ }
+ if (space > sbspace(sb))
+ return (0);
+ if (asa->sa_len > MLEN)
+ return (0);
+ MGET(m, M_DONTWAIT, MT_SONAME);
+ if (m == 0)
+ return (0);
+ m->m_len = asa->sa_len;
+ bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+ if (n)
+ n->m_next = m0; /* concatenate data to control */
+ else
+ control = m0;
+ m->m_next = control;
+ for (n = m; n; n = n->m_next)
+ sballoc(sb, n);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = m;
+ } else
+ sb->sb_mb = m;
+ return (1);
+}
+
+sbappendcontrol(sb, m0, control)
+ struct sockbuf *sb;
+ struct mbuf *control, *m0;
+{
+ register struct mbuf *m, *n;
+ int space = 0;
+
+ if (control == 0)
+ panic("sbappendcontrol");
+ for (m = control; ; m = m->m_next) {
+ space += m->m_len;
+ if (m->m_next == 0)
+ break;
+ }
+ n = m; /* save pointer to last control buffer */
+ for (m = m0; m; m = m->m_next)
+ space += m->m_len;
+ if (space > sbspace(sb))
+ return (0);
+ n->m_next = m0; /* concatenate data to control */
+ for (m = control; m; m = m->m_next)
+ sballoc(sb, m);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = control;
+ } else
+ sb->sb_mb = control;
+ return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n. If n
+ * is null, the buffer is presumed empty.
+ */
+sbcompress(sb, m, n)
+ register struct sockbuf *sb;
+ register struct mbuf *m, *n;
+{
+ register int eor = 0;
+ register struct mbuf *o;
+
+ while (m) {
+ eor |= m->m_flags & M_EOR;
+ if (m->m_len == 0 &&
+ (eor == 0 ||
+ (((o = m->m_next) || (o = n)) &&
+ o->m_type == m->m_type))) {
+ m = m_free(m);
+ continue;
+ }
+ if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+ (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+ n->m_type == m->m_type) {
+ bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+ (unsigned)m->m_len);
+ n->m_len += m->m_len;
+ sb->sb_cc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mb = m;
+ sballoc(sb, m);
+ n = m;
+ m->m_flags &= ~M_EOR;
+ m = m->m_next;
+ n->m_next = 0;
+ }
+ if (eor) {
+ if (n)
+ n->m_flags |= eor;
+ else
+ printf("semi-panic: sbcompress\n");
+ }
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+sbflush(sb)
+ register struct sockbuf *sb;
+{
+
+ if (sb->sb_flags & SB_LOCK)
+ panic("sbflush");
+ while (sb->sb_mbcnt)
+ sbdrop(sb, (int)sb->sb_cc);
+ if (sb->sb_cc || sb->sb_mb)
+ panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+sbdrop(sb, len)
+ register struct sockbuf *sb;
+ register int len;
+{
+ register struct mbuf *m, *mn;
+ struct mbuf *next;
+
+ next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ while (len > 0) {
+ if (m == 0) {
+ if (next == 0)
+ panic("sbdrop");
+ m = next;
+ next = m->m_nextpkt;
+ continue;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ m->m_data += len;
+ sb->sb_cc -= len;
+ break;
+ }
+ len -= m->m_len;
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ while (m && m->m_len == 0) {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ if (m) {
+ sb->sb_mb = m;
+ m->m_nextpkt = next;
+ } else
+ sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+sbdroprecord(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m, *mn;
+
+ m = sb->sb_mb;
+ if (m) {
+ sb->sb_mb = m->m_nextpkt;
+ do {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ } while (m = mn);
+ }
+}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
new file mode 100644
index 000000000000..ed09ee63b9f4
--- /dev/null
+++ b/sys/kern/uipc_socket.c
@@ -0,0 +1,1024 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Socket operation routines.
+ * These routines are called by the routines in
+ * sys_socket.c or from a system process, and
+ * implement the semantics of socket operations by
+ * switching out to the protocol specific routines.
+ */
+/*ARGSUSED*/
+socreate(dom, aso, type, proto)
+ int dom;
+ struct socket **aso;
+ register int type;
+ int proto;
+{
+ struct proc *p = curproc; /* XXX */
+ register struct protosw *prp;
+ register struct socket *so;
+ register int error;
+
+ if (proto)
+ prp = pffindproto(dom, proto, type);
+ else
+ prp = pffindtype(dom, type);
+ if (prp == 0 || prp->pr_usrreq == 0)
+ return (EPROTONOSUPPORT);
+ if (prp->pr_type != type)
+ return (EPROTOTYPE);
+ MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
+ bzero((caddr_t)so, sizeof(*so));
+ so->so_type = type;
+ if (p->p_ucred->cr_uid == 0)
+ so->so_state = SS_PRIV;
+ so->so_proto = prp;
+ error =
+ (*prp->pr_usrreq)(so, PRU_ATTACH,
+ (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
+ if (error) {
+ so->so_state |= SS_NOFDREF;
+ sofree(so);
+ return (error);
+ }
+ *aso = so;
+ return (0);
+}
+
+sobind(so, nam)
+ struct socket *so;
+ struct mbuf *nam;
+{
+ int s = splnet();
+ int error;
+
+ error =
+ (*so->so_proto->pr_usrreq)(so, PRU_BIND,
+ (struct mbuf *)0, nam, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+solisten(so, backlog)
+ register struct socket *so;
+ int backlog;
+{
+ int s = splnet(), error;
+
+ error =
+ (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+ if (error) {
+ splx(s);
+ return (error);
+ }
+ if (so->so_q == 0)
+ so->so_options |= SO_ACCEPTCONN;
+ if (backlog < 0)
+ backlog = 0;
+ so->so_qlimit = min(backlog, SOMAXCONN);
+ splx(s);
+ return (0);
+}
+
+sofree(so)
+ register struct socket *so;
+{
+
+ if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+ return;
+ if (so->so_head) {
+ if (!soqremque(so, 0) && !soqremque(so, 1))
+ panic("sofree dq");
+ so->so_head = 0;
+ }
+ sbrelease(&so->so_snd);
+ sorflush(so);
+ FREE(so, M_SOCKET);
+}
+
+/*
+ * Close a socket on last file table reference removal.
+ * Initiate disconnect if connected.
+ * Free socket when disconnect complete.
+ */
+soclose(so)
+ register struct socket *so;
+{
+ int s = splnet(); /* conservative */
+ int error = 0;
+
+ if (so->so_options & SO_ACCEPTCONN) {
+ while (so->so_q0)
+ (void) soabort(so->so_q0);
+ while (so->so_q)
+ (void) soabort(so->so_q);
+ }
+ if (so->so_pcb == 0)
+ goto discard;
+ if (so->so_state & SS_ISCONNECTED) {
+ if ((so->so_state & SS_ISDISCONNECTING) == 0) {
+ error = sodisconnect(so);
+ if (error)
+ goto drop;
+ }
+ if (so->so_options & SO_LINGER) {
+ if ((so->so_state & SS_ISDISCONNECTING) &&
+ (so->so_state & SS_NBIO))
+ goto drop;
+ while (so->so_state & SS_ISCONNECTED)
+ if (error = tsleep((caddr_t)&so->so_timeo,
+ PSOCK | PCATCH, netcls, so->so_linger))
+ break;
+ }
+ }
+drop:
+ if (so->so_pcb) {
+ int error2 =
+ (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+ if (error == 0)
+ error = error2;
+ }
+discard:
+ if (so->so_state & SS_NOFDREF)
+ panic("soclose: NOFDREF");
+ so->so_state |= SS_NOFDREF;
+ sofree(so);
+ splx(s);
+ return (error);
+}
+
+/*
+ * Must be called at splnet...
+ */
+soabort(so)
+ struct socket *so;
+{
+
+ return (
+ (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+}
+
+soaccept(so, nam)
+ register struct socket *so;
+ struct mbuf *nam;
+{
+ int s = splnet();
+ int error;
+
+ if ((so->so_state & SS_NOFDREF) == 0)
+ panic("soaccept: !NOFDREF");
+ so->so_state &= ~SS_NOFDREF;
+ error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
+ (struct mbuf *)0, nam, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+soconnect(so, nam)
+ register struct socket *so;
+ struct mbuf *nam;
+{
+ int s;
+ int error;
+
+ if (so->so_options & SO_ACCEPTCONN)
+ return (EOPNOTSUPP);
+ s = splnet();
+ /*
+ * If protocol is connection-based, can only connect once.
+ * Otherwise, if connected, try to disconnect first.
+ * This allows user to disconnect by connecting to, e.g.,
+ * a null address.
+ */
+ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
+ ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
+ (error = sodisconnect(so))))
+ error = EISCONN;
+ else
+ error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
+ (struct mbuf *)0, nam, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+soconnect2(so1, so2)
+ register struct socket *so1;
+ struct socket *so2;
+{
+ int s = splnet();
+ int error;
+
+ error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
+ (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
+ splx(s);
+ return (error);
+}
+
+sodisconnect(so)
+ register struct socket *so;
+{
+ int s = splnet();
+ int error;
+
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ if (so->so_state & SS_ISDISCONNECTING) {
+ error = EALREADY;
+ goto bad;
+ }
+ error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+bad:
+ splx(s);
+ return (error);
+}
+
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
+/*
+ * Send on a socket.
+ * If send must go all at once and message is larger than
+ * send buffering, then hard error.
+ * Lock against other senders.
+ * If must go all at once and not enough room now, then
+ * inform user that this would block and do nothing.
+ * Otherwise, if nonblocking, send as much as possible.
+ * The data to be sent is described by "uio" if nonzero,
+ * otherwise by the mbuf chain "top" (which must be null
+ * if uio is not). Data provided in mbuf chain must be small
+ * enough to send all at once.
+ *
+ * Returns nonzero on error, timeout or signal; callers
+ * must check for short counts if EINTR/ERESTART are returned.
+ * Data and control buffers are freed on return.
+ */
+sosend(so, addr, uio, top, control, flags)
+ register struct socket *so;
+ struct mbuf *addr;
+ struct uio *uio;
+ struct mbuf *top;
+ struct mbuf *control;
+ int flags;
+{
+ struct proc *p = curproc; /* XXX */
+ struct mbuf **mp;
+ register struct mbuf *m;
+ register long space, len, resid;
+ int clen = 0, error, s, dontroute, mlen;
+ int atomic = sosendallatonce(so) || top;
+
+ if (uio)
+ resid = uio->uio_resid;
+ else
+ resid = top->m_pkthdr.len;
+ /*
+ * In theory resid should be unsigned.
+ * However, space must be signed, as it might be less than 0
+ * if we over-committed, and we must use a signed comparison
+ * of space and resid. On the other hand, a negative resid
+ * causes us to loop sending 0-length segments to the protocol.
+ */
+ if (resid < 0)
+ return (EINVAL);
+ dontroute =
+ (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
+ (so->so_proto->pr_flags & PR_ATOMIC);
+ p->p_stats->p_ru.ru_msgsnd++;
+ if (control)
+ clen = control->m_len;
+#define snderr(errno) { error = errno; splx(s); goto release; }
+
+restart:
+ if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
+ goto out;
+ do {
+ s = splnet();
+ if (so->so_state & SS_CANTSENDMORE)
+ snderr(EPIPE);
+ if (so->so_error)
+ snderr(so->so_error);
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ if ((so->so_state & SS_ISCONFIRMING) == 0 &&
+ !(resid == 0 && clen != 0))
+ snderr(ENOTCONN);
+ } else if (addr == 0)
+ snderr(EDESTADDRREQ);
+ }
+ space = sbspace(&so->so_snd);
+ if (flags & MSG_OOB)
+ space += 1024;
+ if (atomic && resid > so->so_snd.sb_hiwat ||
+ clen > so->so_snd.sb_hiwat)
+ snderr(EMSGSIZE);
+ if (space < resid + clen && uio &&
+ (atomic || space < so->so_snd.sb_lowat || space < clen)) {
+ if (so->so_state & SS_NBIO)
+ snderr(EWOULDBLOCK);
+ sbunlock(&so->so_snd);
+ error = sbwait(&so->so_snd);
+ splx(s);
+ if (error)
+ goto out;
+ goto restart;
+ }
+ splx(s);
+ mp = &top;
+ space -= clen;
+ do {
+ if (uio == NULL) {
+ /*
+ * Data is prepackaged in "top".
+ */
+ resid = 0;
+ if (flags & MSG_EOR)
+ top->m_flags |= M_EOR;
+ } else do {
+ if (top == 0) {
+ MGETHDR(m, M_WAIT, MT_DATA);
+ mlen = MHLEN;
+ m->m_pkthdr.len = 0;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ } else {
+ MGET(m, M_WAIT, MT_DATA);
+ mlen = MLEN;
+ }
+ if (resid >= MINCLSIZE && space >= MCLBYTES) {
+ MCLGET(m, M_WAIT);
+ if ((m->m_flags & M_EXT) == 0)
+ goto nopages;
+ mlen = MCLBYTES;
+#ifdef MAPPED_MBUFS
+ len = min(MCLBYTES, resid);
+#else
+ if (atomic && top == 0) {
+ len = min(MCLBYTES - max_hdr, resid);
+ m->m_data += max_hdr;
+ } else
+ len = min(MCLBYTES, resid);
+#endif
+ space -= MCLBYTES;
+ } else {
+nopages:
+ len = min(min(mlen, resid), space);
+ space -= len;
+ /*
+ * For datagram protocols, leave room
+ * for protocol headers in first mbuf.
+ */
+ if (atomic && top == 0 && len < mlen)
+ MH_ALIGN(m, len);
+ }
+ error = uiomove(mtod(m, caddr_t), (int)len, uio);
+ resid = uio->uio_resid;
+ m->m_len = len;
+ *mp = m;
+ top->m_pkthdr.len += len;
+ if (error)
+ goto release;
+ mp = &m->m_next;
+ if (resid <= 0) {
+ if (flags & MSG_EOR)
+ top->m_flags |= M_EOR;
+ break;
+ }
+ } while (space > 0 && atomic);
+ if (dontroute)
+ so->so_options |= SO_DONTROUTE;
+ s = splnet(); /* XXX */
+ error = (*so->so_proto->pr_usrreq)(so,
+ (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
+ top, addr, control);
+ splx(s);
+ if (dontroute)
+ so->so_options &= ~SO_DONTROUTE;
+ clen = 0;
+ control = 0;
+ top = 0;
+ mp = &top;
+ if (error)
+ goto release;
+ } while (resid && space > 0);
+ } while (resid);
+
+release:
+ sbunlock(&so->so_snd);
+out:
+ if (top)
+ m_freem(top);
+ if (control)
+ m_freem(control);
+ return (error);
+}
+
+/*
+ * Implement receive operations on a socket.
+ * We depend on the way that records are added to the sockbuf
+ * by sbappend*. In particular, each record (mbufs linked through m_next)
+ * must begin with an address if the protocol so specifies,
+ * followed by an optional mbuf or mbufs containing ancillary data,
+ * and then zero or more mbufs of data.
+ * In order to avoid blocking network interrupts for the entire time here,
+ * we splx() while doing the actual copy to user space.
+ * Although the sockbuf is locked, new data may still be appended,
+ * and thus we must maintain consistency of the sockbuf during that time.
+ *
+ * The caller may receive the data as a single mbuf chain by supplying
+ * an mbuf **mp0 for use in returning the chain. The uio is then used
+ * only for the count in uio_resid.
+ */
+soreceive(so, paddr, uio, mp0, controlp, flagsp)
+ register struct socket *so;
+ struct mbuf **paddr;
+ struct uio *uio;
+ struct mbuf **mp0;
+ struct mbuf **controlp;
+ int *flagsp;
+{
+ register struct mbuf *m, **mp;
+ register int flags, len, error, s, offset;
+ struct protosw *pr = so->so_proto;
+ struct mbuf *nextrecord;
+ int moff, type;
+ int orig_resid = uio->uio_resid;
+
+ mp = mp0;
+ if (paddr)
+ *paddr = 0;
+ if (controlp)
+ *controlp = 0;
+ if (flagsp)
+ flags = *flagsp &~ MSG_EOR;
+ else
+ flags = 0;
+ if (flags & MSG_OOB) {
+ m = m_get(M_WAIT, MT_DATA);
+ error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
+ m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
+ if (error)
+ goto bad;
+ do {
+ error = uiomove(mtod(m, caddr_t),
+ (int) min(uio->uio_resid, m->m_len), uio);
+ m = m_free(m);
+ } while (uio->uio_resid && error == 0 && m);
+bad:
+ if (m)
+ m_freem(m);
+ return (error);
+ }
+ if (mp)
+ *mp = (struct mbuf *)0;
+ if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
+ (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+ (struct mbuf *)0, (struct mbuf *)0);
+
+restart:
+ if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
+ return (error);
+ s = splnet();
+
+ m = so->so_rcv.sb_mb;
+ /*
+ * If we have less data than requested, block awaiting more
+ * (subject to any timeout) if:
+ * 1. the current count is less than the low water mark, or
+ * 2. MSG_WAITALL is set, and it is possible to do the entire
+ * receive operation at once if we block (resid <= hiwat).
+ * 3. MSG_DONTWAIT is not set
+ * If MSG_WAITALL is set but resid is larger than the receive buffer,
+ * we have to do the receive in sections, and thus risk returning
+ * a short count if a timeout or signal occurs after we start.
+ */
+ if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
+ so->so_rcv.sb_cc < uio->uio_resid) &&
+ (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+ ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
+ m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
+#ifdef DIAGNOSTIC
+ if (m == 0 && so->so_rcv.sb_cc)
+ panic("receive 1");
+#endif
+ if (so->so_error) {
+ if (m)
+ goto dontblock;
+ error = so->so_error;
+ if ((flags & MSG_PEEK) == 0)
+ so->so_error = 0;
+ goto release;
+ }
+ if (so->so_state & SS_CANTRCVMORE) {
+ if (m)
+ goto dontblock;
+ else
+ goto release;
+ }
+ for (; m; m = m->m_next)
+ if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
+ m = so->so_rcv.sb_mb;
+ goto dontblock;
+ }
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ error = ENOTCONN;
+ goto release;
+ }
+ if (uio->uio_resid == 0)
+ goto release;
+ if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
+ error = EWOULDBLOCK;
+ goto release;
+ }
+ sbunlock(&so->so_rcv);
+ error = sbwait(&so->so_rcv);
+ splx(s);
+ if (error)
+ return (error);
+ goto restart;
+ }
+dontblock:
+ if (uio->uio_procp)
+ uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
+ nextrecord = m->m_nextpkt;
+ if (pr->pr_flags & PR_ADDR) {
+#ifdef DIAGNOSTIC
+ if (m->m_type != MT_SONAME)
+ panic("receive 1a");
+#endif
+ orig_resid = 0;
+ if (flags & MSG_PEEK) {
+ if (paddr)
+ *paddr = m_copy(m, 0, m->m_len);
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ if (paddr) {
+ *paddr = m;
+ so->so_rcv.sb_mb = m->m_next;
+ m->m_next = 0;
+ m = so->so_rcv.sb_mb;
+ } else {
+ MFREE(m, so->so_rcv.sb_mb);
+ m = so->so_rcv.sb_mb;
+ }
+ }
+ }
+ while (m && m->m_type == MT_CONTROL && error == 0) {
+ if (flags & MSG_PEEK) {
+ if (controlp)
+ *controlp = m_copy(m, 0, m->m_len);
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ if (controlp) {
+ if (pr->pr_domain->dom_externalize &&
+ mtod(m, struct cmsghdr *)->cmsg_type ==
+ SCM_RIGHTS)
+ error = (*pr->pr_domain->dom_externalize)(m);
+ *controlp = m;
+ so->so_rcv.sb_mb = m->m_next;
+ m->m_next = 0;
+ m = so->so_rcv.sb_mb;
+ } else {
+ MFREE(m, so->so_rcv.sb_mb);
+ m = so->so_rcv.sb_mb;
+ }
+ }
+ if (controlp) {
+ orig_resid = 0;
+ controlp = &(*controlp)->m_next;
+ }
+ }
+ if (m) {
+ if ((flags & MSG_PEEK) == 0)
+ m->m_nextpkt = nextrecord;
+ type = m->m_type;
+ if (type == MT_OOBDATA)
+ flags |= MSG_OOB;
+ }
+ moff = 0;
+ offset = 0;
+ while (m && uio->uio_resid > 0 && error == 0) {
+ if (m->m_type == MT_OOBDATA) {
+ if (type != MT_OOBDATA)
+ break;
+ } else if (type == MT_OOBDATA)
+ break;
+#ifdef DIAGNOSTIC
+ else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
+ panic("receive 3");
+#endif
+ so->so_state &= ~SS_RCVATMARK;
+ len = uio->uio_resid;
+ if (so->so_oobmark && len > so->so_oobmark - offset)
+ len = so->so_oobmark - offset;
+ if (len > m->m_len - moff)
+ len = m->m_len - moff;
+ /*
+ * If mp is set, just pass back the mbufs.
+ * Otherwise copy them out via the uio, then free.
+ * Sockbuf must be consistent here (points to current mbuf,
+ * it points to next record) when we drop priority;
+ * we must note any additions to the sockbuf when we
+ * block interrupts again.
+ */
+ if (mp == 0) {
+ splx(s);
+ error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
+ s = splnet();
+ } else
+ uio->uio_resid -= len;
+ if (len == m->m_len - moff) {
+ if (m->m_flags & M_EOR)
+ flags |= MSG_EOR;
+ if (flags & MSG_PEEK) {
+ m = m->m_next;
+ moff = 0;
+ } else {
+ nextrecord = m->m_nextpkt;
+ sbfree(&so->so_rcv, m);
+ if (mp) {
+ *mp = m;
+ mp = &m->m_next;
+ so->so_rcv.sb_mb = m = m->m_next;
+ *mp = (struct mbuf *)0;
+ } else {
+ MFREE(m, so->so_rcv.sb_mb);
+ m = so->so_rcv.sb_mb;
+ }
+ if (m)
+ m->m_nextpkt = nextrecord;
+ }
+ } else {
+ if (flags & MSG_PEEK)
+ moff += len;
+ else {
+ if (mp)
+ *mp = m_copym(m, 0, len, M_WAIT);
+ m->m_data += len;
+ m->m_len -= len;
+ so->so_rcv.sb_cc -= len;
+ }
+ }
+ if (so->so_oobmark) {
+ if ((flags & MSG_PEEK) == 0) {
+ so->so_oobmark -= len;
+ if (so->so_oobmark == 0) {
+ so->so_state |= SS_RCVATMARK;
+ break;
+ }
+ } else {
+ offset += len;
+ if (offset == so->so_oobmark)
+ break;
+ }
+ }
+ if (flags & MSG_EOR)
+ break;
+ /*
+ * If the MSG_WAITALL flag is set (for non-atomic socket),
+ * we must not quit until "uio->uio_resid == 0" or an error
+ * termination. If a signal/timeout occurs, return
+ * with a short count but without error.
+ * Keep sockbuf locked against other readers.
+ */
+ while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
+ !sosendallatonce(so) && !nextrecord) {
+ if (so->so_error || so->so_state & SS_CANTRCVMORE)
+ break;
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ sbunlock(&so->so_rcv);
+ splx(s);
+ return (0);
+ }
+ if (m = so->so_rcv.sb_mb)
+ nextrecord = m->m_nextpkt;
+ }
+ }
+
+ if (m && pr->pr_flags & PR_ATOMIC) {
+ flags |= MSG_TRUNC;
+ if ((flags & MSG_PEEK) == 0)
+ (void) sbdroprecord(&so->so_rcv);
+ }
+ if ((flags & MSG_PEEK) == 0) {
+ if (m == 0)
+ so->so_rcv.sb_mb = nextrecord;
+ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+ (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+ (struct mbuf *)flags, (struct mbuf *)0,
+ (struct mbuf *)0);
+ }
+ if (orig_resid == uio->uio_resid && orig_resid &&
+ (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
+ sbunlock(&so->so_rcv);
+ splx(s);
+ goto restart;
+ }
+
+ if (flagsp)
+ *flagsp |= flags;
+release:
+ sbunlock(&so->so_rcv);
+ splx(s);
+ return (error);
+}
+
+soshutdown(so, how)
+ register struct socket *so;
+ register int how;
+{
+ register struct protosw *pr = so->so_proto;
+
+ how++;
+ if (how & FREAD)
+ sorflush(so);
+ if (how & FWRITE)
+ return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+ return (0);
+}
+
+sorflush(so)
+ register struct socket *so;
+{
+ register struct sockbuf *sb = &so->so_rcv;
+ register struct protosw *pr = so->so_proto;
+ register int s;
+ struct sockbuf asb;
+
+ sb->sb_flags |= SB_NOINTR;
+ (void) sblock(sb, M_WAITOK);
+ s = splimp();
+ socantrcvmore(so);
+ sbunlock(sb);
+ asb = *sb;
+ bzero((caddr_t)sb, sizeof (*sb));
+ splx(s);
+ if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
+ (*pr->pr_domain->dom_dispose)(asb.sb_mb);
+ sbrelease(&asb);
+}
+
+sosetopt(so, level, optname, m0)
+ register struct socket *so;
+ int level, optname;
+ struct mbuf *m0;
+{
+ int error = 0;
+ register struct mbuf *m = m0;
+
+ if (level != SOL_SOCKET) {
+ if (so->so_proto && so->so_proto->pr_ctloutput)
+ return ((*so->so_proto->pr_ctloutput)
+ (PRCO_SETOPT, so, level, optname, &m0));
+ error = ENOPROTOOPT;
+ } else {
+ switch (optname) {
+
+ case SO_LINGER:
+ if (m == NULL || m->m_len != sizeof (struct linger)) {
+ error = EINVAL;
+ goto bad;
+ }
+ so->so_linger = mtod(m, struct linger *)->l_linger;
+ /* fall thru... */
+
+ case SO_DEBUG:
+ case SO_KEEPALIVE:
+ case SO_DONTROUTE:
+ case SO_USELOOPBACK:
+ case SO_BROADCAST:
+ case SO_REUSEADDR:
+ case SO_REUSEPORT:
+ case SO_OOBINLINE:
+ if (m == NULL || m->m_len < sizeof (int)) {
+ error = EINVAL;
+ goto bad;
+ }
+ if (*mtod(m, int *))
+ so->so_options |= optname;
+ else
+ so->so_options &= ~optname;
+ break;
+
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ case SO_SNDLOWAT:
+ case SO_RCVLOWAT:
+ if (m == NULL || m->m_len < sizeof (int)) {
+ error = EINVAL;
+ goto bad;
+ }
+ switch (optname) {
+
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ if (sbreserve(optname == SO_SNDBUF ?
+ &so->so_snd : &so->so_rcv,
+ (u_long) *mtod(m, int *)) == 0) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ break;
+
+ case SO_SNDLOWAT:
+ so->so_snd.sb_lowat = *mtod(m, int *);
+ break;
+ case SO_RCVLOWAT:
+ so->so_rcv.sb_lowat = *mtod(m, int *);
+ break;
+ }
+ break;
+
+ case SO_SNDTIMEO:
+ case SO_RCVTIMEO:
+ {
+ struct timeval *tv;
+ short val;
+
+ if (m == NULL || m->m_len < sizeof (*tv)) {
+ error = EINVAL;
+ goto bad;
+ }
+ tv = mtod(m, struct timeval *);
+ if (tv->tv_sec > SHRT_MAX / hz - hz) {
+ error = EDOM;
+ goto bad;
+ }
+ val = tv->tv_sec * hz + tv->tv_usec / tick;
+
+ switch (optname) {
+
+ case SO_SNDTIMEO:
+ so->so_snd.sb_timeo = val;
+ break;
+ case SO_RCVTIMEO:
+ so->so_rcv.sb_timeo = val;
+ break;
+ }
+ break;
+ }
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
+ (void) ((*so->so_proto->pr_ctloutput)
+ (PRCO_SETOPT, so, level, optname, &m0));
+ m = NULL; /* freed by protocol */
+ }
+ }
+bad:
+ if (m)
+ (void) m_free(m);
+ return (error);
+}
+
+sogetopt(so, level, optname, mp)
+ register struct socket *so;
+ int level, optname;
+ struct mbuf **mp;
+{
+ register struct mbuf *m;
+
+ if (level != SOL_SOCKET) {
+ if (so->so_proto && so->so_proto->pr_ctloutput) {
+ return ((*so->so_proto->pr_ctloutput)
+ (PRCO_GETOPT, so, level, optname, mp));
+ } else
+ return (ENOPROTOOPT);
+ } else {
+ m = m_get(M_WAIT, MT_SOOPTS);
+ m->m_len = sizeof (int);
+
+ switch (optname) {
+
+ case SO_LINGER:
+ m->m_len = sizeof (struct linger);
+ mtod(m, struct linger *)->l_onoff =
+ so->so_options & SO_LINGER;
+ mtod(m, struct linger *)->l_linger = so->so_linger;
+ break;
+
+ case SO_USELOOPBACK:
+ case SO_DONTROUTE:
+ case SO_DEBUG:
+ case SO_KEEPALIVE:
+ case SO_REUSEADDR:
+ case SO_REUSEPORT:
+ case SO_BROADCAST:
+ case SO_OOBINLINE:
+ *mtod(m, int *) = so->so_options & optname;
+ break;
+
+ case SO_TYPE:
+ *mtod(m, int *) = so->so_type;
+ break;
+
+ case SO_ERROR:
+ *mtod(m, int *) = so->so_error;
+ so->so_error = 0;
+ break;
+
+ case SO_SNDBUF:
+ *mtod(m, int *) = so->so_snd.sb_hiwat;
+ break;
+
+ case SO_RCVBUF:
+ *mtod(m, int *) = so->so_rcv.sb_hiwat;
+ break;
+
+ case SO_SNDLOWAT:
+ *mtod(m, int *) = so->so_snd.sb_lowat;
+ break;
+
+ case SO_RCVLOWAT:
+ *mtod(m, int *) = so->so_rcv.sb_lowat;
+ break;
+
+ case SO_SNDTIMEO:
+ case SO_RCVTIMEO:
+ {
+ int val = (optname == SO_SNDTIMEO ?
+ so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
+
+ m->m_len = sizeof(struct timeval);
+ mtod(m, struct timeval *)->tv_sec = val / hz;
+ mtod(m, struct timeval *)->tv_usec =
+ (val % hz) / tick;
+ break;
+ }
+
+ default:
+ (void)m_free(m);
+ return (ENOPROTOOPT);
+ }
+ *mp = m;
+ return (0);
+ }
+}
+
+sohasoutofband(so)
+ register struct socket *so;
+{
+ struct proc *p;
+
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGURG);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGURG);
+ selwakeup(&so->so_rcv.sb_sel);
+}
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
new file mode 100644
index 000000000000..d4af592d79b5
--- /dev/null
+++ b/sys/kern/uipc_socket2.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char netio[] = "netio";
+char netcon[] = "netcon";
+char netcls[] = "netcls";
+
+u_long sb_max = SB_MAX; /* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups. Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established. When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed. The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn(). When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ *
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+soisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= SS_ISCONNECTING;
+}
+
+soisconnected(so)
+ register struct socket *so;
+{
+ register struct socket *head = so->so_head;
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+ so->so_state |= SS_ISCONNECTED;
+ if (head && soqremque(so, 0)) {
+ soqinsque(head, so, 1);
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ } else {
+ wakeup((caddr_t)&so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+ }
+}
+
+soisdisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~SS_ISCONNECTING;
+ so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+soisdisconnected(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called. If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+ register struct socket *head;
+ int connstatus;
+{
+ register struct socket *so;
+ int soqueue = connstatus ? 1 : 0;
+
+ if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+ return ((struct socket *)0);
+ MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+ if (so == NULL)
+ return ((struct socket *)0);
+ bzero((caddr_t)so, sizeof(*so));
+ so->so_type = head->so_type;
+ so->so_options = head->so_options &~ SO_ACCEPTCONN;
+ so->so_linger = head->so_linger;
+ so->so_state = head->so_state | SS_NOFDREF;
+ so->so_proto = head->so_proto;
+ so->so_timeo = head->so_timeo;
+ so->so_pgid = head->so_pgid;
+ (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+ soqinsque(head, so, soqueue);
+ if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+ (void) soqremque(so, soqueue);
+ (void) free((caddr_t)so, M_SOCKET);
+ return ((struct socket *)0);
+ }
+ if (connstatus) {
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ so->so_state |= connstatus;
+ }
+ return (so);
+}
+
+soqinsque(head, so, q)
+ register struct socket *head, *so;
+ int q;
+{
+
+ register struct socket **prev;
+ so->so_head = head;
+ if (q == 0) {
+ head->so_q0len++;
+ so->so_q0 = 0;
+ for (prev = &(head->so_q0); *prev; )
+ prev = &((*prev)->so_q0);
+ } else {
+ head->so_qlen++;
+ so->so_q = 0;
+ for (prev = &(head->so_q); *prev; )
+ prev = &((*prev)->so_q);
+ }
+ *prev = so;
+}
+
+soqremque(so, q)
+ register struct socket *so;
+ int q;
+{
+ register struct socket *head, *prev, *next;
+
+ head = so->so_head;
+ prev = head;
+ for (;;) {
+ next = q ? prev->so_q : prev->so_q0;
+ if (next == so)
+ break;
+ if (next == 0)
+ return (0);
+ prev = next;
+ }
+ if (q == 0) {
+ prev->so_q0 = next->so_q0;
+ head->so_q0len--;
+ } else {
+ prev->so_q = next->so_q;
+ head->so_qlen--;
+ }
+ next->so_q0 = next->so_q = 0;
+ next->so_head = 0;
+ return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+socantsendmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTSENDMORE;
+ sowwakeup(so);
+}
+
+socantrcvmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTRCVMORE;
+ sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+sbwait(sb)
+ struct sockbuf *sb;
+{
+
+ sb->sb_flags |= SB_WAIT;
+ return (tsleep((caddr_t)&sb->sb_cc,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+ sb->sb_timeo));
+}
+
+/*
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+sb_lock(sb)
+ register struct sockbuf *sb;
+{
+ int error;
+
+ while (sb->sb_flags & SB_LOCK) {
+ sb->sb_flags |= SB_WANT;
+ if (error = tsleep((caddr_t)&sb->sb_flags,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+ netio, 0))
+ return (error);
+ }
+ sb->sb_flags |= SB_LOCK;
+ return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+sowakeup(so, sb)
+ register struct socket *so;
+ register struct sockbuf *sb;
+{
+ struct proc *p;
+
+ selwakeup(&sb->sb_sel);
+ sb->sb_flags &= ~SB_SEL;
+ if (sb->sb_flags & SB_WAIT) {
+ sb->sb_flags &= ~SB_WAIT;
+ wakeup((caddr_t)&sb->sb_cc);
+ }
+ if (so->so_state & SS_ASYNC) {
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGIO);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGIO);
+ }
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data. Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field. Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ * name, then a record containing that name must be present before
+ * any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ * just additional data associated with the message), and there are
+ * ``rights'' to be received, then a record containing this data
+ * should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ * a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve(). This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits). The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+soreserve(so, sndcc, rcvcc)
+ register struct socket *so;
+ u_long sndcc, rcvcc;
+{
+
+ if (sbreserve(&so->so_snd, sndcc) == 0)
+ goto bad;
+ if (sbreserve(&so->so_rcv, rcvcc) == 0)
+ goto bad2;
+ if (so->so_rcv.sb_lowat == 0)
+ so->so_rcv.sb_lowat = 1;
+ if (so->so_snd.sb_lowat == 0)
+ so->so_snd.sb_lowat = MCLBYTES;
+ if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+ return (0);
+bad2:
+ sbrelease(&so->so_snd);
+bad:
+ return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+sbreserve(sb, cc)
+ struct sockbuf *sb;
+ u_long cc;
+{
+
+ if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+ return (0);
+ sb->sb_hiwat = cc;
+ sb->sb_mbmax = min(cc * 2, sb_max);
+ if (sb->sb_lowat > sb->sb_hiwat)
+ sb->sb_lowat = sb->sb_hiwat;
+ return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+sbrelease(sb)
+ struct sockbuf *sb;
+{
+
+ sbflush(sb);
+ sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added. sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used. To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used. In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement. Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb. The additional space associated
+ * the mbuf chain is recorded in sb. Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+sbappend(sb, m)
+ struct sockbuf *sb;
+ struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ if (m == 0)
+ return;
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ do {
+ if (n->m_flags & M_EOR) {
+ sbappendrecord(sb, m); /* XXXXXX!!!! */
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ }
+ sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+sbcheck(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m;
+ register int len = 0, mbcnt = 0;
+
+ for (m = sb->sb_mb; m; m = m->m_next) {
+ len += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT)
+ mbcnt += m->m_ext.ext_size;
+ if (m->m_nextpkt)
+ panic("sbcheck nextpkt");
+ }
+ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+ printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+ mbcnt, sb->sb_mbcnt);
+ panic("sbcheck");
+ }
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+sbappendrecord(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+
+ if (m0 == 0)
+ return;
+ if (m = sb->sb_mb)
+ while (m->m_nextpkt)
+ m = m->m_nextpkt;
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ if (m)
+ m->m_nextpkt = m0;
+ else
+ sb->sb_mb = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+sbinsertoob(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+ register struct mbuf **mp;
+
+ if (m0 == 0)
+ return;
+ for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+ again:
+ switch (m->m_type) {
+
+ case MT_OOBDATA:
+ continue; /* WANT next train */
+
+ case MT_CONTROL:
+ if (m = m->m_next)
+ goto again; /* inspect THIS train further */
+ }
+ break;
+ }
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ m0->m_nextpkt = *mp;
+ *mp = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket. If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+sbappendaddr(sb, asa, m0, control)
+ register struct sockbuf *sb;
+ struct sockaddr *asa;
+ struct mbuf *m0, *control;
+{
+ register struct mbuf *m, *n;
+ int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+ if (m0)
+ space += m0->m_pkthdr.len;
+ for (n = control; n; n = n->m_next) {
+ space += n->m_len;
+ if (n->m_next == 0) /* keep pointer to last control buf */
+ break;
+ }
+ if (space > sbspace(sb))
+ return (0);
+ if (asa->sa_len > MLEN)
+ return (0);
+ MGET(m, M_DONTWAIT, MT_SONAME);
+ if (m == 0)
+ return (0);
+ m->m_len = asa->sa_len;
+ bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+ if (n)
+ n->m_next = m0; /* concatenate data to control */
+ else
+ control = m0;
+ m->m_next = control;
+ for (n = m; n; n = n->m_next)
+ sballoc(sb, n);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = m;
+ } else
+ sb->sb_mb = m;
+ return (1);
+}
+
+sbappendcontrol(sb, m0, control)
+ struct sockbuf *sb;
+ struct mbuf *control, *m0;
+{
+ register struct mbuf *m, *n;
+ int space = 0;
+
+ if (control == 0)
+ panic("sbappendcontrol");
+ for (m = control; ; m = m->m_next) {
+ space += m->m_len;
+ if (m->m_next == 0)
+ break;
+ }
+ n = m; /* save pointer to last control buffer */
+ for (m = m0; m; m = m->m_next)
+ space += m->m_len;
+ if (space > sbspace(sb))
+ return (0);
+ n->m_next = m0; /* concatenate data to control */
+ for (m = control; m; m = m->m_next)
+ sballoc(sb, m);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = control;
+ } else
+ sb->sb_mb = control;
+ return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n. If n
+ * is null, the buffer is presumed empty.
+ */
+sbcompress(sb, m, n)
+ register struct sockbuf *sb;
+ register struct mbuf *m, *n;
+{
+ register int eor = 0;
+ register struct mbuf *o;
+
+ while (m) {
+ eor |= m->m_flags & M_EOR;
+ if (m->m_len == 0 &&
+ (eor == 0 ||
+ (((o = m->m_next) || (o = n)) &&
+ o->m_type == m->m_type))) {
+ m = m_free(m);
+ continue;
+ }
+ if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+ (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+ n->m_type == m->m_type) {
+ bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+ (unsigned)m->m_len);
+ n->m_len += m->m_len;
+ sb->sb_cc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mb = m;
+ sballoc(sb, m);
+ n = m;
+ m->m_flags &= ~M_EOR;
+ m = m->m_next;
+ n->m_next = 0;
+ }
+ if (eor) {
+ if (n)
+ n->m_flags |= eor;
+ else
+ printf("semi-panic: sbcompress\n");
+ }
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+sbflush(sb)
+ register struct sockbuf *sb;
+{
+
+ if (sb->sb_flags & SB_LOCK)
+ panic("sbflush");
+ while (sb->sb_mbcnt)
+ sbdrop(sb, (int)sb->sb_cc);
+ if (sb->sb_cc || sb->sb_mb)
+ panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+sbdrop(sb, len)
+ register struct sockbuf *sb;
+ register int len;
+{
+ register struct mbuf *m, *mn;
+ struct mbuf *next;
+
+ next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ while (len > 0) {
+ if (m == 0) {
+ if (next == 0)
+ panic("sbdrop");
+ m = next;
+ next = m->m_nextpkt;
+ continue;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ m->m_data += len;
+ sb->sb_cc -= len;
+ break;
+ }
+ len -= m->m_len;
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ while (m && m->m_len == 0) {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ if (m) {
+ sb->sb_mb = m;
+ m->m_nextpkt = next;
+ } else
+ sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+sbdroprecord(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m, *mn;
+
+ m = sb->sb_mb;
+ if (m) {
+ sb->sb_mb = m->m_nextpkt;
+ do {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ } while (m = mn);
+ }
+}
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
new file mode 100644
index 000000000000..89b7ffdf1960
--- /dev/null
+++ b/sys/kern/uipc_syscalls.c
@@ -0,0 +1,1217 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * System call interface to the socket abstraction.
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+#define COMPAT_OLDSOCK
+#endif
+
+extern struct fileops socketops;
+
+struct socket_args {
+ int domain;
+ int type;
+ int protocol;
+};
+socket(p, uap, retval)
+ struct proc *p;
+ register struct socket_args *uap;
+ int *retval;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct socket *so;
+ struct file *fp;
+ int fd, error;
+
+ if (error = falloc(p, &fp, &fd))
+ return (error);
+ fp->f_flag = FREAD|FWRITE;
+ fp->f_type = DTYPE_SOCKET;
+ fp->f_ops = &socketops;
+ if (error = socreate(uap->domain, &so, uap->type, uap->protocol)) {
+ fdp->fd_ofiles[fd] = 0;
+ ffree(fp);
+ } else {
+ fp->f_data = (caddr_t)so;
+ *retval = fd;
+ }
+ return (error);
+}
+
+struct bind_args {
+ int s;
+ caddr_t name;
+ int namelen;
+};
+/* ARGSUSED */
+bind(p, uap, retval)
+ struct proc *p;
+ register struct bind_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *nam;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME))
+ return (error);
+ error = sobind((struct socket *)fp->f_data, nam);
+ m_freem(nam);
+ return (error);
+}
+
+struct listen_args {
+ int s;
+ int backlog;
+};
+/* ARGSUSED */
+listen(p, uap, retval)
+ struct proc *p;
+ register struct listen_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ return (solisten((struct socket *)fp->f_data, uap->backlog));
+}
+
+struct accept_args {
+ int s;
+ caddr_t name;
+ int *anamelen;
+#ifdef COMPAT_OLDSOCK
+ int compat_43; /* pseudo */
+#endif
+};
+
+#ifdef COMPAT_OLDSOCK
+accept(p, uap, retval)
+ struct proc *p;
+ struct accept_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 0;
+ return (accept1(p, uap, retval));
+}
+
+oaccept(p, uap, retval)
+ struct proc *p;
+ struct accept_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 1;
+ return (accept1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define accept1 accept
+#endif
+
+accept1(p, uap, retval)
+ struct proc *p;
+ register struct accept_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *nam;
+ int namelen, error, s;
+ register struct socket *so;
+
+ if (uap->name && (error = copyin((caddr_t)uap->anamelen,
+ (caddr_t)&namelen, sizeof (namelen))))
+ return (error);
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ s = splnet();
+ so = (struct socket *)fp->f_data;
+ if ((so->so_options & SO_ACCEPTCONN) == 0) {
+ splx(s);
+ return (EINVAL);
+ }
+ if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
+ splx(s);
+ return (EWOULDBLOCK);
+ }
+ while (so->so_qlen == 0 && so->so_error == 0) {
+ if (so->so_state & SS_CANTRCVMORE) {
+ so->so_error = ECONNABORTED;
+ break;
+ }
+ if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+ netcon, 0)) {
+ splx(s);
+ return (error);
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ return (error);
+ }
+ if (error = falloc(p, &fp, retval)) {
+ splx(s);
+ return (error);
+ }
+ { struct socket *aso = so->so_q;
+ if (soqremque(aso, 1) == 0)
+ panic("accept");
+ so = aso;
+ }
+ fp->f_type = DTYPE_SOCKET;
+ fp->f_flag = FREAD|FWRITE;
+ fp->f_ops = &socketops;
+ fp->f_data = (caddr_t)so;
+ nam = m_get(M_WAIT, MT_SONAME);
+ (void) soaccept(so, nam);
+ if (uap->name) {
+#ifdef COMPAT_OLDSOCK
+ if (uap->compat_43)
+ mtod(nam, struct osockaddr *)->sa_family =
+ mtod(nam, struct sockaddr *)->sa_family;
+#endif
+ if (namelen > nam->m_len)
+ namelen = nam->m_len;
+ /* SHOULD COPY OUT A CHAIN HERE */
+ if ((error = copyout(mtod(nam, caddr_t), (caddr_t)uap->name,
+ (u_int)namelen)) == 0)
+ error = copyout((caddr_t)&namelen,
+ (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
+ }
+ m_freem(nam);
+ splx(s);
+ return (error);
+}
+
+struct connect_args {
+ int s;
+ caddr_t name;
+ int namelen;
+};
+/* ARGSUSED */
+connect(p, uap, retval)
+ struct proc *p;
+ register struct connect_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ register struct socket *so;
+ struct mbuf *nam;
+ int error, s;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ so = (struct socket *)fp->f_data;
+ if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
+ return (EALREADY);
+ if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME))
+ return (error);
+ error = soconnect(so, nam);
+ if (error)
+ goto bad;
+ if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+ m_freem(nam);
+ return (EINPROGRESS);
+ }
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
+ if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+ netcon, 0))
+ break;
+ if (error == 0) {
+ error = so->so_error;
+ so->so_error = 0;
+ }
+ splx(s);
+bad:
+ so->so_state &= ~SS_ISCONNECTING;
+ m_freem(nam);
+ if (error == ERESTART)
+ error = EINTR;
+ return (error);
+}
+
+struct socketpair_args {
+ int domain;
+ int type;
+ int protocol;
+ int *rsv;
+};
+socketpair(p, uap, retval)
+ struct proc *p;
+ register struct socketpair_args *uap;
+ int retval[];
+{
+ register struct filedesc *fdp = p->p_fd;
+ struct file *fp1, *fp2;
+ struct socket *so1, *so2;
+ int fd, error, sv[2];
+
+ if (error = socreate(uap->domain, &so1, uap->type, uap->protocol))
+ return (error);
+ if (error = socreate(uap->domain, &so2, uap->type, uap->protocol))
+ goto free1;
+ if (error = falloc(p, &fp1, &fd))
+ goto free2;
+ sv[0] = fd;
+ fp1->f_flag = FREAD|FWRITE;
+ fp1->f_type = DTYPE_SOCKET;
+ fp1->f_ops = &socketops;
+ fp1->f_data = (caddr_t)so1;
+ if (error = falloc(p, &fp2, &fd))
+ goto free3;
+ fp2->f_flag = FREAD|FWRITE;
+ fp2->f_type = DTYPE_SOCKET;
+ fp2->f_ops = &socketops;
+ fp2->f_data = (caddr_t)so2;
+ sv[1] = fd;
+ if (error = soconnect2(so1, so2))
+ goto free4;
+ if (uap->type == SOCK_DGRAM) {
+ /*
+ * Datagram socket connection is asymmetric.
+ */
+ if (error = soconnect2(so2, so1))
+ goto free4;
+ }
+ error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
+ retval[0] = sv[0]; /* XXX ??? */
+ retval[1] = sv[1]; /* XXX ??? */
+ return (error);
+free4:
+ ffree(fp2);
+ fdp->fd_ofiles[sv[1]] = 0;
+free3:
+ ffree(fp1);
+ fdp->fd_ofiles[sv[0]] = 0;
+free2:
+ (void)soclose(so2);
+free1:
+ (void)soclose(so1);
+ return (error);
+}
+
+struct sendto_args {
+ int s;
+ caddr_t buf;
+ size_t len;
+ int flags;
+ caddr_t to;
+ int tolen;
+};
+sendto(p, uap, retval)
+ struct proc *p;
+ register struct sendto_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+
+ msg.msg_name = uap->to;
+ msg.msg_namelen = uap->tolen;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = 0;
+#ifdef COMPAT_OLDSOCK
+ msg.msg_flags = 0;
+#endif
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ return (sendit(p, uap->s, &msg, uap->flags, retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+struct osend_args {
+ int s;
+ caddr_t buf;
+ int len;
+ int flags;
+};
+osend(p, uap, retval)
+ struct proc *p;
+ register struct osend_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+
+ msg.msg_name = 0;
+ msg.msg_namelen = 0;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ msg.msg_control = 0;
+ msg.msg_flags = 0;
+ return (sendit(p, uap->s, &msg, uap->flags, retval));
+}
+
+#define MSG_COMPAT 0x8000
+struct osendmsg_args {
+ int s;
+ caddr_t msg;
+ int flags;
+};
+osendmsg(p, uap, retval)
+ struct proc *p;
+ register struct osendmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *iov;
+ int error;
+
+ if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+ if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+ goto done;
+ msg.msg_flags = MSG_COMPAT;
+ msg.msg_iov = iov;
+ error = sendit(p, uap->s, &msg, uap->flags, retval);
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+#endif
+
+struct sendmsg_args {
+ int s;
+ caddr_t msg;
+ int flags;
+};
+sendmsg(p, uap, retval)
+ struct proc *p;
+ register struct sendmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *iov;
+ int error;
+
+ if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+ if (msg.msg_iovlen &&
+ (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
+ goto done;
+ msg.msg_iov = iov;
+#ifdef COMPAT_OLDSOCK
+ msg.msg_flags = 0;
+#endif
+ error = sendit(p, uap->s, &msg, uap->flags, retval);
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+
+sendit(p, s, mp, flags, retsize)
+ register struct proc *p;
+ int s;
+ register struct msghdr *mp;
+ int flags, *retsize;
+{
+ struct file *fp;
+ struct uio auio;
+ register struct iovec *iov;
+ register int i;
+ struct mbuf *to, *control;
+ int len, error;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (error = getsock(p->p_fd, s, &fp))
+ return (error);
+ auio.uio_iov = mp->msg_iov;
+ auio.uio_iovcnt = mp->msg_iovlen;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_procp = p;
+ auio.uio_offset = 0; /* XXX */
+ auio.uio_resid = 0;
+ iov = mp->msg_iov;
+ for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+ if (iov->iov_len < 0)
+ return (EINVAL);
+ if ((auio.uio_resid += iov->iov_len) < 0)
+ return (EINVAL);
+ }
+ if (mp->msg_name) {
+ if (error = sockargs(&to, mp->msg_name, mp->msg_namelen,
+ MT_SONAME))
+ return (error);
+ } else
+ to = 0;
+ if (mp->msg_control) {
+ if (mp->msg_controllen < sizeof(struct cmsghdr)
+#ifdef COMPAT_OLDSOCK
+ && mp->msg_flags != MSG_COMPAT
+#endif
+ ) {
+ error = EINVAL;
+ goto bad;
+ }
+ if (error = sockargs(&control, mp->msg_control,
+ mp->msg_controllen, MT_CONTROL))
+ goto bad;
+#ifdef COMPAT_OLDSOCK
+ if (mp->msg_flags == MSG_COMPAT) {
+ register struct cmsghdr *cm;
+
+ M_PREPEND(control, sizeof(*cm), M_WAIT);
+ if (control == 0) {
+ error = ENOBUFS;
+ goto bad;
+ } else {
+ cm = mtod(control, struct cmsghdr *);
+ cm->cmsg_len = control->m_len;
+ cm->cmsg_level = SOL_SOCKET;
+ cm->cmsg_type = SCM_RIGHTS;
+ }
+ }
+#endif
+ } else
+ control = 0;
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO)) {
+ int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ len = auio.uio_resid;
+ if (error = sosend((struct socket *)fp->f_data, to, &auio,
+ (struct mbuf *)0, control, flags)) {
+ if (auio.uio_resid != len && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ if (error == EPIPE)
+ psignal(p, SIGPIPE);
+ }
+ if (error == 0)
+ *retsize = len - auio.uio_resid;
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, s, UIO_WRITE,
+ ktriov, *retsize, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+bad:
+ if (to)
+ m_freem(to);
+ return (error);
+}
+
+struct recvfrom_args {
+ int s;
+ caddr_t buf;
+ size_t len;
+ int flags;
+ caddr_t from;
+ int *fromlenaddr;
+};
+
+#ifdef COMPAT_OLDSOCK
+orecvfrom(p, uap, retval)
+ struct proc *p;
+ struct recvfrom_args *uap;
+ int *retval;
+{
+
+ uap->flags |= MSG_COMPAT;
+ return (recvfrom(p, uap, retval));
+}
+#endif
+
+recvfrom(p, uap, retval)
+ struct proc *p;
+ register struct recvfrom_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+ int error;
+
+ if (uap->fromlenaddr) {
+ if (error = copyin((caddr_t)uap->fromlenaddr,
+ (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)))
+ return (error);
+ } else
+ msg.msg_namelen = 0;
+ msg.msg_name = uap->from;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ msg.msg_control = 0;
+ msg.msg_flags = uap->flags;
+ return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+struct orecv_args {
+ int s;
+ caddr_t buf;
+ int len;
+ int flags;
+};
+orecv(p, uap, retval)
+ struct proc *p;
+ register struct orecv_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov;
+
+ msg.msg_name = 0;
+ msg.msg_namelen = 0;
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->len;
+ msg.msg_control = 0;
+ msg.msg_flags = uap->flags;
+ return (recvit(p, uap->s, &msg, (caddr_t)0, retval));
+}
+
+/*
+ * Old recvmsg. This code takes advantage of the fact that the old msghdr
+ * overlays the new one, missing only the flags, and with the (old) access
+ * rights where the control fields are now.
+ */
+struct orecvmsg_args {
+ int s;
+ struct omsghdr *msg;
+ int flags;
+};
+orecvmsg(p, uap, retval)
+ struct proc *p;
+ register struct orecvmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *iov;
+ int error;
+
+ if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
+ sizeof (struct omsghdr)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+ msg.msg_flags = uap->flags | MSG_COMPAT;
+ if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+ goto done;
+ msg.msg_iov = iov;
+ error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval);
+
+ if (msg.msg_controllen && error == 0)
+ error = copyout((caddr_t)&msg.msg_controllen,
+ (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+#endif
+
+struct recvmsg_args {
+ int s;
+ struct msghdr *msg;
+ int flags;
+};
+recvmsg(p, uap, retval)
+ struct proc *p;
+ register struct recvmsg_args *uap;
+ int *retval;
+{
+ struct msghdr msg;
+ struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
+ register int error;
+
+ if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)))
+ return (error);
+ if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+ if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+ return (EMSGSIZE);
+ MALLOC(iov, struct iovec *,
+ sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+ M_WAITOK);
+ } else
+ iov = aiov;
+#ifdef COMPAT_OLDSOCK
+ msg.msg_flags = uap->flags &~ MSG_COMPAT;
+#else
+ msg.msg_flags = uap->flags;
+#endif
+ uiov = msg.msg_iov;
+ msg.msg_iov = iov;
+ if (error = copyin((caddr_t)uiov, (caddr_t)iov,
+ (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+ goto done;
+ if ((error = recvit(p, uap->s, &msg, (caddr_t)0, retval)) == 0) {
+ msg.msg_iov = uiov;
+ error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
+ }
+done:
+ if (iov != aiov)
+ FREE(iov, M_IOV);
+ return (error);
+}
+
+recvit(p, s, mp, namelenp, retsize)
+ register struct proc *p;
+ int s;
+ register struct msghdr *mp;
+ caddr_t namelenp;
+ int *retsize;
+{
+ struct file *fp;
+ struct uio auio;
+ register struct iovec *iov;
+ register int i;
+ int len, error;
+ struct mbuf *from = 0, *control = 0;
+#ifdef KTRACE
+ struct iovec *ktriov = NULL;
+#endif
+
+ if (error = getsock(p->p_fd, s, &fp))
+ return (error);
+ auio.uio_iov = mp->msg_iov;
+ auio.uio_iovcnt = mp->msg_iovlen;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_procp = p;
+ auio.uio_offset = 0; /* XXX */
+ auio.uio_resid = 0;
+ iov = mp->msg_iov;
+ for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+ if (iov->iov_len < 0)
+ return (EINVAL);
+ if ((auio.uio_resid += iov->iov_len) < 0)
+ return (EINVAL);
+ }
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_GENIO)) {
+ int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+ MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+ }
+#endif
+ len = auio.uio_resid;
+ if (error = soreceive((struct socket *)fp->f_data, &from, &auio,
+ (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
+ &mp->msg_flags)) {
+ if (auio.uio_resid != len && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ }
+#ifdef KTRACE
+ if (ktriov != NULL) {
+ if (error == 0)
+ ktrgenio(p->p_tracep, s, UIO_READ,
+ ktriov, len - auio.uio_resid, error);
+ FREE(ktriov, M_TEMP);
+ }
+#endif
+ if (error)
+ goto out;
+ *retsize = len - auio.uio_resid;
+ if (mp->msg_name) {
+ len = mp->msg_namelen;
+ if (len <= 0 || from == 0)
+ len = 0;
+ else {
+#ifdef COMPAT_OLDSOCK
+ if (mp->msg_flags & MSG_COMPAT)
+ mtod(from, struct osockaddr *)->sa_family =
+ mtod(from, struct sockaddr *)->sa_family;
+#endif
+ if (len > from->m_len)
+ len = from->m_len;
+ /* else if len < from->m_len ??? */
+ if (error = copyout(mtod(from, caddr_t),
+ (caddr_t)mp->msg_name, (unsigned)len))
+ goto out;
+ }
+ mp->msg_namelen = len;
+ if (namelenp &&
+ (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
+#ifdef COMPAT_OLDSOCK
+ if (mp->msg_flags & MSG_COMPAT)
+ error = 0; /* old recvfrom didn't check */
+ else
+#endif
+ goto out;
+ }
+ }
+ if (mp->msg_control) {
+#ifdef COMPAT_OLDSOCK
+ /*
+ * We assume that old recvmsg calls won't receive access
+ * rights and other control info, esp. as control info
+ * is always optional and those options didn't exist in 4.3.
+ * If we receive rights, trim the cmsghdr; anything else
+ * is tossed.
+ */
+ if (control && mp->msg_flags & MSG_COMPAT) {
+ if (mtod(control, struct cmsghdr *)->cmsg_level !=
+ SOL_SOCKET ||
+ mtod(control, struct cmsghdr *)->cmsg_type !=
+ SCM_RIGHTS) {
+ mp->msg_controllen = 0;
+ goto out;
+ }
+ control->m_len -= sizeof (struct cmsghdr);
+ control->m_data += sizeof (struct cmsghdr);
+ }
+#endif
+ len = mp->msg_controllen;
+ if (len <= 0 || control == 0)
+ len = 0;
+ else {
+ if (len >= control->m_len)
+ len = control->m_len;
+ else
+ mp->msg_flags |= MSG_CTRUNC;
+ error = copyout((caddr_t)mtod(control, caddr_t),
+ (caddr_t)mp->msg_control, (unsigned)len);
+ }
+ mp->msg_controllen = len;
+ }
+out:
+ if (from)
+ m_freem(from);
+ if (control)
+ m_freem(control);
+ return (error);
+}
+
+struct shutdown_args {
+ int s;
+ int how;
+};
+/* ARGSUSED */
+shutdown(p, uap, retval)
+ struct proc *p;
+ register struct shutdown_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ return (soshutdown((struct socket *)fp->f_data, uap->how));
+}
+
+struct setsockopt_args {
+ int s;
+ int level;
+ int name;
+ caddr_t val;
+ int valsize;
+};
+/* ARGSUSED */
+setsockopt(p, uap, retval)
+ struct proc *p;
+ register struct setsockopt_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *m = NULL;
+ int error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ if (uap->valsize > MLEN)
+ return (EINVAL);
+ if (uap->val) {
+ m = m_get(M_WAIT, MT_SOOPTS);
+ if (m == NULL)
+ return (ENOBUFS);
+ if (error = copyin(uap->val, mtod(m, caddr_t),
+ (u_int)uap->valsize)) {
+ (void) m_free(m);
+ return (error);
+ }
+ m->m_len = uap->valsize;
+ }
+ return (sosetopt((struct socket *)fp->f_data, uap->level,
+ uap->name, m));
+}
+
+struct getsockopt_args {
+ int s;
+ int level;
+ int name;
+ caddr_t val;
+ int *avalsize;
+};
+/* ARGSUSED */
+getsockopt(p, uap, retval)
+ struct proc *p;
+ register struct getsockopt_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mbuf *m = NULL;
+ int valsize, error;
+
+ if (error = getsock(p->p_fd, uap->s, &fp))
+ return (error);
+ if (uap->val) {
+ if (error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
+ sizeof (valsize)))
+ return (error);
+ } else
+ valsize = 0;
+ if ((error = sogetopt((struct socket *)fp->f_data, uap->level,
+ uap->name, &m)) == 0 && uap->val && valsize && m != NULL) {
+ if (valsize > m->m_len)
+ valsize = m->m_len;
+ error = copyout(mtod(m, caddr_t), uap->val, (u_int)valsize);
+ if (error == 0)
+ error = copyout((caddr_t)&valsize,
+ (caddr_t)uap->avalsize, sizeof (valsize));
+ }
+ if (m != NULL)
+ (void) m_free(m);
+ return (error);
+}
+
+struct pipe_args {
+ int dummy;
+};
+/* ARGSUSED */
+pipe(p, uap, retval)
+ struct proc *p;
+ struct pipe_args *uap;
+ int retval[];
+{
+ register struct filedesc *fdp = p->p_fd;
+ struct file *rf, *wf;
+ struct socket *rso, *wso;
+ int fd, error;
+
+ if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0))
+ return (error);
+ if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0))
+ goto free1;
+ if (error = falloc(p, &rf, &fd))
+ goto free2;
+ retval[0] = fd;
+ rf->f_flag = FREAD;
+ rf->f_type = DTYPE_SOCKET;
+ rf->f_ops = &socketops;
+ rf->f_data = (caddr_t)rso;
+ if (error = falloc(p, &wf, &fd))
+ goto free3;
+ wf->f_flag = FWRITE;
+ wf->f_type = DTYPE_SOCKET;
+ wf->f_ops = &socketops;
+ wf->f_data = (caddr_t)wso;
+ retval[1] = fd;
+ if (error = unp_connect2(wso, rso))
+ goto free4;
+ return (0);
+free4:
+ ffree(wf);
+ fdp->fd_ofiles[retval[1]] = 0;
+free3:
+ ffree(rf);
+ fdp->fd_ofiles[retval[0]] = 0;
+free2:
+ (void)soclose(wso);
+free1:
+ (void)soclose(rso);
+ return (error);
+}
+
+/*
+ * Get socket name.
+ */
+struct getsockname_args {
+ int fdes;
+ caddr_t asa;
+ int *alen;
+#ifdef COMPAT_OLDSOCK
+ int compat_43; /* pseudo */
+#endif
+};
+#ifdef COMPAT_OLDSOCK
+getsockname(p, uap, retval)
+ struct proc *p;
+ struct getsockname_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 0;
+ return (getsockname1(p, uap, retval));
+}
+
+ogetsockname(p, uap, retval)
+ struct proc *p;
+ struct getsockname_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 1;
+ return (getsockname1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define getsockname1 getsockname
+#endif
+
+/* ARGSUSED */
+getsockname1(p, uap, retval)
+ struct proc *p;
+ register struct getsockname_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ register struct socket *so;
+ struct mbuf *m;
+ int len, error;
+
+ if (error = getsock(p->p_fd, uap->fdes, &fp))
+ return (error);
+ if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)))
+ return (error);
+ so = (struct socket *)fp->f_data;
+ m = m_getclr(M_WAIT, MT_SONAME);
+ if (m == NULL)
+ return (ENOBUFS);
+ if (error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0))
+ goto bad;
+ if (len > m->m_len)
+ len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+ if (uap->compat_43)
+ mtod(m, struct osockaddr *)->sa_family =
+ mtod(m, struct sockaddr *)->sa_family;
+#endif
+ error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len);
+ if (error == 0)
+ error = copyout((caddr_t)&len, (caddr_t)uap->alen,
+ sizeof (len));
+bad:
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Get name of peer for connected socket.
+ */
+struct getpeername_args {
+ int fdes;
+ caddr_t asa;
+ int *alen;
+#ifdef COMPAT_OLDSOCK
+ int compat_43; /* pseudo */
+#endif
+};
+
+#ifdef COMPAT_OLDSOCK
+getpeername(p, uap, retval)
+ struct proc *p;
+ struct getpeername_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 0;
+ return (getpeername1(p, uap, retval));
+}
+
+ogetpeername(p, uap, retval)
+ struct proc *p;
+ struct getpeername_args *uap;
+ int *retval;
+{
+
+ uap->compat_43 = 1;
+ return (getpeername1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define getpeername1 getpeername
+#endif
+
+/* ARGSUSED */
+getpeername1(p, uap, retval)
+ struct proc *p;
+ register struct getpeername_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ register struct socket *so;
+ struct mbuf *m;
+ int len, error;
+
+ if (error = getsock(p->p_fd, uap->fdes, &fp))
+ return (error);
+ so = (struct socket *)fp->f_data;
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
+ return (ENOTCONN);
+ if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)))
+ return (error);
+ m = m_getclr(M_WAIT, MT_SONAME);
+ if (m == NULL)
+ return (ENOBUFS);
+ if (error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0))
+ goto bad;
+ if (len > m->m_len)
+ len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+ if (uap->compat_43)
+ mtod(m, struct osockaddr *)->sa_family =
+ mtod(m, struct sockaddr *)->sa_family;
+#endif
+ if (error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len))
+ goto bad;
+ error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
+bad:
+ m_freem(m);
+ return (error);
+}
+
+sockargs(mp, buf, buflen, type)
+ struct mbuf **mp;
+ caddr_t buf;
+ int buflen, type;
+{
+ register struct sockaddr *sa;
+ register struct mbuf *m;
+ int error;
+
+ if ((u_int)buflen > MLEN) {
+#ifdef COMPAT_OLDSOCK
+ if (type == MT_SONAME && (u_int)buflen <= 112)
+ buflen = MLEN; /* unix domain compat. hack */
+ else
+#endif
+ return (EINVAL);
+ }
+ m = m_get(M_WAIT, type);
+ if (m == NULL)
+ return (ENOBUFS);
+ m->m_len = buflen;
+ error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
+ if (error)
+ (void) m_free(m);
+ else {
+ *mp = m;
+ if (type == MT_SONAME) {
+ sa = mtod(m, struct sockaddr *);
+
+#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
+ if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+ sa->sa_family = sa->sa_len;
+#endif
+ sa->sa_len = buflen;
+ }
+ }
+ return (error);
+}
+
+getsock(fdp, fdes, fpp)
+ struct filedesc *fdp;
+ int fdes;
+ struct file **fpp;
+{
+ register struct file *fp;
+
+ if ((unsigned)fdes >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fdes]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_SOCKET)
+ return (ENOTSOCK);
+ *fpp = fp;
+ return (0);
+}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
new file mode 100644
index 000000000000..94bf8f744c86
--- /dev/null
+++ b/sys/kern/uipc_usrreq.c
@@ -0,0 +1,823 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/unpcb.h>
+#include <sys/un.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mbuf.h>
+
+/*
+ * Unix communications domain.
+ *
+ * TODO:
+ * SEQPACKET, RDM
+ * rethink name space problems
+ * need a proper out-of-band
+ */
+struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
+ino_t unp_ino; /* prototype for fake inode numbers */
+
+/*ARGSUSED*/
+uipc_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ struct unpcb *unp = sotounpcb(so);
+ register struct socket *so2;
+ register int error = 0;
+ struct proc *p = curproc; /* XXX */
+
+ if (req == PRU_CONTROL)
+ return (EOPNOTSUPP);
+ if (req != PRU_SEND && control && control->m_len) {
+ error = EOPNOTSUPP;
+ goto release;
+ }
+ if (unp == 0 && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (unp) {
+ error = EISCONN;
+ break;
+ }
+ error = unp_attach(so);
+ break;
+
+ case PRU_DETACH:
+ unp_detach(unp);
+ break;
+
+ case PRU_BIND:
+ error = unp_bind(unp, nam, p);
+ break;
+
+ case PRU_LISTEN:
+ if (unp->unp_vnode == 0)
+ error = EINVAL;
+ break;
+
+ case PRU_CONNECT:
+ error = unp_connect(so, nam, p);
+ break;
+
+ case PRU_CONNECT2:
+ error = unp_connect2(so, (struct socket *)nam);
+ break;
+
+ case PRU_DISCONNECT:
+ unp_disconnect(unp);
+ break;
+
+ case PRU_ACCEPT:
+ /*
+ * Pass back name of connected socket,
+ * if it was bound and we are still connected
+ * (our peer may have closed already!).
+ */
+ if (unp->unp_conn && unp->unp_conn->unp_addr) {
+ nam->m_len = unp->unp_conn->unp_addr->m_len;
+ bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else {
+ nam->m_len = sizeof(sun_noname);
+ *(mtod(nam, struct sockaddr *)) = sun_noname;
+ }
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ unp_shutdown(unp);
+ break;
+
+ case PRU_RCVD:
+ switch (so->so_type) {
+
+ case SOCK_DGRAM:
+ panic("uipc 1");
+ /*NOTREACHED*/
+
+ case SOCK_STREAM:
+#define rcv (&so->so_rcv)
+#define snd (&so2->so_snd)
+ if (unp->unp_conn == 0)
+ break;
+ so2 = unp->unp_conn->unp_socket;
+ /*
+ * Adjust backpressure on sender
+ * and wakeup any waiting to write.
+ */
+ snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
+ unp->unp_mbcnt = rcv->sb_mbcnt;
+ snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
+ unp->unp_cc = rcv->sb_cc;
+ sowwakeup(so2);
+#undef snd
+#undef rcv
+ break;
+
+ default:
+ panic("uipc 2");
+ }
+ break;
+
+ case PRU_SEND:
+ if (control && (error = unp_internalize(control, p)))
+ break;
+ switch (so->so_type) {
+
+ case SOCK_DGRAM: {
+ struct sockaddr *from;
+
+ if (nam) {
+ if (unp->unp_conn) {
+ error = EISCONN;
+ break;
+ }
+ error = unp_connect(so, nam, p);
+ if (error)
+ break;
+ } else {
+ if (unp->unp_conn == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ }
+ so2 = unp->unp_conn->unp_socket;
+ if (unp->unp_addr)
+ from = mtod(unp->unp_addr, struct sockaddr *);
+ else
+ from = &sun_noname;
+ if (sbappendaddr(&so2->so_rcv, from, m, control)) {
+ sorwakeup(so2);
+ m = 0;
+ control = 0;
+ } else
+ error = ENOBUFS;
+ if (nam)
+ unp_disconnect(unp);
+ break;
+ }
+
+ case SOCK_STREAM:
+#define rcv (&so2->so_rcv)
+#define snd (&so->so_snd)
+ if (so->so_state & SS_CANTSENDMORE) {
+ error = EPIPE;
+ break;
+ }
+ if (unp->unp_conn == 0)
+ panic("uipc 3");
+ so2 = unp->unp_conn->unp_socket;
+ /*
+ * Send to paired receive port, and then reduce
+ * send buffer hiwater marks to maintain backpressure.
+ * Wake up readers.
+ */
+ if (control) {
+ if (sbappendcontrol(rcv, m, control))
+ control = 0;
+ } else
+ sbappend(rcv, m);
+ snd->sb_mbmax -=
+ rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
+ unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
+ snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
+ unp->unp_conn->unp_cc = rcv->sb_cc;
+ sorwakeup(so2);
+ m = 0;
+#undef snd
+#undef rcv
+ break;
+
+ default:
+ panic("uipc 4");
+ }
+ break;
+
+ case PRU_ABORT:
+ unp_drop(unp, ECONNABORTED);
+ break;
+
+ case PRU_SENSE:
+ ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+ if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
+ so2 = unp->unp_conn->unp_socket;
+ ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
+ }
+ ((struct stat *) m)->st_dev = NODEV;
+ if (unp->unp_ino == 0)
+ unp->unp_ino = unp_ino++;
+ ((struct stat *) m)->st_ino = unp->unp_ino;
+ return (0);
+
+ case PRU_RCVOOB:
+ return (EOPNOTSUPP);
+
+ case PRU_SENDOOB:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_SOCKADDR:
+ if (unp->unp_addr) {
+ nam->m_len = unp->unp_addr->m_len;
+ bcopy(mtod(unp->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else
+ nam->m_len = 0;
+ break;
+
+ case PRU_PEERADDR:
+ if (unp->unp_conn && unp->unp_conn->unp_addr) {
+ nam->m_len = unp->unp_conn->unp_addr->m_len;
+ bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else
+ nam->m_len = 0;
+ break;
+
+ case PRU_SLOWTIMO:
+ break;
+
+ default:
+ panic("piusrreq");
+ }
+release:
+ if (control)
+ m_freem(control);
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace. Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#define PIPSIZ 4096
+u_long unpst_sendspace = PIPSIZ;
+u_long unpst_recvspace = PIPSIZ;
+u_long unpdg_sendspace = 2*1024; /* really max datagram size */
+u_long unpdg_recvspace = 4*1024;
+
+int unp_rights; /* file descriptors in flight */
+
+unp_attach(so)
+ struct socket *so;
+{
+ register struct mbuf *m;
+ register struct unpcb *unp;
+ int error;
+
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ switch (so->so_type) {
+
+ case SOCK_STREAM:
+ error = soreserve(so, unpst_sendspace, unpst_recvspace);
+ break;
+
+ case SOCK_DGRAM:
+ error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
+ break;
+
+ default:
+ panic("unp_attach");
+ }
+ if (error)
+ return (error);
+ }
+ m = m_getclr(M_DONTWAIT, MT_PCB);
+ if (m == NULL)
+ return (ENOBUFS);
+ unp = mtod(m, struct unpcb *);
+ so->so_pcb = (caddr_t)unp;
+ unp->unp_socket = so;
+ return (0);
+}
+
+unp_detach(unp)
+ register struct unpcb *unp;
+{
+
+ if (unp->unp_vnode) {
+ unp->unp_vnode->v_socket = 0;
+ vrele(unp->unp_vnode);
+ unp->unp_vnode = 0;
+ }
+ if (unp->unp_conn)
+ unp_disconnect(unp);
+ while (unp->unp_refs)
+ unp_drop(unp->unp_refs, ECONNRESET);
+ soisdisconnected(unp->unp_socket);
+ unp->unp_socket->so_pcb = 0;
+ m_freem(unp->unp_addr);
+ (void) m_free(dtom(unp));
+ if (unp_rights) {
+ /*
+ * Normally the receive buffer is flushed later,
+ * in sofree, but if our receive buffer holds references
+ * to descriptors that are now garbage, we will dispose
+ * of those descriptor references after the garbage collector
+ * gets them (resulting in a "panic: closef: count < 0").
+ */
+ sorflush(unp->unp_socket);
+ unp_gc();
+ }
+}
+
+unp_bind(unp, nam, p)
+ struct unpcb *unp;
+ struct mbuf *nam;
+ struct proc *p;
+{
+ struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
+ soun->sun_path, p);
+ if (unp->unp_vnode != NULL)
+ return (EINVAL);
+ if (nam->m_len == MLEN) {
+ if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+ return (EINVAL);
+ } else
+ *(mtod(nam, caddr_t) + nam->m_len) = 0;
+/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EADDRINUSE);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VSOCK;
+ vattr.va_mode = ACCESSPERMS;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
+ return (error);
+ vp = nd.ni_vp;
+ vp->v_socket = unp->unp_socket;
+ unp->unp_vnode = vp;
+ unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
+ VOP_UNLOCK(vp);
+ return (0);
+}
+
+unp_connect(so, nam, p)
+ struct socket *so;
+ struct mbuf *nam;
+ struct proc *p;
+{
+ register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+ register struct vnode *vp;
+ register struct socket *so2, *so3;
+ struct unpcb *unp2, *unp3;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
+ if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */
+ if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+ return (EMSGSIZE);
+ } else
+ *(mtod(nam, caddr_t) + nam->m_len) = 0;
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VSOCK) {
+ error = ENOTSOCK;
+ goto bad;
+ }
+ if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
+ goto bad;
+ so2 = vp->v_socket;
+ if (so2 == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ if (so->so_type != so2->so_type) {
+ error = EPROTOTYPE;
+ goto bad;
+ }
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
+ (so3 = sonewconn(so2, 0)) == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ unp2 = sotounpcb(so2);
+ unp3 = sotounpcb(so3);
+ if (unp2->unp_addr)
+ unp3->unp_addr =
+ m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+ so2 = so3;
+ }
+ error = unp_connect2(so, so2);
+bad:
+ vput(vp);
+ return (error);
+}
+
+unp_connect2(so, so2)
+ register struct socket *so;
+ register struct socket *so2;
+{
+ register struct unpcb *unp = sotounpcb(so);
+ register struct unpcb *unp2;
+
+ if (so2->so_type != so->so_type)
+ return (EPROTOTYPE);
+ unp2 = sotounpcb(so2);
+ unp->unp_conn = unp2;
+ switch (so->so_type) {
+
+ case SOCK_DGRAM:
+ unp->unp_nextref = unp2->unp_refs;
+ unp2->unp_refs = unp;
+ soisconnected(so);
+ break;
+
+ case SOCK_STREAM:
+ unp2->unp_conn = unp;
+ soisconnected(so);
+ soisconnected(so2);
+ break;
+
+ default:
+ panic("unp_connect2");
+ }
+ return (0);
+}
+
+unp_disconnect(unp)
+ struct unpcb *unp;
+{
+ register struct unpcb *unp2 = unp->unp_conn;
+
+ if (unp2 == 0)
+ return;
+ unp->unp_conn = 0;
+ switch (unp->unp_socket->so_type) {
+
+ case SOCK_DGRAM:
+ if (unp2->unp_refs == unp)
+ unp2->unp_refs = unp->unp_nextref;
+ else {
+ unp2 = unp2->unp_refs;
+ for (;;) {
+ if (unp2 == 0)
+ panic("unp_disconnect");
+ if (unp2->unp_nextref == unp)
+ break;
+ unp2 = unp2->unp_nextref;
+ }
+ unp2->unp_nextref = unp->unp_nextref;
+ }
+ unp->unp_nextref = 0;
+ unp->unp_socket->so_state &= ~SS_ISCONNECTED;
+ break;
+
+ case SOCK_STREAM:
+ soisdisconnected(unp->unp_socket);
+ unp2->unp_conn = 0;
+ soisdisconnected(unp2->unp_socket);
+ break;
+ }
+}
+
+#ifdef notdef
+unp_abort(unp)
+ struct unpcb *unp;
+{
+
+ unp_detach(unp);
+}
+#endif
+
+unp_shutdown(unp)
+ struct unpcb *unp;
+{
+ struct socket *so;
+
+ if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
+ (so = unp->unp_conn->unp_socket))
+ socantrcvmore(so);
+}
+
+unp_drop(unp, errno)
+ struct unpcb *unp;
+ int errno;
+{
+ struct socket *so = unp->unp_socket;
+
+ so->so_error = errno;
+ unp_disconnect(unp);
+ if (so->so_head) {
+ so->so_pcb = (caddr_t) 0;
+ m_freem(unp->unp_addr);
+ (void) m_free(dtom(unp));
+ sofree(so);
+ }
+}
+
+#ifdef notdef
+unp_drain()
+{
+
+}
+#endif
+
+unp_externalize(rights)
+ struct mbuf *rights;
+{
+ struct proc *p = curproc; /* XXX */
+ register int i;
+ register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
+ register struct file **rp = (struct file **)(cm + 1);
+ register struct file *fp;
+ int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
+ int f;
+
+ if (!fdavail(p, newfds)) {
+ for (i = 0; i < newfds; i++) {
+ fp = *rp;
+ unp_discard(fp);
+ *rp++ = 0;
+ }
+ return (EMSGSIZE);
+ }
+ for (i = 0; i < newfds; i++) {
+ if (fdalloc(p, 0, &f))
+ panic("unp_externalize");
+ fp = *rp;
+ p->p_fd->fd_ofiles[f] = fp;
+ fp->f_msgcount--;
+ unp_rights--;
+ *(int *)rp++ = f;
+ }
+ return (0);
+}
+
+unp_internalize(control, p)
+ struct mbuf *control;
+ struct proc *p;
+{
+ struct filedesc *fdp = p->p_fd;
+ register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+ register struct file **rp;
+ register struct file *fp;
+ register int i, fd;
+ int oldfds;
+
+ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_len != control->m_len)
+ return (EINVAL);
+ oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < oldfds; i++) {
+ fd = *(int *)rp++;
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ fdp->fd_ofiles[fd] == NULL)
+ return (EBADF);
+ }
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < oldfds; i++) {
+ fp = fdp->fd_ofiles[*(int *)rp];
+ *rp++ = fp;
+ fp->f_count++;
+ fp->f_msgcount++;
+ unp_rights++;
+ }
+ return (0);
+}
+
+int unp_defer, unp_gcing;
+int unp_mark();
+extern struct domain unixdomain;
+
+unp_gc()
+{
+ register struct file *fp, *nextfp;
+ register struct socket *so;
+ struct file **extra_ref, **fpp;
+ int nunref, i;
+
+ if (unp_gcing)
+ return;
+ unp_gcing = 1;
+ unp_defer = 0;
+ for (fp = filehead; fp; fp = fp->f_filef)
+ fp->f_flag &= ~(FMARK|FDEFER);
+ do {
+ for (fp = filehead; fp; fp = fp->f_filef) {
+ if (fp->f_count == 0)
+ continue;
+ if (fp->f_flag & FDEFER) {
+ fp->f_flag &= ~FDEFER;
+ unp_defer--;
+ } else {
+ if (fp->f_flag & FMARK)
+ continue;
+ if (fp->f_count == fp->f_msgcount)
+ continue;
+ fp->f_flag |= FMARK;
+ }
+ if (fp->f_type != DTYPE_SOCKET ||
+ (so = (struct socket *)fp->f_data) == 0)
+ continue;
+ if (so->so_proto->pr_domain != &unixdomain ||
+ (so->so_proto->pr_flags&PR_RIGHTS) == 0)
+ continue;
+#ifdef notdef
+ if (so->so_rcv.sb_flags & SB_LOCK) {
+ /*
+ * This is problematical; it's not clear
+ * we need to wait for the sockbuf to be
+ * unlocked (on a uniprocessor, at least),
+ * and it's also not clear what to do
+ * if sbwait returns an error due to receipt
+ * of a signal. If sbwait does return
+ * an error, we'll go into an infinite
+ * loop. Delete all of this for now.
+ */
+ (void) sbwait(&so->so_rcv);
+ goto restart;
+ }
+#endif
+ unp_scan(so->so_rcv.sb_mb, unp_mark);
+ }
+ } while (unp_defer);
+ /*
+ * We grab an extra reference to each of the file table entries
+ * that are not otherwise accessible and then free the rights
+ * that are stored in messages on them.
+ *
+ * The bug in the orginal code is a little tricky, so I'll describe
+ * what's wrong with it here.
+ *
+ * It is incorrect to simply unp_discard each entry for f_msgcount
+ * times -- consider the case of sockets A and B that contain
+ * references to each other. On a last close of some other socket,
+ * we trigger a gc since the number of outstanding rights (unp_rights)
+ * is non-zero. If during the sweep phase the gc code un_discards,
+ * we end up doing a (full) closef on the descriptor. A closef on A
+ * results in the following chain. Closef calls soo_close, which
+ * calls soclose. Soclose calls first (through the switch
+ * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
+ * returns because the previous instance had set unp_gcing, and
+ * we return all the way back to soclose, which marks the socket
+ * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
+ * to free up the rights that are queued in messages on the socket A,
+ * i.e., the reference on B. The sorflush calls via the dom_dispose
+ * switch unp_dispose, which unp_scans with unp_discard. This second
+ * instance of unp_discard just calls closef on B.
+ *
+ * Well, a similar chain occurs on B, resulting in a sorflush on B,
+ * which results in another closef on A. Unfortunately, A is already
+ * being closed, and the descriptor has already been marked with
+ * SS_NOFDREF, and soclose panics at this point.
+ *
+ * Here, we first take an extra reference to each inaccessible
+ * descriptor. Then, we call sorflush ourself, since we know
+ * it is a Unix domain socket anyhow. After we destroy all the
+ * rights carried in messages, we do a last closef to get rid
+ * of our extra reference. This is the last close, and the
+ * unp_detach etc will shut down the socket.
+ *
+ * 91/09/19, bsy@cs.cmu.edu
+ */
+ extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
+ for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
+ nextfp = fp->f_filef;
+ if (fp->f_count == 0)
+ continue;
+ if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
+ *fpp++ = fp;
+ nunref++;
+ fp->f_count++;
+ }
+ }
+ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+ sorflush((struct socket *)(*fpp)->f_data);
+ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+ closef(*fpp);
+ free((caddr_t)extra_ref, M_FILE);
+ unp_gcing = 0;
+}
+
+unp_dispose(m)
+ struct mbuf *m;
+{
+ int unp_discard();
+
+ if (m)
+ unp_scan(m, unp_discard);
+}
+
+unp_scan(m0, op)
+ register struct mbuf *m0;
+ int (*op)();
+{
+ register struct mbuf *m;
+ register struct file **rp;
+ register struct cmsghdr *cm;
+ register int i;
+ int qfds;
+
+ while (m0) {
+ for (m = m0; m; m = m->m_next)
+ if (m->m_type == MT_CONTROL &&
+ m->m_len >= sizeof(*cm)) {
+ cm = mtod(m, struct cmsghdr *);
+ if (cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_type != SCM_RIGHTS)
+ continue;
+ qfds = (cm->cmsg_len - sizeof *cm)
+ / sizeof (struct file *);
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < qfds; i++)
+ (*op)(*rp++);
+ break; /* XXX, but saves time */
+ }
+ m0 = m0->m_act;
+ }
+}
+
+unp_mark(fp)
+ struct file *fp;
+{
+
+ if (fp->f_flag & FMARK)
+ return;
+ unp_defer++;
+ fp->f_flag |= (FMARK|FDEFER);
+}
+
+unp_discard(fp)
+ struct file *fp;
+{
+
+ fp->f_msgcount--;
+ unp_rights--;
+ (void) closef(fp, (struct proc *)NULL);
+}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
new file mode 100644
index 000000000000..ec5c962f7dfd
--- /dev/null
+++ b/sys/kern/vfs_bio.c
@@ -0,0 +1,339 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vfs_bio.c 8.6 (Berkeley) 1/11/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/trace.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Definitions for the buffer hash lists.
+ */
+#define BUFHASH(dvp, lbn) \
+ (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
+LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
+u_long bufhash;
+
+/*
+ * Insq/Remq for the buffer hash lists.
+ */
+#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash)
+#define bremhash(bp) LIST_REMOVE(bp, b_hash)
+
+/*
+ * Definitions for the buffer free lists.
+ */
+#define BQUEUES 4 /* number of free buffer queues */
+
+#define BQ_LOCKED 0 /* super-blocks &c */
+#define BQ_LRU 1 /* lru, useful buffers */
+#define BQ_AGE 2 /* rubbish */
+#define BQ_EMPTY 3 /* buffer headers with no memory */
+
+TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
+int needbuffer;
+
+/*
+ * Insq/Remq for the buffer free lists.
+ */
+#define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist)
+#define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist)
+
+void
+bremfree(bp)
+ struct buf *bp;
+{
+ struct bqueues *dp = NULL;
+
+ /*
+ * We only calculate the head of the freelist when removing
+ * the last element of the list as that is the only time that
+ * it is needed (e.g. to reset the tail pointer).
+ *
+ * NB: This makes an assumption about how tailq's are implemented.
+ */
+ if (bp->b_freelist.tqe_next == NULL) {
+ for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
+ if (dp->tqh_last == &bp->b_freelist.tqe_next)
+ break;
+ if (dp == &bufqueues[BQUEUES])
+ panic("bremfree: lost tail");
+ }
+ TAILQ_REMOVE(dp, bp, b_freelist);
+}
+
+/*
+ * Initialize buffers and hash links for buffers.
+ */
+void
+bufinit()
+{
+ register struct buf *bp;
+ struct bqueues *dp;
+ register int i;
+ int base, residual;
+
+ for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
+ TAILQ_INIT(dp);
+ bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash);
+ base = bufpages / nbuf;
+ residual = bufpages % nbuf;
+ for (i = 0; i < nbuf; i++) {
+ bp = &buf[i];
+ bzero((char *)bp, sizeof *bp);
+ bp->b_dev = NODEV;
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ bp->b_vnbufs.le_next = NOLIST;
+ bp->b_data = buffers + i * MAXBSIZE;
+ if (i < residual)
+ bp->b_bufsize = (base + 1) * CLBYTES;
+ else
+ bp->b_bufsize = base * CLBYTES;
+ bp->b_flags = B_INVAL;
+ dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
+ binsheadfree(bp, dp);
+ binshash(bp, &invalhash);
+ }
+}
+
+bread(a1, a2, a3, a4, a5)
+ struct vnode *a1;
+ daddr_t a2;
+ int a3;
+ struct ucred *a4;
+ struct buf **a5;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+breadn(a1, a2, a3, a4, a5, a6, a7, a8)
+ struct vnode *a1;
+ daddr_t a2; int a3;
+ daddr_t a4[]; int a5[];
+ int a6;
+ struct ucred *a7;
+ struct buf **a8;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+bwrite(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+int
+vn_bwrite(ap)
+ struct vop_bwrite_args *ap;
+{
+ return (bwrite(ap->a_bp));
+}
+
+bdwrite(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+bawrite(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+brelse(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+struct buf *
+incore(a1, a2)
+ struct vnode *a1;
+ daddr_t a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+struct buf *
+getblk(a1, a2, a3, a4, a5)
+ struct vnode *a1;
+ daddr_t a2;
+ int a3, a4, a5;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((struct buf *)0);
+}
+
+struct buf *
+geteblk(a1)
+ int a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((struct buf *)0);
+}
+
+allocbuf(a1, a2)
+ struct buf *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+struct buf *
+getnewbuf(a1, a2)
+ int a1, a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((struct buf *)0);
+}
+
+biowait(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (EIO);
+}
+
+void
+biodone(a1)
+ struct buf *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+int
+count_lock_queue()
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * Print out statistics on the current allocation of the buffer pool.
+ * Can be enabled to print out on every ``sync'' by setting "syncprt"
+ * in vfs_syscalls.c using sysctl.
+ */
+void
+vfs_bufstats()
+{
+ int s, i, j, count;
+ register struct buf *bp;
+ register struct bqueues *dp;
+ int counts[MAXBSIZE/CLBYTES+1];
+ static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
+
+ for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
+ count = 0;
+ for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
+ counts[j] = 0;
+ s = splbio();
+ for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) {
+ counts[bp->b_bufsize/CLBYTES]++;
+ count++;
+ }
+ splx(s);
+ printf("%s: total-%d", bname[i], count);
+ for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
+ if (counts[j] != 0)
+ printf(", %d-%d", j * CLBYTES, counts[j]);
+ printf("\n");
+ }
+}
+#endif /* DIAGNOSTIC */
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
new file mode 100644
index 000000000000..4ccfd7289a04
--- /dev/null
+++ b/sys/kern/vfs_cache.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_cache.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Name caching works as follows:
+ *
+ * Names found by directory scans are retained in a cache
+ * for future reference. It is managed LRU, so frequently
+ * used names will hang around. Cache is indexed by hash value
+ * obtained from (vp, name) where vp refers to the directory
+ * containing name.
+ *
+ * For simplicity (and economy of storage), names longer than
+ * a maximum length of NCHNAMLEN are not cached; they occur
+ * infrequently in any case, and are almost never of interest.
+ *
+ * Upon reaching the last segment of a path, if the reference
+ * is for DELETE, or NOCACHE is set (rewrite), and the
+ * name is located in the cache, it will be dropped.
+ */
+
+/*
+ * Structures associated with name cacheing.
+ */
+struct namecache **nchashtbl;
+u_long nchash; /* size of hash table - 1 */
+long numcache; /* number of cache entries allocated */
+struct namecache *nchhead, **nchtail; /* LRU chain pointers */
+struct nchstats nchstats; /* cache effectiveness statistics */
+
+int doingcache = 1; /* 1 => enable the cache */
+
+/*
+ * Look for a the name in the cache. We don't do this
+ * if the segment name is long, simply so the cache can avoid
+ * holding long names (which would either waste space, or
+ * add greatly to the complexity).
+ *
+ * Lookup is called with ni_dvp pointing to the directory to search,
+ * ni_ptr pointing to the name of the entry being sought, ni_namelen
+ * tells the length of the name, and ni_hash contains a hash of
+ * the name. If the lookup succeeds, the vnode is returned in ni_vp
+ * and a status of -1 is returned. If the lookup determines that
+ * the name does not exist (negative cacheing), a status of ENOENT
+ * is returned. If the lookup fails, a status of zero is returned.
+ */
+int
+cache_lookup(dvp, vpp, cnp)
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+{
+ register struct namecache *ncp, *ncq, **ncpp;
+
+ if (!doingcache)
+ return (0);
+ if (cnp->cn_namelen > NCHNAMLEN) {
+ nchstats.ncs_long++;
+ cnp->cn_flags &= ~MAKEENTRY;
+ return (0);
+ }
+ ncpp = &nchashtbl[cnp->cn_hash & nchash];
+ for (ncp = *ncpp; ncp; ncp = ncp->nc_forw) {
+ if (ncp->nc_dvp == dvp &&
+ ncp->nc_dvpid == dvp->v_id &&
+ ncp->nc_nlen == cnp->cn_namelen &&
+ !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
+ break;
+ }
+ if (ncp == NULL) {
+ nchstats.ncs_miss++;
+ return (0);
+ }
+ if (!(cnp->cn_flags & MAKEENTRY)) {
+ nchstats.ncs_badhits++;
+ } else if (ncp->nc_vp == NULL) {
+ if (cnp->cn_nameiop != CREATE) {
+ nchstats.ncs_neghits++;
+ /*
+ * Move this slot to end of LRU chain,
+ * if not already there.
+ */
+ if (ncp->nc_nxt) {
+ /* remove from LRU chain */
+ *ncp->nc_prev = ncp->nc_nxt;
+ ncp->nc_nxt->nc_prev = ncp->nc_prev;
+ /* and replace at end of it */
+ ncp->nc_nxt = NULL;
+ ncp->nc_prev = nchtail;
+ *nchtail = ncp;
+ nchtail = &ncp->nc_nxt;
+ }
+ return (ENOENT);
+ }
+ } else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
+ nchstats.ncs_falsehits++;
+ } else {
+ nchstats.ncs_goodhits++;
+ /*
+ * move this slot to end of LRU chain, if not already there
+ */
+ if (ncp->nc_nxt) {
+ /* remove from LRU chain */
+ *ncp->nc_prev = ncp->nc_nxt;
+ ncp->nc_nxt->nc_prev = ncp->nc_prev;
+ /* and replace at end of it */
+ ncp->nc_nxt = NULL;
+ ncp->nc_prev = nchtail;
+ *nchtail = ncp;
+ nchtail = &ncp->nc_nxt;
+ }
+ *vpp = ncp->nc_vp;
+ return (-1);
+ }
+
+ /*
+ * Last component and we are renaming or deleting,
+ * the cache entry is invalid, or otherwise don't
+ * want cache entry to exist.
+ */
+ /* remove from LRU chain */
+ if (ncq = ncp->nc_nxt)
+ ncq->nc_prev = ncp->nc_prev;
+ else
+ nchtail = ncp->nc_prev;
+ *ncp->nc_prev = ncq;
+ /* remove from hash chain */
+ if (ncq = ncp->nc_forw)
+ ncq->nc_back = ncp->nc_back;
+ *ncp->nc_back = ncq;
+ /* and make a dummy hash chain */
+ ncp->nc_forw = NULL;
+ ncp->nc_back = NULL;
+ /* insert at head of LRU list (first to grab) */
+ if (ncq = nchhead)
+ ncq->nc_prev = &ncp->nc_nxt;
+ else
+ nchtail = &ncp->nc_nxt;
+ nchhead = ncp;
+ ncp->nc_nxt = ncq;
+ ncp->nc_prev = &nchhead;
+ return (0);
+}
+
+/*
+ * Add an entry to the cache
+ */
+cache_enter(dvp, vp, cnp)
+ struct vnode *dvp;
+ struct vnode *vp;
+ struct componentname *cnp;
+{
+ register struct namecache *ncp, *ncq, **ncpp;
+
+#ifdef DIAGNOSTIC
+ if (cnp->cn_namelen > NCHNAMLEN)
+ panic("cache_enter: name too long");
+#endif
+ if (!doingcache)
+ return;
+ /*
+ * Free the cache slot at head of lru chain.
+ */
+ if (numcache < desiredvnodes) {
+ ncp = (struct namecache *)
+ malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
+ bzero((char *)ncp, sizeof *ncp);
+ numcache++;
+ } else if (ncp = nchhead) {
+ /* remove from lru chain */
+ if (ncq = ncp->nc_nxt)
+ ncq->nc_prev = ncp->nc_prev;
+ else
+ nchtail = ncp->nc_prev;
+ *ncp->nc_prev = ncq;
+ /* remove from old hash chain, if on one */
+ if (ncp->nc_back) {
+ if (ncq = ncp->nc_forw)
+ ncq->nc_back = ncp->nc_back;
+ *ncp->nc_back = ncq;
+ ncp->nc_forw = NULL;
+ ncp->nc_back = NULL;
+ }
+ } else
+ return;
+ /* grab the vnode we just found */
+ ncp->nc_vp = vp;
+ if (vp)
+ ncp->nc_vpid = vp->v_id;
+ else
+ ncp->nc_vpid = 0;
+ /* fill in cache info */
+ ncp->nc_dvp = dvp;
+ ncp->nc_dvpid = dvp->v_id;
+ ncp->nc_nlen = cnp->cn_namelen;
+ bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
+ /* link at end of lru chain */
+ ncp->nc_nxt = NULL;
+ ncp->nc_prev = nchtail;
+ *nchtail = ncp;
+ nchtail = &ncp->nc_nxt;
+ /* and insert on hash chain */
+ ncpp = &nchashtbl[cnp->cn_hash & nchash];
+ if (ncq = *ncpp)
+ ncq->nc_back = &ncp->nc_forw;
+ ncp->nc_forw = ncq;
+ ncp->nc_back = ncpp;
+ *ncpp = ncp;
+}
+
+/*
+ * Name cache initialization, from vfs_init() when we are booting
+ */
+nchinit()
+{
+
+ nchtail = &nchhead;
+ nchashtbl = hashinit(desiredvnodes, M_CACHE, &nchash);
+}
+
+/*
+ * Cache flush, a particular vnode; called when a vnode is renamed to
+ * hide entries that would now be invalid
+ */
+cache_purge(vp)
+ struct vnode *vp;
+{
+ struct namecache *ncp, **ncpp;
+
+ vp->v_id = ++nextvnodeid;
+ if (nextvnodeid != 0)
+ return;
+ for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
+ for (ncp = *ncpp; ncp; ncp = ncp->nc_forw) {
+ ncp->nc_vpid = 0;
+ ncp->nc_dvpid = 0;
+ }
+ }
+ vp->v_id = ++nextvnodeid;
+}
+
+/*
+ * Cache flush, a whole filesystem; called when filesys is umounted to
+ * remove entries that would now be invalid
+ *
+ * The line "nxtcp = nchhead" near the end is to avoid potential problems
+ * if the cache lru chain is modified while we are dumping the
+ * inode. This makes the algorithm O(n^2), but do you think I care?
+ */
+cache_purgevfs(mp)
+ struct mount *mp;
+{
+ register struct namecache *ncp, *nxtcp;
+
+ for (ncp = nchhead; ncp; ncp = nxtcp) {
+ if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
+ nxtcp = ncp->nc_nxt;
+ continue;
+ }
+ /* free the resources we had */
+ ncp->nc_vp = NULL;
+ ncp->nc_dvp = NULL;
+ /* remove from old hash chain, if on one */
+ if (ncp->nc_back) {
+ if (nxtcp = ncp->nc_forw)
+ nxtcp->nc_back = ncp->nc_back;
+ *ncp->nc_back = nxtcp;
+ ncp->nc_forw = NULL;
+ ncp->nc_back = NULL;
+ }
+ /* delete this entry from LRU chain */
+ if (nxtcp = ncp->nc_nxt)
+ nxtcp->nc_prev = ncp->nc_prev;
+ else
+ nchtail = ncp->nc_prev;
+ *ncp->nc_prev = nxtcp;
+ /* cause rescan of list, it may have altered */
+ /* also put the now-free entry at head of LRU */
+ if (nxtcp = nchhead)
+ nxtcp->nc_prev = &ncp->nc_nxt;
+ else
+ nchtail = &ncp->nc_nxt;
+ nchhead = ncp;
+ ncp->nc_nxt = nxtcp;
+ ncp->nc_prev = &nchhead;
+ }
+}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
new file mode 100644
index 000000000000..c34fbc34a679
--- /dev/null
+++ b/sys/kern/vfs_cluster.c
@@ -0,0 +1,746 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/trace.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <libkern/libkern.h>
+
+#ifdef DEBUG
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+int doreallocblks = 1;
+struct ctldebug debug13 = { "doreallocblks", &doreallocblks };
+#else
+/* XXX for cluster_write */
+#define doreallocblks 1
+#endif
+
+/*
+ * Local declarations
+ */
+struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,
+ daddr_t, long, int));
+struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
+ daddr_t, daddr_t, long, int, long));
+void cluster_wbuild __P((struct vnode *, struct buf *, long,
+ daddr_t, int, daddr_t));
+struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *));
+
+#ifdef DIAGNOSTIC
+/*
+ * Set to 1 if reads of block zero should cause readahead to be done.
+ * Set to 0 treats a read of block zero as a non-sequential read.
+ *
+ * Setting to one assumes that most reads of block zero of files are due to
+ * sequential passes over the files (e.g. cat, sum) where additional blocks
+ * will soon be needed. Setting to zero assumes that the majority are
+ * surgical strikes to get particular info (e.g. size, file) where readahead
+ * blocks will not be used and, in fact, push out other potentially useful
+ * blocks from the cache. The former seems intuitive, but some quick tests
+ * showed that the latter performed better from a system-wide point of view.
+ */
+int doclusterraz = 0;
+#define ISSEQREAD(vp, blk) \
+ (((blk) != 0 || doclusterraz) && \
+ ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#else
+#define ISSEQREAD(vp, blk) \
+ ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#endif
+
+/*
+ * This replaces bread. If this is a bread at the beginning of a file and
+ * lastr is 0, we assume this is the first read and we'll read up to two
+ * blocks if they are sequential. After that, we'll do regular read ahead
+ * in clustered chunks.
+ *
+ * There are 4 or 5 cases depending on how you count:
+ * Desired block is in the cache:
+ * 1 Not sequential access (0 I/Os).
+ * 2 Access is sequential, do read-ahead (1 ASYNC).
+ * Desired block is not in cache:
+ * 3 Not sequential access (1 SYNC).
+ * 4 Sequential access, next block is contiguous (1 SYNC).
+ * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
+ *
+ * There are potentially two buffers that require I/O.
+ * bp is the block requested.
+ * rbp is the read-ahead block.
+ * If either is NULL, then you don't have to do the I/O.
+ */
+cluster_read(vp, filesize, lblkno, size, cred, bpp)
+ struct vnode *vp;
+ u_quad_t filesize;
+ daddr_t lblkno;
+ long size;
+ struct ucred *cred;
+ struct buf **bpp;
+{
+ struct buf *bp, *rbp;
+ daddr_t blkno, ioblkno;
+ long flags;
+ int error, num_ra, alreadyincore;
+
+#ifdef DIAGNOSTIC
+ if (size == 0)
+ panic("cluster_read: size = 0");
+#endif
+
+ error = 0;
+ flags = B_READ;
+ *bpp = bp = getblk(vp, lblkno, size, 0, 0);
+ if (bp->b_flags & B_CACHE) {
+ /*
+ * Desired block is in cache; do any readahead ASYNC.
+ * Case 1, 2.
+ */
+ trace(TR_BREADHIT, pack(vp, size), lblkno);
+ flags |= B_ASYNC;
+ ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1);
+ alreadyincore = (int)incore(vp, ioblkno);
+ bp = NULL;
+ } else {
+ /* Block wasn't in cache, case 3, 4, 5. */
+ trace(TR_BREADMISS, pack(vp, size), lblkno);
+ bp->b_flags |= B_READ;
+ ioblkno = lblkno;
+ alreadyincore = 0;
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ }
+ /*
+ * XXX
+ * Replace 1 with a window size based on some permutation of
+ * maxcontig and rot_delay. This will let you figure out how
+ * many blocks you should read-ahead (case 2, 4, 5).
+ *
+ * If the access isn't sequential, reset the window to 1.
+ * Note that a read to the same block is considered sequential.
+ * This catches the case where the file is being read sequentially,
+ * but at smaller than the filesystem block size.
+ */
+ rbp = NULL;
+ if (!ISSEQREAD(vp, lblkno)) {
+ vp->v_ralen = 0;
+ vp->v_maxra = lblkno;
+ } else if ((ioblkno + 1) * size <= filesize && !alreadyincore &&
+ !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) &&
+ blkno != -1) {
+ /*
+ * Reading sequentially, and the next block is not in the
+ * cache. We are going to try reading ahead.
+ */
+ if (num_ra) {
+ /*
+ * If our desired readahead block had been read
+ * in a previous readahead but is no longer in
+ * core, then we may be reading ahead too far
+ * or are not using our readahead very rapidly.
+ * In this case we scale back the window.
+ */
+ if (!alreadyincore && ioblkno <= vp->v_maxra)
+ vp->v_ralen = max(vp->v_ralen >> 1, 1);
+ /*
+ * There are more sequential blocks than our current
+ * window allows, scale up. Ideally we want to get
+ * in sync with the filesystem maxcontig value.
+ */
+ else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr)
+ vp->v_ralen = vp->v_ralen ?
+ min(num_ra, vp->v_ralen << 1) : 1;
+
+ if (num_ra > vp->v_ralen)
+ num_ra = vp->v_ralen;
+ }
+
+ if (num_ra) /* case 2, 4 */
+ rbp = cluster_rbuild(vp, filesize,
+ bp, ioblkno, blkno, size, num_ra, flags);
+ else if (ioblkno == lblkno) {
+ bp->b_blkno = blkno;
+ /* Case 5: check how many blocks to read ahead */
+ ++ioblkno;
+ if ((ioblkno + 1) * size > filesize ||
+ incore(vp, ioblkno) || (error = VOP_BMAP(vp,
+ ioblkno, NULL, &blkno, &num_ra)) || blkno == -1)
+ goto skip_readahead;
+ /*
+ * Adjust readahead as above
+ */
+ if (num_ra) {
+ if (!alreadyincore && ioblkno <= vp->v_maxra)
+ vp->v_ralen = max(vp->v_ralen >> 1, 1);
+ else if (num_ra > vp->v_ralen &&
+ lblkno != vp->v_lastr)
+ vp->v_ralen = vp->v_ralen ?
+ min(num_ra,vp->v_ralen<<1) : 1;
+ if (num_ra > vp->v_ralen)
+ num_ra = vp->v_ralen;
+ }
+ flags |= B_ASYNC;
+ if (num_ra)
+ rbp = cluster_rbuild(vp, filesize,
+ NULL, ioblkno, blkno, size, num_ra, flags);
+ else {
+ rbp = getblk(vp, ioblkno, size, 0, 0);
+ rbp->b_flags |= flags;
+ rbp->b_blkno = blkno;
+ }
+ } else {
+ /* case 2; read ahead single block */
+ rbp = getblk(vp, ioblkno, size, 0, 0);
+ rbp->b_flags |= flags;
+ rbp->b_blkno = blkno;
+ }
+
+ if (rbp == bp) /* case 4 */
+ rbp = NULL;
+ else if (rbp) { /* case 2, 5 */
+ trace(TR_BREADMISSRA,
+ pack(vp, (num_ra + 1) * size), ioblkno);
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ }
+ }
+
+ /* XXX Kirk, do we need to make sure the bp has creds? */
+skip_readahead:
+ if (bp)
+ if (bp->b_flags & (B_DONE | B_DELWRI))
+ panic("cluster_read: DONE bp");
+ else
+ error = VOP_STRATEGY(bp);
+
+ if (rbp)
+ if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {
+ rbp->b_flags &= ~(B_ASYNC | B_READ);
+ brelse(rbp);
+ } else
+ (void) VOP_STRATEGY(rbp);
+
+ /*
+ * Recalculate our maximum readahead
+ */
+ if (rbp == NULL)
+ rbp = bp;
+ if (rbp)
+ vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1;
+
+ if (bp)
+ return(biowait(bp));
+ return(error);
+}
+
+/*
+ * If blocks are contiguous on disk, use this to provide clustered
+ * read ahead. We will read as many blocks as possible sequentially
+ * and then parcel them up into logical blocks in the buffer hash table.
+ */
+struct buf *
+cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
+ struct vnode *vp;
+ u_quad_t filesize;
+ struct buf *bp;
+ daddr_t lbn;
+ daddr_t blkno;
+ long size;
+ int run;
+ long flags;
+{
+ struct cluster_save *b_save;
+ struct buf *tbp;
+ daddr_t bn;
+ int i, inc;
+
+#ifdef DIAGNOSTIC
+ if (size != vp->v_mount->mnt_stat.f_iosize)
+ panic("cluster_rbuild: size %d != filesize %d\n",
+ size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+ if (size * (lbn + run + 1) > filesize)
+ --run;
+ if (run == 0) {
+ if (!bp) {
+ bp = getblk(vp, lbn, size, 0, 0);
+ bp->b_blkno = blkno;
+ bp->b_flags |= flags;
+ }
+ return(bp);
+ }
+
+ bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);
+ if (bp->b_flags & (B_DONE | B_DELWRI))
+ return (bp);
+
+ b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
+ M_SEGMENT, M_WAITOK);
+ b_save->bs_bufsize = b_save->bs_bcount = size;
+ b_save->bs_nchildren = 0;
+ b_save->bs_children = (struct buf **)(b_save + 1);
+ b_save->bs_saveaddr = bp->b_saveaddr;
+ bp->b_saveaddr = (caddr_t) b_save;
+
+ inc = btodb(size);
+ for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {
+ if (incore(vp, lbn + i)) {
+ if (i == 1) {
+ bp->b_saveaddr = b_save->bs_saveaddr;
+ bp->b_flags &= ~B_CALL;
+ bp->b_iodone = NULL;
+ allocbuf(bp, size);
+ free(b_save, M_SEGMENT);
+ } else
+ allocbuf(bp, size * i);
+ break;
+ }
+ tbp = getblk(vp, lbn + i, 0, 0, 0);
+ /*
+ * getblk may return some memory in the buffer if there were
+ * no empty buffers to shed it to. If there is currently
+ * memory in the buffer, we move it down size bytes to make
+ * room for the valid pages that cluster_callback will insert.
+ * We do this now so we don't have to do it at interrupt time
+ * in the callback routine.
+ */
+ if (tbp->b_bufsize != 0) {
+ caddr_t bdata = (char *)tbp->b_data;
+
+ if (tbp->b_bufsize + size > MAXBSIZE)
+ panic("cluster_rbuild: too much memory");
+ if (tbp->b_bufsize > size) {
+ /*
+ * XXX if the source and destination regions
+ * overlap we have to copy backward to avoid
+ * clobbering any valid pages (i.e. pagemove
+ * implementations typically can't handle
+ * overlap).
+ */
+ bdata += tbp->b_bufsize;
+ while (bdata > (char *)tbp->b_data) {
+ bdata -= CLBYTES;
+ pagemove(bdata, bdata + size, CLBYTES);
+ }
+ } else
+ pagemove(bdata, bdata + size, tbp->b_bufsize);
+ }
+ tbp->b_blkno = bn;
+ tbp->b_flags |= flags | B_READ | B_ASYNC;
+ ++b_save->bs_nchildren;
+ b_save->bs_children[i - 1] = tbp;
+ }
+ return(bp);
+}
+
+/*
+ * Either get a new buffer or grow the existing one.
+ */
+struct buf *
+cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)
+ struct vnode *vp;
+ struct buf *bp;
+ long flags;
+ daddr_t blkno;
+ daddr_t lblkno;
+ long size;
+ int run;
+{
+ if (!bp) {
+ bp = getblk(vp, lblkno, size, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ bp->b_blkno = blkno;
+ return(bp);
+ }
+ }
+ allocbuf(bp, run * size);
+ bp->b_blkno = blkno;
+ bp->b_iodone = cluster_callback;
+ bp->b_flags |= flags | B_CALL;
+ return(bp);
+}
+
+/*
+ * Cleanup after a clustered read or write.
+ * This is complicated by the fact that any of the buffers might have
+ * extra memory (if there were no empty buffer headers at allocbuf time)
+ * that we will need to shift around.
+ */
+void
+cluster_callback(bp)
+ struct buf *bp;
+{
+ struct cluster_save *b_save;
+ struct buf **bpp, *tbp;
+ long bsize;
+ caddr_t cp;
+ int error = 0;
+
+ /*
+ * Must propogate errors to all the components.
+ */
+ if (bp->b_flags & B_ERROR)
+ error = bp->b_error;
+
+ b_save = (struct cluster_save *)(bp->b_saveaddr);
+ bp->b_saveaddr = b_save->bs_saveaddr;
+
+ bsize = b_save->bs_bufsize;
+ cp = (char *)bp->b_data + bsize;
+ /*
+ * Move memory from the large cluster buffer into the component
+ * buffers and mark IO as done on these.
+ */
+ for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) {
+ tbp = *bpp;
+ pagemove(cp, tbp->b_data, bsize);
+ tbp->b_bufsize += bsize;
+ tbp->b_bcount = bsize;
+ if (error) {
+ tbp->b_flags |= B_ERROR;
+ tbp->b_error = error;
+ }
+ biodone(tbp);
+ bp->b_bufsize -= bsize;
+ cp += bsize;
+ }
+ /*
+ * If there was excess memory in the cluster buffer,
+ * slide it up adjacent to the remaining valid data.
+ */
+ if (bp->b_bufsize != bsize) {
+ if (bp->b_bufsize < bsize)
+ panic("cluster_callback: too little memory");
+ pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize);
+ }
+ bp->b_bcount = bsize;
+ bp->b_iodone = NULL;
+ free(b_save, M_SEGMENT);
+ if (bp->b_flags & B_ASYNC)
+ brelse(bp);
+ else {
+ bp->b_flags &= ~B_WANTED;
+ wakeup((caddr_t)bp);
+ }
+}
+
+/*
+ * Do clustered write for FFS.
+ *
+ * Three cases:
+ * 1. Write is not sequential (write asynchronously)
+ * Write is sequential:
+ * 2. beginning of cluster - begin cluster
+ * 3. middle of a cluster - add to cluster
+ * 4. end of a cluster - asynchronously write cluster
+ */
+void
+cluster_write(bp, filesize)
+ struct buf *bp;
+ u_quad_t filesize;
+{
+ struct vnode *vp;
+ daddr_t lbn;
+ int maxclen, cursize;
+
+ vp = bp->b_vp;
+ lbn = bp->b_lblkno;
+
+ /* Initialize vnode to beginning of file. */
+ if (lbn == 0)
+ vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
+
+ if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
+ (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) {
+ maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
+ if (vp->v_clen != 0) {
+ /*
+ * Next block is not sequential.
+ *
+ * If we are not writing at end of file, the process
+ * seeked to another point in the file since its
+ * last write, or we have reached our maximum
+ * cluster size, then push the previous cluster.
+ * Otherwise try reallocating to make it sequential.
+ */
+ cursize = vp->v_lastw - vp->v_cstart + 1;
+ if (!doreallocblks ||
+ (lbn + 1) * bp->b_bcount != filesize ||
+ lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
+ cluster_wbuild(vp, NULL, bp->b_bcount,
+ vp->v_cstart, cursize, lbn);
+ } else {
+ struct buf **bpp, **endbp;
+ struct cluster_save *buflist;
+
+ buflist = cluster_collectbufs(vp, bp);
+ endbp = &buflist->bs_children
+ [buflist->bs_nchildren - 1];
+ if (VOP_REALLOCBLKS(vp, buflist)) {
+ /*
+ * Failed, push the previous cluster.
+ */
+ for (bpp = buflist->bs_children;
+ bpp < endbp; bpp++)
+ brelse(*bpp);
+ free(buflist, M_SEGMENT);
+ cluster_wbuild(vp, NULL, bp->b_bcount,
+ vp->v_cstart, cursize, lbn);
+ } else {
+ /*
+ * Succeeded, keep building cluster.
+ */
+ for (bpp = buflist->bs_children;
+ bpp <= endbp; bpp++)
+ bdwrite(*bpp);
+ free(buflist, M_SEGMENT);
+ vp->v_lastw = lbn;
+ vp->v_lasta = bp->b_blkno;
+ return;
+ }
+ }
+ }
+ /*
+ * Consider beginning a cluster.
+ * If at end of file, make cluster as large as possible,
+ * otherwise find size of existing cluster.
+ */
+ if ((lbn + 1) * bp->b_bcount != filesize &&
+ (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) ||
+ bp->b_blkno == -1)) {
+ bawrite(bp);
+ vp->v_clen = 0;
+ vp->v_lasta = bp->b_blkno;
+ vp->v_cstart = lbn + 1;
+ vp->v_lastw = lbn;
+ return;
+ }
+ vp->v_clen = maxclen;
+ if (maxclen == 0) { /* I/O not contiguous */
+ vp->v_cstart = lbn + 1;
+ bawrite(bp);
+ } else { /* Wait for rest of cluster */
+ vp->v_cstart = lbn;
+ bdwrite(bp);
+ }
+ } else if (lbn == vp->v_cstart + vp->v_clen) {
+ /*
+ * At end of cluster, write it out.
+ */
+ cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
+ vp->v_clen + 1, lbn);
+ vp->v_clen = 0;
+ vp->v_cstart = lbn + 1;
+ } else
+ /*
+ * In the middle of a cluster, so just delay the
+ * I/O for now.
+ */
+ bdwrite(bp);
+ vp->v_lastw = lbn;
+ vp->v_lasta = bp->b_blkno;
+}
+
+
+/*
+ * This is an awful lot like cluster_rbuild...wish they could be combined.
+ * The last lbn argument is the current block on which I/O is being
+ * performed. Check to see that it doesn't fall in the middle of
+ * the current block (if last_bp == NULL).
+ */
+void
+cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
+ struct vnode *vp;
+ struct buf *last_bp;
+ long size;
+ daddr_t start_lbn;
+ int len;
+ daddr_t lbn;
+{
+ struct cluster_save *b_save;
+ struct buf *bp, *tbp;
+ caddr_t cp;
+ int i, s;
+
+#ifdef DIAGNOSTIC
+ if (size != vp->v_mount->mnt_stat.f_iosize)
+ panic("cluster_wbuild: size %d != filesize %d\n",
+ size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+redo:
+ while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) {
+ ++start_lbn;
+ --len;
+ }
+
+ /* Get more memory for current buffer */
+ if (len <= 1) {
+ if (last_bp) {
+ bawrite(last_bp);
+ } else if (len) {
+ bp = getblk(vp, start_lbn, size, 0, 0);
+ bawrite(bp);
+ }
+ return;
+ }
+
+ bp = getblk(vp, start_lbn, size, 0, 0);
+ if (!(bp->b_flags & B_DELWRI)) {
+ ++start_lbn;
+ --len;
+ brelse(bp);
+ goto redo;
+ }
+
+ /*
+ * Extra memory in the buffer, punt on this buffer.
+ * XXX we could handle this in most cases, but we would have to
+ * push the extra memory down to after our max possible cluster
+ * size and then potentially pull it back up if the cluster was
+ * terminated prematurely--too much hassle.
+ */
+ if (bp->b_bcount != bp->b_bufsize) {
+ ++start_lbn;
+ --len;
+ bawrite(bp);
+ goto redo;
+ }
+
+ --len;
+ b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save),
+ M_SEGMENT, M_WAITOK);
+ b_save->bs_bcount = bp->b_bcount;
+ b_save->bs_bufsize = bp->b_bufsize;
+ b_save->bs_nchildren = 0;
+ b_save->bs_children = (struct buf **)(b_save + 1);
+ b_save->bs_saveaddr = bp->b_saveaddr;
+ bp->b_saveaddr = (caddr_t) b_save;
+
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = cluster_callback;
+ cp = (char *)bp->b_data + size;
+ for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) {
+ /*
+ * Block is not in core or the non-sequential block
+ * ending our cluster was part of the cluster (in which
+ * case we don't want to write it twice).
+ */
+ if (!incore(vp, start_lbn) ||
+ last_bp == NULL && start_lbn == lbn)
+ break;
+
+ /*
+ * Get the desired block buffer (unless it is the final
+ * sequential block whose buffer was passed in explictly
+ * as last_bp).
+ */
+ if (last_bp == NULL || start_lbn != lbn) {
+ tbp = getblk(vp, start_lbn, size, 0, 0);
+ if (!(tbp->b_flags & B_DELWRI)) {
+ brelse(tbp);
+ break;
+ }
+ } else
+ tbp = last_bp;
+
+ ++b_save->bs_nchildren;
+
+ /* Move memory from children to parent */
+ if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) {
+ printf("Clustered Block: %d addr %x bufsize: %d\n",
+ bp->b_lblkno, bp->b_blkno, bp->b_bufsize);
+ printf("Child Block: %d addr: %x\n", tbp->b_lblkno,
+ tbp->b_blkno);
+ panic("Clustered write to wrong blocks");
+ }
+
+ pagemove(tbp->b_data, cp, size);
+ bp->b_bcount += size;
+ bp->b_bufsize += size;
+
+ tbp->b_bufsize -= size;
+ tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
+ tbp->b_flags |= (B_ASYNC | B_AGE);
+ s = splbio();
+ reassignbuf(tbp, tbp->b_vp); /* put on clean list */
+ ++tbp->b_vp->v_numoutput;
+ splx(s);
+ b_save->bs_children[i] = tbp;
+
+ cp += size;
+ }
+
+ if (i == 0) {
+ /* None to cluster */
+ bp->b_saveaddr = b_save->bs_saveaddr;
+ bp->b_flags &= ~B_CALL;
+ bp->b_iodone = NULL;
+ free(b_save, M_SEGMENT);
+ }
+ bawrite(bp);
+ if (i < len) {
+ len -= i + 1;
+ start_lbn += 1;
+ goto redo;
+ }
+}
+
+/*
+ * Collect together all the buffers in a cluster.
+ * Plus add one additional buffer.
+ */
+struct cluster_save *
+cluster_collectbufs(vp, last_bp)
+ struct vnode *vp;
+ struct buf *last_bp;
+{
+ struct cluster_save *buflist;
+ daddr_t lbn;
+ int i, len;
+
+ len = vp->v_lastw - vp->v_cstart + 1;
+ buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
+ M_SEGMENT, M_WAITOK);
+ buflist->bs_nchildren = 0;
+ buflist->bs_children = (struct buf **)(buflist + 1);
+ for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++)
+ (void)bread(vp, lbn, last_bp->b_bcount, NOCRED,
+ &buflist->bs_children[i]);
+ buflist->bs_children[i] = last_bp;
+ buflist->bs_nchildren = i + 1;
+ return (buflist);
+}
diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c
new file mode 100644
index 000000000000..2fe39eb674b0
--- /dev/null
+++ b/sys/kern/vfs_conf.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_conf.c 8.8 (Berkeley) 3/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#ifdef FFS
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * This specifies the filesystem used to mount the root.
+ * This specification should be done by /etc/config.
+ */
+int (*mountroot)() = ffs_mountroot;
+#endif
+
+/*
+ * These define the root filesystem and device.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ * The types are defined in mount.h.
+ */
+#ifdef FFS
+extern struct vfsops ufs_vfsops;
+#define UFS_VFSOPS &ufs_vfsops
+#else
+#define UFS_VFSOPS NULL
+#endif
+
+#ifdef LFS
+extern struct vfsops lfs_vfsops;
+#define LFS_VFSOPS &lfs_vfsops
+#else
+#define LFS_VFSOPS NULL
+#endif
+
+#ifdef MFS
+extern struct vfsops mfs_vfsops;
+#define MFS_VFSOPS &mfs_vfsops
+#else
+#define MFS_VFSOPS NULL
+#endif
+
+#ifdef NFS
+extern struct vfsops nfs_vfsops;
+#define NFS_VFSOPS &nfs_vfsops
+#else
+#define NFS_VFSOPS NULL
+#endif
+
+#ifdef FDESC
+extern struct vfsops fdesc_vfsops;
+#define FDESC_VFSOPS &fdesc_vfsops
+#else
+#define FDESC_VFSOPS NULL
+#endif
+
+#ifdef PORTAL
+extern struct vfsops portal_vfsops;
+#define PORTAL_VFSOPS &portal_vfsops
+#else
+#define PORTAL_VFSOPS NULL
+#endif
+
+#ifdef NULLFS
+extern struct vfsops null_vfsops;
+#define NULL_VFSOPS &null_vfsops
+#else
+#define NULL_VFSOPS NULL
+#endif
+
+#ifdef UMAPFS
+extern struct vfsops umap_vfsops;
+#define UMAP_VFSOPS &umap_vfsops
+#else
+#define UMAP_VFSOPS NULL
+#endif
+
+#ifdef KERNFS
+extern struct vfsops kernfs_vfsops;
+#define KERNFS_VFSOPS &kernfs_vfsops
+#else
+#define KERNFS_VFSOPS NULL
+#endif
+
+#ifdef PROCFS
+extern struct vfsops procfs_vfsops;
+#define PROCFS_VFSOPS &procfs_vfsops
+#else
+#define PROCFS_VFSOPS NULL
+#endif
+
+#ifdef AFS
+extern struct vfsops afs_vfsops;
+#define AFS_VFSOPS &afs_vfsops
+#else
+#define AFS_VFSOPS NULL
+#endif
+
+#ifdef CD9660
+extern struct vfsops cd9660_vfsops;
+#define CD9660_VFSOPS &cd9660_vfsops
+#else
+#define CD9660_VFSOPS NULL
+#endif
+
+#ifdef UNION
+extern struct vfsops union_vfsops;
+#define UNION_VFSOPS &union_vfsops
+#else
+#define UNION_VFSOPS NULL
+#endif
+
+struct vfsops *vfssw[] = {
+ NULL, /* 0 = MOUNT_NONE */
+ UFS_VFSOPS, /* 1 = MOUNT_UFS */
+ NFS_VFSOPS, /* 2 = MOUNT_NFS */
+ MFS_VFSOPS, /* 3 = MOUNT_MFS */
+ NULL, /* 4 = MOUNT_PC */
+ LFS_VFSOPS, /* 5 = MOUNT_LFS */
+ NULL, /* 6 = MOUNT_LOFS */
+ FDESC_VFSOPS, /* 7 = MOUNT_FDESC */
+ PORTAL_VFSOPS, /* 8 = MOUNT_PORTAL */
+ NULL_VFSOPS, /* 9 = MOUNT_NULL */
+ UMAP_VFSOPS, /* 10 = MOUNT_UMAP */
+ KERNFS_VFSOPS, /* 11 = MOUNT_KERNFS */
+ PROCFS_VFSOPS, /* 12 = MOUNT_PROCFS */
+ AFS_VFSOPS, /* 13 = MOUNT_AFS */
+ CD9660_VFSOPS, /* 14 = MOUNT_CD9660 */
+ UNION_VFSOPS, /* 15 = MOUNT_UNION */
+ 0
+};
+
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector. It is consulted at system boot to build operation
+ * vectors. It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+ &ffs_vnodeop_opv_desc,
+ &ffs_specop_opv_desc,
+#ifdef FIFO
+ &ffs_fifoop_opv_desc,
+#endif
+ &dead_vnodeop_opv_desc,
+#ifdef FIFO
+ &fifo_vnodeop_opv_desc,
+#endif
+ &spec_vnodeop_opv_desc,
+#ifdef LFS
+ &lfs_vnodeop_opv_desc,
+ &lfs_specop_opv_desc,
+#ifdef FIFO
+ &lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+ &mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+ &nfsv2_vnodeop_opv_desc,
+ &spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+ &fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+ &fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+ &portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+ &null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+ &umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+ &kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+ &procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+ &cd9660_vnodeop_opv_desc,
+ &cd9660_specop_opv_desc,
+#ifdef FIFO
+ &cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+ &union_vnodeop_opv_desc,
+#endif
+ NULL
+};
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
new file mode 100644
index 000000000000..9891fe61c198
--- /dev/null
+++ b/sys/kern/vfs_export.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+ VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+ VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int vttoif_tab[9] = {
+ 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+ S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define bufremvn(bp) { \
+ LIST_REMOVE(bp, b_vnbufs); \
+ (bp)->b_vnbufs.le_next = NOLIST; \
+}
+
+TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
+struct mntlist mountlist; /* mounted filesystem list */
+
+/*
+ * Initialize the vnode management data structures.
+ */
+vntblinit()
+{
+
+ TAILQ_INIT(&vnode_free_list);
+ TAILQ_INIT(&mountlist);
+}
+
+/*
+ * Lock a filesystem.
+ * Used to prevent access to it while mounting and unmounting.
+ */
+vfs_lock(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t)mp, PVFS);
+ }
+ mp->mnt_flag |= MNT_MLOCK;
+ return (0);
+}
+
+/*
+ * Unlock a locked filesystem.
+ * Panic if filesystem is not locked.
+ */
+void
+vfs_unlock(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MLOCK) == 0)
+ panic("vfs_unlock: not locked");
+ mp->mnt_flag &= ~MNT_MLOCK;
+ if (mp->mnt_flag & MNT_MWAIT) {
+ mp->mnt_flag &= ~MNT_MWAIT;
+ wakeup((caddr_t)mp);
+ }
+}
+
+/*
+ * Mark a mount point as busy.
+ * Used to synchronize access and to delay unmounting.
+ */
+vfs_busy(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MPBUSY) {
+ mp->mnt_flag |= MNT_MPWANT;
+ sleep((caddr_t)&mp->mnt_flag, PVFS);
+ }
+ if (mp->mnt_flag & MNT_UNMOUNT)
+ return (1);
+ mp->mnt_flag |= MNT_MPBUSY;
+ return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ * Panic if filesystem is not busy.
+ */
+vfs_unbusy(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vfs_unbusy: not busy");
+ mp->mnt_flag &= ~MNT_MPBUSY;
+ if (mp->mnt_flag & MNT_MPWANT) {
+ mp->mnt_flag &= ~MNT_MPWANT;
+ wakeup((caddr_t)&mp->mnt_flag);
+ }
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+getvfs(fsid)
+ fsid_t *fsid;
+{
+ register struct mount *mp;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+ mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+ return (mp);
+ }
+ return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+getnewfsid(mp, mtype)
+ struct mount *mp;
+ int mtype;
+{
+static u_short xxxfs_mntid;
+
+ fsid_t tfsid;
+
+ mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+ mp->mnt_stat.f_fsid.val[1] = mtype;
+ if (xxxfs_mntid == 0)
+ ++xxxfs_mntid;
+ tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+ tfsid.val[1] = mtype;
+ if (mountlist.tqh_first != NULL) {
+ while (getvfs(&tfsid)) {
+ tfsid.val[0]++;
+ xxxfs_mntid++;
+ }
+ }
+ mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void vattr_null(vap)
+ register struct vattr *vap;
+{
+
+ vap->va_type = VNON;
+ vap->va_size = vap->va_bytes = VNOVAL;
+ vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+ vap->va_fsid = vap->va_fileid =
+ vap->va_blocksize = vap->va_rdev =
+ vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+ vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+ vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+ vap->va_flags = vap->va_gen = VNOVAL;
+ vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+extern void vclean();
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+getnewvnode(tag, mp, vops, vpp)
+ enum vtagtype tag;
+ struct mount *mp;
+ int (**vops)();
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ int s;
+
+ if ((vnode_free_list.tqh_first == NULL &&
+ numvnodes < 2 * desiredvnodes) ||
+ numvnodes < desiredvnodes) {
+ vp = (struct vnode *)malloc((u_long)sizeof *vp,
+ M_VNODE, M_WAITOK);
+ bzero((char *)vp, sizeof *vp);
+ numvnodes++;
+ } else {
+ if ((vp = vnode_free_list.tqh_first) == NULL) {
+ tablefull("vnode");
+ *vpp = 0;
+ return (ENFILE);
+ }
+ if (vp->v_usecount)
+ panic("free vnode isn't");
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ /* see comment on why 0xdeadb is set at end of vgone (below) */
+ vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+ vp->v_lease = NULL;
+ if (vp->v_type != VBAD)
+ vgone(vp);
+#ifdef DIAGNOSTIC
+ if (vp->v_data)
+ panic("cleaned vnode isn't");
+ s = splbio();
+ if (vp->v_numoutput)
+ panic("Clean vnode has pending I/O's");
+ splx(s);
+#endif
+ vp->v_flag = 0;
+ vp->v_lastr = 0;
+ vp->v_ralen = 0;
+ vp->v_maxra = 0;
+ vp->v_lastw = 0;
+ vp->v_lasta = 0;
+ vp->v_cstart = 0;
+ vp->v_clen = 0;
+ vp->v_socket = 0;
+ }
+ vp->v_type = VNON;
+ cache_purge(vp);
+ vp->v_tag = tag;
+ vp->v_op = vops;
+ insmntque(vp, mp);
+ *vpp = vp;
+ vp->v_usecount = 1;
+ vp->v_data = 0;
+ return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+insmntque(vp, mp)
+ register struct vnode *vp;
+ register struct mount *mp;
+{
+
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL)
+ LIST_REMOVE(vp, v_mntvnodes);
+ /*
+ * Insert into list of vnodes for the new mount point, if available.
+ */
+ if ((vp->v_mount = mp) == NULL)
+ return;
+ LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+vwakeup(bp)
+ register struct buf *bp;
+{
+ register struct vnode *vp;
+
+ bp->b_flags &= ~B_WRITEINPROG;
+ if (vp = bp->b_vp) {
+ vp->v_numoutput--;
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ vp->v_flag &= ~VBWAIT;
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+ register struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int slpflag, slptimeo;
+{
+ register struct buf *bp;
+ struct buf *nbp, *blist;
+ int s, error;
+
+ if (flags & V_SAVE) {
+ if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+ return (error);
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ panic("vinvalbuf: dirty bufs");
+ }
+ for (;;) {
+ if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
+ (flags & V_SAVEMETA))
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist)
+ break;
+
+ for (bp = blist; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+ continue;
+ s = splbio();
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp,
+ slpflag | (PRIBIO + 1), "vinvalbuf",
+ slptimeo);
+ splx(s);
+ if (error)
+ return (error);
+ break;
+ }
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ /*
+ * XXX Since there are no node locks for NFS, I believe
+ * there is a slight chance that a delayed write will
+ * occur while sleeping just above, so check for it.
+ */
+ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+ (void) VOP_BWRITE(bp);
+ break;
+ }
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ }
+ }
+ if (!(flags & V_SAVEMETA) &&
+ (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+ panic("vinvalbuf: flush failed");
+ return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+bgetvp(vp, bp)
+ register struct vnode *vp;
+ register struct buf *bp;
+{
+
+ if (bp->b_vp)
+ panic("bgetvp: not free");
+ VHOLD(vp);
+ bp->b_vp = vp;
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ bp->b_dev = vp->v_rdev;
+ else
+ bp->b_dev = NODEV;
+ /*
+ * Insert onto list for new vnode.
+ */
+ bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+brelvp(bp)
+ register struct buf *bp;
+{
+ struct vnode *vp;
+
+ if (bp->b_vp == (struct vnode *) 0)
+ panic("brelvp: NULL");
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ vp = bp->b_vp;
+ bp->b_vp = (struct vnode *) 0;
+ HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+reassignbuf(bp, newvp)
+ register struct buf *bp;
+ register struct vnode *newvp;
+{
+ register struct buflists *listheadp;
+
+ if (newvp == NULL) {
+ printf("reassignbuf: NULL");
+ return;
+ }
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ /*
+ * If dirty, put on list of dirty buffers;
+ * otherwise insert onto list of clean buffers.
+ */
+ if (bp->b_flags & B_DELWRI)
+ listheadp = &newvp->v_dirtyblkhd;
+ else
+ listheadp = &newvp->v_cleanblkhd;
+ bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+bdevvp(dev, vpp)
+ dev_t dev;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct vnode *nvp;
+ int error;
+
+ if (dev == NODEV)
+ return (0);
+ error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+ if (error) {
+ *vpp = 0;
+ return (error);
+ }
+ vp = nvp;
+ vp->v_type = VBLK;
+ if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+ vput(vp);
+ vp = nvp;
+ }
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+ register struct vnode *nvp;
+ dev_t nvp_rdev;
+ struct mount *mp;
+{
+ register struct vnode *vp;
+ struct vnode **vpp;
+
+ if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+ return (NULLVP);
+
+ vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+ for (vp = *vpp; vp; vp = vp->v_specnext) {
+ if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ goto loop;
+ }
+ if (vget(vp, 1))
+ goto loop;
+ break;
+ }
+ if (vp == NULL || vp->v_tag != VT_NON) {
+ MALLOC(nvp->v_specinfo, struct specinfo *,
+ sizeof(struct specinfo), M_VNODE, M_WAITOK);
+ nvp->v_rdev = nvp_rdev;
+ nvp->v_hashchain = vpp;
+ nvp->v_specnext = *vpp;
+ nvp->v_specflags = 0;
+ *vpp = nvp;
+ if (vp != NULL) {
+ nvp->v_flag |= VALIASED;
+ vp->v_flag |= VALIASED;
+ vput(vp);
+ }
+ return (NULLVP);
+ }
+ VOP_UNLOCK(vp);
+ vclean(vp, 0);
+ vp->v_op = nvp->v_op;
+ vp->v_tag = nvp->v_tag;
+ nvp->v_type = VNON;
+ insmntque(vp, mp);
+ return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+vget(vp, lockflag)
+ register struct vnode *vp;
+ int lockflag;
+{
+
+ /*
+ * If the vnode is in the process of being cleaned out for
+ * another use, we wait for the cleaning to finish and then
+ * return failure. Cleaning is determined either by checking
+ * that the VXLOCK flag is set, or that the use count is
+ * zero with the back pointer set to show that it has been
+ * removed from the free list by getnewvnode. The VXLOCK
+ * flag may not have been set yet because vclean is blocked in
+ * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+ */
+ if ((vp->v_flag & VXLOCK) ||
+ (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return (1);
+ }
+ if (vp->v_usecount == 0)
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ vp->v_usecount++;
+ if (lockflag)
+ VOP_LOCK(vp);
+ return (0);
+}
+
+/*
+ * Vnode reference, just increment the count
+ */
+void vref(vp)
+ struct vnode *vp;
+{
+
+ if (vp->v_usecount <= 0)
+ panic("vref used where vget required");
+ vp->v_usecount++;
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void vput(vp)
+ register struct vnode *vp;
+{
+
+ VOP_UNLOCK(vp);
+ vrele(vp);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void vrele(vp)
+ register struct vnode *vp;
+{
+
+#ifdef DIAGNOSTIC
+ if (vp == NULL)
+ panic("vrele: null vp");
+#endif
+ vp->v_usecount--;
+ if (vp->v_usecount > 0)
+ return;
+#ifdef DIAGNOSTIC
+ if (vp->v_usecount != 0 || vp->v_writecount != 0) {
+ vprint("vrele: bad ref count", vp);
+ panic("vrele: ref cnt");
+ }
+#endif
+ /*
+ * insert at tail of LRU list
+ */
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ VOP_INACTIVE(vp);
+}
+
+/*
+ * Page or buffer structure gets a reference.
+ */
+void vhold(vp)
+ register struct vnode *vp;
+{
+
+ vp->v_holdcnt++;
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void holdrele(vp)
+ register struct vnode *vp;
+{
+
+ if (vp->v_holdcnt <= 0)
+ panic("holdrele: holdcnt");
+ vp->v_holdcnt--;
+}
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0; /* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+vflush(mp, skipvp, flags)
+ struct mount *mp;
+ struct vnode *skipvp;
+ int flags;
+{
+ register struct vnode *vp, *nvp;
+ int busy = 0;
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vflush: not busy");
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp)
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ /*
+ * Skip over a selected vnode.
+ */
+ if (vp == skipvp)
+ continue;
+ /*
+ * Skip over a vnodes marked VSYSTEM.
+ */
+ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+ continue;
+ /*
+ * If WRITECLOSE is set, only flush out regular file
+ * vnodes open for writing.
+ */
+ if ((flags & WRITECLOSE) &&
+ (vp->v_writecount == 0 || vp->v_type != VREG))
+ continue;
+ /*
+ * With v_usecount == 0, all we need to do is clear
+ * out the vnode data structures and we are done.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ continue;
+ }
+ /*
+ * If FORCECLOSE is set, forcibly close the vnode.
+ * For block or character devices, revert to an
+ * anonymous device. For all other files, just kill them.
+ */
+ if (flags & FORCECLOSE) {
+ if (vp->v_type != VBLK && vp->v_type != VCHR) {
+ vgone(vp);
+ } else {
+ vclean(vp, 0);
+ vp->v_op = spec_vnodeop_p;
+ insmntque(vp, (struct mount *)0);
+ }
+ continue;
+ }
+#ifdef DIAGNOSTIC
+ if (busyprt)
+ vprint("vflush: busy vnode", vp);
+#endif
+ busy++;
+ }
+ if (busy)
+ return (EBUSY);
+ return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ */
+void
+vclean(vp, flags)
+ register struct vnode *vp;
+ int flags;
+{
+ int active;
+
+ /*
+ * Check to see if the vnode is in use.
+ * If so we have to reference it before we clean it out
+ * so that its count cannot fall to zero and generate a
+ * race against ourselves to recycle it.
+ */
+ if (active = vp->v_usecount)
+ VREF(vp);
+ /*
+ * Even if the count is zero, the VOP_INACTIVE routine may still
+ * have the object locked while it cleans it out. The VOP_LOCK
+ * ensures that the VOP_INACTIVE routine is done with its work.
+ * For active vnodes, it ensures that no other activity can
+ * occur while the underlying object is being cleaned out.
+ */
+ VOP_LOCK(vp);
+ /*
+ * Prevent the vnode from being recycled or
+ * brought into use while we clean it out.
+ */
+ if (vp->v_flag & VXLOCK)
+ panic("vclean: deadlock");
+ vp->v_flag |= VXLOCK;
+ /*
+ * Clean out any buffers associated with the vnode.
+ */
+ if (flags & DOCLOSE)
+ vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ /*
+ * Any other processes trying to obtain this lock must first
+ * wait for VXLOCK to clear, then call the new lock operation.
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * If purging an active vnode, it must be closed and
+ * deactivated before being reclaimed.
+ */
+ if (active) {
+ if (flags & DOCLOSE)
+ VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
+ VOP_INACTIVE(vp);
+ }
+ /*
+ * Reclaim the vnode.
+ */
+ if (VOP_RECLAIM(vp))
+ panic("vclean: cannot reclaim");
+ if (active)
+ vrele(vp);
+
+ /*
+ * Done with purge, notify sleepers of the grim news.
+ */
+ vp->v_op = dead_vnodeop_p;
+ vp->v_tag = VT_NON;
+ vp->v_flag &= ~VXLOCK;
+ if (vp->v_flag & VXWANT) {
+ vp->v_flag &= ~VXWANT;
+ wakeup((caddr_t)vp);
+ }
+}
+
+/*
+ * Eliminate all activity associated with the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+void vgoneall(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_flag & VALIASED) {
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Ensure that vp will not be vgone'd while we
+ * are eliminating its aliases.
+ */
+ vp->v_flag |= VXLOCK;
+ while (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type || vp == vq)
+ continue;
+ vgone(vq);
+ break;
+ }
+ }
+ /*
+ * Remove the lock so that vgone below will
+ * really eliminate the vnode after which time
+ * vgone will awaken any sleepers.
+ */
+ vp->v_flag &= ~VXLOCK;
+ }
+ vgone(vp);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void vgone(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+ struct vnode *vx;
+
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Clean out the filesystem specific data.
+ */
+ vclean(vp, DOCLOSE);
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL) {
+ LIST_REMOVE(vp, v_mntvnodes);
+ vp->v_mount = NULL;
+ }
+ /*
+ * If special device, remove it from special device alias list.
+ */
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ if (*vp->v_hashchain == vp) {
+ *vp->v_hashchain = vp->v_specnext;
+ } else {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_specnext != vp)
+ continue;
+ vq->v_specnext = vp->v_specnext;
+ break;
+ }
+ if (vq == NULL)
+ panic("missing bdev");
+ }
+ if (vp->v_flag & VALIASED) {
+ vx = NULL;
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vx)
+ break;
+ vx = vq;
+ }
+ if (vx == NULL)
+ panic("missing alias");
+ if (vq == NULL)
+ vx->v_flag &= ~VALIASED;
+ vp->v_flag &= ~VALIASED;
+ }
+ FREE(vp->v_specinfo, M_VNODE);
+ vp->v_specinfo = NULL;
+ }
+ /*
+ * If it is on the freelist and not already at the head,
+ * move it to the head of the list. The test of the back
+ * pointer and the reference count of zero is because
+ * it will be removed from the free list by getnewvnode,
+ * but will not have its reference count incremented until
+ * after calling vgone. If the reference count were
+ * incremented first, vgone would (incorrectly) try to
+ * close the previous instance of the underlying object.
+ * So, the back pointer is explicitly set to `0xdeadb' in
+ * getnewvnode after removing it from the freelist to ensure
+ * that we do not try to move it here.
+ */
+ if (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
+ vnode_free_list.tqh_first != vp) {
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ }
+ vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+vfinddev(dev, type, vpp)
+ dev_t dev;
+ enum vtype type;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+
+ for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+ if (dev != vp->v_rdev || type != vp->v_type)
+ continue;
+ *vpp = vp;
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+vcount(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq, *vnext;
+ int count;
+
+loop:
+ if ((vp->v_flag & VALIASED) == 0)
+ return (vp->v_usecount);
+ for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+ vnext = vq->v_specnext;
+ if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vq->v_usecount == 0 && vq != vp) {
+ vgone(vq);
+ goto loop;
+ }
+ count += vq->v_usecount;
+ }
+ return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+ { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+vprint(label, vp)
+ char *label;
+ register struct vnode *vp;
+{
+ char buf[64];
+
+ if (label != NULL)
+ printf("%s: ", label);
+ printf("type %s, usecount %d, writecount %d, refcount %d,",
+ typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+ vp->v_holdcnt);
+ buf[0] = '\0';
+ if (vp->v_flag & VROOT)
+ strcat(buf, "|VROOT");
+ if (vp->v_flag & VTEXT)
+ strcat(buf, "|VTEXT");
+ if (vp->v_flag & VSYSTEM)
+ strcat(buf, "|VSYSTEM");
+ if (vp->v_flag & VXLOCK)
+ strcat(buf, "|VXLOCK");
+ if (vp->v_flag & VXWANT)
+ strcat(buf, "|VXWANT");
+ if (vp->v_flag & VBWAIT)
+ strcat(buf, "|VBWAIT");
+ if (vp->v_flag & VALIASED)
+ strcat(buf, "|VALIASED");
+ if (buf[0] != '\0')
+ printf(" flags (%s)", &buf[1]);
+ if (vp->v_data == NULL) {
+ printf("\n");
+ } else {
+ printf("\n\t");
+ VOP_PRINT(vp);
+ }
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+printlockedvnodes()
+{
+ register struct mount *mp;
+ register struct vnode *vp;
+
+ printf("Locked vnodes\n");
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next)
+ if (VOP_ISLOCKED(vp))
+ vprint((char *)0, vp);
+ }
+}
+#endif
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP 10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+sysctl_vnode(where, sizep)
+ char *where;
+ size_t *sizep;
+{
+ register struct mount *mp, *nmp;
+ struct vnode *vp;
+ register char *bp = where, *savebp;
+ char *ewhere;
+ int error;
+
+#define VPTRSZ sizeof (struct vnode *)
+#define VNODESZ sizeof (struct vnode)
+ if (where == NULL) {
+ *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+ return (0);
+ }
+ ewhere = where + *sizep;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (vfs_busy(mp))
+ continue;
+ savebp = bp;
+again:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * Check that the vp is still associated with
+ * this filesystem. RACE: could have been
+ * recycled onto the same filesystem.
+ */
+ if (vp->v_mount != mp) {
+ if (kinfo_vdebug)
+ printf("kinfo: vp changed\n");
+ bp = savebp;
+ goto again;
+ }
+ if (bp + VPTRSZ + VNODESZ > ewhere) {
+ *sizep = bp - where;
+ return (ENOMEM);
+ }
+ if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+ (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+ return (error);
+ bp += VPTRSZ + VNODESZ;
+ }
+ vfs_unbusy(mp);
+ }
+
+ *sizep = bp - where;
+ return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ if (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vq->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ register int i;
+ struct radix_node *rn;
+ struct sockaddr *saddr, *smask = 0;
+ struct domain *dom;
+ int error;
+
+ if (argp->ex_addrlen == 0) {
+ if (mp->mnt_flag & MNT_DEFEXPORTED)
+ return (EPERM);
+ np = &nep->ne_defexported;
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ mp->mnt_flag |= MNT_DEFEXPORTED;
+ return (0);
+ }
+ i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+ np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+ bzero((caddr_t)np, i);
+ saddr = (struct sockaddr *)(np + 1);
+ if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+ goto out;
+ if (saddr->sa_len > argp->ex_addrlen)
+ saddr->sa_len = argp->ex_addrlen;
+ if (argp->ex_masklen) {
+ smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+ error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+ if (error)
+ goto out;
+ if (smask->sa_len > argp->ex_masklen)
+ smask->sa_len = argp->ex_masklen;
+ }
+ i = saddr->sa_family;
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ /*
+ * Seems silly to initialize every AF when most are not
+ * used, do so on demand here
+ */
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_family == i && dom->dom_rtattach) {
+ dom->dom_rtattach((void **)&nep->ne_rtable[i],
+ dom->dom_rtoffset);
+ break;
+ }
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ error = ENOBUFS;
+ goto out;
+ }
+ }
+ rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+ np->netc_rnodes);
+ if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
+ error = EPERM;
+ goto out;
+ }
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ return (0);
+out:
+ free(np, M_NETADDR);
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+ struct radix_node *rn;
+ caddr_t w;
+{
+ register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+ (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+ free((caddr_t)rn, M_NETADDR);
+ return (0);
+}
+
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+ struct netexport *nep;
+{
+ register int i;
+ register struct radix_node_head *rnh;
+
+ for (i = 0; i <= AF_MAX; i++)
+ if (rnh = nep->ne_rtable[i]) {
+ (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+ (caddr_t)rnh);
+ free((caddr_t)rnh, M_RTABLE);
+ nep->ne_rtable[i] = 0;
+ }
+}
+
+int
+vfs_export(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ int error;
+
+ if (argp->ex_flags & MNT_DELEXPORT) {
+ vfs_free_addrlist(nep);
+ mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+ }
+ if (argp->ex_flags & MNT_EXPORTED) {
+ if (error = vfs_hang_addrlist(mp, nep, argp))
+ return (error);
+ mp->mnt_flag |= MNT_EXPORTED;
+ }
+ return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+ register struct mount *mp;
+ struct netexport *nep;
+ struct mbuf *nam;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ struct sockaddr *saddr;
+
+ np = NULL;
+ if (mp->mnt_flag & MNT_EXPORTED) {
+ /*
+ * Lookup in the export list first.
+ */
+ if (nam != NULL) {
+ saddr = mtod(nam, struct sockaddr *);
+ rnh = nep->ne_rtable[saddr->sa_family];
+ if (rnh != NULL) {
+ np = (struct netcred *)
+ (*rnh->rnh_matchaddr)((caddr_t)saddr,
+ rnh);
+ if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+ np = NULL;
+ }
+ }
+ /*
+ * If no address match, use the default if it exists.
+ */
+ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+ np = &nep->ne_defexported;
+ }
+ return (np);
+}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
new file mode 100644
index 000000000000..345c7a79bf20
--- /dev/null
+++ b/sys/kern/vfs_extattr.c
@@ -0,0 +1,2107 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+struct mount_args {
+ int type;
+ char *path;
+ int flags;
+ caddr_t data;
+};
+/* ARGSUSED */
+mount(p, uap, retval)
+ struct proc *p;
+ register struct mount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ register struct mount *mp;
+ int error, flag;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ /*
+ * Get vnode to be covered
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (uap->flags & MNT_UPDATE) {
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ flag = mp->mnt_flag;
+ /*
+ * We only allow the filesystem to be reloaded if it
+ * is currently mounted read-only.
+ */
+ if ((uap->flags & MNT_RELOAD) &&
+ ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+ vput(vp);
+ return (EOPNOTSUPP); /* Needs translation */
+ }
+ mp->mnt_flag |=
+ uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+ VOP_UNLOCK(vp);
+ goto update;
+ }
+ if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+ if (vp->v_type != VDIR) {
+ vput(vp);
+ return (ENOTDIR);
+ }
+ if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) {
+ vput(vp);
+ return (ENODEV);
+ }
+
+ /*
+ * Allocate and initialize the file system.
+ */
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = vfssw[uap->type];
+ if (error = vfs_lock(mp)) {
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (error);
+ }
+ if (vp->v_mountedhere != NULL) {
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (EBUSY);
+ }
+ vp->v_mountedhere = mp;
+ mp->mnt_vnodecovered = vp;
+update:
+ /*
+ * Set the mount level flags.
+ */
+ if (uap->flags & MNT_RDONLY)
+ mp->mnt_flag |= MNT_RDONLY;
+ else if (mp->mnt_flag & MNT_RDONLY)
+ mp->mnt_flag |= MNT_WANTRDWR;
+ mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ /*
+ * Mount the filesystem.
+ */
+ error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p);
+ if (mp->mnt_flag & MNT_UPDATE) {
+ vrele(vp);
+ if (mp->mnt_flag & MNT_WANTRDWR)
+ mp->mnt_flag &= ~MNT_RDONLY;
+ mp->mnt_flag &=~
+ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+ if (error)
+ mp->mnt_flag = flag;
+ return (error);
+ }
+ /*
+ * Put the new filesystem on the mount list after root.
+ */
+ cache_purge(vp);
+ if (!error) {
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ VOP_UNLOCK(vp);
+ vfs_unlock(mp);
+ error = VFS_START(mp, 0, p);
+ } else {
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+struct unmount_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+unmount(p, uap, retval)
+ struct proc *p;
+ register struct unmount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+
+ /*
+ * Unless this is a user mount, then must
+ * have suser privilege.
+ */
+ if (((vp->v_mount->mnt_flag & MNT_USER) == 0) &&
+ (error = suser(p->p_ucred, &p->p_acflag))) {
+ vput(vp);
+ return (error);
+ }
+
+ /*
+ * Must be the root of the filesystem
+ */
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ vput(vp);
+ return (dounmount(mp, uap->flags, p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+dounmount(mp, flags, p)
+ register struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ struct vnode *coveredvp;
+ int error;
+
+ coveredvp = mp->mnt_vnodecovered;
+ if (vfs_busy(mp))
+ return (EBUSY);
+ mp->mnt_flag |= MNT_UNMOUNT;
+ if (error = vfs_lock(mp))
+ return (error);
+
+ mp->mnt_flag &=~ MNT_ASYNC;
+ vnode_pager_umount(mp); /* release cached vnodes */
+ cache_purgevfs(mp); /* remove cache entries for this file sys */
+ if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
+ (flags & MNT_FORCE))
+ error = VFS_UNMOUNT(mp, flags, p);
+ mp->mnt_flag &= ~MNT_UNMOUNT;
+ vfs_unbusy(mp);
+ if (error) {
+ vfs_unlock(mp);
+ } else {
+ vrele(coveredvp);
+ TAILQ_REMOVE(&mountlist, mp, mnt_list);
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ if (mp->mnt_vnodelist.lh_first != NULL)
+ panic("unmount: dangling vnode");
+ free((caddr_t)mp, M_MOUNT);
+ }
+ return (error);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DIAGNOSTIC
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+struct sync_args {
+ int dummy;
+};
+/* ARGSUSED */
+sync(p, uap, retval)
+ struct proc *p;
+ struct sync_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ int asyncflag;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ /*
+ * The lock check below is to avoid races with mount
+ * and unmount.
+ */
+ if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
+ !vfs_busy(mp)) {
+ asyncflag = mp->mnt_flag & MNT_ASYNC;
+ mp->mnt_flag &= ~MNT_ASYNC;
+ VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+ if (asyncflag)
+ mp->mnt_flag |= MNT_ASYNC;
+ vfs_unbusy(mp);
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (syncprt)
+ vfs_bufstats();
+#endif /* DIAGNOSTIC */
+ return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+struct quotactl_args {
+ char *path;
+ int cmd;
+ int uid;
+ caddr_t arg;
+};
+/* ARGSUSED */
+quotactl(p, uap, retval)
+ struct proc *p;
+ register struct quotactl_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ vrele(nd.ni_vp);
+ return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct statfs_args {
+ char *path;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+statfs(p, uap, retval)
+ struct proc *p;
+ register struct statfs_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ register struct statfs *sp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ sp = &mp->mnt_stat;
+ vrele(nd.ni_vp);
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct fstatfs_args {
+ int fd;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+fstatfs(p, uap, retval)
+ struct proc *p;
+ register struct fstatfs_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mount *mp;
+ register struct statfs *sp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ mp = ((struct vnode *)fp->f_data)->v_mount;
+ sp = &mp->mnt_stat;
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+struct getfsstat_args {
+ struct statfs *buf;
+ long bufsize;
+ int flags;
+};
+getfsstat(p, uap, retval)
+ struct proc *p;
+ register struct getfsstat_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ register struct statfs *sp;
+ caddr_t sfsp;
+ long count, maxcount, error;
+
+ maxcount = uap->bufsize / sizeof(struct statfs);
+ sfsp = (caddr_t)uap->buf;
+ for (count = 0, mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (sfsp && count < maxcount &&
+ ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+ sp = &mp->mnt_stat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the
+ * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+ */
+ if (((uap->flags & MNT_NOWAIT) == 0 ||
+ (uap->flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ continue;
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+ return (error);
+ sfsp += sizeof(*sp);
+ }
+ count++;
+ }
+ if (sfsp && count > maxcount)
+ *retval = maxcount;
+ else
+ *retval = count;
+ return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+struct fchdir_args {
+ int fd;
+};
+/* ARGSUSED */
+fchdir(p, uap, retval)
+ struct proc *p;
+ struct fchdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(fdp, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ VREF(vp);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = vp;
+ return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+struct chdir_args {
+ char *path;
+};
+/* ARGSUSED */
+chdir(p, uap, retval)
+ struct proc *p;
+ struct chdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+struct chroot_args {
+ char *path;
+};
+/* ARGSUSED */
+chroot(p, uap, retval)
+ struct proc *p;
+ struct chroot_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ if (fdp->fd_rdir != NULL)
+ vrele(fdp->fd_rdir);
+ fdp->fd_rdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+ register struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *vp;
+ int error;
+
+ if (error = namei(ndp))
+ return (error);
+ vp = ndp->ni_vp;
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+struct open_args {
+ char *path;
+ int flags;
+ int mode;
+};
+open(p, uap, retval)
+ struct proc *p;
+ register struct open_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register struct vnode *vp;
+ int flags, cmode;
+ struct file *nfp;
+ int type, indx, error;
+ struct flock lf;
+ struct nameidata nd;
+ extern struct fileops vnops;
+
+ if (error = falloc(p, &nfp, &indx))
+ return (error);
+ fp = nfp;
+ flags = FFLAGS(uap->flags);
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ p->p_dupfd = -indx - 1; /* XXX check for fdopen */
+ if (error = vn_open(&nd, flags, cmode)) {
+ ffree(fp);
+ if ((error == ENODEV || error == ENXIO) &&
+ p->p_dupfd >= 0 && /* XXX from fdopen */
+ (error =
+ dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+ *retval = indx;
+ return (0);
+ }
+ if (error == ERESTART)
+ error = EINTR;
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ p->p_dupfd = 0;
+ vp = nd.ni_vp;
+ fp->f_flag = flags & FMASK;
+ fp->f_type = DTYPE_VNODE;
+ fp->f_ops = &vnops;
+ fp->f_data = (caddr_t)vp;
+ if (flags & (O_EXLOCK | O_SHLOCK)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (flags & O_EXLOCK)
+ lf.l_type = F_WRLCK;
+ else
+ lf.l_type = F_RDLCK;
+ type = F_FLOCK;
+ if ((flags & FNONBLOCK) == 0)
+ type |= F_WAIT;
+ VOP_UNLOCK(vp);
+ if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+ (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+ ffree(fp);
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ VOP_LOCK(vp);
+ fp->f_flag |= FHASLOCK;
+ }
+ VOP_UNLOCK(vp);
+ *retval = indx;
+ return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+struct ocreat_args {
+ char *path;
+ int mode;
+};
+ocreat(p, uap, retval)
+ struct proc *p;
+ register struct ocreat_args *uap;
+ int *retval;
+{
+ struct open_args openuap;
+
+ openuap.path = uap->path;
+ openuap.mode = uap->mode;
+ openuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
+ return (open(p, &openuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+struct mknod_args {
+ char *path;
+ int mode;
+ int dev;
+};
+/* ARGSUSED */
+mknod(p, uap, retval)
+ struct proc *p;
+ register struct mknod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL)
+ error = EEXIST;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ vattr.va_rdev = uap->dev;
+
+ switch (uap->mode & S_IFMT) {
+ case S_IFMT: /* used by badsect to flag bad sectors */
+ vattr.va_type = VBAD;
+ break;
+ case S_IFCHR:
+ vattr.va_type = VCHR;
+ break;
+ case S_IFBLK:
+ vattr.va_type = VBLK;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ }
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (vp)
+ vrele(vp);
+ }
+ return (error);
+}
+
+/*
+ * Create named pipe.
+ */
+struct mkfifo_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkfifo(p, uap, retval)
+ struct proc *p;
+ register struct mkfifo_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+#ifndef FIFO
+ return (EOPNOTSUPP);
+#else
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ if (nd.ni_vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VFIFO;
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+struct link_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+link(p, uap, retval)
+ struct proc *p;
+ register struct link_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct nameidata nd;
+ int error;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ nd.ni_dirp = uap->link;
+ if ((error = namei(&nd)) == 0) {
+ if (nd.ni_vp != NULL)
+ error = EEXIST;
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp,
+ p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp,
+ p, p->p_ucred, LEASE_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+ }
+ }
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+struct symlink_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+symlink(p, uap, retval)
+ struct proc *p;
+ register struct symlink_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ char *path;
+ int error;
+ struct nameidata nd;
+
+ MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ if (error = copyinstr(uap->path, path, MAXPATHLEN, NULL))
+ goto out;
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p);
+ if (error = namei(&nd))
+ goto out;
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+ FREE(path, M_NAMEI);
+ return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+struct unlink_args {
+ char *path;
+};
+/* ARGSUSED */
+unlink(p, uap, retval)
+ struct proc *p;
+ struct unlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+ else
+ (void)vnode_pager_uncache(vp);
+ }
+
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+struct lseek_args {
+ int fd;
+ int pad;
+ off_t offset;
+ int whence;
+};
+lseek(p, uap, retval)
+ struct proc *p;
+ register struct lseek_args *uap;
+ int *retval;
+{
+ struct ucred *cred = p->p_ucred;
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vattr vattr;
+ int error;
+
+ if ((u_int)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (ESPIPE);
+ switch (uap->whence) {
+ case L_INCR:
+ fp->f_offset += uap->offset;
+ break;
+ case L_XTND:
+ if (error =
+ VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+ return (error);
+ fp->f_offset = uap->offset + vattr.va_size;
+ break;
+ case L_SET:
+ fp->f_offset = uap->offset;
+ break;
+ default:
+ return (EINVAL);
+ }
+ *(off_t *)retval = fp->f_offset;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+struct olseek_args {
+ int fd;
+ long offset;
+ int whence;
+};
+olseek(p, uap, retval)
+ struct proc *p;
+ register struct olseek_args *uap;
+ int *retval;
+{
+ struct lseek_args nuap;
+ off_t qret;
+ int error;
+
+ nuap.fd = uap->fd;
+ nuap.offset = uap->offset;
+ nuap.whence = uap->whence;
+ error = lseek(p, &nuap, &qret);
+ *(long *)retval = qret;
+ return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+struct access_args {
+ char *path;
+ int flags;
+};
+access(p, uap, retval)
+ struct proc *p;
+ register struct access_args *uap;
+ int *retval;
+{
+ register struct ucred *cred = p->p_ucred;
+ register struct vnode *vp;
+ int error, flags, t_gid, t_uid;
+ struct nameidata nd;
+
+ t_uid = cred->cr_uid;
+ t_gid = cred->cr_groups[0];
+ cred->cr_uid = p->p_cred->p_ruid;
+ cred->cr_groups[0] = p->p_cred->p_rgid;
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ goto out1;
+ vp = nd.ni_vp;
+
+ /* Flags == 0 means only check for existence. */
+ if (uap->flags) {
+ flags = 0;
+ if (uap->flags & R_OK)
+ flags |= VREAD;
+ if (uap->flags & W_OK)
+ flags |= VWRITE;
+ if (uap->flags & X_OK)
+ flags |= VEXEC;
+ if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+ error = VOP_ACCESS(vp, flags, cred, p);
+ }
+ vput(vp);
+out1:
+ cred->cr_uid = t_uid;
+ cred->cr_groups[0] = t_gid;
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+struct ostat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+ostat(p, uap, retval)
+ struct proc *p;
+ register struct ostat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct olstat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+olstat(p, uap, retval)
+ struct proc *p;
+ register struct olstat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+cvtstat(st, ost)
+ struct stat *st;
+ struct ostat *ost;
+{
+
+ ost->st_dev = st->st_dev;
+ ost->st_ino = st->st_ino;
+ ost->st_mode = st->st_mode;
+ ost->st_nlink = st->st_nlink;
+ ost->st_uid = st->st_uid;
+ ost->st_gid = st->st_gid;
+ ost->st_rdev = st->st_rdev;
+ if (st->st_size < (quad_t)1 << 32)
+ ost->st_size = st->st_size;
+ else
+ ost->st_size = -2;
+ ost->st_atime = st->st_atime;
+ ost->st_mtime = st->st_mtime;
+ ost->st_ctime = st->st_ctime;
+ ost->st_blksize = st->st_blksize;
+ ost->st_blocks = st->st_blocks;
+ ost->st_flags = st->st_flags;
+ ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+struct stat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+stat(p, uap, retval)
+ struct proc *p;
+ register struct stat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct lstat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+lstat(p, uap, retval)
+ struct proc *p;
+ register struct lstat_args *uap;
+ int *retval;
+{
+ int error;
+ struct vnode *vp, *dvp;
+ struct stat sb, sb1;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+ uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ /*
+ * For symbolic links, always return the attributes of its
+ * containing directory, except for mode, size, and links.
+ */
+ vp = nd.ni_vp;
+ dvp = nd.ni_dvp;
+ if (vp->v_type != VLNK) {
+ if (dvp == vp)
+ vrele(dvp);
+ else
+ vput(dvp);
+ error = vn_stat(vp, &sb, p);
+ vput(vp);
+ if (error)
+ return (error);
+ } else {
+ error = vn_stat(dvp, &sb, p);
+ vput(dvp);
+ if (error) {
+ vput(vp);
+ return (error);
+ }
+ error = vn_stat(vp, &sb1, p);
+ vput(vp);
+ if (error)
+ return (error);
+ sb.st_mode &= ~S_IFDIR;
+ sb.st_mode |= S_IFLNK;
+ sb.st_nlink = sb1.st_nlink;
+ sb.st_size = sb1.st_size;
+ sb.st_blocks = sb1.st_blocks;
+ }
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+struct pathconf_args {
+ char *path;
+ int name;
+};
+/* ARGSUSED */
+pathconf(p, uap, retval)
+ struct proc *p;
+ register struct pathconf_args *uap;
+ int *retval;
+{
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = VOP_PATHCONF(nd.ni_vp, uap->name, retval);
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+struct readlink_args {
+ char *path;
+ char *buf;
+ int count;
+};
+/* ARGSUSED */
+readlink(p, uap, retval)
+ struct proc *p;
+ register struct readlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct iovec aiov;
+ struct uio auio;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VLNK)
+ error = EINVAL;
+ else {
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ error = VOP_READLINK(vp, &auio, p->p_ucred);
+ }
+ vput(vp);
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+struct chflags_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+chflags(p, uap, retval)
+ struct proc *p;
+ register struct chflags_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+struct fchflags_args {
+ int fd;
+ int flags;
+};
+/* ARGSUSED */
+fchflags(p, uap, retval)
+ struct proc *p;
+ register struct fchflags_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+struct chmod_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+chmod(p, uap, retval)
+ struct proc *p;
+ register struct chmod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+struct fchmod_args {
+ int fd;
+ int mode;
+};
+/* ARGSUSED */
+fchmod(p, uap, retval)
+ struct proc *p;
+ register struct fchmod_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+struct chown_args {
+ char *path;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+chown(p, uap, retval)
+ struct proc *p;
+ register struct chown_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+struct fchown_args {
+ int fd;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+fchown(p, uap, retval)
+ struct proc *p;
+ register struct fchown_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+struct utimes_args {
+ char *path;
+ struct timeval *tptr;
+};
+/* ARGSUSED */
+utimes(p, uap, retval)
+ struct proc *p;
+ register struct utimes_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct timeval tv[2];
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ VATTR_NULL(&vattr);
+ if (uap->tptr == NULL) {
+ microtime(&tv[0]);
+ tv[1] = tv[0];
+ vattr.va_vaflags |= VA_UTIMES_NULL;
+ } else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ vattr.va_atime.ts_sec = tv[0].tv_sec;
+ vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+ vattr.va_mtime.ts_sec = tv[1].tv_sec;
+ vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+struct truncate_args {
+ char *path;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+truncate(p, uap, retval)
+ struct proc *p;
+ register struct truncate_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0 &&
+ (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct ftruncate_args {
+ int fd;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+ftruncate(p, uap, retval)
+ struct proc *p;
+ register struct ftruncate_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FWRITE) == 0)
+ return (EINVAL);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+struct otruncate_args {
+ char *path;
+ long length;
+};
+/* ARGSUSED */
+otruncate(p, uap, retval)
+ struct proc *p;
+ register struct otruncate_args *uap;
+ int *retval;
+{
+ struct truncate_args nuap;
+
+ nuap.path = uap->path;
+ nuap.length = uap->length;
+ return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct oftruncate_args {
+ int fd;
+ long length;
+};
+/* ARGSUSED */
+oftruncate(p, uap, retval)
+ struct proc *p;
+ register struct oftruncate_args *uap;
+ int *retval;
+{
+ struct ftruncate_args nuap;
+
+ nuap.fd = uap->fd;
+ nuap.length = uap->length;
+ return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+struct fsync_args {
+ int fd;
+};
+/* ARGSUSED */
+fsync(p, uap, retval)
+ struct proc *p;
+ struct fsync_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+struct rename_args {
+ char *from;
+ char *to;
+};
+/* ARGSUSED */
+rename(p, uap, retval)
+ struct proc *p;
+ register struct rename_args *uap;
+ int *retval;
+{
+ register struct vnode *tvp, *fvp, *tdvp;
+ struct nameidata fromnd, tond;
+ int error;
+
+ NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+ uap->from, p);
+ if (error = namei(&fromnd))
+ return (error);
+ fvp = fromnd.ni_vp;
+ NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+ UIO_USERSPACE, uap->to, p);
+ if (error = namei(&tond)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE);
+ if (fromnd.ni_dvp != tdvp)
+ LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (tvp)
+ LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ if (fromnd.ni_startdir)
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+struct mkdir_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkdir(p, uap, retval)
+ struct proc *p;
+ register struct mkdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VDIR;
+ vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ if (!error)
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+struct rmdir_args {
+ char *path;
+};
+/* ARGSUSED */
+rmdir(p, uap, retval)
+ struct proc *p;
+ struct rmdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct ogetdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+ogetdirentries(p, uap, retval)
+ struct proc *p;
+ register struct ogetdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio, kuio;
+ struct iovec aiov, kiov;
+ struct dirent *dp, *edp;
+ caddr_t dirbuf;
+ int error, readcnt;
+ long loff;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+# if (BYTE_ORDER != LITTLE_ENDIAN)
+ if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ } else
+# endif
+ {
+ kuio = auio;
+ kuio.uio_iov = &kiov;
+ kuio.uio_segflg = UIO_SYSSPACE;
+ kiov.iov_len = uap->count;
+ MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
+ kiov.iov_base = dirbuf;
+ error = VOP_READDIR(vp, &kuio, fp->f_cred);
+ fp->f_offset = kuio.uio_offset;
+ if (error == 0) {
+ readcnt = uap->count - kuio.uio_resid;
+ edp = (struct dirent *)&dirbuf[readcnt];
+ for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ /*
+ * The expected low byte of
+ * dp->d_namlen is our dp->d_type.
+ * The high MBZ byte of dp->d_namlen
+ * is our dp->d_namlen.
+ */
+ dp->d_type = dp->d_namlen;
+ dp->d_namlen = 0;
+# else
+ /*
+ * The dp->d_type is the high byte
+ * of the expected dp->d_namlen,
+ * so must be zero'ed.
+ */
+ dp->d_type = 0;
+# endif
+ if (dp->d_reclen > 0) {
+ dp = (struct dirent *)
+ ((char *)dp + dp->d_reclen);
+ } else {
+ error = EIO;
+ break;
+ }
+ }
+ if (dp >= edp)
+ error = uiomove(dirbuf, readcnt, &auio);
+ }
+ FREE(dirbuf, M_TEMP);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+#endif
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct getdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+getdirentries(p, uap, retval)
+ struct proc *p;
+ register struct getdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio;
+ struct iovec aiov;
+ long loff;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+unionread:
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+
+#ifdef UNION
+{
+ extern int (**union_vnodeop_p)();
+ extern struct vnode *union_lowervp __P((struct vnode *));
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_op == union_vnodeop_p)) {
+ struct vnode *tvp = vp;
+
+ vp = union_lowervp(vp);
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_OPEN(vp, FREAD);
+ VOP_UNLOCK(vp);
+
+ if (error) {
+ vrele(vp);
+ return (error);
+ }
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ error = vn_close(tvp, FREAD, fp->f_cred, p);
+ if (error)
+ return (error);
+ goto unionread;
+ }
+ }
+}
+#endif
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_flag & VROOT) &&
+ (vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ vp = vp->v_mount->mnt_vnodecovered;
+ VREF(vp);
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ vrele(tvp);
+ goto unionread;
+ }
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+struct umask_args {
+ int newmask;
+};
+mode_t /* XXX */
+umask(p, uap, retval)
+ struct proc *p;
+ struct umask_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp;
+
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = uap->newmask & ALLPERMS;
+ return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+struct revoke_args {
+ char *path;
+};
+/* ARGSUSED */
+revoke(p, uap, retval)
+ struct proc *p;
+ register struct revoke_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VCHR && vp->v_type != VBLK) {
+ error = EINVAL;
+ goto out;
+ }
+ if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+ goto out;
+ if (p->p_ucred->cr_uid != vattr.va_uid &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+ vgoneall(vp);
+out:
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+getvnode(fdp, fd, fpp)
+ struct filedesc *fdp;
+ struct file **fpp;
+ int fd;
+{
+ struct file *fp;
+
+ if ((u_int)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EINVAL);
+ *fpp = fp;
+ return (0);
+}
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
new file mode 100644
index 000000000000..1ce7347bdc86
--- /dev/null
+++ b/sys/kern/vfs_init.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed
+ * to Berkeley by John Heidemann of the UCLA Ficus project.
+ *
+ * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94
+ */
+
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Sigh, such primitive tools are these...
+ */
+#if 0
+#define DODEBUG(A) A
+#else
+#define DODEBUG(A)
+#endif
+
+extern struct vnodeopv_desc *vfs_opv_descs[];
+ /* a list of lists of vnodeops defns */
+extern struct vnodeop_desc *vfs_op_descs[];
+ /* and the operations they perform */
+/*
+ * This code doesn't work if the defn is **vnodop_defns with cc.
+ * The problem is because of the compiler sometimes putting in an
+ * extra level of indirection for arrays. It's an interesting
+ * "feature" of C.
+ */
+int vfs_opv_numops;
+
+typedef (*PFI)(); /* the standard Pointer to a Function returning an Int */
+
+/*
+ * A miscellaneous routine.
+ * A generic "default" routine that just returns an error.
+ */
+int
+vn_default_error()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * vfs_init.c
+ *
+ * Allocate and fill in operations vectors.
+ *
+ * An undocumented feature of this approach to defining operations is that
+ * there can be multiple entries in vfs_opv_descs for the same operations
+ * vector. This allows third parties to extend the set of operations
+ * supported by another layer in a binary compatibile way. For example,
+ * assume that NFS needed to be modified to support Ficus. NFS has an entry
+ * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
+ * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
+ * listing those new operations Ficus adds to NFS, all without modifying the
+ * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
+ * that is a(whole)nother story.) This is a feature.
+ */
+void
+vfs_opv_init()
+{
+ int i, j, k;
+ int (***opv_desc_vector_p)();
+ int (**opv_desc_vector)();
+ struct vnodeopv_entry_desc *opve_descp;
+
+ /*
+ * Allocate the dynamic vectors and fill them in.
+ */
+ for (i=0; vfs_opv_descs[i]; i++) {
+ opv_desc_vector_p = vfs_opv_descs[i]->opv_desc_vector_p;
+ /*
+ * Allocate and init the vector, if it needs it.
+ * Also handle backwards compatibility.
+ */
+ if (*opv_desc_vector_p == NULL) {
+ /* XXX - shouldn't be M_VNODE */
+ MALLOC(*opv_desc_vector_p, PFI*,
+ vfs_opv_numops*sizeof(PFI), M_VNODE, M_WAITOK);
+ bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFI));
+ DODEBUG(printf("vector at %x allocated\n",
+ opv_desc_vector_p));
+ }
+ opv_desc_vector = *opv_desc_vector_p;
+ for (j=0; vfs_opv_descs[i]->opv_desc_ops[j].opve_op; j++) {
+ opve_descp = &(vfs_opv_descs[i]->opv_desc_ops[j]);
+
+ /*
+ * Sanity check: is this operation listed
+ * in the list of operations? We check this
+ * by seeing if its offest is zero. Since
+ * the default routine should always be listed
+ * first, it should be the only one with a zero
+ * offset. Any other operation with a zero
+ * offset is probably not listed in
+ * vfs_op_descs, and so is probably an error.
+ *
+ * A panic here means the layer programmer
+ * has committed the all-too common bug
+ * of adding a new operation to the layer's
+ * list of vnode operations but
+ * not adding the operation to the system-wide
+ * list of supported operations.
+ */
+ if (opve_descp->opve_op->vdesc_offset == 0 &&
+ opve_descp->opve_op->vdesc_offset !=
+ VOFFSET(vop_default)) {
+ printf("operation %s not listed in %s.\n",
+ opve_descp->opve_op->vdesc_name,
+ "vfs_op_descs");
+ panic ("vfs_opv_init: bad operation");
+ }
+ /*
+ * Fill in this entry.
+ */
+ opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
+ opve_descp->opve_impl;
+ }
+ }
+ /*
+ * Finally, go back and replace unfilled routines
+ * with their default. (Sigh, an O(n^3) algorithm. I
+ * could make it better, but that'd be work, and n is small.)
+ */
+ for (i = 0; vfs_opv_descs[i]; i++) {
+ opv_desc_vector = *(vfs_opv_descs[i]->opv_desc_vector_p);
+ /*
+ * Force every operations vector to have a default routine.
+ */
+ if (opv_desc_vector[VOFFSET(vop_default)]==NULL) {
+ panic("vfs_opv_init: operation vector without default routine.");
+ }
+ for (k = 0; k<vfs_opv_numops; k++)
+ if (opv_desc_vector[k] == NULL)
+ opv_desc_vector[k] =
+ opv_desc_vector[VOFFSET(vop_default)];
+ }
+}
+
+/*
+ * Initialize known vnode operations vectors.
+ */
+void
+vfs_op_init()
+{
+ int i;
+
+ DODEBUG(printf("Vnode_interface_init.\n"));
+ /*
+ * Set all vnode vectors to a well known value.
+ */
+ for (i = 0; vfs_opv_descs[i]; i++)
+ *(vfs_opv_descs[i]->opv_desc_vector_p) = NULL;
+ /*
+ * Figure out how many ops there are by counting the table,
+ * and assign each its offset.
+ */
+ for (vfs_opv_numops = 0, i = 0; vfs_op_descs[i]; i++) {
+ vfs_op_descs[i]->vdesc_offset = vfs_opv_numops;
+ vfs_opv_numops++;
+ }
+ DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops));
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern struct vnodeops dead_vnodeops;
+extern struct vnodeops spec_vnodeops;
+extern void vclean();
+struct vattr va_null;
+
+/*
+ * Initialize the vnode structures and initialize each file system type.
+ */
+vfsinit()
+{
+ struct vfsops **vfsp;
+
+ /*
+ * Initialize the vnode table
+ */
+ vntblinit();
+ /*
+ * Initialize the vnode name cache
+ */
+ nchinit();
+ /*
+ * Build vnode operation vectors.
+ */
+ vfs_op_init();
+ vfs_opv_init(); /* finish the job */
+ /*
+ * Initialize each file system type.
+ */
+ vattr_null(&va_null);
+ for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) {
+ if (*vfsp == NULL)
+ continue;
+ (*(*vfsp)->vfs_init)();
+ }
+}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
new file mode 100644
index 000000000000..0fa5aa19b78d
--- /dev/null
+++ b/sys/kern/vfs_lookup.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/syslimits.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * Convert a pathname into a pointer to a locked inode.
+ *
+ * The FOLLOW flag is set when symbolic links are to be followed
+ * when they occur at the end of the name translation process.
+ * Symbolic links are always followed for all other pathname
+ * components other than the last.
+ *
+ * The segflg defines whether the name is to be copied from user
+ * space or kernel space.
+ *
+ * Overall outline of namei:
+ *
+ * copy in name
+ * get starting directory
+ * while (!done && !error) {
+ * call lookup to search path.
+ * if symbolic link, massage name in buffer and continue
+ * }
+ */
+int
+namei(ndp)
+ register struct nameidata *ndp;
+{
+ register struct filedesc *fdp; /* pointer to file descriptor state */
+ register char *cp; /* pointer into pathname argument */
+ register struct vnode *dp; /* the directory we are searching */
+ struct iovec aiov; /* uio for reading symbolic links */
+ struct uio auio;
+ int error, linklen;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
+#ifdef DIAGNOSTIC
+ if (!cnp->cn_cred || !cnp->cn_proc)
+ panic ("namei: bad cred/proc");
+ if (cnp->cn_nameiop & (~OPMASK))
+ panic ("namei: nameiop contaminated with flags");
+ if (cnp->cn_flags & OPMASK)
+ panic ("namei: flags contaminated with nameiops");
+#endif
+ fdp = cnp->cn_proc->p_fd;
+
+ /*
+ * Get a buffer for the name to be translated, and copy the
+ * name into the buffer.
+ */
+ if ((cnp->cn_flags & HASBUF) == 0)
+ MALLOC(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ if (ndp->ni_segflg == UIO_SYSSPACE)
+ error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
+ MAXPATHLEN, &ndp->ni_pathlen);
+ else
+ error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
+ MAXPATHLEN, &ndp->ni_pathlen);
+ if (error) {
+ free(cnp->cn_pnbuf, M_NAMEI);
+ ndp->ni_vp = NULL;
+ return (error);
+ }
+ ndp->ni_loopcnt = 0;
+#ifdef KTRACE
+ if (KTRPOINT(cnp->cn_proc, KTR_NAMEI))
+ ktrnamei(cnp->cn_proc->p_tracep, cnp->cn_pnbuf);
+#endif
+
+ /*
+ * Get starting point for the translation.
+ */
+ if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL)
+ ndp->ni_rootdir = rootvnode;
+ dp = fdp->fd_cdir;
+ VREF(dp);
+ for (;;) {
+ /*
+ * Check if root directory should replace current directory.
+ * Done at start of translation and after symbolic link.
+ */
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ if (*(cnp->cn_nameptr) == '/') {
+ vrele(dp);
+ while (*(cnp->cn_nameptr) == '/') {
+ cnp->cn_nameptr++;
+ ndp->ni_pathlen--;
+ }
+ dp = ndp->ni_rootdir;
+ VREF(dp);
+ }
+ ndp->ni_startdir = dp;
+ if (error = lookup(ndp)) {
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+ }
+ /*
+ * Check for symbolic link
+ */
+ if ((cnp->cn_flags & ISSYMLINK) == 0) {
+ if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ else
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ VOP_UNLOCK(ndp->ni_dvp);
+ if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+ error = ELOOP;
+ break;
+ }
+ if (ndp->ni_pathlen > 1)
+ MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ else
+ cp = cnp->cn_pnbuf;
+ aiov.iov_base = cp;
+ aiov.iov_len = MAXPATHLEN;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = (struct proc *)0;
+ auio.uio_resid = MAXPATHLEN;
+ if (error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred)) {
+ if (ndp->ni_pathlen > 1)
+ free(cp, M_NAMEI);
+ break;
+ }
+ linklen = MAXPATHLEN - auio.uio_resid;
+ if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
+ if (ndp->ni_pathlen > 1)
+ free(cp, M_NAMEI);
+ error = ENAMETOOLONG;
+ break;
+ }
+ if (ndp->ni_pathlen > 1) {
+ bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ cnp->cn_pnbuf = cp;
+ } else
+ cnp->cn_pnbuf[linklen] = '\0';
+ ndp->ni_pathlen += linklen;
+ vput(ndp->ni_vp);
+ dp = ndp->ni_dvp;
+ }
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ return (error);
+}
+
+/*
+ * Search a pathname.
+ * This is a very central and rather complicated routine.
+ *
+ * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
+ * The starting directory is taken from ni_startdir. The pathname is
+ * descended until done, or a symbolic link is encountered. The variable
+ * ni_more is clear if the path is completed; it is set to one if a
+ * symbolic link needing interpretation is encountered.
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it, the parent directory is returned
+ * locked. If flag has WANTPARENT or'ed into it, the parent directory is
+ * returned unlocked. Otherwise the parent directory is not returned. If
+ * the target of the pathname exists and LOCKLEAF is or'ed into the flag
+ * the target is returned locked, otherwise it is returned unlocked.
+ * When creating or renaming and LOCKPARENT is specified, the target may not
+ * be ".". When deleting and LOCKPARENT is specified, the target may be ".".
+ *
+ * Overall outline of lookup:
+ *
+ * dirloop:
+ * identify next component of name at ndp->ni_ptr
+ * handle degenerate case where name is null string
+ * if .. and crossing mount points and on mounted filesys, find parent
+ * call VOP_LOOKUP routine for next component name
+ * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
+ * component vnode returned in ni_vp (if it exists), locked.
+ * if result vnode is mounted on and crossing mount points,
+ * find mounted on vnode
+ * if more components of name, do next level at dirloop
+ * return the answer in ni_vp, locked if LOCKLEAF set
+ * if LOCKPARENT set, return locked parent in ni_dvp
+ * if WANTPARENT set, return unlocked parent in ni_dvp
+ */
+int
+lookup(ndp)
+ register struct nameidata *ndp;
+{
+ register char *cp; /* pointer into pathname argument */
+ register struct vnode *dp = 0; /* the directory we are searching */
+ struct vnode *tdp; /* saved dp */
+ struct mount *mp; /* mount table entry */
+ int docache; /* == 0 do not cache last component */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int rdonly; /* lookup read-only flag bit */
+ int error = 0;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ /*
+ * Setup: break out flag bits into variables.
+ */
+ wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
+ docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+ if (cnp->cn_nameiop == DELETE ||
+ (wantparent && cnp->cn_nameiop != CREATE))
+ docache = 0;
+ rdonly = cnp->cn_flags & RDONLY;
+ ndp->ni_dvp = NULL;
+ cnp->cn_flags &= ~ISSYMLINK;
+ dp = ndp->ni_startdir;
+ ndp->ni_startdir = NULLVP;
+ VOP_LOCK(dp);
+
+dirloop:
+ /*
+ * Search a new directory.
+ *
+ * The cn_hash value is for use by vfs_cache.
+ * The last component of the filename is left accessible via
+ * cnp->cn_nameptr for callers that need the name. Callers needing
+ * the name set the SAVENAME flag. When done, they assume
+ * responsibility for freeing the pathname buffer.
+ */
+ cnp->cn_consume = 0;
+ cnp->cn_hash = 0;
+ for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+ cnp->cn_hash += (unsigned char)*cp;
+ cnp->cn_namelen = cp - cnp->cn_nameptr;
+ if (cnp->cn_namelen > NAME_MAX) {
+ error = ENAMETOOLONG;
+ goto bad;
+ }
+#ifdef NAMEI_DIAGNOSTIC
+ { char c = *cp;
+ *cp = '\0';
+ printf("{%s}: ", cnp->cn_nameptr);
+ *cp = c; }
+#endif
+ ndp->ni_pathlen -= cnp->cn_namelen;
+ ndp->ni_next = cp;
+ cnp->cn_flags |= MAKEENTRY;
+ if (*cp == '\0' && docache == 0)
+ cnp->cn_flags &= ~MAKEENTRY;
+ if (cnp->cn_namelen == 2 &&
+ cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
+ cnp->cn_flags |= ISDOTDOT;
+ else
+ cnp->cn_flags &= ~ISDOTDOT;
+ if (*ndp->ni_next == 0)
+ cnp->cn_flags |= ISLASTCN;
+ else
+ cnp->cn_flags &= ~ISLASTCN;
+
+
+ /*
+ * Check for degenerate name (e.g. / or "")
+ * which is a way of talking about a directory,
+ * e.g. like "/." or ".".
+ */
+ if (cnp->cn_nameptr[0] == '\0') {
+ if (cnp->cn_nameiop != LOOKUP) {
+ error = EISDIR;
+ goto bad;
+ }
+ if (dp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ if (wantparent) {
+ ndp->ni_dvp = dp;
+ VREF(dp);
+ }
+ ndp->ni_vp = dp;
+ if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
+ VOP_UNLOCK(dp);
+ if (cnp->cn_flags & SAVESTART)
+ panic("lookup: SAVESTART");
+ return (0);
+ }
+
+ /*
+ * Handle "..": two special cases.
+ * 1. If at root directory (e.g. after chroot)
+ * or at absolute root directory
+ * then ignore it so can't get out.
+ * 2. If this vnode is the root of a mounted
+ * filesystem, then replace it with the
+ * vnode which was mounted on so we take the
+ * .. in the other file system.
+ */
+ if (cnp->cn_flags & ISDOTDOT) {
+ for (;;) {
+ if (dp == ndp->ni_rootdir || dp == rootvnode) {
+ ndp->ni_dvp = dp;
+ ndp->ni_vp = dp;
+ VREF(dp);
+ goto nextname;
+ }
+ if ((dp->v_flag & VROOT) == 0 ||
+ (cnp->cn_flags & NOCROSSMOUNT))
+ break;
+ tdp = dp;
+ dp = dp->v_mount->mnt_vnodecovered;
+ vput(tdp);
+ VREF(dp);
+ VOP_LOCK(dp);
+ }
+ }
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ */
+unionlookup:
+ ndp->ni_dvp = dp;
+ if (error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) {
+#ifdef DIAGNOSTIC
+ if (ndp->ni_vp != NULL)
+ panic("leaf should be empty");
+#endif
+#ifdef NAMEI_DIAGNOSTIC
+ printf("not found\n");
+#endif
+ if ((error == ENOENT) &&
+ (dp->v_flag & VROOT) &&
+ (dp->v_mount->mnt_flag & MNT_UNION)) {
+ tdp = dp;
+ dp = dp->v_mount->mnt_vnodecovered;
+ vput(tdp);
+ VREF(dp);
+ VOP_LOCK(dp);
+ goto unionlookup;
+ }
+
+ if (error != EJUSTRETURN)
+ goto bad;
+ /*
+ * If creating and at end of pathname, then can consider
+ * allowing file to be created.
+ */
+ if (rdonly || (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto bad;
+ }
+ /*
+ * We return with ni_vp NULL to indicate that the entry
+ * doesn't currently exist, leaving a pointer to the
+ * (possibly locked) directory inode in ndp->ni_dvp.
+ */
+ if (cnp->cn_flags & SAVESTART) {
+ ndp->ni_startdir = ndp->ni_dvp;
+ VREF(ndp->ni_startdir);
+ }
+ return (0);
+ }
+#ifdef NAMEI_DIAGNOSTIC
+ printf("found\n");
+#endif
+
+ /*
+ * Take into account any additional components consumed by
+ * the underlying filesystem.
+ */
+ if (cnp->cn_consume > 0) {
+ cnp->cn_nameptr += cnp->cn_consume;
+ ndp->ni_next += cnp->cn_consume;
+ ndp->ni_pathlen -= cnp->cn_consume;
+ cnp->cn_consume = 0;
+ }
+
+ dp = ndp->ni_vp;
+ /*
+ * Check for symbolic link
+ */
+ if ((dp->v_type == VLNK) &&
+ ((cnp->cn_flags & FOLLOW) || *ndp->ni_next == '/')) {
+ cnp->cn_flags |= ISSYMLINK;
+ return (0);
+ }
+
+ /*
+ * Check to see if the vnode has been mounted on;
+ * if so find the root of the mounted file system.
+ */
+ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
+ (cnp->cn_flags & NOCROSSMOUNT) == 0) {
+ if (mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t)mp, PVFS);
+ continue;
+ }
+ if (error = VFS_ROOT(dp->v_mountedhere, &tdp))
+ goto bad2;
+ vput(dp);
+ ndp->ni_vp = dp = tdp;
+ }
+
+nextname:
+ /*
+ * Not a symbolic link. If more pathname,
+ * continue at next component, else return.
+ */
+ if (*ndp->ni_next == '/') {
+ cnp->cn_nameptr = ndp->ni_next;
+ while (*cnp->cn_nameptr == '/') {
+ cnp->cn_nameptr++;
+ ndp->ni_pathlen--;
+ }
+ vrele(ndp->ni_dvp);
+ goto dirloop;
+ }
+ /*
+ * Check for read-only file systems.
+ */
+ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+ /*
+ * Disallow directory write attempts on read-only
+ * file systems.
+ */
+ if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) ||
+ (wantparent &&
+ (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY))) {
+ error = EROFS;
+ goto bad2;
+ }
+ }
+ if (cnp->cn_flags & SAVESTART) {
+ ndp->ni_startdir = ndp->ni_dvp;
+ VREF(ndp->ni_startdir);
+ }
+ if (!wantparent)
+ vrele(ndp->ni_dvp);
+ if ((cnp->cn_flags & LOCKLEAF) == 0)
+ VOP_UNLOCK(dp);
+ return (0);
+
+bad2:
+ if ((cnp->cn_flags & LOCKPARENT) && *ndp->ni_next == '\0')
+ VOP_UNLOCK(ndp->ni_dvp);
+ vrele(ndp->ni_dvp);
+bad:
+ vput(dp);
+ ndp->ni_vp = NULL;
+ return (error);
+}
+
+
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
new file mode 100644
index 000000000000..2fe39eb674b0
--- /dev/null
+++ b/sys/kern/vfs_mount.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_conf.c 8.8 (Berkeley) 3/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#ifdef FFS
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * This specifies the filesystem used to mount the root.
+ * This specification should be done by /etc/config.
+ */
+int (*mountroot)() = ffs_mountroot;
+#endif
+
+/*
+ * These define the root filesystem and device.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ * The types are defined in mount.h.
+ */
+#ifdef FFS
+extern struct vfsops ufs_vfsops;
+#define UFS_VFSOPS &ufs_vfsops
+#else
+#define UFS_VFSOPS NULL
+#endif
+
+#ifdef LFS
+extern struct vfsops lfs_vfsops;
+#define LFS_VFSOPS &lfs_vfsops
+#else
+#define LFS_VFSOPS NULL
+#endif
+
+#ifdef MFS
+extern struct vfsops mfs_vfsops;
+#define MFS_VFSOPS &mfs_vfsops
+#else
+#define MFS_VFSOPS NULL
+#endif
+
+#ifdef NFS
+extern struct vfsops nfs_vfsops;
+#define NFS_VFSOPS &nfs_vfsops
+#else
+#define NFS_VFSOPS NULL
+#endif
+
+#ifdef FDESC
+extern struct vfsops fdesc_vfsops;
+#define FDESC_VFSOPS &fdesc_vfsops
+#else
+#define FDESC_VFSOPS NULL
+#endif
+
+#ifdef PORTAL
+extern struct vfsops portal_vfsops;
+#define PORTAL_VFSOPS &portal_vfsops
+#else
+#define PORTAL_VFSOPS NULL
+#endif
+
+#ifdef NULLFS
+extern struct vfsops null_vfsops;
+#define NULL_VFSOPS &null_vfsops
+#else
+#define NULL_VFSOPS NULL
+#endif
+
+#ifdef UMAPFS
+extern struct vfsops umap_vfsops;
+#define UMAP_VFSOPS &umap_vfsops
+#else
+#define UMAP_VFSOPS NULL
+#endif
+
+#ifdef KERNFS
+extern struct vfsops kernfs_vfsops;
+#define KERNFS_VFSOPS &kernfs_vfsops
+#else
+#define KERNFS_VFSOPS NULL
+#endif
+
+#ifdef PROCFS
+extern struct vfsops procfs_vfsops;
+#define PROCFS_VFSOPS &procfs_vfsops
+#else
+#define PROCFS_VFSOPS NULL
+#endif
+
+#ifdef AFS
+extern struct vfsops afs_vfsops;
+#define AFS_VFSOPS &afs_vfsops
+#else
+#define AFS_VFSOPS NULL
+#endif
+
+#ifdef CD9660
+extern struct vfsops cd9660_vfsops;
+#define CD9660_VFSOPS &cd9660_vfsops
+#else
+#define CD9660_VFSOPS NULL
+#endif
+
+#ifdef UNION
+extern struct vfsops union_vfsops;
+#define UNION_VFSOPS &union_vfsops
+#else
+#define UNION_VFSOPS NULL
+#endif
+
+struct vfsops *vfssw[] = {
+ NULL, /* 0 = MOUNT_NONE */
+ UFS_VFSOPS, /* 1 = MOUNT_UFS */
+ NFS_VFSOPS, /* 2 = MOUNT_NFS */
+ MFS_VFSOPS, /* 3 = MOUNT_MFS */
+ NULL, /* 4 = MOUNT_PC */
+ LFS_VFSOPS, /* 5 = MOUNT_LFS */
+ NULL, /* 6 = MOUNT_LOFS */
+ FDESC_VFSOPS, /* 7 = MOUNT_FDESC */
+ PORTAL_VFSOPS, /* 8 = MOUNT_PORTAL */
+ NULL_VFSOPS, /* 9 = MOUNT_NULL */
+ UMAP_VFSOPS, /* 10 = MOUNT_UMAP */
+ KERNFS_VFSOPS, /* 11 = MOUNT_KERNFS */
+ PROCFS_VFSOPS, /* 12 = MOUNT_PROCFS */
+ AFS_VFSOPS, /* 13 = MOUNT_AFS */
+ CD9660_VFSOPS, /* 14 = MOUNT_CD9660 */
+ UNION_VFSOPS, /* 15 = MOUNT_UNION */
+ 0
+};
+
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector. It is consulted at system boot to build operation
+ * vectors. It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+ &ffs_vnodeop_opv_desc,
+ &ffs_specop_opv_desc,
+#ifdef FIFO
+ &ffs_fifoop_opv_desc,
+#endif
+ &dead_vnodeop_opv_desc,
+#ifdef FIFO
+ &fifo_vnodeop_opv_desc,
+#endif
+ &spec_vnodeop_opv_desc,
+#ifdef LFS
+ &lfs_vnodeop_opv_desc,
+ &lfs_specop_opv_desc,
+#ifdef FIFO
+ &lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+ &mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+ &nfsv2_vnodeop_opv_desc,
+ &spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+ &fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+ &fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+ &portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+ &null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+ &umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+ &kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+ &procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+ &cd9660_vnodeop_opv_desc,
+ &cd9660_specop_opv_desc,
+#ifdef FIFO
+ &cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+ &union_vnodeop_opv_desc,
+#endif
+ NULL
+};
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
new file mode 100644
index 000000000000..9891fe61c198
--- /dev/null
+++ b/sys/kern/vfs_subr.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+ VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+ VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int vttoif_tab[9] = {
+ 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+ S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define bufremvn(bp) { \
+ LIST_REMOVE(bp, b_vnbufs); \
+ (bp)->b_vnbufs.le_next = NOLIST; \
+}
+
+TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
+struct mntlist mountlist; /* mounted filesystem list */
+
+/*
+ * Initialize the vnode management data structures.
+ */
+vntblinit()
+{
+
+ TAILQ_INIT(&vnode_free_list);
+ TAILQ_INIT(&mountlist);
+}
+
+/*
+ * Lock a filesystem.
+ * Used to prevent access to it while mounting and unmounting.
+ */
+vfs_lock(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t)mp, PVFS);
+ }
+ mp->mnt_flag |= MNT_MLOCK;
+ return (0);
+}
+
+/*
+ * Unlock a locked filesystem.
+ * Panic if filesystem is not locked.
+ */
+void
+vfs_unlock(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MLOCK) == 0)
+ panic("vfs_unlock: not locked");
+ mp->mnt_flag &= ~MNT_MLOCK;
+ if (mp->mnt_flag & MNT_MWAIT) {
+ mp->mnt_flag &= ~MNT_MWAIT;
+ wakeup((caddr_t)mp);
+ }
+}
+
+/*
+ * Mark a mount point as busy.
+ * Used to synchronize access and to delay unmounting.
+ */
+vfs_busy(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MPBUSY) {
+ mp->mnt_flag |= MNT_MPWANT;
+ sleep((caddr_t)&mp->mnt_flag, PVFS);
+ }
+ if (mp->mnt_flag & MNT_UNMOUNT)
+ return (1);
+ mp->mnt_flag |= MNT_MPBUSY;
+ return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ * Panic if filesystem is not busy.
+ */
+vfs_unbusy(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vfs_unbusy: not busy");
+ mp->mnt_flag &= ~MNT_MPBUSY;
+ if (mp->mnt_flag & MNT_MPWANT) {
+ mp->mnt_flag &= ~MNT_MPWANT;
+ wakeup((caddr_t)&mp->mnt_flag);
+ }
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+getvfs(fsid)
+ fsid_t *fsid;
+{
+ register struct mount *mp;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+ mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+ return (mp);
+ }
+ return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+getnewfsid(mp, mtype)
+ struct mount *mp;
+ int mtype;
+{
+static u_short xxxfs_mntid;
+
+ fsid_t tfsid;
+
+ mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+ mp->mnt_stat.f_fsid.val[1] = mtype;
+ if (xxxfs_mntid == 0)
+ ++xxxfs_mntid;
+ tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+ tfsid.val[1] = mtype;
+ if (mountlist.tqh_first != NULL) {
+ while (getvfs(&tfsid)) {
+ tfsid.val[0]++;
+ xxxfs_mntid++;
+ }
+ }
+ mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void vattr_null(vap)
+ register struct vattr *vap;
+{
+
+ vap->va_type = VNON;
+ vap->va_size = vap->va_bytes = VNOVAL;
+ vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+ vap->va_fsid = vap->va_fileid =
+ vap->va_blocksize = vap->va_rdev =
+ vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+ vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+ vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+ vap->va_flags = vap->va_gen = VNOVAL;
+ vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+extern void vclean();
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+getnewvnode(tag, mp, vops, vpp)
+ enum vtagtype tag;
+ struct mount *mp;
+ int (**vops)();
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ int s;
+
+ if ((vnode_free_list.tqh_first == NULL &&
+ numvnodes < 2 * desiredvnodes) ||
+ numvnodes < desiredvnodes) {
+ vp = (struct vnode *)malloc((u_long)sizeof *vp,
+ M_VNODE, M_WAITOK);
+ bzero((char *)vp, sizeof *vp);
+ numvnodes++;
+ } else {
+ if ((vp = vnode_free_list.tqh_first) == NULL) {
+ tablefull("vnode");
+ *vpp = 0;
+ return (ENFILE);
+ }
+ if (vp->v_usecount)
+ panic("free vnode isn't");
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ /* see comment on why 0xdeadb is set at end of vgone (below) */
+ vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+ vp->v_lease = NULL;
+ if (vp->v_type != VBAD)
+ vgone(vp);
+#ifdef DIAGNOSTIC
+ if (vp->v_data)
+ panic("cleaned vnode isn't");
+ s = splbio();
+ if (vp->v_numoutput)
+ panic("Clean vnode has pending I/O's");
+ splx(s);
+#endif
+ vp->v_flag = 0;
+ vp->v_lastr = 0;
+ vp->v_ralen = 0;
+ vp->v_maxra = 0;
+ vp->v_lastw = 0;
+ vp->v_lasta = 0;
+ vp->v_cstart = 0;
+ vp->v_clen = 0;
+ vp->v_socket = 0;
+ }
+ vp->v_type = VNON;
+ cache_purge(vp);
+ vp->v_tag = tag;
+ vp->v_op = vops;
+ insmntque(vp, mp);
+ *vpp = vp;
+ vp->v_usecount = 1;
+ vp->v_data = 0;
+ return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+insmntque(vp, mp)
+ register struct vnode *vp;
+ register struct mount *mp;
+{
+
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL)
+ LIST_REMOVE(vp, v_mntvnodes);
+ /*
+ * Insert into list of vnodes for the new mount point, if available.
+ */
+ if ((vp->v_mount = mp) == NULL)
+ return;
+ LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+vwakeup(bp)
+ register struct buf *bp;
+{
+ register struct vnode *vp;
+
+ bp->b_flags &= ~B_WRITEINPROG;
+ if (vp = bp->b_vp) {
+ vp->v_numoutput--;
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ vp->v_flag &= ~VBWAIT;
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+ register struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int slpflag, slptimeo;
+{
+ register struct buf *bp;
+ struct buf *nbp, *blist;
+ int s, error;
+
+ if (flags & V_SAVE) {
+ if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+ return (error);
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ panic("vinvalbuf: dirty bufs");
+ }
+ for (;;) {
+ if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
+ (flags & V_SAVEMETA))
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist)
+ break;
+
+ for (bp = blist; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+ continue;
+ s = splbio();
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp,
+ slpflag | (PRIBIO + 1), "vinvalbuf",
+ slptimeo);
+ splx(s);
+ if (error)
+ return (error);
+ break;
+ }
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ /*
+ * XXX Since there are no node locks for NFS, I believe
+ * there is a slight chance that a delayed write will
+ * occur while sleeping just above, so check for it.
+ */
+ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+ (void) VOP_BWRITE(bp);
+ break;
+ }
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ }
+ }
+ if (!(flags & V_SAVEMETA) &&
+ (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+ panic("vinvalbuf: flush failed");
+ return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+bgetvp(vp, bp)
+ register struct vnode *vp;
+ register struct buf *bp;
+{
+
+ if (bp->b_vp)
+ panic("bgetvp: not free");
+ VHOLD(vp);
+ bp->b_vp = vp;
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ bp->b_dev = vp->v_rdev;
+ else
+ bp->b_dev = NODEV;
+ /*
+ * Insert onto list for new vnode.
+ */
+ bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+brelvp(bp)
+ register struct buf *bp;
+{
+ struct vnode *vp;
+
+ if (bp->b_vp == (struct vnode *) 0)
+ panic("brelvp: NULL");
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ vp = bp->b_vp;
+ bp->b_vp = (struct vnode *) 0;
+ HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+reassignbuf(bp, newvp)
+ register struct buf *bp;
+ register struct vnode *newvp;
+{
+ register struct buflists *listheadp;
+
+ if (newvp == NULL) {
+ printf("reassignbuf: NULL");
+ return;
+ }
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ /*
+ * If dirty, put on list of dirty buffers;
+ * otherwise insert onto list of clean buffers.
+ */
+ if (bp->b_flags & B_DELWRI)
+ listheadp = &newvp->v_dirtyblkhd;
+ else
+ listheadp = &newvp->v_cleanblkhd;
+ bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+bdevvp(dev, vpp)
+ dev_t dev;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct vnode *nvp;
+ int error;
+
+ if (dev == NODEV)
+ return (0);
+ error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+ if (error) {
+ *vpp = 0;
+ return (error);
+ }
+ vp = nvp;
+ vp->v_type = VBLK;
+ if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+ vput(vp);
+ vp = nvp;
+ }
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+ register struct vnode *nvp;
+ dev_t nvp_rdev;
+ struct mount *mp;
+{
+ register struct vnode *vp;
+ struct vnode **vpp;
+
+ if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+ return (NULLVP);
+
+ vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+ for (vp = *vpp; vp; vp = vp->v_specnext) {
+ if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ goto loop;
+ }
+ if (vget(vp, 1))
+ goto loop;
+ break;
+ }
+ if (vp == NULL || vp->v_tag != VT_NON) {
+ MALLOC(nvp->v_specinfo, struct specinfo *,
+ sizeof(struct specinfo), M_VNODE, M_WAITOK);
+ nvp->v_rdev = nvp_rdev;
+ nvp->v_hashchain = vpp;
+ nvp->v_specnext = *vpp;
+ nvp->v_specflags = 0;
+ *vpp = nvp;
+ if (vp != NULL) {
+ nvp->v_flag |= VALIASED;
+ vp->v_flag |= VALIASED;
+ vput(vp);
+ }
+ return (NULLVP);
+ }
+ VOP_UNLOCK(vp);
+ vclean(vp, 0);
+ vp->v_op = nvp->v_op;
+ vp->v_tag = nvp->v_tag;
+ nvp->v_type = VNON;
+ insmntque(vp, mp);
+ return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+vget(vp, lockflag)
+ register struct vnode *vp;
+ int lockflag;
+{
+
+ /*
+ * If the vnode is in the process of being cleaned out for
+ * another use, we wait for the cleaning to finish and then
+ * return failure. Cleaning is determined either by checking
+ * that the VXLOCK flag is set, or that the use count is
+ * zero with the back pointer set to show that it has been
+ * removed from the free list by getnewvnode. The VXLOCK
+ * flag may not have been set yet because vclean is blocked in
+ * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+ */
+ if ((vp->v_flag & VXLOCK) ||
+ (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return (1);
+ }
+ if (vp->v_usecount == 0)
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ vp->v_usecount++;
+ if (lockflag)
+ VOP_LOCK(vp);
+ return (0);
+}
+
+/*
+ * Vnode reference, just increment the count
+ */
+void vref(vp)
+ struct vnode *vp;
+{
+
+ if (vp->v_usecount <= 0)
+ panic("vref used where vget required");
+ vp->v_usecount++;
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void vput(vp)
+ register struct vnode *vp;
+{
+
+ VOP_UNLOCK(vp);
+ vrele(vp);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void vrele(vp)
+ register struct vnode *vp;
+{
+
+#ifdef DIAGNOSTIC
+ if (vp == NULL)
+ panic("vrele: null vp");
+#endif
+ vp->v_usecount--;
+ if (vp->v_usecount > 0)
+ return;
+#ifdef DIAGNOSTIC
+ if (vp->v_usecount != 0 || vp->v_writecount != 0) {
+ vprint("vrele: bad ref count", vp);
+ panic("vrele: ref cnt");
+ }
+#endif
+ /*
+ * insert at tail of LRU list
+ */
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ VOP_INACTIVE(vp);
+}
+
+/*
+ * Page or buffer structure gets a reference.
+ */
+void vhold(vp)
+ register struct vnode *vp;
+{
+
+ vp->v_holdcnt++;
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void holdrele(vp)
+ register struct vnode *vp;
+{
+
+ if (vp->v_holdcnt <= 0)
+ panic("holdrele: holdcnt");
+ vp->v_holdcnt--;
+}
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0; /* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+vflush(mp, skipvp, flags)
+ struct mount *mp;
+ struct vnode *skipvp;
+ int flags;
+{
+ register struct vnode *vp, *nvp;
+ int busy = 0;
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vflush: not busy");
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp)
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ /*
+ * Skip over a selected vnode.
+ */
+ if (vp == skipvp)
+ continue;
+ /*
+ * Skip over a vnodes marked VSYSTEM.
+ */
+ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+ continue;
+ /*
+ * If WRITECLOSE is set, only flush out regular file
+ * vnodes open for writing.
+ */
+ if ((flags & WRITECLOSE) &&
+ (vp->v_writecount == 0 || vp->v_type != VREG))
+ continue;
+ /*
+ * With v_usecount == 0, all we need to do is clear
+ * out the vnode data structures and we are done.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ continue;
+ }
+ /*
+ * If FORCECLOSE is set, forcibly close the vnode.
+ * For block or character devices, revert to an
+ * anonymous device. For all other files, just kill them.
+ */
+ if (flags & FORCECLOSE) {
+ if (vp->v_type != VBLK && vp->v_type != VCHR) {
+ vgone(vp);
+ } else {
+ vclean(vp, 0);
+ vp->v_op = spec_vnodeop_p;
+ insmntque(vp, (struct mount *)0);
+ }
+ continue;
+ }
+#ifdef DIAGNOSTIC
+ if (busyprt)
+ vprint("vflush: busy vnode", vp);
+#endif
+ busy++;
+ }
+ if (busy)
+ return (EBUSY);
+ return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ */
+void
+vclean(vp, flags)
+ register struct vnode *vp;
+ int flags;
+{
+ int active;
+
+ /*
+ * Check to see if the vnode is in use.
+ * If so we have to reference it before we clean it out
+ * so that its count cannot fall to zero and generate a
+ * race against ourselves to recycle it.
+ */
+ if (active = vp->v_usecount)
+ VREF(vp);
+ /*
+ * Even if the count is zero, the VOP_INACTIVE routine may still
+ * have the object locked while it cleans it out. The VOP_LOCK
+ * ensures that the VOP_INACTIVE routine is done with its work.
+ * For active vnodes, it ensures that no other activity can
+ * occur while the underlying object is being cleaned out.
+ */
+ VOP_LOCK(vp);
+ /*
+ * Prevent the vnode from being recycled or
+ * brought into use while we clean it out.
+ */
+ if (vp->v_flag & VXLOCK)
+ panic("vclean: deadlock");
+ vp->v_flag |= VXLOCK;
+ /*
+ * Clean out any buffers associated with the vnode.
+ */
+ if (flags & DOCLOSE)
+ vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ /*
+ * Any other processes trying to obtain this lock must first
+ * wait for VXLOCK to clear, then call the new lock operation.
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * If purging an active vnode, it must be closed and
+ * deactivated before being reclaimed.
+ */
+ if (active) {
+ if (flags & DOCLOSE)
+ VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
+ VOP_INACTIVE(vp);
+ }
+ /*
+ * Reclaim the vnode.
+ */
+ if (VOP_RECLAIM(vp))
+ panic("vclean: cannot reclaim");
+ if (active)
+ vrele(vp);
+
+ /*
+ * Done with purge, notify sleepers of the grim news.
+ */
+ vp->v_op = dead_vnodeop_p;
+ vp->v_tag = VT_NON;
+ vp->v_flag &= ~VXLOCK;
+ if (vp->v_flag & VXWANT) {
+ vp->v_flag &= ~VXWANT;
+ wakeup((caddr_t)vp);
+ }
+}
+
+/*
+ * Eliminate all activity associated with the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+void vgoneall(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_flag & VALIASED) {
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Ensure that vp will not be vgone'd while we
+ * are eliminating its aliases.
+ */
+ vp->v_flag |= VXLOCK;
+ while (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type || vp == vq)
+ continue;
+ vgone(vq);
+ break;
+ }
+ }
+ /*
+ * Remove the lock so that vgone below will
+ * really eliminate the vnode after which time
+ * vgone will awaken any sleepers.
+ */
+ vp->v_flag &= ~VXLOCK;
+ }
+ vgone(vp);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void vgone(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+ struct vnode *vx;
+
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Clean out the filesystem specific data.
+ */
+ vclean(vp, DOCLOSE);
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL) {
+ LIST_REMOVE(vp, v_mntvnodes);
+ vp->v_mount = NULL;
+ }
+ /*
+ * If special device, remove it from special device alias list.
+ */
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ if (*vp->v_hashchain == vp) {
+ *vp->v_hashchain = vp->v_specnext;
+ } else {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_specnext != vp)
+ continue;
+ vq->v_specnext = vp->v_specnext;
+ break;
+ }
+ if (vq == NULL)
+ panic("missing bdev");
+ }
+ if (vp->v_flag & VALIASED) {
+ vx = NULL;
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vx)
+ break;
+ vx = vq;
+ }
+ if (vx == NULL)
+ panic("missing alias");
+ if (vq == NULL)
+ vx->v_flag &= ~VALIASED;
+ vp->v_flag &= ~VALIASED;
+ }
+ FREE(vp->v_specinfo, M_VNODE);
+ vp->v_specinfo = NULL;
+ }
+ /*
+ * If it is on the freelist and not already at the head,
+ * move it to the head of the list. The test of the back
+ * pointer and the reference count of zero is because
+ * it will be removed from the free list by getnewvnode,
+ * but will not have its reference count incremented until
+ * after calling vgone. If the reference count were
+ * incremented first, vgone would (incorrectly) try to
+ * close the previous instance of the underlying object.
+ * So, the back pointer is explicitly set to `0xdeadb' in
+ * getnewvnode after removing it from the freelist to ensure
+ * that we do not try to move it here.
+ */
+ if (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
+ vnode_free_list.tqh_first != vp) {
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ }
+ vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+vfinddev(dev, type, vpp)
+ dev_t dev;
+ enum vtype type;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+
+ for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+ if (dev != vp->v_rdev || type != vp->v_type)
+ continue;
+ *vpp = vp;
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+vcount(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq, *vnext;
+ int count;
+
+loop:
+ if ((vp->v_flag & VALIASED) == 0)
+ return (vp->v_usecount);
+ for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+ vnext = vq->v_specnext;
+ if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vq->v_usecount == 0 && vq != vp) {
+ vgone(vq);
+ goto loop;
+ }
+ count += vq->v_usecount;
+ }
+ return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+ { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+vprint(label, vp)
+ char *label;
+ register struct vnode *vp;
+{
+ char buf[64];
+
+ if (label != NULL)
+ printf("%s: ", label);
+ printf("type %s, usecount %d, writecount %d, refcount %d,",
+ typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+ vp->v_holdcnt);
+ buf[0] = '\0';
+ if (vp->v_flag & VROOT)
+ strcat(buf, "|VROOT");
+ if (vp->v_flag & VTEXT)
+ strcat(buf, "|VTEXT");
+ if (vp->v_flag & VSYSTEM)
+ strcat(buf, "|VSYSTEM");
+ if (vp->v_flag & VXLOCK)
+ strcat(buf, "|VXLOCK");
+ if (vp->v_flag & VXWANT)
+ strcat(buf, "|VXWANT");
+ if (vp->v_flag & VBWAIT)
+ strcat(buf, "|VBWAIT");
+ if (vp->v_flag & VALIASED)
+ strcat(buf, "|VALIASED");
+ if (buf[0] != '\0')
+ printf(" flags (%s)", &buf[1]);
+ if (vp->v_data == NULL) {
+ printf("\n");
+ } else {
+ printf("\n\t");
+ VOP_PRINT(vp);
+ }
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+printlockedvnodes()
+{
+ register struct mount *mp;
+ register struct vnode *vp;
+
+ printf("Locked vnodes\n");
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next)
+ if (VOP_ISLOCKED(vp))
+ vprint((char *)0, vp);
+ }
+}
+#endif
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP 10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+sysctl_vnode(where, sizep)
+ char *where;
+ size_t *sizep;
+{
+ register struct mount *mp, *nmp;
+ struct vnode *vp;
+ register char *bp = where, *savebp;
+ char *ewhere;
+ int error;
+
+#define VPTRSZ sizeof (struct vnode *)
+#define VNODESZ sizeof (struct vnode)
+ if (where == NULL) {
+ *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+ return (0);
+ }
+ ewhere = where + *sizep;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (vfs_busy(mp))
+ continue;
+ savebp = bp;
+again:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * Check that the vp is still associated with
+ * this filesystem. RACE: could have been
+ * recycled onto the same filesystem.
+ */
+ if (vp->v_mount != mp) {
+ if (kinfo_vdebug)
+ printf("kinfo: vp changed\n");
+ bp = savebp;
+ goto again;
+ }
+ if (bp + VPTRSZ + VNODESZ > ewhere) {
+ *sizep = bp - where;
+ return (ENOMEM);
+ }
+ if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+ (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+ return (error);
+ bp += VPTRSZ + VNODESZ;
+ }
+ vfs_unbusy(mp);
+ }
+
+ *sizep = bp - where;
+ return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ if (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vq->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ register int i;
+ struct radix_node *rn;
+ struct sockaddr *saddr, *smask = 0;
+ struct domain *dom;
+ int error;
+
+ if (argp->ex_addrlen == 0) {
+ if (mp->mnt_flag & MNT_DEFEXPORTED)
+ return (EPERM);
+ np = &nep->ne_defexported;
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ mp->mnt_flag |= MNT_DEFEXPORTED;
+ return (0);
+ }
+ i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+ np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+ bzero((caddr_t)np, i);
+ saddr = (struct sockaddr *)(np + 1);
+ if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+ goto out;
+ if (saddr->sa_len > argp->ex_addrlen)
+ saddr->sa_len = argp->ex_addrlen;
+ if (argp->ex_masklen) {
+ smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+ error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+ if (error)
+ goto out;
+ if (smask->sa_len > argp->ex_masklen)
+ smask->sa_len = argp->ex_masklen;
+ }
+ i = saddr->sa_family;
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ /*
+ * Seems silly to initialize every AF when most are not
+ * used, do so on demand here
+ */
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_family == i && dom->dom_rtattach) {
+ dom->dom_rtattach((void **)&nep->ne_rtable[i],
+ dom->dom_rtoffset);
+ break;
+ }
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ error = ENOBUFS;
+ goto out;
+ }
+ }
+ rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+ np->netc_rnodes);
+ if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
+ error = EPERM;
+ goto out;
+ }
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ return (0);
+out:
+ free(np, M_NETADDR);
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+ struct radix_node *rn;
+ caddr_t w;
+{
+ register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+ (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+ free((caddr_t)rn, M_NETADDR);
+ return (0);
+}
+
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+ struct netexport *nep;
+{
+ register int i;
+ register struct radix_node_head *rnh;
+
+ for (i = 0; i <= AF_MAX; i++)
+ if (rnh = nep->ne_rtable[i]) {
+ (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+ (caddr_t)rnh);
+ free((caddr_t)rnh, M_RTABLE);
+ nep->ne_rtable[i] = 0;
+ }
+}
+
+int
+vfs_export(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ int error;
+
+ if (argp->ex_flags & MNT_DELEXPORT) {
+ vfs_free_addrlist(nep);
+ mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+ }
+ if (argp->ex_flags & MNT_EXPORTED) {
+ if (error = vfs_hang_addrlist(mp, nep, argp))
+ return (error);
+ mp->mnt_flag |= MNT_EXPORTED;
+ }
+ return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+ register struct mount *mp;
+ struct netexport *nep;
+ struct mbuf *nam;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ struct sockaddr *saddr;
+
+ np = NULL;
+ if (mp->mnt_flag & MNT_EXPORTED) {
+ /*
+ * Lookup in the export list first.
+ */
+ if (nam != NULL) {
+ saddr = mtod(nam, struct sockaddr *);
+ rnh = nep->ne_rtable[saddr->sa_family];
+ if (rnh != NULL) {
+ np = (struct netcred *)
+ (*rnh->rnh_matchaddr)((caddr_t)saddr,
+ rnh);
+ if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+ np = NULL;
+ }
+ }
+ /*
+ * If no address match, use the default if it exists.
+ */
+ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+ np = &nep->ne_defexported;
+ }
+ return (np);
+}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
new file mode 100644
index 000000000000..345c7a79bf20
--- /dev/null
+++ b/sys/kern/vfs_syscalls.c
@@ -0,0 +1,2107 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+struct mount_args {
+ int type;
+ char *path;
+ int flags;
+ caddr_t data;
+};
+/* ARGSUSED */
+mount(p, uap, retval)
+ struct proc *p;
+ register struct mount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ register struct mount *mp;
+ int error, flag;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ /*
+ * Get vnode to be covered
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (uap->flags & MNT_UPDATE) {
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ flag = mp->mnt_flag;
+ /*
+ * We only allow the filesystem to be reloaded if it
+ * is currently mounted read-only.
+ */
+ if ((uap->flags & MNT_RELOAD) &&
+ ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+ vput(vp);
+ return (EOPNOTSUPP); /* Needs translation */
+ }
+ mp->mnt_flag |=
+ uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+ VOP_UNLOCK(vp);
+ goto update;
+ }
+ if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+ if (vp->v_type != VDIR) {
+ vput(vp);
+ return (ENOTDIR);
+ }
+ if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) {
+ vput(vp);
+ return (ENODEV);
+ }
+
+ /*
+ * Allocate and initialize the file system.
+ */
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = vfssw[uap->type];
+ if (error = vfs_lock(mp)) {
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (error);
+ }
+ if (vp->v_mountedhere != NULL) {
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (EBUSY);
+ }
+ vp->v_mountedhere = mp;
+ mp->mnt_vnodecovered = vp;
+update:
+ /*
+ * Set the mount level flags.
+ */
+ if (uap->flags & MNT_RDONLY)
+ mp->mnt_flag |= MNT_RDONLY;
+ else if (mp->mnt_flag & MNT_RDONLY)
+ mp->mnt_flag |= MNT_WANTRDWR;
+ mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ /*
+ * Mount the filesystem.
+ */
+ error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p);
+ if (mp->mnt_flag & MNT_UPDATE) {
+ vrele(vp);
+ if (mp->mnt_flag & MNT_WANTRDWR)
+ mp->mnt_flag &= ~MNT_RDONLY;
+ mp->mnt_flag &=~
+ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+ if (error)
+ mp->mnt_flag = flag;
+ return (error);
+ }
+ /*
+ * Put the new filesystem on the mount list after root.
+ */
+ cache_purge(vp);
+ if (!error) {
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ VOP_UNLOCK(vp);
+ vfs_unlock(mp);
+ error = VFS_START(mp, 0, p);
+ } else {
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+struct unmount_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+unmount(p, uap, retval)
+ struct proc *p;
+ register struct unmount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+
+ /*
+ * Unless this is a user mount, then must
+ * have suser privilege.
+ */
+ if (((vp->v_mount->mnt_flag & MNT_USER) == 0) &&
+ (error = suser(p->p_ucred, &p->p_acflag))) {
+ vput(vp);
+ return (error);
+ }
+
+ /*
+ * Must be the root of the filesystem
+ */
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ vput(vp);
+ return (dounmount(mp, uap->flags, p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+dounmount(mp, flags, p)
+ register struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ struct vnode *coveredvp;
+ int error;
+
+ coveredvp = mp->mnt_vnodecovered;
+ if (vfs_busy(mp))
+ return (EBUSY);
+ mp->mnt_flag |= MNT_UNMOUNT;
+ if (error = vfs_lock(mp))
+ return (error);
+
+ mp->mnt_flag &=~ MNT_ASYNC;
+ vnode_pager_umount(mp); /* release cached vnodes */
+ cache_purgevfs(mp); /* remove cache entries for this file sys */
+ if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
+ (flags & MNT_FORCE))
+ error = VFS_UNMOUNT(mp, flags, p);
+ mp->mnt_flag &= ~MNT_UNMOUNT;
+ vfs_unbusy(mp);
+ if (error) {
+ vfs_unlock(mp);
+ } else {
+ vrele(coveredvp);
+ TAILQ_REMOVE(&mountlist, mp, mnt_list);
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ if (mp->mnt_vnodelist.lh_first != NULL)
+ panic("unmount: dangling vnode");
+ free((caddr_t)mp, M_MOUNT);
+ }
+ return (error);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DIAGNOSTIC
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+struct sync_args {
+ int dummy;
+};
+/* ARGSUSED */
+sync(p, uap, retval)
+ struct proc *p;
+ struct sync_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ int asyncflag;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ /*
+ * The lock check below is to avoid races with mount
+ * and unmount.
+ */
+ if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
+ !vfs_busy(mp)) {
+ asyncflag = mp->mnt_flag & MNT_ASYNC;
+ mp->mnt_flag &= ~MNT_ASYNC;
+ VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+ if (asyncflag)
+ mp->mnt_flag |= MNT_ASYNC;
+ vfs_unbusy(mp);
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (syncprt)
+ vfs_bufstats();
+#endif /* DIAGNOSTIC */
+ return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+struct quotactl_args {
+ char *path;
+ int cmd;
+ int uid;
+ caddr_t arg;
+};
+/* ARGSUSED */
+quotactl(p, uap, retval)
+ struct proc *p;
+ register struct quotactl_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ vrele(nd.ni_vp);
+ return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct statfs_args {
+ char *path;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+statfs(p, uap, retval)
+ struct proc *p;
+ register struct statfs_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ register struct statfs *sp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ sp = &mp->mnt_stat;
+ vrele(nd.ni_vp);
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct fstatfs_args {
+ int fd;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+fstatfs(p, uap, retval)
+ struct proc *p;
+ register struct fstatfs_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mount *mp;
+ register struct statfs *sp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ mp = ((struct vnode *)fp->f_data)->v_mount;
+ sp = &mp->mnt_stat;
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+struct getfsstat_args {
+ struct statfs *buf;
+ long bufsize;
+ int flags;
+};
+getfsstat(p, uap, retval)
+ struct proc *p;
+ register struct getfsstat_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ register struct statfs *sp;
+ caddr_t sfsp;
+ long count, maxcount, error;
+
+ maxcount = uap->bufsize / sizeof(struct statfs);
+ sfsp = (caddr_t)uap->buf;
+ for (count = 0, mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (sfsp && count < maxcount &&
+ ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+ sp = &mp->mnt_stat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the
+ * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+ */
+ if (((uap->flags & MNT_NOWAIT) == 0 ||
+ (uap->flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ continue;
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+ return (error);
+ sfsp += sizeof(*sp);
+ }
+ count++;
+ }
+ if (sfsp && count > maxcount)
+ *retval = maxcount;
+ else
+ *retval = count;
+ return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+struct fchdir_args {
+ int fd;
+};
+/* ARGSUSED */
+fchdir(p, uap, retval)
+ struct proc *p;
+ struct fchdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(fdp, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ VREF(vp);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = vp;
+ return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+struct chdir_args {
+ char *path;
+};
+/* ARGSUSED */
+chdir(p, uap, retval)
+ struct proc *p;
+ struct chdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+struct chroot_args {
+ char *path;
+};
+/* ARGSUSED */
+chroot(p, uap, retval)
+ struct proc *p;
+ struct chroot_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ if (fdp->fd_rdir != NULL)
+ vrele(fdp->fd_rdir);
+ fdp->fd_rdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+ register struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *vp;
+ int error;
+
+ if (error = namei(ndp))
+ return (error);
+ vp = ndp->ni_vp;
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+struct open_args {
+ char *path;
+ int flags;
+ int mode;
+};
+open(p, uap, retval)
+ struct proc *p;
+ register struct open_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register struct vnode *vp;
+ int flags, cmode;
+ struct file *nfp;
+ int type, indx, error;
+ struct flock lf;
+ struct nameidata nd;
+ extern struct fileops vnops;
+
+ if (error = falloc(p, &nfp, &indx))
+ return (error);
+ fp = nfp;
+ flags = FFLAGS(uap->flags);
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ p->p_dupfd = -indx - 1; /* XXX check for fdopen */
+ if (error = vn_open(&nd, flags, cmode)) {
+ ffree(fp);
+ if ((error == ENODEV || error == ENXIO) &&
+ p->p_dupfd >= 0 && /* XXX from fdopen */
+ (error =
+ dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+ *retval = indx;
+ return (0);
+ }
+ if (error == ERESTART)
+ error = EINTR;
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ p->p_dupfd = 0;
+ vp = nd.ni_vp;
+ fp->f_flag = flags & FMASK;
+ fp->f_type = DTYPE_VNODE;
+ fp->f_ops = &vnops;
+ fp->f_data = (caddr_t)vp;
+ if (flags & (O_EXLOCK | O_SHLOCK)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (flags & O_EXLOCK)
+ lf.l_type = F_WRLCK;
+ else
+ lf.l_type = F_RDLCK;
+ type = F_FLOCK;
+ if ((flags & FNONBLOCK) == 0)
+ type |= F_WAIT;
+ VOP_UNLOCK(vp);
+ if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+ (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+ ffree(fp);
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ VOP_LOCK(vp);
+ fp->f_flag |= FHASLOCK;
+ }
+ VOP_UNLOCK(vp);
+ *retval = indx;
+ return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+struct ocreat_args {
+ char *path;
+ int mode;
+};
+ocreat(p, uap, retval)
+ struct proc *p;
+ register struct ocreat_args *uap;
+ int *retval;
+{
+ struct open_args openuap;
+
+ openuap.path = uap->path;
+ openuap.mode = uap->mode;
+ openuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
+ return (open(p, &openuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+struct mknod_args {
+ char *path;
+ int mode;
+ int dev;
+};
+/* ARGSUSED */
+mknod(p, uap, retval)
+ struct proc *p;
+ register struct mknod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL)
+ error = EEXIST;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ vattr.va_rdev = uap->dev;
+
+ switch (uap->mode & S_IFMT) {
+ case S_IFMT: /* used by badsect to flag bad sectors */
+ vattr.va_type = VBAD;
+ break;
+ case S_IFCHR:
+ vattr.va_type = VCHR;
+ break;
+ case S_IFBLK:
+ vattr.va_type = VBLK;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ }
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (vp)
+ vrele(vp);
+ }
+ return (error);
+}
+
+/*
+ * Create named pipe.
+ */
+struct mkfifo_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkfifo(p, uap, retval)
+ struct proc *p;
+ register struct mkfifo_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+#ifndef FIFO
+ return (EOPNOTSUPP);
+#else
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ if (nd.ni_vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VFIFO;
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+struct link_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+link(p, uap, retval)
+ struct proc *p;
+ register struct link_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct nameidata nd;
+ int error;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ nd.ni_dirp = uap->link;
+ if ((error = namei(&nd)) == 0) {
+ if (nd.ni_vp != NULL)
+ error = EEXIST;
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp,
+ p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp,
+ p, p->p_ucred, LEASE_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+ }
+ }
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+struct symlink_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+symlink(p, uap, retval)
+ struct proc *p;
+ register struct symlink_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ char *path;
+ int error;
+ struct nameidata nd;
+
+ MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ if (error = copyinstr(uap->path, path, MAXPATHLEN, NULL))
+ goto out;
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p);
+ if (error = namei(&nd))
+ goto out;
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+ FREE(path, M_NAMEI);
+ return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+struct unlink_args {
+ char *path;
+};
+/* ARGSUSED */
+unlink(p, uap, retval)
+ struct proc *p;
+ struct unlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+ else
+ (void)vnode_pager_uncache(vp);
+ }
+
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+struct lseek_args {
+ int fd;
+ int pad;
+ off_t offset;
+ int whence;
+};
+lseek(p, uap, retval)
+ struct proc *p;
+ register struct lseek_args *uap;
+ int *retval;
+{
+ struct ucred *cred = p->p_ucred;
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vattr vattr;
+ int error;
+
+ if ((u_int)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (ESPIPE);
+ switch (uap->whence) {
+ case L_INCR:
+ fp->f_offset += uap->offset;
+ break;
+ case L_XTND:
+ if (error =
+ VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+ return (error);
+ fp->f_offset = uap->offset + vattr.va_size;
+ break;
+ case L_SET:
+ fp->f_offset = uap->offset;
+ break;
+ default:
+ return (EINVAL);
+ }
+ *(off_t *)retval = fp->f_offset;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+struct olseek_args {
+ int fd;
+ long offset;
+ int whence;
+};
+olseek(p, uap, retval)
+ struct proc *p;
+ register struct olseek_args *uap;
+ int *retval;
+{
+ struct lseek_args nuap;
+ off_t qret;
+ int error;
+
+ nuap.fd = uap->fd;
+ nuap.offset = uap->offset;
+ nuap.whence = uap->whence;
+ error = lseek(p, &nuap, &qret);
+ *(long *)retval = qret;
+ return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+struct access_args {
+ char *path;
+ int flags;
+};
+access(p, uap, retval)
+ struct proc *p;
+ register struct access_args *uap;
+ int *retval;
+{
+ register struct ucred *cred = p->p_ucred;
+ register struct vnode *vp;
+ int error, flags, t_gid, t_uid;
+ struct nameidata nd;
+
+ t_uid = cred->cr_uid;
+ t_gid = cred->cr_groups[0];
+ cred->cr_uid = p->p_cred->p_ruid;
+ cred->cr_groups[0] = p->p_cred->p_rgid;
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ goto out1;
+ vp = nd.ni_vp;
+
+ /* Flags == 0 means only check for existence. */
+ if (uap->flags) {
+ flags = 0;
+ if (uap->flags & R_OK)
+ flags |= VREAD;
+ if (uap->flags & W_OK)
+ flags |= VWRITE;
+ if (uap->flags & X_OK)
+ flags |= VEXEC;
+ if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+ error = VOP_ACCESS(vp, flags, cred, p);
+ }
+ vput(vp);
+out1:
+ cred->cr_uid = t_uid;
+ cred->cr_groups[0] = t_gid;
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+struct ostat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+ostat(p, uap, retval)
+ struct proc *p;
+ register struct ostat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct olstat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+olstat(p, uap, retval)
+ struct proc *p;
+ register struct olstat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+cvtstat(st, ost)
+ struct stat *st;
+ struct ostat *ost;
+{
+
+ ost->st_dev = st->st_dev;
+ ost->st_ino = st->st_ino;
+ ost->st_mode = st->st_mode;
+ ost->st_nlink = st->st_nlink;
+ ost->st_uid = st->st_uid;
+ ost->st_gid = st->st_gid;
+ ost->st_rdev = st->st_rdev;
+ if (st->st_size < (quad_t)1 << 32)
+ ost->st_size = st->st_size;
+ else
+ ost->st_size = -2;
+ ost->st_atime = st->st_atime;
+ ost->st_mtime = st->st_mtime;
+ ost->st_ctime = st->st_ctime;
+ ost->st_blksize = st->st_blksize;
+ ost->st_blocks = st->st_blocks;
+ ost->st_flags = st->st_flags;
+ ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+struct stat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+stat(p, uap, retval)
+ struct proc *p;
+ register struct stat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct lstat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+lstat(p, uap, retval)
+ struct proc *p;
+ register struct lstat_args *uap;
+ int *retval;
+{
+ int error;
+ struct vnode *vp, *dvp;
+ struct stat sb, sb1;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+ uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ /*
+ * For symbolic links, always return the attributes of its
+ * containing directory, except for mode, size, and links.
+ */
+ vp = nd.ni_vp;
+ dvp = nd.ni_dvp;
+ if (vp->v_type != VLNK) {
+ if (dvp == vp)
+ vrele(dvp);
+ else
+ vput(dvp);
+ error = vn_stat(vp, &sb, p);
+ vput(vp);
+ if (error)
+ return (error);
+ } else {
+ error = vn_stat(dvp, &sb, p);
+ vput(dvp);
+ if (error) {
+ vput(vp);
+ return (error);
+ }
+ error = vn_stat(vp, &sb1, p);
+ vput(vp);
+ if (error)
+ return (error);
+ sb.st_mode &= ~S_IFDIR;
+ sb.st_mode |= S_IFLNK;
+ sb.st_nlink = sb1.st_nlink;
+ sb.st_size = sb1.st_size;
+ sb.st_blocks = sb1.st_blocks;
+ }
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+struct pathconf_args {
+ char *path;
+ int name;
+};
+/* ARGSUSED */
+pathconf(p, uap, retval)
+ struct proc *p;
+ register struct pathconf_args *uap;
+ int *retval;
+{
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = VOP_PATHCONF(nd.ni_vp, uap->name, retval);
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+struct readlink_args {
+ char *path;
+ char *buf;
+ int count;
+};
+/* ARGSUSED */
+readlink(p, uap, retval)
+ struct proc *p;
+ register struct readlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct iovec aiov;
+ struct uio auio;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VLNK)
+ error = EINVAL;
+ else {
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ error = VOP_READLINK(vp, &auio, p->p_ucred);
+ }
+ vput(vp);
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+struct chflags_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+chflags(p, uap, retval)
+ struct proc *p;
+ register struct chflags_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+struct fchflags_args {
+ int fd;
+ int flags;
+};
+/* ARGSUSED */
+fchflags(p, uap, retval)
+ struct proc *p;
+ register struct fchflags_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+struct chmod_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+chmod(p, uap, retval)
+ struct proc *p;
+ register struct chmod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+struct fchmod_args {
+ int fd;
+ int mode;
+};
+/* ARGSUSED */
+fchmod(p, uap, retval)
+ struct proc *p;
+ register struct fchmod_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+struct chown_args {
+ char *path;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+chown(p, uap, retval)
+ struct proc *p;
+ register struct chown_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+struct fchown_args {
+ int fd;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+fchown(p, uap, retval)
+ struct proc *p;
+ register struct fchown_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+struct utimes_args {
+ char *path;
+ struct timeval *tptr;
+};
+/* ARGSUSED */
+utimes(p, uap, retval)
+ struct proc *p;
+ register struct utimes_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct timeval tv[2];
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ VATTR_NULL(&vattr);
+ if (uap->tptr == NULL) {
+ microtime(&tv[0]);
+ tv[1] = tv[0];
+ vattr.va_vaflags |= VA_UTIMES_NULL;
+ } else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ vattr.va_atime.ts_sec = tv[0].tv_sec;
+ vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+ vattr.va_mtime.ts_sec = tv[1].tv_sec;
+ vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+struct truncate_args {
+ char *path;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+truncate(p, uap, retval)
+ struct proc *p;
+ register struct truncate_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0 &&
+ (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct ftruncate_args {
+ int fd;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+ftruncate(p, uap, retval)
+ struct proc *p;
+ register struct ftruncate_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FWRITE) == 0)
+ return (EINVAL);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+struct otruncate_args {
+ char *path;
+ long length;
+};
+/* ARGSUSED */
+otruncate(p, uap, retval)
+ struct proc *p;
+ register struct otruncate_args *uap;
+ int *retval;
+{
+ struct truncate_args nuap;
+
+ nuap.path = uap->path;
+ nuap.length = uap->length;
+ return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct oftruncate_args {
+ int fd;
+ long length;
+};
+/* ARGSUSED */
+oftruncate(p, uap, retval)
+ struct proc *p;
+ register struct oftruncate_args *uap;
+ int *retval;
+{
+ struct ftruncate_args nuap;
+
+ nuap.fd = uap->fd;
+ nuap.length = uap->length;
+ return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+struct fsync_args {
+ int fd;
+};
+/* ARGSUSED */
+fsync(p, uap, retval)
+ struct proc *p;
+ struct fsync_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+struct rename_args {
+ char *from;
+ char *to;
+};
+/* ARGSUSED */
+rename(p, uap, retval)
+ struct proc *p;
+ register struct rename_args *uap;
+ int *retval;
+{
+ register struct vnode *tvp, *fvp, *tdvp;
+ struct nameidata fromnd, tond;
+ int error;
+
+ NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+ uap->from, p);
+ if (error = namei(&fromnd))
+ return (error);
+ fvp = fromnd.ni_vp;
+ NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+ UIO_USERSPACE, uap->to, p);
+ if (error = namei(&tond)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE);
+ if (fromnd.ni_dvp != tdvp)
+ LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (tvp)
+ LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ if (fromnd.ni_startdir)
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+struct mkdir_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkdir(p, uap, retval)
+ struct proc *p;
+ register struct mkdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VDIR;
+ vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ if (!error)
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+struct rmdir_args {
+ char *path;
+};
+/* ARGSUSED */
+rmdir(p, uap, retval)
+ struct proc *p;
+ struct rmdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct ogetdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+ogetdirentries(p, uap, retval)
+ struct proc *p;
+ register struct ogetdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio, kuio;
+ struct iovec aiov, kiov;
+ struct dirent *dp, *edp;
+ caddr_t dirbuf;
+ int error, readcnt;
+ long loff;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+# if (BYTE_ORDER != LITTLE_ENDIAN)
+ if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ } else
+# endif
+ {
+ kuio = auio;
+ kuio.uio_iov = &kiov;
+ kuio.uio_segflg = UIO_SYSSPACE;
+ kiov.iov_len = uap->count;
+ MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
+ kiov.iov_base = dirbuf;
+ error = VOP_READDIR(vp, &kuio, fp->f_cred);
+ fp->f_offset = kuio.uio_offset;
+ if (error == 0) {
+ readcnt = uap->count - kuio.uio_resid;
+ edp = (struct dirent *)&dirbuf[readcnt];
+ for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ /*
+ * The expected low byte of
+ * dp->d_namlen is our dp->d_type.
+ * The high MBZ byte of dp->d_namlen
+ * is our dp->d_namlen.
+ */
+ dp->d_type = dp->d_namlen;
+ dp->d_namlen = 0;
+# else
+ /*
+ * The dp->d_type is the high byte
+ * of the expected dp->d_namlen,
+ * so must be zero'ed.
+ */
+ dp->d_type = 0;
+# endif
+ if (dp->d_reclen > 0) {
+ dp = (struct dirent *)
+ ((char *)dp + dp->d_reclen);
+ } else {
+ error = EIO;
+ break;
+ }
+ }
+ if (dp >= edp)
+ error = uiomove(dirbuf, readcnt, &auio);
+ }
+ FREE(dirbuf, M_TEMP);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+#endif
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct getdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+getdirentries(p, uap, retval)
+ struct proc *p;
+ register struct getdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio;
+ struct iovec aiov;
+ long loff;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+unionread:
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+
+#ifdef UNION
+{
+ extern int (**union_vnodeop_p)();
+ extern struct vnode *union_lowervp __P((struct vnode *));
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_op == union_vnodeop_p)) {
+ struct vnode *tvp = vp;
+
+ vp = union_lowervp(vp);
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_OPEN(vp, FREAD);
+ VOP_UNLOCK(vp);
+
+ if (error) {
+ vrele(vp);
+ return (error);
+ }
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ error = vn_close(tvp, FREAD, fp->f_cred, p);
+ if (error)
+ return (error);
+ goto unionread;
+ }
+ }
+}
+#endif
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_flag & VROOT) &&
+ (vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ vp = vp->v_mount->mnt_vnodecovered;
+ VREF(vp);
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ vrele(tvp);
+ goto unionread;
+ }
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+struct umask_args {
+ int newmask;
+};
+mode_t /* XXX */
+umask(p, uap, retval)
+ struct proc *p;
+ struct umask_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp;
+
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = uap->newmask & ALLPERMS;
+ return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+struct revoke_args {
+ char *path;
+};
+/* ARGSUSED */
+revoke(p, uap, retval)
+ struct proc *p;
+ register struct revoke_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VCHR && vp->v_type != VBLK) {
+ error = EINVAL;
+ goto out;
+ }
+ if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+ goto out;
+ if (p->p_ucred->cr_uid != vattr.va_uid &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+ vgoneall(vp);
+out:
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+getvnode(fdp, fd, fpp)
+ struct filedesc *fdp;
+ struct file **fpp;
+ int fd;
+{
+ struct file *fp;
+
+ if ((u_int)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EINVAL);
+ *fpp = fp;
+ return (0);
+}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
new file mode 100644
index 000000000000..d104bb9de773
--- /dev/null
+++ b/sys/kern/vfs_vnops.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+#include <vm/vm.h>
+
+struct fileops vnops =
+ { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
+
+/*
+ * Common code for vnode open operations.
+ * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
+ */
+vn_open(ndp, fmode, cmode)
+ register struct nameidata *ndp;
+ int fmode, cmode;
+{
+ register struct vnode *vp;
+ register struct proc *p = ndp->ni_cnd.cn_proc;
+ register struct ucred *cred = p->p_ucred;
+ struct vattr vat;
+ struct vattr *vap = &vat;
+ int error;
+
+ if (fmode & O_CREAT) {
+ ndp->ni_cnd.cn_nameiop = CREATE;
+ ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if ((fmode & O_EXCL) == 0)
+ ndp->ni_cnd.cn_flags |= FOLLOW;
+ if (error = namei(ndp))
+ return (error);
+ if (ndp->ni_vp == NULL) {
+ VATTR_NULL(vap);
+ vap->va_type = VREG;
+ vap->va_mode = cmode;
+ LEASE_CHECK(ndp->ni_dvp, p, cred, LEASE_WRITE);
+ if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
+ &ndp->ni_cnd, vap))
+ return (error);
+ fmode &= ~O_TRUNC;
+ vp = ndp->ni_vp;
+ } else {
+ VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
+ if (ndp->ni_dvp == ndp->ni_vp)
+ vrele(ndp->ni_dvp);
+ else
+ vput(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+ vp = ndp->ni_vp;
+ if (fmode & O_EXCL) {
+ error = EEXIST;
+ goto bad;
+ }
+ fmode &= ~O_CREAT;
+ }
+ } else {
+ ndp->ni_cnd.cn_nameiop = LOOKUP;
+ ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
+ if (error = namei(ndp))
+ return (error);
+ vp = ndp->ni_vp;
+ }
+ if (vp->v_type == VSOCK) {
+ error = EOPNOTSUPP;
+ goto bad;
+ }
+ if ((fmode & O_CREAT) == 0) {
+ if (fmode & FREAD) {
+ if (error = VOP_ACCESS(vp, VREAD, cred, p))
+ goto bad;
+ }
+ if (fmode & (FWRITE | O_TRUNC)) {
+ if (vp->v_type == VDIR) {
+ error = EISDIR;
+ goto bad;
+ }
+ if ((error = vn_writechk(vp)) ||
+ (error = VOP_ACCESS(vp, VWRITE, cred, p)))
+ goto bad;
+ }
+ }
+ if (fmode & O_TRUNC) {
+ VOP_UNLOCK(vp); /* XXX */
+ LEASE_CHECK(vp, p, cred, LEASE_WRITE);
+ VOP_LOCK(vp); /* XXX */
+ VATTR_NULL(vap);
+ vap->va_size = 0;
+ if (error = VOP_SETATTR(vp, vap, cred, p))
+ goto bad;
+ }
+ if (error = VOP_OPEN(vp, fmode, cred, p))
+ goto bad;
+ if (fmode & FWRITE)
+ vp->v_writecount++;
+ return (0);
+bad:
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Check for write permissions on the specified vnode.
+ * The read-only status of the file system is checked.
+ * Also, prototype text segments cannot be written.
+ */
+vn_writechk(vp)
+ register struct vnode *vp;
+{
+
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket or a block or character
+ * device resident on the file system.
+ */
+ if (vp->v_mount->mnt_flag & MNT_RDONLY) {
+ switch (vp->v_type) {
+ case VREG: case VDIR: case VLNK:
+ return (EROFS);
+ }
+ }
+ /*
+ * If there's shared text associated with
+ * the vnode, try to free it up once. If
+ * we fail, we can't allow writing.
+ */
+ if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+ return (ETXTBSY);
+ return (0);
+}
+
+/*
+ * Vnode close call
+ */
+vn_close(vp, flags, cred, p)
+ register struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+{
+ int error;
+
+ if (flags & FWRITE)
+ vp->v_writecount--;
+ error = VOP_CLOSE(vp, flags, cred, p);
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Package up an I/O request on a vnode into a uio and do it.
+ */
+vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
+ enum uio_rw rw;
+ struct vnode *vp;
+ caddr_t base;
+ int len;
+ off_t offset;
+ enum uio_seg segflg;
+ int ioflg;
+ struct ucred *cred;
+ int *aresid;
+ struct proc *p;
+{
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+
+ if ((ioflg & IO_NODELOCKED) == 0)
+ VOP_LOCK(vp);
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ aiov.iov_base = base;
+ aiov.iov_len = len;
+ auio.uio_resid = len;
+ auio.uio_offset = offset;
+ auio.uio_segflg = segflg;
+ auio.uio_rw = rw;
+ auio.uio_procp = p;
+ if (rw == UIO_READ) {
+ error = VOP_READ(vp, &auio, ioflg, cred);
+ } else {
+ error = VOP_WRITE(vp, &auio, ioflg, cred);
+ }
+ if (aresid)
+ *aresid = auio.uio_resid;
+ else
+ if (auio.uio_resid && error == 0)
+ error = EIO;
+ if ((ioflg & IO_NODELOCKED) == 0)
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * File table vnode read routine.
+ */
+vn_read(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+ register struct vnode *vp = (struct vnode *)fp->f_data;
+ int count, error;
+
+ LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_READ);
+ VOP_LOCK(vp);
+ uio->uio_offset = fp->f_offset;
+ count = uio->uio_resid;
+ error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
+ cred);
+ fp->f_offset += count - uio->uio_resid;
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * File table vnode write routine.
+ */
+vn_write(fp, uio, cred)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *cred;
+{
+ register struct vnode *vp = (struct vnode *)fp->f_data;
+ int count, error, ioflag = 0;
+
+ if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+ ioflag |= IO_APPEND;
+ if (fp->f_flag & FNONBLOCK)
+ ioflag |= IO_NDELAY;
+ LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ uio->uio_offset = fp->f_offset;
+ count = uio->uio_resid;
+ error = VOP_WRITE(vp, uio, ioflag, cred);
+ if (ioflag & IO_APPEND)
+ fp->f_offset = uio->uio_offset;
+ else
+ fp->f_offset += count - uio->uio_resid;
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * File table vnode stat routine.
+ */
+vn_stat(vp, sb, p)
+ struct vnode *vp;
+ register struct stat *sb;
+ struct proc *p;
+{
+ struct vattr vattr;
+ register struct vattr *vap;
+ int error;
+ u_short mode;
+
+ vap = &vattr;
+ error = VOP_GETATTR(vp, vap, p->p_ucred, p);
+ if (error)
+ return (error);
+ /*
+ * Copy from vattr table
+ */
+ sb->st_dev = vap->va_fsid;
+ sb->st_ino = vap->va_fileid;
+ mode = vap->va_mode;
+ switch (vp->v_type) {
+ case VREG:
+ mode |= S_IFREG;
+ break;
+ case VDIR:
+ mode |= S_IFDIR;
+ break;
+ case VBLK:
+ mode |= S_IFBLK;
+ break;
+ case VCHR:
+ mode |= S_IFCHR;
+ break;
+ case VLNK:
+ mode |= S_IFLNK;
+ break;
+ case VSOCK:
+ mode |= S_IFSOCK;
+ break;
+ case VFIFO:
+ mode |= S_IFIFO;
+ break;
+ default:
+ return (EBADF);
+ };
+ sb->st_mode = mode;
+ sb->st_nlink = vap->va_nlink;
+ sb->st_uid = vap->va_uid;
+ sb->st_gid = vap->va_gid;
+ sb->st_rdev = vap->va_rdev;
+ sb->st_size = vap->va_size;
+ sb->st_atimespec = vap->va_atime;
+ sb->st_mtimespec= vap->va_mtime;
+ sb->st_ctimespec = vap->va_ctime;
+ sb->st_blksize = vap->va_blocksize;
+ sb->st_flags = vap->va_flags;
+ sb->st_gen = vap->va_gen;
+ sb->st_blocks = vap->va_bytes / S_BLKSIZE;
+ return (0);
+}
+
+/*
+ * File table vnode ioctl routine.
+ */
+vn_ioctl(fp, com, data, p)
+ struct file *fp;
+ int com;
+ caddr_t data;
+ struct proc *p;
+{
+ register struct vnode *vp = ((struct vnode *)fp->f_data);
+ struct vattr vattr;
+ int error;
+
+ switch (vp->v_type) {
+
+ case VREG:
+ case VDIR:
+ if (com == FIONREAD) {
+ if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+ return (error);
+ *(int *)data = vattr.va_size - fp->f_offset;
+ return (0);
+ }
+ if (com == FIONBIO || com == FIOASYNC) /* XXX */
+ return (0); /* XXX */
+ /* fall into ... */
+
+ default:
+ return (ENOTTY);
+
+ case VFIFO:
+ case VCHR:
+ case VBLK:
+ error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
+ if (error == 0 && com == TIOCSCTTY) {
+ p->p_session->s_ttyvp = vp;
+ VREF(vp);
+ }
+ return (error);
+ }
+}
+
+/*
+ * File table vnode select routine.
+ */
+vn_select(fp, which, p)
+ struct file *fp;
+ int which;
+ struct proc *p;
+{
+
+ return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
+ fp->f_cred, p));
+}
+
+/*
+ * File table vnode close routine.
+ */
+vn_closefile(fp, p)
+ struct file *fp;
+ struct proc *p;
+{
+
+ return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
+ fp->f_cred, p));
+}
diff --git a/sys/kern/vnode_if.pl b/sys/kern/vnode_if.pl
new file mode 100644
index 000000000000..e190fa04836d
--- /dev/null
+++ b/sys/kern/vnode_if.pl
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+# (where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk. Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+ echo 'usage: vnode_if.sh srcfile'
+ exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ name = $1;
+ uname = toupper(name);
+
+ # Get the function arguments.
+ for (c1 = 0;; ++c1) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ a[c1] = $0;
+ }
+
+ # Print out the vop_F_args structure.
+ printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+ name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%sa_%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("};\n");
+
+ # Print out extern declaration.
+ printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+ # Print out inline struct.
+ printf("static inline int %s(", uname);
+ sep = ", ";
+ for (c2 = 0; c2 < c1; ++c2) {
+ if (c2 == c1 - 1)
+ sep = ")\n";
+ c3 = split(a[c2], t);
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("%s%s", substr(t[c3], beg, end - beg), sep);
+ }
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%s%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("{\n\tstruct %s_args a;\n\n", name);
+ printf("\ta.a_desc = VDESC(%s);\n", name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("a.a_%s = %s\n",
+ substr(t[c3], beg, end - beg), substr(t[c3], beg));
+ }
+ c1 = split(a[0], t);
+ beg = match(t[c1], "[^*]");
+ end = match(t[c1], ";");
+ printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+ substr(t[c1], beg, end - beg), name);
+ }' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+ 0,
+ "default",
+ 0,
+ NULL,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+ sub (/^[ \t]*/, "", s);
+ sub (/[ \t]*$/, "", s);
+ return s;
+ }
+
+ function read_args() {
+ numargs = 0;
+ while (getline ln) {
+ if (ln ~ /}/) {
+ break;
+ };
+
+ # Delete comments, if any.
+ gsub (/\/\*.*\*\//, "", ln);
+
+ # Delete leading/trailing space.
+ ln = kill_surrounding_ws(ln);
+
+ # Pick off direction.
+ if (1 == sub(/^INOUT[ \t]+/, "", ln))
+ dir = "INOUT";
+ else if (1 == sub(/^IN[ \t]+/, "", ln))
+ dir = "IN";
+ else if (1 == sub(/^OUT[ \t]+/, "", ln))
+ dir = "OUT";
+ else
+ bail("No IN/OUT direction for \"" ln "\".");
+
+ # check for "WILLRELE"
+ if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+ rele = "WILLRELE";
+ } else {
+ rele = "WONTRELE";
+ };
+
+ # kill trailing ;
+ if (1 != sub (/;$/, "", ln)) {
+ bail("Missing end-of-line ; in \"" ln "\".");
+ };
+
+ # pick off variable name
+ if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+ bail("Missing var name \"a_foo\" in \"" ln "\".");
+ };
+ arg = substr (ln, i);
+ # Want to <<substr(ln, i) = "";>>, but nawk cannot.
+ # Hack around this.
+ ln = substr(ln, 1, i-1);
+
+ # what is left must be type
+ # (put clean it up some)
+ type = ln;
+ gsub (/[ \t]+/, " ", type); # condense whitespace
+ type = kill_surrounding_ws(type);
+
+ # (boy this was easier in Perl)
+
+ numargs++;
+ dirs[numargs] = dir;
+ reles[numargs] = rele;
+ types[numargs] = type;
+ args[numargs] = arg;
+ };
+ }
+
+ function generate_operation_vp_offsets() {
+ printf ("int %s_vp_offsets[] = {\n", name);
+ # as a side effect, figure out the releflags
+ releflags = "";
+ vpnum = 0;
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode *") {
+ printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+ name, args[i]);
+ if (reles[i] == "WILLRELE") {
+ releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+ };
+ vpnum++;
+ };
+ };
+ sub (/^\|/, "", releflags);
+ print "\tVDESC_NO_OFFSET";
+ print "};";
+ }
+
+ function find_arg_with_type (type) {
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == type) {
+ return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+ };
+ };
+ return "VDESC_NO_OFFSET";
+ }
+
+ function generate_operation_desc() {
+ printf ("struct vnodeop_desc %s_desc = {\n", name);
+ # offset
+ printf ("\t0,\n");
+ # printable name
+ printf ("\t\"%s\",\n", name);
+ # flags
+ vppwillrele = "";
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode **" &&
+ (reles[i] == "WILLRELE")) {
+ vppwillrele = "|VDESC_VPP_WILLRELE";
+ };
+ };
+ if (releflags == "") {
+ printf ("\t0%s,\n", vppwillrele);
+ } else {
+ printf ("\t%s%s,\n", releflags, vppwillrele);
+ };
+ # vp offsets
+ printf ("\t%s_vp_offsets,\n", name);
+ # vpp (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+ # cred (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+ # proc (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+ # componentname
+ printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+ # transport layer information
+ printf ("\tNULL,\n};\n");
+ }
+
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # get the function name
+ name = $1;
+
+ # get the function arguments
+ read_args();
+
+ # Print out the vop_F_vp_offsets structure. This all depends
+ # on naming conventions and nothing else.
+ generate_operation_vp_offsets();
+
+ # Print out the vnodeop_desc structure.
+ generate_operation_desc();
+
+ printf "\n";
+
+ }' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+#
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments. This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+ struct buf *bp;
+{
+ struct vop_strategy_args a;
+
+ a.a_desc = VDESC(vop_strategy);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+ struct buf *bp;
+{
+ struct vop_bwrite_args a;
+
+ a.a_desc = VDESC(vop_bwrite);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+ 0,
+ "vop_strategy",
+ 0,
+ vop_strategy_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+ 0,
+ "vop_bwrite",
+ 0,
+ vop_bwrite_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+ BEGIN {
+ printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+ printf("\t&vop_default_desc, /* MUST BE FIRST */\n");
+ printf("\t&vop_strategy_desc, /* XXX: SPECIAL CASE */\n");
+ printf("\t&vop_bwrite_desc, /* XXX: SPECIAL CASE */\n");
+ }
+ END {
+ printf("\tNULL\n};\n");
+ }
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ printf("\t&%s_desc,\n", $1);
+
+ # Skip the function arguments.
+ for (;;) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ }
+ }' < $SRC >> $CFILE
+
diff --git a/sys/kern/vnode_if.sh b/sys/kern/vnode_if.sh
new file mode 100644
index 000000000000..e190fa04836d
--- /dev/null
+++ b/sys/kern/vnode_if.sh
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+# (where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk. Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+ echo 'usage: vnode_if.sh srcfile'
+ exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ name = $1;
+ uname = toupper(name);
+
+ # Get the function arguments.
+ for (c1 = 0;; ++c1) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ a[c1] = $0;
+ }
+
+ # Print out the vop_F_args structure.
+ printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+ name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%sa_%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("};\n");
+
+ # Print out extern declaration.
+ printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+ # Print out inline struct.
+ printf("static inline int %s(", uname);
+ sep = ", ";
+ for (c2 = 0; c2 < c1; ++c2) {
+ if (c2 == c1 - 1)
+ sep = ")\n";
+ c3 = split(a[c2], t);
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("%s%s", substr(t[c3], beg, end - beg), sep);
+ }
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%s%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("{\n\tstruct %s_args a;\n\n", name);
+ printf("\ta.a_desc = VDESC(%s);\n", name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("a.a_%s = %s\n",
+ substr(t[c3], beg, end - beg), substr(t[c3], beg));
+ }
+ c1 = split(a[0], t);
+ beg = match(t[c1], "[^*]");
+ end = match(t[c1], ";");
+ printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+ substr(t[c1], beg, end - beg), name);
+ }' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+ 0,
+ "default",
+ 0,
+ NULL,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+ sub (/^[ \t]*/, "", s);
+ sub (/[ \t]*$/, "", s);
+ return s;
+ }
+
+ function read_args() {
+ numargs = 0;
+ while (getline ln) {
+ if (ln ~ /}/) {
+ break;
+ };
+
+ # Delete comments, if any.
+ gsub (/\/\*.*\*\//, "", ln);
+
+ # Delete leading/trailing space.
+ ln = kill_surrounding_ws(ln);
+
+ # Pick off direction.
+ if (1 == sub(/^INOUT[ \t]+/, "", ln))
+ dir = "INOUT";
+ else if (1 == sub(/^IN[ \t]+/, "", ln))
+ dir = "IN";
+ else if (1 == sub(/^OUT[ \t]+/, "", ln))
+ dir = "OUT";
+ else
+ bail("No IN/OUT direction for \"" ln "\".");
+
+ # check for "WILLRELE"
+ if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+ rele = "WILLRELE";
+ } else {
+ rele = "WONTRELE";
+ };
+
+ # kill trailing ;
+ if (1 != sub (/;$/, "", ln)) {
+ bail("Missing end-of-line ; in \"" ln "\".");
+ };
+
+ # pick off variable name
+ if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+ bail("Missing var name \"a_foo\" in \"" ln "\".");
+ };
+ arg = substr (ln, i);
+ # Want to <<substr(ln, i) = "";>>, but nawk cannot.
+ # Hack around this.
+ ln = substr(ln, 1, i-1);
+
+ # what is left must be type
+ # (put clean it up some)
+ type = ln;
+ gsub (/[ \t]+/, " ", type); # condense whitespace
+ type = kill_surrounding_ws(type);
+
+ # (boy this was easier in Perl)
+
+ numargs++;
+ dirs[numargs] = dir;
+ reles[numargs] = rele;
+ types[numargs] = type;
+ args[numargs] = arg;
+ };
+ }
+
+ function generate_operation_vp_offsets() {
+ printf ("int %s_vp_offsets[] = {\n", name);
+ # as a side effect, figure out the releflags
+ releflags = "";
+ vpnum = 0;
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode *") {
+ printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+ name, args[i]);
+ if (reles[i] == "WILLRELE") {
+ releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+ };
+ vpnum++;
+ };
+ };
+ sub (/^\|/, "", releflags);
+ print "\tVDESC_NO_OFFSET";
+ print "};";
+ }
+
+ function find_arg_with_type (type) {
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == type) {
+ return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+ };
+ };
+ return "VDESC_NO_OFFSET";
+ }
+
+ function generate_operation_desc() {
+ printf ("struct vnodeop_desc %s_desc = {\n", name);
+ # offset
+ printf ("\t0,\n");
+ # printable name
+ printf ("\t\"%s\",\n", name);
+ # flags
+ vppwillrele = "";
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode **" &&
+ (reles[i] == "WILLRELE")) {
+ vppwillrele = "|VDESC_VPP_WILLRELE";
+ };
+ };
+ if (releflags == "") {
+ printf ("\t0%s,\n", vppwillrele);
+ } else {
+ printf ("\t%s%s,\n", releflags, vppwillrele);
+ };
+ # vp offsets
+ printf ("\t%s_vp_offsets,\n", name);
+ # vpp (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+ # cred (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+ # proc (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+ # componentname
+ printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+ # transport layer information
+ printf ("\tNULL,\n};\n");
+ }
+
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # get the function name
+ name = $1;
+
+ # get the function arguments
+ read_args();
+
+ # Print out the vop_F_vp_offsets structure. This all depends
+ # on naming conventions and nothing else.
+ generate_operation_vp_offsets();
+
+ # Print out the vnodeop_desc structure.
+ generate_operation_desc();
+
+ printf "\n";
+
+ }' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+#
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments. This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+ struct buf *bp;
+{
+ struct vop_strategy_args a;
+
+ a.a_desc = VDESC(vop_strategy);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+ struct buf *bp;
+{
+ struct vop_bwrite_args a;
+
+ a.a_desc = VDESC(vop_bwrite);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+ 0,
+ "vop_strategy",
+ 0,
+ vop_strategy_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+ 0,
+ "vop_bwrite",
+ 0,
+ vop_bwrite_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+ BEGIN {
+ printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+ printf("\t&vop_default_desc, /* MUST BE FIRST */\n");
+ printf("\t&vop_strategy_desc, /* XXX: SPECIAL CASE */\n");
+ printf("\t&vop_bwrite_desc, /* XXX: SPECIAL CASE */\n");
+ }
+ END {
+ printf("\tNULL\n};\n");
+ }
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ printf("\t&%s_desc,\n", $1);
+
+ # Skip the function arguments.
+ for (;;) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ }
+ }' < $SRC >> $CFILE
+
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
new file mode 100644
index 000000000000..caee21dce0b7
--- /dev/null
+++ b/sys/kern/vnode_if.src
@@ -0,0 +1,296 @@
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.src 8.3 (Berkeley) 2/3/94
+#
+vop_lookup {
+ IN struct vnode *dvp;
+ INOUT struct vnode **vpp;
+ IN struct componentname *cnp;
+};
+
+vop_create {
+ IN WILLRELE struct vnode *dvp;
+ OUT struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+};
+
+vop_mknod {
+ IN WILLRELE struct vnode *dvp;
+ OUT WILLRELE struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+};
+
+vop_open {
+ IN struct vnode *vp;
+ IN int mode;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_close {
+ IN struct vnode *vp;
+ IN int fflag;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_access {
+ IN struct vnode *vp;
+ IN int mode;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_getattr {
+ IN struct vnode *vp;
+ IN struct vattr *vap;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_setattr {
+ IN struct vnode *vp;
+ IN struct vattr *vap;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_read {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN int ioflag;
+ IN struct ucred *cred;
+};
+
+vop_write {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN int ioflag;
+ IN struct ucred *cred;
+};
+
+vop_ioctl {
+ IN struct vnode *vp;
+ IN int command;
+ IN caddr_t data;
+ IN int fflag;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+# Needs work? (fflags)
+vop_select {
+ IN struct vnode *vp;
+ IN int which;
+ IN int fflags;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_mmap {
+ IN struct vnode *vp;
+ IN int fflags;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_fsync {
+ IN struct vnode *vp;
+ IN struct ucred *cred;
+ IN int waitfor;
+ IN struct proc *p;
+};
+
+# Needs word: Is newoff right? What's it mean?
+vop_seek {
+ IN struct vnode *vp;
+ IN off_t oldoff;
+ IN off_t newoff;
+ IN struct ucred *cred;
+};
+
+vop_remove {
+ IN WILLRELE struct vnode *dvp;
+ IN WILLRELE struct vnode *vp;
+ IN struct componentname *cnp;
+};
+
+vop_link {
+ IN WILLRELE struct vnode *vp;
+ IN struct vnode *tdvp;
+ IN struct componentname *cnp;
+};
+
+vop_rename {
+ IN WILLRELE struct vnode *fdvp;
+ IN WILLRELE struct vnode *fvp;
+ IN struct componentname *fcnp;
+ IN WILLRELE struct vnode *tdvp;
+ IN WILLRELE struct vnode *tvp;
+ IN struct componentname *tcnp;
+};
+
+vop_mkdir {
+ IN WILLRELE struct vnode *dvp;
+ OUT struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+};
+
+vop_rmdir {
+ IN WILLRELE struct vnode *dvp;
+ IN WILLRELE struct vnode *vp;
+ IN struct componentname *cnp;
+};
+
+vop_symlink {
+ IN WILLRELE struct vnode *dvp;
+ OUT WILLRELE struct vnode **vpp;
+ IN struct componentname *cnp;
+ IN struct vattr *vap;
+ IN char *target;
+};
+
+vop_readdir {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN struct ucred *cred;
+};
+
+vop_readlink {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN struct ucred *cred;
+};
+
+vop_abortop {
+ IN struct vnode *dvp;
+ IN struct componentname *cnp;
+};
+
+vop_inactive {
+ IN struct vnode *vp;
+};
+
+vop_reclaim {
+ IN struct vnode *vp;
+};
+
+vop_lock {
+ IN struct vnode *vp;
+};
+
+vop_unlock {
+ IN struct vnode *vp;
+};
+
+vop_bmap {
+ IN struct vnode *vp;
+ IN daddr_t bn;
+ OUT struct vnode **vpp;
+ IN daddr_t *bnp;
+ OUT int *runp;
+};
+
+#vop_strategy {
+# IN struct buf *bp;
+#};
+
+vop_print {
+ IN struct vnode *vp;
+};
+
+vop_islocked {
+ IN struct vnode *vp;
+};
+
+vop_pathconf {
+ IN struct vnode *vp;
+ IN int name;
+ OUT int *retval;
+};
+
+vop_advlock {
+ IN struct vnode *vp;
+ IN caddr_t id;
+ IN int op;
+ IN struct flock *fl;
+ IN int flags;
+};
+
+vop_blkatoff {
+ IN struct vnode *vp;
+ IN off_t offset;
+ OUT char **res;
+ OUT struct buf **bpp;
+};
+
+vop_valloc {
+ IN struct vnode *pvp;
+ IN int mode;
+ IN struct ucred *cred;
+ OUT struct vnode **vpp;
+};
+
+vop_reallocblks {
+ IN struct vnode *vp;
+ IN struct cluster_save *buflist;
+};
+
+vop_vfree {
+ IN struct vnode *pvp;
+ IN ino_t ino;
+ IN int mode;
+};
+
+vop_truncate {
+ IN struct vnode *vp;
+ IN off_t length;
+ IN int flags;
+ IN struct ucred *cred;
+ IN struct proc *p;
+};
+
+vop_update {
+ IN struct vnode *vp;
+ IN struct timeval *access;
+ IN struct timeval *modify;
+ IN int waitfor;
+};
+
+# Needs work: no vp?
+#vop_bwrite {
+# IN struct buf *bp;
+#};
diff --git a/sys/libkern/Makefile b/sys/libkern/Makefile
new file mode 100644
index 000000000000..991a4350144e
--- /dev/null
+++ b/sys/libkern/Makefile
@@ -0,0 +1,20 @@
+# @(#)Makefile 7.9 (Berkeley) 6/1/93
+
+LIB= kern
+CFLAGS+= -I${.CURDIR} -I${.CURDIR}/..
+SRCS= adddi3.c anddi3.c ashldi3.c ashrdi3.c bcmp.c cmpdi2.c divdi3.c \
+ ffs.c iordi3.c locc.c lshldi3.c lshrdi3.c mcount.c moddi3.c \
+ muldi3.c negdi2.c notdi2.c qdivrem.c random.c rindex.c scanc.c \
+ skpc.c strcat.c strcmp.c strcpy.c strlen.c strncpy.c subdi3.c \
+ ucmpdi2.c udivdi3.c umoddi3.c xordi3.c
+
+.if exists(${.CURDIR}/${MACHINE}/Makefile.inc)
+.PATH: ${.CURDIR}/${MACHINE}
+.include "${.CURDIR}/${MACHINE}/Makefile.inc"
+.endif
+
+# mcount cannot be compiled with profiling
+mcount.po: mcount.o
+ cp mcount.o mcount.po
+
+.include <bsd.lib.mk>
diff --git a/sys/libkern/adddi3.c b/sys/libkern/adddi3.c
new file mode 100644
index 000000000000..d10da47e0cf3
--- /dev/null
+++ b/sys/libkern/adddi3.c
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)adddi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Add two quads. This is trivial since a one-bit carry from a single
+ * u_long addition x+y occurs if and only if the sum x+y is less than
+ * either x or y (the choice to compare with x or y is arbitrary).
+ */
+quad_t
+__adddi3(a, b)
+ quad_t a, b;
+{
+ union uu aa, bb, sum;
+
+ aa.q = a;
+ bb.q = b;
+ sum.ul[L] = aa.ul[L] + bb.ul[L];
+ sum.ul[H] = aa.ul[H] + bb.ul[H] + (sum.ul[L] < bb.ul[L]);
+ return (sum.q);
+}
diff --git a/sys/libkern/anddi3.c b/sys/libkern/anddi3.c
new file mode 100644
index 000000000000..5ae45ac1a86a
--- /dev/null
+++ b/sys/libkern/anddi3.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)anddi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return a & b, in quad.
+ */
+quad_t
+__anddi3(a, b)
+ quad_t a, b;
+{
+ union uu aa, bb;
+
+ aa.q = a;
+ bb.q = b;
+ aa.ul[0] &= bb.ul[0];
+ aa.ul[1] &= bb.ul[1];
+ return (aa.q);
+}
diff --git a/sys/libkern/ashldi3.c b/sys/libkern/ashldi3.c
new file mode 100644
index 000000000000..72501adfaedd
--- /dev/null
+++ b/sys/libkern/ashldi3.c
@@ -0,0 +1,66 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ashldi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift a (signed) quad value left (arithmetic shift left).
+ * This is the same as logical shift left!
+ */
+quad_t
+__ashldi3(a, shift)
+ quad_t a;
+ qshift_t shift;
+{
+ union uu aa;
+
+ aa.q = a;
+ if (shift >= LONG_BITS) {
+ aa.ul[H] = shift >= QUAD_BITS ? 0 :
+ aa.ul[L] << (shift - LONG_BITS);
+ aa.ul[L] = 0;
+ } else if (shift > 0) {
+ aa.ul[H] = (aa.ul[H] << shift) |
+ (aa.ul[L] >> (LONG_BITS - shift));
+ aa.ul[L] <<= shift;
+ }
+ return (aa.q);
+}
diff --git a/sys/libkern/ashrdi3.c b/sys/libkern/ashrdi3.c
new file mode 100644
index 000000000000..9ffa5ed06b55
--- /dev/null
+++ b/sys/libkern/ashrdi3.c
@@ -0,0 +1,75 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ashrdi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift a (signed) quad value right (arithmetic shift right).
+ */
+quad_t
+__ashrdi3(a, shift)
+ quad_t a;
+ qshift_t shift;
+{
+ union uu aa;
+
+ aa.q = a;
+ if (shift >= LONG_BITS) {
+ long s;
+
+ /*
+ * Smear bits rightward using the machine's right-shift
+ * method, whether that is sign extension or zero fill,
+ * to get the `sign word' s. Note that shifting by
+ * LONG_BITS is undefined, so we shift (LONG_BITS-1),
+ * then 1 more, to get our answer.
+ */
+ s = (aa.sl[H] >> (LONG_BITS - 1)) >> 1;
+ aa.ul[L] = shift >= QUAD_BITS ? s :
+ aa.sl[H] >> (shift - LONG_BITS);
+ aa.ul[H] = s;
+ } else if (shift > 0) {
+ aa.ul[L] = (aa.ul[L] >> shift) |
+ (aa.ul[H] << (LONG_BITS - shift));
+ aa.sl[H] >>= shift;
+ }
+ return (aa.q);
+}
diff --git a/sys/libkern/bcmp.c b/sys/libkern/bcmp.c
new file mode 100644
index 000000000000..5a3ae616800e
--- /dev/null
+++ b/sys/libkern/bcmp.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)bcmp.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <string.h>
+
+/*
+ * bcmp -- vax cmpc3 instruction
+ */
+int
+bcmp(b1, b2, length)
+ const void *b1, *b2;
+ register size_t length;
+{
+ register char *p1, *p2;
+
+ if (length == 0)
+ return(0);
+ p1 = (char *)b1;
+ p2 = (char *)b2;
+ do
+ if (*p1++ != *p2++)
+ break;
+ while (--length);
+ return(length);
+}
diff --git a/sys/libkern/cmpdi2.c b/sys/libkern/cmpdi2.c
new file mode 100644
index 000000000000..f6e4bdd6a4df
--- /dev/null
+++ b/sys/libkern/cmpdi2.c
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)cmpdi2.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return 0, 1, or 2 as a <, =, > b respectively.
+ * Both a and b are considered signed---which means only the high word is
+ * signed.
+ */
+int
+__cmpdi2(a, b)
+ quad_t a, b;
+{
+ union uu aa, bb;
+
+ aa.q = a;
+ bb.q = b;
+ return (aa.sl[H] < bb.sl[H] ? 0 : aa.sl[H] > bb.sl[H] ? 2 :
+ aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1);
+}
diff --git a/sys/libkern/divdi3.c b/sys/libkern/divdi3.c
new file mode 100644
index 000000000000..da7b2fccd015
--- /dev/null
+++ b/sys/libkern/divdi3.c
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)divdi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Divide two signed quads.
+ * ??? if -1/2 should produce -1 on this machine, this code is wrong
+ */
+quad_t
+__divdi3(a, b)
+ quad_t a, b;
+{
+ u_quad_t ua, ub, uq;
+ int neg;
+
+ if (a < 0)
+ ua = -(u_quad_t)a, neg = 1;
+ else
+ ua = a, neg = 0;
+ if (b < 0)
+ ub = -(u_quad_t)b, neg ^= 1;
+ else
+ ub = b;
+ uq = __qdivrem(ua, ub, (u_quad_t *)0);
+ return (neg ? -uq : uq);
+}
diff --git a/sys/libkern/ffs.c b/sys/libkern/ffs.c
new file mode 100644
index 000000000000..099ff8e4c911
--- /dev/null
+++ b/sys/libkern/ffs.c
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ffs.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <string.h>
+
+/*
+ * ffs -- vax ffs instruction
+ */
+int
+ffs(mask)
+ register int mask;
+{
+ register int bit;
+
+ if (mask == 0)
+ return(0);
+ for (bit = 1; !(mask & 1); bit++)
+ mask >>= 1;
+ return(bit);
+}
diff --git a/sys/libkern/iordi3.c b/sys/libkern/iordi3.c
new file mode 100644
index 000000000000..e225005d414e
--- /dev/null
+++ b/sys/libkern/iordi3.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)iordi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return a | b, in quad.
+ */
+quad_t
+__iordi3(a, b)
+ quad_t a, b;
+{
+ union uu aa, bb;
+
+ aa.q = a;
+ bb.q = b;
+ aa.ul[0] |= bb.ul[0];
+ aa.ul[1] |= bb.ul[1];
+ return (aa.q);
+}
diff --git a/sys/libkern/libkern.h b/sys/libkern/libkern.h
new file mode 100644
index 000000000000..0e465e03dfde
--- /dev/null
+++ b/sys/libkern/libkern.h
@@ -0,0 +1,98 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)libkern.h 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/types.h>
+
+static inline int
+imax(a, b)
+ int a, b;
+{
+ return (a > b ? a : b);
+}
+static inline int
+imin(a, b)
+ int a, b;
+{
+ return (a < b ? a : b);
+}
+static inline long
+lmax(a, b)
+ long a, b;
+{
+ return (a > b ? a : b);
+}
+static inline long
+lmin(a, b)
+ long a, b;
+{
+ return (a < b ? a : b);
+}
+static inline u_int
+max(a, b)
+ u_int a, b;
+{
+ return (a > b ? a : b);
+}
+static inline u_int
+min(a, b)
+ u_int a, b;
+{
+ return (a < b ? a : b);
+}
+static inline u_long
+ulmax(a, b)
+ u_long a, b;
+{
+ return (a > b ? a : b);
+}
+static inline u_long
+ulmin(a, b)
+ u_long a, b;
+{
+ return (a < b ? a : b);
+}
+
+/* Prototypes for non-quad routines. */
+int bcmp __P((const void *, const void *, size_t));
+int ffs __P((int));
+int locc __P((int, char *, u_int));
+u_long random __P((void));
+char *rindex __P((const char *, int));
+int scanc __P((u_int, u_char *, u_char *, int));
+int skpc __P((int, int, char *));
+char *strcat __P((char *, const char *));
+char *strcpy __P((char *, const char *));
+size_t strlen __P((const char *));
+char *strncpy __P((char *, const char *, size_t));
diff --git a/sys/libkern/locc.c b/sys/libkern/locc.c
new file mode 100644
index 000000000000..3767222c5f0f
--- /dev/null
+++ b/sys/libkern/locc.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)locc.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+int
+locc(mask0, cp0, size)
+ int mask0;
+ char *cp0;
+ u_int size;
+{
+ register u_char *cp, *end, mask;
+
+ mask = mask0;
+ cp = (u_char *)cp0;
+ for (end = &cp[size]; cp < end && *cp != mask; ++cp);
+ return (end - cp);
+}
diff --git a/sys/libkern/lshldi3.c b/sys/libkern/lshldi3.c
new file mode 100644
index 000000000000..0af6051c1a61
--- /dev/null
+++ b/sys/libkern/lshldi3.c
@@ -0,0 +1,66 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)lshldi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift an (unsigned) quad value left (logical shift left).
+ * This is the same as arithmetic shift left!
+ */
+quad_t
+__lshldi3(a, shift)
+ quad_t a;
+ qshift_t shift;
+{
+ union uu aa;
+
+ aa.q = a;
+ if (shift >= LONG_BITS) {
+ aa.ul[H] = shift >= QUAD_BITS ? 0 :
+ aa.ul[L] << (shift - LONG_BITS);
+ aa.ul[L] = 0;
+ } else if (shift > 0) {
+ aa.ul[H] = (aa.ul[H] << shift) |
+ (aa.ul[L] >> (LONG_BITS - shift));
+ aa.ul[L] <<= shift;
+ }
+ return (aa.q);
+}
diff --git a/sys/libkern/lshrdi3.c b/sys/libkern/lshrdi3.c
new file mode 100644
index 000000000000..add2eda988cf
--- /dev/null
+++ b/sys/libkern/lshrdi3.c
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)lshrdi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift an (unsigned) quad value right (logical shift right).
+ */
+quad_t
+__lshrdi3(a, shift)
+ quad_t a;
+ qshift_t shift;
+{
+ union uu aa;
+
+ aa.q = a;
+ if (shift >= LONG_BITS) {
+ aa.ul[L] = shift >= QUAD_BITS ? 0 :
+ aa.ul[H] >> (shift - LONG_BITS);
+ aa.ul[H] = 0;
+ } else if (shift > 0) {
+ aa.ul[L] = (aa.ul[L] >> shift) |
+ (aa.ul[H] << (LONG_BITS - shift));
+ aa.ul[H] >>= shift;
+ }
+ return (aa.q);
+}
diff --git a/sys/libkern/mcount.c b/sys/libkern/mcount.c
new file mode 100644
index 000000000000..523217d1d2d9
--- /dev/null
+++ b/sys/libkern/mcount.c
@@ -0,0 +1,178 @@
+/*-
+ * Copyright (c) 1983, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(lint) && !defined(KERNEL) && defined(LIBC_SCCS)
+static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
+#endif
+
+#include <sys/param.h>
+#include <sys/gmon.h>
+
+/*
+ * mcount is called on entry to each function compiled with the profiling
+ * switch set. _mcount(), which is declared in a machine-dependent way
+ * with _MCOUNT_DECL, does the actual work and is either inlined into a
+ * C routine or called by an assembly stub. In any case, this magic is
+ * taken care of by the MCOUNT definition in <machine/profile.h>.
+ *
+ * _mcount updates data structures that represent traversals of the
+ * program's call graph edges. frompc and selfpc are the return
+ * address and function address that represents the given call graph edge.
+ *
+ * Note: the original BSD code used the same variable (frompcindex) for
+ * both frompcindex and frompc. Any reasonable, modern compiler will
+ * perform this optimization.
+ */
+_MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */
+ register u_long frompc, selfpc;
+{
+ register u_short *frompcindex;
+ register struct tostruct *top, *prevtop;
+ register struct gmonparam *p;
+ register long toindex;
+#ifdef KERNEL
+ register int s;
+#endif
+
+ p = &_gmonparam;
+ /*
+ * check that we are profiling
+ * and that we aren't recursively invoked.
+ */
+ if (p->state != GMON_PROF_ON)
+ return;
+#ifdef KERNEL
+ MCOUNT_ENTER;
+#else
+ p->state = GMON_PROF_BUSY;
+#endif
+ /*
+ * check that frompcindex is a reasonable pc value.
+ * for example: signal catchers get called from the stack,
+ * not from text space. too bad.
+ */
+ frompc -= p->lowpc;
+ if (frompc > p->textsize)
+ goto done;
+
+ frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))];
+ toindex = *frompcindex;
+ if (toindex == 0) {
+ /*
+ * first time traversing this arc
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ /* halt further profiling */
+ goto overflow;
+
+ *frompcindex = toindex;
+ top = &p->tos[toindex];
+ top->selfpc = selfpc;
+ top->count = 1;
+ top->link = 0;
+ goto done;
+ }
+ top = &p->tos[toindex];
+ if (top->selfpc == selfpc) {
+ /*
+ * arc at front of chain; usual case.
+ */
+ top->count++;
+ goto done;
+ }
+ /*
+ * have to go looking down chain for it.
+ * top points to what we are looking at,
+ * prevtop points to previous top.
+ * we know it is not at the head of the chain.
+ */
+ for (; /* goto done */; ) {
+ if (top->link == 0) {
+ /*
+ * top is end of the chain and none of the chain
+ * had top->selfpc == selfpc.
+ * so we allocate a new tostruct
+ * and link it to the head of the chain.
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ goto overflow;
+
+ top = &p->tos[toindex];
+ top->selfpc = selfpc;
+ top->count = 1;
+ top->link = *frompcindex;
+ *frompcindex = toindex;
+ goto done;
+ }
+ /*
+ * otherwise, check the next arc on the chain.
+ */
+ prevtop = top;
+ top = &p->tos[top->link];
+ if (top->selfpc == selfpc) {
+ /*
+ * there it is.
+ * increment its count
+ * move it to the head of the chain.
+ */
+ top->count++;
+ toindex = prevtop->link;
+ prevtop->link = top->link;
+ top->link = *frompcindex;
+ *frompcindex = toindex;
+ goto done;
+ }
+
+ }
+done:
+#ifdef KERNEL
+ MCOUNT_EXIT;
+#else
+ p->state = GMON_PROF_ON;
+#endif
+ return;
+overflow:
+ p->state = GMON_PROF_ERROR;
+#ifdef KERNEL
+ MCOUNT_EXIT;
+#endif
+ return;
+}
+
+/*
+ * Actual definition of mcount function. Defined in <machine/profile.h>,
+ * which is included by <sys/gmon.h>.
+ */
+MCOUNT
diff --git a/sys/libkern/moddi3.c b/sys/libkern/moddi3.c
new file mode 100644
index 000000000000..f31c6e84f2ba
--- /dev/null
+++ b/sys/libkern/moddi3.c
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)moddi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return remainder after dividing two signed quads.
+ *
+ * XXX
+ * If -1/2 should produce -1 on this machine, this code is wrong.
+ */
+quad_t
+__moddi3(a, b)
+ quad_t a, b;
+{
+ u_quad_t ua, ub, ur;
+ int neg;
+
+ if (a < 0)
+ ua = -(u_quad_t)a, neg = 1;
+ else
+ ua = a, neg = 0;
+ if (b < 0)
+ ub = -(u_quad_t)b, neg ^= 1;
+ else
+ ub = b;
+ (void)__qdivrem(ua, ub, &ur);
+ return (neg ? -ur : ur);
+}
diff --git a/sys/libkern/muldi3.c b/sys/libkern/muldi3.c
new file mode 100644
index 000000000000..a8d7cfc7eabc
--- /dev/null
+++ b/sys/libkern/muldi3.c
@@ -0,0 +1,246 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)muldi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Multiply two quads.
+ *
+ * Our algorithm is based on the following. Split incoming quad values
+ * u and v (where u,v >= 0) into
+ *
+ * u = 2^n u1 * u0 (n = number of bits in `u_long', usu. 32)
+ *
+ * and
+ *
+ * v = 2^n v1 * v0
+ *
+ * Then
+ *
+ * uv = 2^2n u1 v1 + 2^n u1 v0 + 2^n v1 u0 + u0 v0
+ * = 2^2n u1 v1 + 2^n (u1 v0 + v1 u0) + u0 v0
+ *
+ * Now add 2^n u1 v1 to the first term and subtract it from the middle,
+ * and add 2^n u0 v0 to the last term and subtract it from the middle.
+ * This gives:
+ *
+ * uv = (2^2n + 2^n) (u1 v1) +
+ * (2^n) (u1 v0 - u1 v1 + u0 v1 - u0 v0) +
+ * (2^n + 1) (u0 v0)
+ *
+ * Factoring the middle a bit gives us:
+ *
+ * uv = (2^2n + 2^n) (u1 v1) + [u1v1 = high]
+ * (2^n) (u1 - u0) (v0 - v1) + [(u1-u0)... = mid]
+ * (2^n + 1) (u0 v0) [u0v0 = low]
+ *
+ * The terms (u1 v1), (u1 - u0) (v0 - v1), and (u0 v0) can all be done
+ * in just half the precision of the original. (Note that either or both
+ * of (u1 - u0) or (v0 - v1) may be negative.)
+ *
+ * This algorithm is from Knuth vol. 2 (2nd ed), section 4.3.3, p. 278.
+ *
+ * Since C does not give us a `long * long = quad' operator, we split
+ * our input quads into two longs, then split the two longs into two
+ * shorts. We can then calculate `short * short = long' in native
+ * arithmetic.
+ *
+ * Our product should, strictly speaking, be a `long quad', with 128
+ * bits, but we are going to discard the upper 64. In other words,
+ * we are not interested in uv, but rather in (uv mod 2^2n). This
+ * makes some of the terms above vanish, and we get:
+ *
+ * (2^n)(high) + (2^n)(mid) + (2^n + 1)(low)
+ *
+ * or
+ *
+ * (2^n)(high + mid + low) + low
+ *
+ * Furthermore, `high' and `mid' can be computed mod 2^n, as any factor
+ * of 2^n in either one will also vanish. Only `low' need be computed
+ * mod 2^2n, and only because of the final term above.
+ */
+static quad_t __lmulq(u_long, u_long);
+
+quad_t
+__muldi3(a, b)
+ quad_t a, b;
+{
+ union uu u, v, low, prod;
+ register u_long high, mid, udiff, vdiff;
+ register int negall, negmid;
+#define u1 u.ul[H]
+#define u0 u.ul[L]
+#define v1 v.ul[H]
+#define v0 v.ul[L]
+
+ /*
+ * Get u and v such that u, v >= 0. When this is finished,
+ * u1, u0, v1, and v0 will be directly accessible through the
+ * longword fields.
+ */
+ if (a >= 0)
+ u.q = a, negall = 0;
+ else
+ u.q = -a, negall = 1;
+ if (b >= 0)
+ v.q = b;
+ else
+ v.q = -b, negall ^= 1;
+
+ if (u1 == 0 && v1 == 0) {
+ /*
+ * An (I hope) important optimization occurs when u1 and v1
+ * are both 0. This should be common since most numbers
+ * are small. Here the product is just u0*v0.
+ */
+ prod.q = __lmulq(u0, v0);
+ } else {
+ /*
+ * Compute the three intermediate products, remembering
+ * whether the middle term is negative. We can discard
+ * any upper bits in high and mid, so we can use native
+ * u_long * u_long => u_long arithmetic.
+ */
+ low.q = __lmulq(u0, v0);
+
+ if (u1 >= u0)
+ negmid = 0, udiff = u1 - u0;
+ else
+ negmid = 1, udiff = u0 - u1;
+ if (v0 >= v1)
+ vdiff = v0 - v1;
+ else
+ vdiff = v1 - v0, negmid ^= 1;
+ mid = udiff * vdiff;
+
+ high = u1 * v1;
+
+ /*
+ * Assemble the final product.
+ */
+ prod.ul[H] = high + (negmid ? -mid : mid) + low.ul[L] +
+ low.ul[H];
+ prod.ul[L] = low.ul[L];
+ }
+ return (negall ? -prod.q : prod.q);
+#undef u1
+#undef u0
+#undef v1
+#undef v0
+}
+
+/*
+ * Multiply two 2N-bit longs to produce a 4N-bit quad, where N is half
+ * the number of bits in a long (whatever that is---the code below
+ * does not care as long as quad.h does its part of the bargain---but
+ * typically N==16).
+ *
+ * We use the same algorithm from Knuth, but this time the modulo refinement
+ * does not apply. On the other hand, since N is half the size of a long,
+ * we can get away with native multiplication---none of our input terms
+ * exceeds (ULONG_MAX >> 1).
+ *
+ * Note that, for u_long l, the quad-precision result
+ *
+ * l << N
+ *
+ * splits into high and low longs as HHALF(l) and LHUP(l) respectively.
+ */
+static quad_t
+__lmulq(u_long u, u_long v)
+{
+ u_long u1, u0, v1, v0, udiff, vdiff, high, mid, low;
+ u_long prodh, prodl, was;
+ union uu prod;
+ int neg;
+
+ u1 = HHALF(u);
+ u0 = LHALF(u);
+ v1 = HHALF(v);
+ v0 = LHALF(v);
+
+ low = u0 * v0;
+
+ /* This is the same small-number optimization as before. */
+ if (u1 == 0 && v1 == 0)
+ return (low);
+
+ if (u1 >= u0)
+ udiff = u1 - u0, neg = 0;
+ else
+ udiff = u0 - u1, neg = 1;
+ if (v0 >= v1)
+ vdiff = v0 - v1;
+ else
+ vdiff = v1 - v0, neg ^= 1;
+ mid = udiff * vdiff;
+
+ high = u1 * v1;
+
+ /* prod = (high << 2N) + (high << N); */
+ prodh = high + HHALF(high);
+ prodl = LHUP(high);
+
+ /* if (neg) prod -= mid << N; else prod += mid << N; */
+ if (neg) {
+ was = prodl;
+ prodl -= LHUP(mid);
+ prodh -= HHALF(mid) + (prodl > was);
+ } else {
+ was = prodl;
+ prodl += LHUP(mid);
+ prodh += HHALF(mid) + (prodl < was);
+ }
+
+ /* prod += low << N */
+ was = prodl;
+ prodl += LHUP(low);
+ prodh += HHALF(low) + (prodl < was);
+ /* ... + low; */
+ if ((prodl += low) < low)
+ prodh++;
+
+ /* return 4N-bit product */
+ prod.ul[H] = prodh;
+ prod.ul[L] = prodl;
+ return (prod.q);
+}
diff --git a/sys/libkern/negdi2.c b/sys/libkern/negdi2.c
new file mode 100644
index 000000000000..bb8670d8e2cb
--- /dev/null
+++ b/sys/libkern/negdi2.c
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)negdi2.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return -a (or, equivalently, 0 - a), in quad. See subdi3.c.
+ */
+quad_t
+__negdi2(a)
+ quad_t a;
+{
+ union uu aa, res;
+
+ aa.q = a;
+ res.ul[L] = -aa.ul[L];
+ res.ul[H] = -aa.ul[H] - (res.ul[L] > 0);
+ return (res.q);
+}
diff --git a/sys/libkern/notdi2.c b/sys/libkern/notdi2.c
new file mode 100644
index 000000000000..d6247339a80b
--- /dev/null
+++ b/sys/libkern/notdi2.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)notdi2.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return ~a. For some reason gcc calls this `one's complement' rather
+ * than `not'.
+ */
+quad_t
+__one_cmpldi2(a)
+ quad_t a;
+{
+ union uu aa;
+
+ aa.q = a;
+ aa.ul[0] = ~aa.ul[0];
+ aa.ul[1] = ~aa.ul[1];
+ return (aa.q);
+}
diff --git a/sys/libkern/qdivrem.c b/sys/libkern/qdivrem.c
new file mode 100644
index 000000000000..34b94ceaab23
--- /dev/null
+++ b/sys/libkern/qdivrem.c
@@ -0,0 +1,279 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)qdivrem.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
+ * section 4.3.1, pp. 257--259.
+ */
+
+#include "quad.h"
+
+#define B (1 << HALF_BITS) /* digit base */
+
+/* Combine two `digits' to make a single two-digit number. */
+#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
+
+/* select a type for digits in base B: use unsigned short if they fit */
+#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
+typedef unsigned short digit;
+#else
+typedef u_long digit;
+#endif
+
+/*
+ * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
+ * `fall out' the left (there never will be any such anyway).
+ * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
+ */
+static void
+shl(register digit *p, register int len, register int sh)
+{
+ register int i;
+
+ for (i = 0; i < len; i++)
+ p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
+ p[i] = LHALF(p[i] << sh);
+}
+
+/*
+ * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
+ *
+ * We do this in base 2-sup-HALF_BITS, so that all intermediate products
+ * fit within u_long. As a consequence, the maximum length dividend and
+ * divisor are 4 `digits' in this base (they are shorter if they have
+ * leading zeros).
+ */
+u_quad_t
+__qdivrem(uq, vq, arq)
+ u_quad_t uq, vq, *arq;
+{
+ union uu tmp;
+ digit *u, *v, *q;
+ register digit v1, v2;
+ u_long qhat, rhat, t;
+ int m, n, d, j, i;
+ digit uspace[5], vspace[5], qspace[5];
+
+ /*
+ * Take care of special cases: divide by zero, and u < v.
+ */
+ if (vq == 0) {
+ /* divide by zero. */
+ static volatile const unsigned int zero = 0;
+
+ tmp.ul[H] = tmp.ul[L] = 1 / zero;
+ if (arq)
+ *arq = uq;
+ return (tmp.q);
+ }
+ if (uq < vq) {
+ if (arq)
+ *arq = uq;
+ return (0);
+ }
+ u = &uspace[0];
+ v = &vspace[0];
+ q = &qspace[0];
+
+ /*
+ * Break dividend and divisor into digits in base B, then
+ * count leading zeros to determine m and n. When done, we
+ * will have:
+ * u = (u[1]u[2]...u[m+n]) sub B
+ * v = (v[1]v[2]...v[n]) sub B
+ * v[1] != 0
+ * 1 < n <= 4 (if n = 1, we use a different division algorithm)
+ * m >= 0 (otherwise u < v, which we already checked)
+ * m + n = 4
+ * and thus
+ * m = 4 - n <= 2
+ */
+ tmp.uq = uq;
+ u[0] = 0;
+ u[1] = HHALF(tmp.ul[H]);
+ u[2] = LHALF(tmp.ul[H]);
+ u[3] = HHALF(tmp.ul[L]);
+ u[4] = LHALF(tmp.ul[L]);
+ tmp.uq = vq;
+ v[1] = HHALF(tmp.ul[H]);
+ v[2] = LHALF(tmp.ul[H]);
+ v[3] = HHALF(tmp.ul[L]);
+ v[4] = LHALF(tmp.ul[L]);
+ for (n = 4; v[1] == 0; v++) {
+ if (--n == 1) {
+ u_long rbj; /* r*B+u[j] (not root boy jim) */
+ digit q1, q2, q3, q4;
+
+ /*
+ * Change of plan, per exercise 16.
+ * r = 0;
+ * for j = 1..4:
+ * q[j] = floor((r*B + u[j]) / v),
+ * r = (r*B + u[j]) % v;
+ * We unroll this completely here.
+ */
+ t = v[2]; /* nonzero, by definition */
+ q1 = u[1] / t;
+ rbj = COMBINE(u[1] % t, u[2]);
+ q2 = rbj / t;
+ rbj = COMBINE(rbj % t, u[3]);
+ q3 = rbj / t;
+ rbj = COMBINE(rbj % t, u[4]);
+ q4 = rbj / t;
+ if (arq)
+ *arq = rbj % t;
+ tmp.ul[H] = COMBINE(q1, q2);
+ tmp.ul[L] = COMBINE(q3, q4);
+ return (tmp.q);
+ }
+ }
+
+ /*
+ * By adjusting q once we determine m, we can guarantee that
+ * there is a complete four-digit quotient at &qspace[1] when
+ * we finally stop.
+ */
+ for (m = 4 - n; u[1] == 0; u++)
+ m--;
+ for (i = 4 - m; --i >= 0;)
+ q[i] = 0;
+ q += 4 - m;
+
+ /*
+ * Here we run Program D, translated from MIX to C and acquiring
+ * a few minor changes.
+ *
+ * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
+ */
+ d = 0;
+ for (t = v[1]; t < B / 2; t <<= 1)
+ d++;
+ if (d > 0) {
+ shl(&u[0], m + n, d); /* u <<= d */
+ shl(&v[1], n - 1, d); /* v <<= d */
+ }
+ /*
+ * D2: j = 0.
+ */
+ j = 0;
+ v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
+ v2 = v[2]; /* for D3 */
+ do {
+ register digit uj0, uj1, uj2;
+
+ /*
+ * D3: Calculate qhat (\^q, in TeX notation).
+ * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
+ * let rhat = (u[j]*B + u[j+1]) mod v[1].
+ * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
+ * decrement qhat and increase rhat correspondingly.
+ * Note that if rhat >= B, v[2]*qhat < rhat*B.
+ */
+ uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
+ uj1 = u[j + 1]; /* for D3 only */
+ uj2 = u[j + 2]; /* for D3 only */
+ if (uj0 == v1) {
+ qhat = B;
+ rhat = uj1;
+ goto qhat_too_big;
+ } else {
+ u_long n = COMBINE(uj0, uj1);
+ qhat = n / v1;
+ rhat = n % v1;
+ }
+ while (v2 * qhat > COMBINE(rhat, uj2)) {
+ qhat_too_big:
+ qhat--;
+ if ((rhat += v1) >= B)
+ break;
+ }
+ /*
+ * D4: Multiply and subtract.
+ * The variable `t' holds any borrows across the loop.
+ * We split this up so that we do not require v[0] = 0,
+ * and to eliminate a final special case.
+ */
+ for (t = 0, i = n; i > 0; i--) {
+ t = u[i + j] - v[i] * qhat - t;
+ u[i + j] = LHALF(t);
+ t = (B - HHALF(t)) & (B - 1);
+ }
+ t = u[j] - t;
+ u[j] = LHALF(t);
+ /*
+ * D5: test remainder.
+ * There is a borrow if and only if HHALF(t) is nonzero;
+ * in that (rare) case, qhat was too large (by exactly 1).
+ * Fix it by adding v[1..n] to u[j..j+n].
+ */
+ if (HHALF(t)) {
+ qhat--;
+ for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
+ t += u[i + j] + v[i];
+ u[i + j] = LHALF(t);
+ t = HHALF(t);
+ }
+ u[j] = LHALF(u[j] + t);
+ }
+ q[j] = qhat;
+ } while (++j <= m); /* D7: loop on j. */
+
+ /*
+ * If caller wants the remainder, we have to calculate it as
+ * u[m..m+n] >> d (this is at most n digits and thus fits in
+ * u[m+1..m+n], but we may need more source digits).
+ */
+ if (arq) {
+ if (d) {
+ for (i = m + n; i > m; --i)
+ u[i] = (u[i] >> d) |
+ LHALF(u[i - 1] << (HALF_BITS - d));
+ u[i] = 0;
+ }
+ tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
+ tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
+ *arq = tmp.q;
+ }
+
+ tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
+ tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
+ return (tmp.q);
+}
diff --git a/sys/libkern/quad.h b/sys/libkern/quad.h
new file mode 100644
index 000000000000..bc6a2f836327
--- /dev/null
+++ b/sys/libkern/quad.h
@@ -0,0 +1,110 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)quad.h 8.1 (Berkeley) 6/4/93
+ */
+
+/*
+ * Quad arithmetic.
+ *
+ * This library makes the following assumptions:
+ *
+ * - The type long long (aka quad_t) exists.
+ *
+ * - A quad variable is exactly twice as long as `long'.
+ *
+ * - The machine's arithmetic is two's complement.
+ *
+ * This library can provide 128-bit arithmetic on a machine with 128-bit
+ * quads and 64-bit longs, for instance, or 96-bit arithmetic on machines
+ * with 48-bit longs.
+ */
+
+#include <sys/types.h>
+#include <limits.h>
+
+/*
+ * Depending on the desired operation, we view a `long long' (aka quad_t) in
+ * one or more of the following formats.
+ */
+union uu {
+ quad_t q; /* as a (signed) quad */
+ quad_t uq; /* as an unsigned quad */
+ long sl[2]; /* as two signed longs */
+ u_long ul[2]; /* as two unsigned longs */
+};
+
+/*
+ * Define high and low longwords.
+ */
+#define H _QUAD_HIGHWORD
+#define L _QUAD_LOWWORD
+
+/*
+ * Total number of bits in a quad_t and in the pieces that make it up.
+ * These are used for shifting, and also below for halfword extraction
+ * and assembly.
+ */
+#define QUAD_BITS (sizeof(quad_t) * CHAR_BIT)
+#define LONG_BITS (sizeof(long) * CHAR_BIT)
+#define HALF_BITS (sizeof(long) * CHAR_BIT / 2)
+
+/*
+ * Extract high and low shortwords from longword, and move low shortword of
+ * longword to upper half of long, i.e., produce the upper longword of
+ * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.)
+ *
+ * These are used in the multiply code, to split a longword into upper
+ * and lower halves, and to reassemble a product as a quad_t, shifted left
+ * (sizeof(long)*CHAR_BIT/2).
+ */
+#define HHALF(x) ((x) >> HALF_BITS)
+#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1))
+#define LHUP(x) ((x) << HALF_BITS)
+
+extern u_quad_t __qdivrem __P((u_quad_t u, u_quad_t v, u_quad_t *rem));
+
+/*
+ * XXX
+ * Compensate for gcc 1 vs gcc 2. Gcc 1 defines ?sh?di3's second argument
+ * as u_quad_t, while gcc 2 correctly uses int. Unfortunately, we still use
+ * both compilers.
+ */
+#if __GNUC__ >= 2
+typedef unsigned int qshift_t;
+#else
+typedef u_quad_t qshift_t;
+#endif
diff --git a/sys/libkern/random.c b/sys/libkern/random.c
new file mode 100644
index 000000000000..5153124e3fd3
--- /dev/null
+++ b/sys/libkern/random.c
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)random.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+/*
+ * Pseudo-random number generator for randomizing the profiling clock,
+ * and whatever else we might use it for. The result is uniform on
+ * [0, 2^31 - 1].
+ */
+u_long
+random()
+{
+ static u_long randseed = 1;
+ register long x, hi, lo, t;
+
+ /*
+ * Compute x[n + 1] = (7^5 * x[n]) mod (2^31 - 1).
+ * From "Random number generators: good ones are hard to find",
+ * Park and Miller, Communications of the ACM, vol. 31, no. 10,
+ * October 1988, p. 1195.
+ */
+ x = randseed;
+ hi = x / 127773;
+ lo = x % 127773;
+ t = 16807 * lo - 2836 * hi;
+ if (t <= 0)
+ t += 0x7fffffff;
+ randseed = t;
+ return (t);
+}
diff --git a/sys/libkern/rindex.c b/sys/libkern/rindex.c
new file mode 100644
index 000000000000..69dced4c46dc
--- /dev/null
+++ b/sys/libkern/rindex.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)rindex.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <stddef.h>
+#include <string.h>
+
+char *
+#ifdef STRRCHR
+strrchr(p, ch)
+#else
+rindex(p, ch)
+#endif
+ register const char *p;
+ register int ch;
+{
+ register char *save;
+
+ for (save = NULL;; ++p) {
+ if (*p == ch)
+ save = (char *)p;
+ if (!*p)
+ return(save);
+ }
+ /* NOTREACHED */
+}
diff --git a/sys/libkern/scanc.c b/sys/libkern/scanc.c
new file mode 100644
index 000000000000..2d8b6a06dd68
--- /dev/null
+++ b/sys/libkern/scanc.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)scanc.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+int
+scanc(size, cp, table, mask0)
+ u_int size;
+ register u_char *cp, table[];
+ int mask0;
+{
+ register u_char *end;
+ register u_char mask;
+
+ mask = mask0;
+ for (end = &cp[size]; cp < end && (table[*cp] & mask) == 0; ++cp);
+ return (end - cp);
+}
diff --git a/sys/libkern/skpc.c b/sys/libkern/skpc.c
new file mode 100644
index 000000000000..11b269ee7e78
--- /dev/null
+++ b/sys/libkern/skpc.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)skpc.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+int
+skpc(mask0, size, cp0)
+ int mask0;
+ int size;
+ char *cp0;
+{
+ register u_char *cp, *end, mask;
+
+ mask = mask0;
+ cp = (u_char *)cp0;
+ for (end = &cp[size]; cp < end && *cp == mask; ++cp);
+ return (end - cp);
+}
diff --git a/sys/libkern/strcat.c b/sys/libkern/strcat.c
new file mode 100644
index 000000000000..343696719b71
--- /dev/null
+++ b/sys/libkern/strcat.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strcat.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <string.h>
+
+char *
+strcat(s, append)
+ register char *s;
+ register const char *append;
+{
+ char *save = s;
+
+ for (; *s; ++s);
+ while (*s++ = *append++);
+ return(save);
+}
diff --git a/sys/libkern/strcmp.c b/sys/libkern/strcmp.c
new file mode 100644
index 000000000000..79cfaa831b29
--- /dev/null
+++ b/sys/libkern/strcmp.c
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strcmp.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+/*
+ * Compare strings.
+ */
+int
+strcmp(s1, s2)
+ register const char *s1, *s2;
+{
+ while (*s1 == *s2++)
+ if (*s1++ == 0)
+ return (0);
+ return (*(unsigned char *)s1 - *(unsigned char *)--s2);
+}
diff --git a/sys/libkern/strcpy.c b/sys/libkern/strcpy.c
new file mode 100644
index 000000000000..d1791dd00c35
--- /dev/null
+++ b/sys/libkern/strcpy.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strcpy.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+char *
+strcpy(to, from)
+ register char *to;
+ register const char *from;
+{
+ char *save = to;
+
+ for (; *to = *from; ++from, ++to);
+ return(save);
+}
diff --git a/sys/libkern/strlen.c b/sys/libkern/strlen.c
new file mode 100644
index 000000000000..323fbe484526
--- /dev/null
+++ b/sys/libkern/strlen.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strlen.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+size_t
+strlen(str)
+ const char *str;
+{
+ register const char *s;
+
+ for (s = str; *s; ++s);
+ return(s - str);
+}
+
diff --git a/sys/libkern/strncpy.c b/sys/libkern/strncpy.c
new file mode 100644
index 000000000000..9e72740b8b9f
--- /dev/null
+++ b/sys/libkern/strncpy.c
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strncpy.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+/*
+ * Copy src to dst, truncating or null-padding to always copy n bytes.
+ * Return dst.
+ */
+char *
+strncpy(dst, src, n)
+ char *dst;
+ const char *src;
+ register size_t n;
+{
+ if (n != 0) {
+ register char *d = dst;
+ register const char *s = src;
+
+ do {
+ if ((*d++ = *s++) == 0) {
+ /* NUL pad the remaining n-1 bytes */
+ while (--n != 0)
+ *d++ = 0;
+ break;
+ }
+ } while (--n != 0);
+ }
+ return (dst);
+}
diff --git a/sys/libkern/subdi3.c b/sys/libkern/subdi3.c
new file mode 100644
index 000000000000..e9763452e4f8
--- /dev/null
+++ b/sys/libkern/subdi3.c
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)subdi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Subtract two quad values. This is trivial since a one-bit carry
+ * from a single u_long difference x-y occurs if and only if (x-y) > x.
+ */
+quad_t
+__subdi3(a, b)
+ quad_t a, b;
+{
+ union uu aa, bb, diff;
+
+ aa.q = a;
+ bb.q = b;
+ diff.ul[L] = aa.ul[L] - bb.ul[L];
+ diff.ul[H] = aa.ul[H] - bb.ul[H] - (diff.ul[L] > aa.ul[L]);
+ return (diff.q);
+}
diff --git a/sys/libkern/ucmpdi2.c b/sys/libkern/ucmpdi2.c
new file mode 100644
index 000000000000..e5dfc435d9c3
--- /dev/null
+++ b/sys/libkern/ucmpdi2.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ucmpdi2.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return 0, 1, or 2 as a <, =, > b respectively.
+ * Neither a nor b are considered signed.
+ */
+int
+__ucmpdi2(a, b)
+ u_quad_t a, b;
+{
+ union uu aa, bb;
+
+ aa.uq = a;
+ bb.uq = b;
+ return (aa.ul[H] < bb.ul[H] ? 0 : aa.ul[H] > bb.ul[H] ? 2 :
+ aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1);
+}
diff --git a/sys/libkern/udivdi3.c b/sys/libkern/udivdi3.c
new file mode 100644
index 000000000000..8ddd55989115
--- /dev/null
+++ b/sys/libkern/udivdi3.c
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)udivdi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Divide two unsigned quads.
+ */
+u_quad_t
+__udivdi3(a, b)
+ u_quad_t a, b;
+{
+
+ return (__qdivrem(a, b, (u_quad_t *)0));
+}
diff --git a/sys/libkern/umoddi3.c b/sys/libkern/umoddi3.c
new file mode 100644
index 000000000000..2a85f7699a6b
--- /dev/null
+++ b/sys/libkern/umoddi3.c
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)umoddi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return remainder after dividing two unsigned quads.
+ */
+u_quad_t
+__umoddi3(a, b)
+ u_quad_t a, b;
+{
+ u_quad_t r;
+
+ (void)__qdivrem(a, b, &r);
+ return (r);
+}
diff --git a/sys/libkern/xordi3.c b/sys/libkern/xordi3.c
new file mode 100644
index 000000000000..e3a858893605
--- /dev/null
+++ b/sys/libkern/xordi3.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)xordi3.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return a ^ b, in quad.
+ */
+quad_t
+__xordi3(a, b)
+ quad_t a, b;
+{
+ union uu aa, bb;
+
+ aa.q = a;
+ bb.q = b;
+ aa.ul[0] ^= bb.ul[0];
+ aa.ul[1] ^= bb.ul[1];
+ return (aa.q);
+}
diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c
new file mode 100644
index 000000000000..9d04652b7fc8
--- /dev/null
+++ b/sys/miscfs/deadfs/dead_vnops.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dead_vnops.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+
+/*
+ * Prototypes for dead operations on vnodes.
+ */
+int dead_badop(),
+ dead_ebadf();
+int dead_lookup __P((struct vop_lookup_args *));
+#define dead_create ((int (*) __P((struct vop_create_args *)))dead_badop)
+#define dead_mknod ((int (*) __P((struct vop_mknod_args *)))dead_badop)
+int dead_open __P((struct vop_open_args *));
+#define dead_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define dead_access ((int (*) __P((struct vop_access_args *)))dead_ebadf)
+#define dead_getattr ((int (*) __P((struct vop_getattr_args *)))dead_ebadf)
+#define dead_setattr ((int (*) __P((struct vop_setattr_args *)))dead_ebadf)
+int dead_read __P((struct vop_read_args *));
+int dead_write __P((struct vop_write_args *));
+int dead_ioctl __P((struct vop_ioctl_args *));
+int dead_select __P((struct vop_select_args *));
+#define dead_mmap ((int (*) __P((struct vop_mmap_args *)))dead_badop)
+#define dead_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define dead_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define dead_remove ((int (*) __P((struct vop_remove_args *)))dead_badop)
+#define dead_link ((int (*) __P((struct vop_link_args *)))dead_badop)
+#define dead_rename ((int (*) __P((struct vop_rename_args *)))dead_badop)
+#define dead_mkdir ((int (*) __P((struct vop_mkdir_args *)))dead_badop)
+#define dead_rmdir ((int (*) __P((struct vop_rmdir_args *)))dead_badop)
+#define dead_symlink ((int (*) __P((struct vop_symlink_args *)))dead_badop)
+#define dead_readdir ((int (*) __P((struct vop_readdir_args *)))dead_ebadf)
+#define dead_readlink ((int (*) __P((struct vop_readlink_args *)))dead_ebadf)
+#define dead_abortop ((int (*) __P((struct vop_abortop_args *)))dead_badop)
+#define dead_inactive ((int (*) __P((struct vop_inactive_args *)))nullop)
+#define dead_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop)
+int dead_lock __P((struct vop_lock_args *));
+#define dead_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+int dead_bmap __P((struct vop_bmap_args *));
+int dead_strategy __P((struct vop_strategy_args *));
+int dead_print __P((struct vop_print_args *));
+#define dead_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define dead_pathconf ((int (*) __P((struct vop_pathconf_args *)))dead_ebadf)
+#define dead_advlock ((int (*) __P((struct vop_advlock_args *)))dead_ebadf)
+#define dead_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))dead_badop)
+#define dead_valloc ((int (*) __P((struct vop_valloc_args *)))dead_badop)
+#define dead_vfree ((int (*) __P((struct vop_vfree_args *)))dead_badop)
+#define dead_truncate ((int (*) __P((struct vop_truncate_args *)))nullop)
+#define dead_update ((int (*) __P((struct vop_update_args *)))nullop)
+#define dead_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop)
+
+int (**dead_vnodeop_p)();
+struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, dead_lookup }, /* lookup */
+ { &vop_create_desc, dead_create }, /* create */
+ { &vop_mknod_desc, dead_mknod }, /* mknod */
+ { &vop_open_desc, dead_open }, /* open */
+ { &vop_close_desc, dead_close }, /* close */
+ { &vop_access_desc, dead_access }, /* access */
+ { &vop_getattr_desc, dead_getattr }, /* getattr */
+ { &vop_setattr_desc, dead_setattr }, /* setattr */
+ { &vop_read_desc, dead_read }, /* read */
+ { &vop_write_desc, dead_write }, /* write */
+ { &vop_ioctl_desc, dead_ioctl }, /* ioctl */
+ { &vop_select_desc, dead_select }, /* select */
+ { &vop_mmap_desc, dead_mmap }, /* mmap */
+ { &vop_fsync_desc, dead_fsync }, /* fsync */
+ { &vop_seek_desc, dead_seek }, /* seek */
+ { &vop_remove_desc, dead_remove }, /* remove */
+ { &vop_link_desc, dead_link }, /* link */
+ { &vop_rename_desc, dead_rename }, /* rename */
+ { &vop_mkdir_desc, dead_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, dead_rmdir }, /* rmdir */
+ { &vop_symlink_desc, dead_symlink }, /* symlink */
+ { &vop_readdir_desc, dead_readdir }, /* readdir */
+ { &vop_readlink_desc, dead_readlink }, /* readlink */
+ { &vop_abortop_desc, dead_abortop }, /* abortop */
+ { &vop_inactive_desc, dead_inactive }, /* inactive */
+ { &vop_reclaim_desc, dead_reclaim }, /* reclaim */
+ { &vop_lock_desc, dead_lock }, /* lock */
+ { &vop_unlock_desc, dead_unlock }, /* unlock */
+ { &vop_bmap_desc, dead_bmap }, /* bmap */
+ { &vop_strategy_desc, dead_strategy }, /* strategy */
+ { &vop_print_desc, dead_print }, /* print */
+ { &vop_islocked_desc, dead_islocked }, /* islocked */
+ { &vop_pathconf_desc, dead_pathconf }, /* pathconf */
+ { &vop_advlock_desc, dead_advlock }, /* advlock */
+ { &vop_blkatoff_desc, dead_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, dead_valloc }, /* valloc */
+ { &vop_vfree_desc, dead_vfree }, /* vfree */
+ { &vop_truncate_desc, dead_truncate }, /* truncate */
+ { &vop_update_desc, dead_update }, /* update */
+ { &vop_bwrite_desc, dead_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc dead_vnodeop_opv_desc =
+ { &dead_vnodeop_p, dead_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+int
+dead_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open always fails as if device did not exist.
+ */
+/* ARGSUSED */
+dead_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (ENXIO);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+dead_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_read: lock");
+ /*
+ * Return EOF for character devices, EIO for others
+ */
+ if (ap->a_vp->v_type != VCHR)
+ return (EIO);
+ return (0);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+dead_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_write: lock");
+ return (EIO);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+dead_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (EBADF);
+ return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap));
+}
+
+/* ARGSUSED */
+dead_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Let the user find out that the descriptor is gone.
+ */
+ return (1);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+dead_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+
+ if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) {
+ ap->a_bp->b_flags |= B_ERROR;
+ biodone(ap->a_bp);
+ return (EIO);
+ }
+ return (VOP_STRATEGY(ap->a_bp));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (0);
+ return (VCALL(ap->a_vp, VOFFSET(vop_lock), ap));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (EIO);
+ return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp));
+}
+
+/*
+ * Print out the contents of a dead vnode.
+ */
+/* ARGSUSED */
+dead_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dead vnode\n");
+}
+
+/*
+ * Empty vnode failed operation
+ */
+dead_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Empty vnode bad operation
+ */
+dead_badop()
+{
+
+ panic("dead_badop called");
+ /* NOTREACHED */
+}
+
+/*
+ * Empty vnode null operation
+ */
+dead_nullop()
+{
+
+ return (0);
+}
+
+/*
+ * We have to wait during times when the vnode is
+ * in a state of change.
+ */
+chkvnlock(vp)
+ register struct vnode *vp;
+{
+ int locked = 0;
+
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ locked = 1;
+ }
+ return (locked);
+}
diff --git a/sys/miscfs/fdesc/fdesc.h b/sys/miscfs/fdesc/fdesc.h
new file mode 100644
index 000000000000..4c682e7bd370
--- /dev/null
+++ b/sys/miscfs/fdesc/fdesc.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc.h 8.5 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc.h,v 1.8 1993/04/06 15:28:33 jsp Exp $
+ */
+
+#ifdef KERNEL
+struct fdescmount {
+ struct vnode *f_root; /* Root node */
+};
+
+#define FD_ROOT 2
+#define FD_DEVFD 3
+#define FD_STDIN 4
+#define FD_STDOUT 5
+#define FD_STDERR 6
+#define FD_CTTY 7
+#define FD_DESC 8
+#define FD_MAX 12
+
+typedef enum {
+ Froot,
+ Fdevfd,
+ Fdesc,
+ Flink,
+ Fctty
+} fdntype;
+
+struct fdescnode {
+ struct fdescnode *fd_forw; /* Hash chain */
+ struct fdescnode *fd_back;
+ struct vnode *fd_vnode; /* Back ptr to vnode */
+ fdntype fd_type; /* Type of this node */
+ unsigned fd_fd; /* Fd to be dup'ed */
+ char *fd_link; /* Link to fd/n */
+ int fd_ix; /* filesystem index */
+};
+
+#define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data))
+#define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
+
+extern dev_t devctty;
+extern int fdesc_init __P((void));
+extern int fdesc_root __P((struct mount *, struct vnode **));
+extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **));
+extern int (**fdesc_vnodeop_p)();
+extern struct vfsops fdesc_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/fdesc/fdesc_vfsops.c b/sys/miscfs/fdesc/fdesc_vfsops.c
new file mode 100644
index 000000000000..80c543da6550
--- /dev/null
+++ b/sys/miscfs/fdesc/fdesc_vfsops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vfsops.c 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vfsops.c,v 1.9 1993/04/06 15:28:33 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/fdesc/fdesc.h>
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+fdesc_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ u_int size;
+ struct fdescmount *fmp;
+ struct vnode *rvp;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp);
+ if (error)
+ return (error);
+
+ MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ fmp->f_root = rvp;
+ /* XXX -- don't mark as local to work around fts() problems */
+ /*mp->mnt_flag |= MNT_LOCAL;*/
+ mp->mnt_data = (qaddr_t) fmp;
+ getnewfsid(mp, MOUNT_FDESC);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc"));
+ return (0);
+}
+
+int
+fdesc_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+}
+
+int
+fdesc_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ int flags = 0;
+ extern int doforce;
+ struct vnode *rootvp = VFSTOFDESC(mp)->f_root;
+
+ if (mntflags & MNT_FORCE) {
+ /* fdesc can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, rootvp, flags))
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Finally, throw away the fdescmount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+
+ return (0);
+}
+
+int
+fdesc_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOFDESC(mp)->f_root;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+fdesc_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ struct filedesc *fdp;
+ int lim;
+ int i;
+ int last;
+ int freefd;
+
+ /*
+ * Compute number of free file descriptors.
+ * [ Strange results will ensue if the open file
+ * limit is ever reduced below the current number
+ * of open files... ]
+ */
+ lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur;
+ fdp = p->p_fd;
+ last = min(fdp->fd_nfiles, lim);
+ freefd = 0;
+ for (i = fdp->fd_freefile; i < last; i++)
+ if (fdp->fd_ofiles[i] == NULL)
+ freefd++;
+
+ /*
+ * Adjust for the fact that the fdesc array may not
+ * have been fully allocated yet.
+ */
+ if (fdp->fd_nfiles < lim)
+ freefd += (lim - fdp->fd_nfiles);
+
+ sbp->f_type = MOUNT_FDESC;
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = lim + 1; /* Allow for "." */
+ sbp->f_ffree = freefd; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+fdesc_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+/*
+ * Fdesc flat namespace lookup.
+ * Currently unsupported.
+ */
+int
+fdesc_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_fhtovp(mp, fhp, setgen, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ int setgen;
+ struct vnode **vpp;
+{
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+struct vfsops fdesc_vfsops = {
+ fdesc_mount,
+ fdesc_start,
+ fdesc_unmount,
+ fdesc_root,
+ fdesc_quotactl,
+ fdesc_statfs,
+ fdesc_sync,
+ fdesc_vget,
+ fdesc_fhtovp,
+ fdesc_vptofh,
+ fdesc_init,
+};
diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c
new file mode 100644
index 000000000000..00d8675aea2f
--- /dev/null
+++ b/sys/miscfs/fdesc/fdesc_vnops.c
@@ -0,0 +1,974 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vnops.c,v 1.12 1993/04/06 16:17:17 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/kernel.h> /* boottime */
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <miscfs/fdesc/fdesc.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+#define FDL_WANT 0x01
+#define FDL_LOCKED 0x02
+static int fdcache_lock;
+
+dev_t devctty;
+
+#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
+FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
+#endif
+
+#define NFDCACHE 3
+#define FD_NHASH(ix) ((ix) & NFDCACHE)
+
+/*
+ * Cache head
+ */
+struct fdcache {
+ struct fdescnode *fc_forw;
+ struct fdescnode *fc_back;
+};
+
+static struct fdcache fdcache[NFDCACHE];
+
+/*
+ * Initialise cache headers
+ */
+fdesc_init()
+{
+ struct fdcache *fc;
+
+ devctty = makedev(nchrdev, 0);
+
+ for (fc = fdcache; fc < fdcache + NFDCACHE; fc++)
+ fc->fc_forw = fc->fc_back = (struct fdescnode *) fc;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct fdcache *
+fdesc_hash(ix)
+ int ix;
+{
+
+ return (&fdcache[FD_NHASH(ix)]);
+}
+
+int
+fdesc_allocvp(ftype, ix, mp, vpp)
+ fdntype ftype;
+ int ix;
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct fdcache *fc;
+ struct fdescnode *fd;
+ int error = 0;
+
+loop:
+ fc = fdesc_hash(ix);
+ for (fd = fc->fc_forw; fd != (struct fdescnode *) fc; fd = fd->fd_forw) {
+ if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
+ if (vget(fd->fd_vnode, 0))
+ goto loop;
+ *vpp = fd->fd_vnode;
+ return (error);
+ }
+ }
+
+ /*
+ * otherwise lock the array while we call getnewvnode
+ * since that can block.
+ */
+ if (fdcache_lock & FDL_LOCKED) {
+ fdcache_lock |= FDL_WANT;
+ sleep((caddr_t) &fdcache_lock, PINOD);
+ goto loop;
+ }
+ fdcache_lock |= FDL_LOCKED;
+
+ error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp);
+ if (error)
+ goto out;
+ MALLOC(fd, void *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
+ (*vpp)->v_data = fd;
+ fd->fd_vnode = *vpp;
+ fd->fd_type = ftype;
+ fd->fd_fd = -1;
+ fd->fd_link = 0;
+ fd->fd_ix = ix;
+ fc = fdesc_hash(ix);
+ insque(fd, fc);
+
+out:;
+ fdcache_lock &= ~FDL_LOCKED;
+
+ if (fdcache_lock & FDL_WANT) {
+ fdcache_lock &= ~FDL_WANT;
+ wakeup((caddr_t) &fdcache_lock);
+ }
+
+ return (error);
+}
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+int
+fdesc_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname;
+ struct proc *p;
+ int nfiles;
+ unsigned fd;
+ int error;
+ struct vnode *fvp;
+ char *ln;
+
+ pname = ap->a_cnp->cn_nameptr;
+ if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ VOP_LOCK(dvp);
+ return (0);
+ }
+
+ p = ap->a_cnp->cn_proc;
+ nfiles = p->p_fd->fd_nfiles;
+
+ switch (VTOFDESC(dvp)->fd_type) {
+ default:
+ case Flink:
+ case Fdesc:
+ case Fctty:
+ error = ENOTDIR;
+ goto bad;
+
+ case Froot:
+ if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) {
+ error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VDIR;
+ VOP_LOCK(fvp);
+ return (0);
+ }
+
+ if (ap->a_cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) {
+ struct vnode *ttyvp = cttyvp(p);
+ if (ttyvp == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VFIFO;
+ VOP_LOCK(fvp);
+ return (0);
+ }
+
+ ln = 0;
+ switch (ap->a_cnp->cn_namelen) {
+ case 5:
+ if (bcmp(pname, "stdin", 5) == 0) {
+ ln = "fd/0";
+ fd = FD_STDIN;
+ }
+ break;
+ case 6:
+ if (bcmp(pname, "stdout", 6) == 0) {
+ ln = "fd/1";
+ fd = FD_STDOUT;
+ } else
+ if (bcmp(pname, "stderr", 6) == 0) {
+ ln = "fd/2";
+ fd = FD_STDERR;
+ }
+ break;
+ }
+
+ if (ln) {
+ error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_link = ln;
+ *vpp = fvp;
+ fvp->v_type = VLNK;
+ VOP_LOCK(fvp);
+ return (0);
+ } else {
+ error = ENOENT;
+ goto bad;
+ }
+
+ /* FALL THROUGH */
+
+ case Fdevfd:
+ if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) {
+ error = fdesc_root(dvp->v_mount, vpp);
+ return (error);
+ }
+
+ fd = 0;
+ while (*pname >= '0' && *pname <= '9') {
+ fd = 10 * fd + *pname++ - '0';
+ if (fd >= nfiles)
+ break;
+ }
+
+ if (*pname != '\0') {
+ error = ENOENT;
+ goto bad;
+ }
+
+ if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) {
+ error = EBADF;
+ goto bad;
+ }
+
+ error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_fd = fd;
+ *vpp = fvp;
+ return (0);
+ }
+
+bad:;
+ *vpp = NULL;
+ return (error);
+}
+
+int
+fdesc_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Fdesc:
+ /*
+ * XXX Kludge: set p->p_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be
+ * released by vn_open. Open will detect this special error and
+ * take the actions in dupfdopen. Other callers of vn_open or
+ * VOP_OPEN will simply report the error.
+ */
+ ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */
+ error = ENODEV;
+ break;
+
+ case Fctty:
+ error = cttyopen(devctty, ap->a_mode, 0, ap->a_p);
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_attr(fd, vap, cred, p)
+ int fd;
+ struct vattr *vap;
+ struct ucred *cred;
+ struct proc *p;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct file *fp;
+ struct stat stb;
+ int error;
+
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p);
+ if (error == 0 && vap->va_type == VDIR) {
+ /*
+ * don't allow directories to show up because
+ * that causes loops in the namespace.
+ */
+ vap->va_type = VFIFO;
+ }
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &stb);
+ if (error == 0) {
+ vattr_null(vap);
+ vap->va_type = VSOCK;
+ vap->va_mode = stb.st_mode;
+ vap->va_nlink = stb.st_nlink;
+ vap->va_uid = stb.st_uid;
+ vap->va_gid = stb.st_gid;
+ vap->va_fsid = stb.st_dev;
+ vap->va_fileid = stb.st_ino;
+ vap->va_size = stb.st_size;
+ vap->va_blocksize = stb.st_blksize;
+ vap->va_atime = stb.st_atimespec;
+ vap->va_mtime = stb.st_mtimespec;
+ vap->va_ctime = stb.st_ctimespec;
+ vap->va_gen = stb.st_gen;
+ vap->va_flags = stb.st_flags;
+ vap->va_rdev = stb.st_rdev;
+ vap->va_bytes = stb.st_blocks * stb.st_blksize;
+ }
+ break;
+
+ default:
+ panic("fdesc attr");
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ unsigned fd;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Froot:
+ case Fdevfd:
+ case Flink:
+ case Fctty:
+ bzero((caddr_t) vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_fileid = VTOFDESC(vp)->fd_ix;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Flink:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VLNK;
+ vap->va_nlink = 1;
+ vap->va_size = strlen(VTOFDESC(vp)->fd_link);
+ break;
+
+ case Fctty:
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
+ vap->va_type = VFIFO;
+ vap->va_nlink = 1;
+ vap->va_size = 0;
+ break;
+
+ default:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VDIR;
+ vap->va_nlink = 2;
+ vap->va_size = DEV_BSIZE;
+ break;
+ }
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_blocksize = DEV_BSIZE;
+ vap->va_atime.ts_sec = boottime.tv_sec;
+ vap->va_atime.ts_nsec = 0;
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_mtime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ vap->va_bytes = 0;
+ break;
+
+ case Fdesc:
+ fd = VTOFDESC(vp)->fd_fd;
+ error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p);
+ break;
+
+ default:
+ panic("fdesc_getattr");
+ break;
+ }
+
+ if (error == 0)
+ vp->v_type = vap->va_type;
+
+ return (error);
+}
+
+int
+fdesc_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct filedesc *fdp = ap->a_p->p_fd;
+ struct file *fp;
+ unsigned fd;
+ int error;
+
+ /*
+ * Can't mess with the root vnode
+ */
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fdesc:
+ break;
+
+ case Fctty:
+ return (0);
+
+ default:
+ return (EACCES);
+ }
+
+ fd = VTOFDESC(ap->a_vp)->fd_fd;
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
+ return (EBADF);
+ }
+
+ /*
+ * Can setattr the underlying vnode, but not sockets!
+ */
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p);
+ break;
+
+ case DTYPE_SOCKET:
+ error = 0;
+ break;
+
+ default:
+ panic("fdesc setattr");
+ break;
+ }
+
+ return (error);
+}
+
+#define UIO_MX 16
+
+static struct dirtmp {
+ u_long d_fileno;
+ u_short d_reclen;
+ u_short d_namlen;
+ char d_name[8];
+} rootent[] = {
+ { FD_DEVFD, UIO_MX, 2, "fd" },
+ { FD_STDIN, UIO_MX, 5, "stdin" },
+ { FD_STDOUT, UIO_MX, 6, "stdout" },
+ { FD_STDERR, UIO_MX, 6, "stderr" },
+ { FD_CTTY, UIO_MX, 3, "tty" },
+ { 0 }
+};
+
+int
+fdesc_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct filedesc *fdp;
+ int i;
+ int error;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ return (0);
+
+ case Fdesc:
+ return (ENOTDIR);
+
+ default:
+ break;
+ }
+
+ fdp = uio->uio_procp->p_fd;
+
+ if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
+ struct dirent d;
+ struct dirent *dp = &d;
+ struct dirtmp *dt;
+
+ i = uio->uio_offset / UIO_MX;
+ error = 0;
+
+ while (uio->uio_resid > 0) {
+ dt = &rootent[i];
+ if (dt->d_fileno == 0) {
+ /**eofflagp = 1;*/
+ break;
+ }
+ i++;
+
+ switch (dt->d_fileno) {
+ case FD_CTTY:
+ if (cttyvp(uio->uio_procp) == NULL)
+ continue;
+ break;
+
+ case FD_STDIN:
+ case FD_STDOUT:
+ case FD_STDERR:
+ if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles)
+ continue;
+ if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL)
+ continue;
+ break;
+ }
+ bzero((caddr_t) dp, UIO_MX);
+ dp->d_fileno = dt->d_fileno;
+ dp->d_namlen = dt->d_namlen;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_reclen = dt->d_reclen;
+ bcopy(dt->d_name, dp->d_name, dp->d_namlen+1);
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ }
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+ }
+
+ i = uio->uio_offset / UIO_MX;
+ error = 0;
+ while (uio->uio_resid > 0) {
+ if (i >= fdp->fd_nfiles)
+ break;
+
+ if (fdp->fd_ofiles[i] != NULL) {
+ struct dirent d;
+ struct dirent *dp = &d;
+
+ bzero((caddr_t) dp, UIO_MX);
+
+ dp->d_namlen = sprintf(dp->d_name, "%d", i);
+ dp->d_reclen = UIO_MX;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_fileno = i + FD_STDIN;
+ /*
+ * And ship to userland
+ */
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ }
+ i++;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+}
+
+int
+fdesc_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error;
+
+ if (vp->v_type != VLNK)
+ return (EPERM);
+
+ if (VTOFDESC(vp)->fd_type == Flink) {
+ char *ln = VTOFDESC(vp)->fd_link;
+ error = uiomove(ln, strlen(ln), ap->a_uio);
+ } else {
+ error = EOPNOTSUPP;
+ }
+
+ return (error);
+}
+
+int
+fdesc_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyread(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttywrite(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyioctl(devctty, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyselect(devctty, ap->a_fflags, ap->a_p);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ /*
+ * Clear out the v_type field to avoid
+ * nasty things happening in vgone().
+ */
+ vp->v_type = VNON;
+ return (0);
+}
+
+int
+fdesc_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ remque(VTOFDESC(vp));
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+fdesc_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+int
+fdesc_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, fdesc vnode\n");
+ return (0);
+}
+
+/*void*/
+int
+fdesc_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * /dev/fd vnode unsupported operation
+ */
+int
+fdesc_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+int
+fdesc_badop()
+{
+
+ panic("fdesc: bad op");
+ /* NOTREACHED */
+}
+
+/*
+ * /dev/fd vnode null operation
+ */
+int
+fdesc_nullop()
+{
+
+ return (0);
+}
+
+#define fdesc_create ((int (*) __P((struct vop_create_args *)))fdesc_enotsupp)
+#define fdesc_mknod ((int (*) __P((struct vop_mknod_args *)))fdesc_enotsupp)
+#define fdesc_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define fdesc_access ((int (*) __P((struct vop_access_args *)))nullop)
+#define fdesc_mmap ((int (*) __P((struct vop_mmap_args *)))fdesc_enotsupp)
+#define fdesc_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define fdesc_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define fdesc_remove ((int (*) __P((struct vop_remove_args *)))fdesc_enotsupp)
+#define fdesc_link ((int (*) __P((struct vop_link_args *)))fdesc_enotsupp)
+#define fdesc_rename ((int (*) __P((struct vop_rename_args *)))fdesc_enotsupp)
+#define fdesc_mkdir ((int (*) __P((struct vop_mkdir_args *)))fdesc_enotsupp)
+#define fdesc_rmdir ((int (*) __P((struct vop_rmdir_args *)))fdesc_enotsupp)
+#define fdesc_symlink ((int (*) __P((struct vop_symlink_args *)))fdesc_enotsupp)
+#define fdesc_abortop ((int (*) __P((struct vop_abortop_args *)))nullop)
+#define fdesc_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define fdesc_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define fdesc_bmap ((int (*) __P((struct vop_bmap_args *)))fdesc_badop)
+#define fdesc_strategy ((int (*) __P((struct vop_strategy_args *)))fdesc_badop)
+#define fdesc_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define fdesc_advlock ((int (*) __P((struct vop_advlock_args *)))fdesc_enotsupp)
+#define fdesc_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))fdesc_enotsupp)
+#define fdesc_vget ((int (*) __P((struct vop_vget_args *)))fdesc_enotsupp)
+#define fdesc_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) fdesc_enotsupp)
+#define fdesc_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))fdesc_enotsupp)
+#define fdesc_update ((int (*) __P((struct vop_update_args *)))fdesc_enotsupp)
+#define fdesc_bwrite ((int (*) __P((struct vop_bwrite_args *)))fdesc_enotsupp)
+
+int (**fdesc_vnodeop_p)();
+struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fdesc_lookup }, /* lookup */
+ { &vop_create_desc, fdesc_create }, /* create */
+ { &vop_mknod_desc, fdesc_mknod }, /* mknod */
+ { &vop_open_desc, fdesc_open }, /* open */
+ { &vop_close_desc, fdesc_close }, /* close */
+ { &vop_access_desc, fdesc_access }, /* access */
+ { &vop_getattr_desc, fdesc_getattr }, /* getattr */
+ { &vop_setattr_desc, fdesc_setattr }, /* setattr */
+ { &vop_read_desc, fdesc_read }, /* read */
+ { &vop_write_desc, fdesc_write }, /* write */
+ { &vop_ioctl_desc, fdesc_ioctl }, /* ioctl */
+ { &vop_select_desc, fdesc_select }, /* select */
+ { &vop_mmap_desc, fdesc_mmap }, /* mmap */
+ { &vop_fsync_desc, fdesc_fsync }, /* fsync */
+ { &vop_seek_desc, fdesc_seek }, /* seek */
+ { &vop_remove_desc, fdesc_remove }, /* remove */
+ { &vop_link_desc, fdesc_link }, /* link */
+ { &vop_rename_desc, fdesc_rename }, /* rename */
+ { &vop_mkdir_desc, fdesc_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fdesc_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fdesc_symlink }, /* symlink */
+ { &vop_readdir_desc, fdesc_readdir }, /* readdir */
+ { &vop_readlink_desc, fdesc_readlink }, /* readlink */
+ { &vop_abortop_desc, fdesc_abortop }, /* abortop */
+ { &vop_inactive_desc, fdesc_inactive }, /* inactive */
+ { &vop_reclaim_desc, fdesc_reclaim }, /* reclaim */
+ { &vop_lock_desc, fdesc_lock }, /* lock */
+ { &vop_unlock_desc, fdesc_unlock }, /* unlock */
+ { &vop_bmap_desc, fdesc_bmap }, /* bmap */
+ { &vop_strategy_desc, fdesc_strategy }, /* strategy */
+ { &vop_print_desc, fdesc_print }, /* print */
+ { &vop_islocked_desc, fdesc_islocked }, /* islocked */
+ { &vop_pathconf_desc, fdesc_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fdesc_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fdesc_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fdesc_valloc }, /* valloc */
+ { &vop_vfree_desc, fdesc_vfree }, /* vfree */
+ { &vop_truncate_desc, fdesc_truncate }, /* truncate */
+ { &vop_update_desc, fdesc_update }, /* update */
+ { &vop_bwrite_desc, fdesc_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fdesc_vnodeop_opv_desc =
+ { &fdesc_vnodeop_p, fdesc_vnodeop_entries };
diff --git a/sys/miscfs/fifofs/fifo.h b/sys/miscfs/fifofs/fifo.h
new file mode 100644
index 000000000000..e89186d8b896
--- /dev/null
+++ b/sys/miscfs/fifofs/fifo.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo.h 8.2 (Berkeley) 2/2/94
+ */
+
+#ifdef FIFO
+/*
+ * Prototypes for fifo operations on vnodes.
+ */
+int fifo_badop(),
+ fifo_ebadf();
+
+int fifo_lookup __P((struct vop_lookup_args *));
+#define fifo_create ((int (*) __P((struct vop_create_args *)))fifo_badop)
+#define fifo_mknod ((int (*) __P((struct vop_mknod_args *)))fifo_badop)
+int fifo_open __P((struct vop_open_args *));
+int fifo_close __P((struct vop_close_args *));
+#define fifo_access ((int (*) __P((struct vop_access_args *)))fifo_ebadf)
+#define fifo_getattr ((int (*) __P((struct vop_getattr_args *)))fifo_ebadf)
+#define fifo_setattr ((int (*) __P((struct vop_setattr_args *)))fifo_ebadf)
+int fifo_read __P((struct vop_read_args *));
+int fifo_write __P((struct vop_write_args *));
+int fifo_ioctl __P((struct vop_ioctl_args *));
+int fifo_select __P((struct vop_select_args *));
+#define fifo_mmap ((int (*) __P((struct vop_mmap_args *)))fifo_badop)
+#define fifo_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define fifo_seek ((int (*) __P((struct vop_seek_args *)))fifo_badop)
+#define fifo_remove ((int (*) __P((struct vop_remove_args *)))fifo_badop)
+#define fifo_link ((int (*) __P((struct vop_link_args *)))fifo_badop)
+#define fifo_rename ((int (*) __P((struct vop_rename_args *)))fifo_badop)
+#define fifo_mkdir ((int (*) __P((struct vop_mkdir_args *)))fifo_badop)
+#define fifo_rmdir ((int (*) __P((struct vop_rmdir_args *)))fifo_badop)
+#define fifo_symlink ((int (*) __P((struct vop_symlink_args *)))fifo_badop)
+#define fifo_readdir ((int (*) __P((struct vop_readdir_args *)))fifo_badop)
+#define fifo_readlink ((int (*) __P((struct vop_readlink_args *)))fifo_badop)
+#define fifo_abortop ((int (*) __P((struct vop_abortop_args *)))fifo_badop)
+#define fifo_inactive ((int (*) __P((struct vop_inactive_args *)))nullop)
+#define fifo_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop)
+int fifo_lock __P((struct vop_lock_args *));
+int fifo_unlock __P((struct vop_unlock_args *));
+int fifo_bmap __P((struct vop_bmap_args *));
+#define fifo_strategy ((int (*) __P((struct vop_strategy_args *)))fifo_badop)
+int fifo_print __P((struct vop_print_args *));
+#define fifo_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+int fifo_pathconf __P((struct vop_pathconf_args *));
+int fifo_advlock __P((struct vop_advlock_args *));
+#define fifo_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))fifo_badop)
+#define fifo_valloc ((int (*) __P((struct vop_valloc_args *)))fifo_badop)
+#define fifo_reallocblks \
+ ((int (*) __P((struct vop_reallocblks_args *)))fifo_badop)
+#define fifo_vfree ((int (*) __P((struct vop_vfree_args *)))fifo_badop)
+#define fifo_truncate ((int (*) __P((struct vop_truncate_args *)))nullop)
+#define fifo_update ((int (*) __P((struct vop_update_args *)))nullop)
+#define fifo_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop)
+#endif /* FIFO */
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
new file mode 100644
index 000000000000..bad33a430b62
--- /dev/null
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo_vnops.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <miscfs/fifofs/fifo.h>
+
+/*
+ * This structure is associated with the FIFO vnode and stores
+ * the state associated with the FIFO.
+ */
+struct fifoinfo {
+ struct socket *fi_readsock;
+ struct socket *fi_writesock;
+ long fi_readers;
+ long fi_writers;
+};
+
+int (**fifo_vnodeop_p)();
+struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, fifo_close }, /* close */
+ { &vop_access_desc, fifo_access }, /* access */
+ { &vop_getattr_desc, fifo_getattr }, /* getattr */
+ { &vop_setattr_desc, fifo_setattr }, /* setattr */
+ { &vop_read_desc, fifo_read }, /* read */
+ { &vop_write_desc, fifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, fifo_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, fifo_inactive }, /* inactive */
+ { &vop_reclaim_desc, fifo_reclaim }, /* reclaim */
+ { &vop_lock_desc, fifo_lock }, /* lock */
+ { &vop_unlock_desc, fifo_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_strategy }, /* strategy */
+ { &vop_print_desc, fifo_print }, /* print */
+ { &vop_islocked_desc, fifo_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, fifo_update }, /* update */
+ { &vop_bwrite_desc, fifo_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_vnodeop_opv_desc =
+ { &fifo_vnodeop_p, fifo_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+fifo_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open called to set up a new instance of a fifo or
+ * to find an active instance of a fifo.
+ */
+/* ARGSUSED */
+fifo_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct fifoinfo *fip;
+ struct socket *rso, *wso;
+ int error;
+ static char openstr[] = "fifo";
+
+ if ((ap->a_mode & (FREAD|FWRITE)) == (FREAD|FWRITE))
+ return (EINVAL);
+ if ((fip = vp->v_fifoinfo) == NULL) {
+ MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+ vp->v_fifoinfo = fip;
+ if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readsock = rso;
+ if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_writesock = wso;
+ if (error = unp_connect2(wso, rso)) {
+ (void)soclose(wso);
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readers = fip->fi_writers = 0;
+ wso->so_state |= SS_CANTRCVMORE;
+ rso->so_state |= SS_CANTSENDMORE;
+ }
+ error = 0;
+ if (ap->a_mode & FREAD) {
+ fip->fi_readers++;
+ if (fip->fi_readers == 1) {
+ fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+ if (fip->fi_writers > 0)
+ wakeup((caddr_t)&fip->fi_writers);
+ }
+ if (ap->a_mode & O_NONBLOCK)
+ return (0);
+ while (fip->fi_writers == 0) {
+ VOP_UNLOCK(vp);
+ error = tsleep((caddr_t)&fip->fi_readers,
+ PCATCH | PSOCK, openstr, 0);
+ VOP_LOCK(vp);
+ if (error)
+ break;
+ }
+ } else {
+ fip->fi_writers++;
+ if (fip->fi_readers == 0 && (ap->a_mode & O_NONBLOCK)) {
+ error = ENXIO;
+ } else {
+ if (fip->fi_writers == 1) {
+ fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+ if (fip->fi_readers > 0)
+ wakeup((caddr_t)&fip->fi_readers);
+ }
+ while (fip->fi_readers == 0) {
+ VOP_UNLOCK(vp);
+ error = tsleep((caddr_t)&fip->fi_writers,
+ PCATCH | PSOCK, openstr, 0);
+ VOP_LOCK(vp);
+ if (error)
+ break;
+ }
+ }
+ }
+ if (error)
+ VOP_CLOSE(vp, ap->a_mode, ap->a_cred, ap->a_p);
+ return (error);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+fifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock;
+ int error, startresid;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("fifo_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state |= SS_NBIO;
+ startresid = uio->uio_resid;
+ VOP_UNLOCK(ap->a_vp);
+ error = soreceive(rso, (struct mbuf **)0, uio, (int *)0,
+ (struct mbuf **)0, (struct mbuf **)0);
+ VOP_LOCK(ap->a_vp);
+ /*
+ * Clear EOF indication after first such return.
+ */
+ if (uio->uio_resid == startresid)
+ rso->so_state &= ~SS_CANTRCVMORE;
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+fifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock;
+ int error;
+
+#ifdef DIAGNOSTIC
+ if (ap->a_uio->uio_rw != UIO_WRITE)
+ panic("fifo_write mode");
+#endif
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state |= SS_NBIO;
+ VOP_UNLOCK(ap->a_vp);
+ error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0);
+ VOP_LOCK(ap->a_vp);
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+fifo_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+
+ if (ap->a_command == FIONBIO)
+ return (0);
+ if (ap->a_fflag & FREAD)
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ else
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ return (soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p));
+}
+
+/* ARGSUSED */
+fifo_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+
+ if (ap->a_fflags & FREAD)
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ else
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ return (soo_select(&filetmp, ap->a_which, ap->a_p));
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+fifo_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+fifo_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/* ARGSUSED */
+fifo_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+fifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+ int error1, error2;
+
+ if (ap->a_fflag & FWRITE) {
+ fip->fi_writers--;
+ if (fip->fi_writers == 0)
+ socantrcvmore(fip->fi_readsock);
+ } else {
+ fip->fi_readers--;
+ if (fip->fi_readers == 0)
+ socantsendmore(fip->fi_writesock);
+ }
+ if (vp->v_usecount > 1)
+ return (0);
+ error1 = soclose(fip->fi_readsock);
+ error2 = soclose(fip->fi_writesock);
+ FREE(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ if (error1)
+ return (error1);
+ return (error2);
+}
+
+/*
+ * Print out the contents of a fifo vnode.
+ */
+fifo_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON");
+ fifo_printinfo(ap->a_vp);
+ printf("\n");
+}
+
+/*
+ * Print out internal contents of a fifo vnode.
+ */
+fifo_printinfo(vp)
+ struct vnode *vp;
+{
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+
+ printf(", fifo with %d readers and %d writers",
+ fip->fi_readers, fip->fi_writers);
+}
+
+/*
+ * Return POSIX pathconf information applicable to fifo's.
+ */
+fifo_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Fifo failed operation
+ */
+fifo_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Fifo advisory byte-level locks.
+ */
+/* ARGSUSED */
+fifo_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Fifo bad operation
+ */
+fifo_badop()
+{
+
+ panic("fifo_badop called");
+ /* NOTREACHED */
+}
diff --git a/sys/miscfs/kernfs/kernfs.h b/sys/miscfs/kernfs/kernfs.h
new file mode 100644
index 000000000000..75ddecc6db1c
--- /dev/null
+++ b/sys/miscfs/kernfs/kernfs.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kernfs.h 8.4 (Berkeley) 1/21/94
+ */
+
+#define _PATH_KERNFS "/kern" /* Default mountpoint */
+
+#ifdef KERNEL
+struct kernfs_mount {
+ struct vnode *kf_root; /* Root node */
+};
+
+struct kernfs_node {
+ struct kern_target *kf_kt;
+};
+
+#define VFSTOKERNFS(mp) ((struct kernfs_mount *)((mp)->mnt_data))
+#define VTOKERN(vp) ((struct kernfs_node *)(vp)->v_data)
+
+extern int (**kernfs_vnodeop_p)();
+extern struct vfsops kernfs_vfsops;
+extern struct vnode *rrootvp;
+#endif /* KERNEL */
diff --git a/sys/miscfs/kernfs/kernfs_vfsops.c b/sys/miscfs/kernfs/kernfs_vfsops.c
new file mode 100644
index 000000000000..b68d76eaddfd
--- /dev/null
+++ b/sys/miscfs/kernfs/kernfs_vfsops.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kernfs_vfsops.c 8.4 (Berkeley) 1/21/94
+ */
+
+/*
+ * Kernel params Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/kernfs/kernfs.h>
+
+struct vnode *rrootvp;
+
+/*
+ * Create a vnode for a character device.
+ */
+int
+cdevvp(dev, vpp)
+ dev_t dev;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct vnode *nvp;
+ int error;
+
+ if (dev == NODEV)
+ return (0);
+ error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+ if (error) {
+ *vpp = 0;
+ return (error);
+ }
+ vp = nvp;
+ vp->v_type = VCHR;
+ if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+ vput(vp);
+ vp = nvp;
+ }
+ *vpp = vp;
+ return (0);
+}
+
+kernfs_init()
+{
+ int cmaj;
+ int bmaj = major(rootdev);
+ int error = ENXIO;
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_init\n"); /* printed during system boot */
+#endif
+
+ for (cmaj = 0; cmaj < nchrdev; cmaj++) {
+ if (cdevsw[cmaj].d_open == bdevsw[bmaj].d_open) {
+ dev_t cdev = makedev(cmaj, minor(rootdev));
+ error = cdevvp(cdev, &rrootvp);
+ if (error == 0)
+ break;
+ }
+ }
+
+ if (error) {
+ printf("kernfs: no raw boot device\n");
+ rrootvp = 0;
+ }
+}
+
+/*
+ * Mount the Kernel params filesystem
+ */
+kernfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ u_int size;
+ struct kernfs_mount *fmp;
+ struct vnode *rvp;
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ error = getnewvnode(VT_KERNFS, mp, kernfs_vnodeop_p, &rvp); /* XXX */
+ if (error)
+ return (error);
+
+ MALLOC(fmp, struct kernfs_mount *, sizeof(struct kernfs_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_mount: root vp = %x\n", rvp);
+#endif
+ fmp->kf_root = rvp;
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) fmp;
+ getnewfsid(mp, MOUNT_KERNFS);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("kernfs", mp->mnt_stat.f_mntfromname, sizeof("kernfs"));
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_mount: at %s\n", mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+kernfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+}
+
+kernfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ int flags = 0;
+ extern int doforce;
+ struct vnode *rootvp = VFSTOKERNFS(mp)->kf_root;
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE) {
+ /* kernfs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_unmount: calling vflush\n");
+#endif
+ if (error = vflush(mp, rootvp, flags))
+ return (error);
+
+#ifdef KERNFS_DIAGNOSTIC
+ vprint("kernfs root", rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Finally, throw away the kernfs_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return 0;
+}
+
+kernfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_root(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOKERNFS(mp)->kf_root;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+kernfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return (EOPNOTSUPP);
+}
+
+kernfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_statfs(mp = %x)\n", mp);
+#endif
+
+ sbp->f_type = MOUNT_KERNFS;
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = 0;
+ sbp->f_ffree = 0;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+kernfs_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+ return (0);
+}
+
+/*
+ * Kernfs flat namespace lookup.
+ * Currently unsupported.
+ */
+kernfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+
+kernfs_fhtovp(mp, fhp, setgen, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ int setgen;
+ struct vnode **vpp;
+{
+ return (EOPNOTSUPP);
+}
+
+kernfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return (EOPNOTSUPP);
+}
+
+struct vfsops kernfs_vfsops = {
+ kernfs_mount,
+ kernfs_start,
+ kernfs_unmount,
+ kernfs_root,
+ kernfs_quotactl,
+ kernfs_statfs,
+ kernfs_sync,
+ kernfs_vget,
+ kernfs_fhtovp,
+ kernfs_vptofh,
+ kernfs_init,
+};
diff --git a/sys/miscfs/kernfs/kernfs_vnops.c b/sys/miscfs/kernfs/kernfs_vnops.c
new file mode 100644
index 000000000000..10b7d7c0a64c
--- /dev/null
+++ b/sys/miscfs/kernfs/kernfs_vnops.c
@@ -0,0 +1,759 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kernfs_vnops.c 8.6 (Berkeley) 2/10/94
+ */
+
+/*
+ * Kernel parameter filesystem (/kern)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/vmmeter.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <miscfs/kernfs/kernfs.h>
+
+#define KSTRING 256 /* Largest I/O available via this filesystem */
+#define UIO_MX 32
+
+#define READ_MODE (S_IRUSR|S_IRGRP|S_IROTH)
+#define WRITE_MODE (S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH)
+#define DIR_MODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
+
+struct kern_target {
+ char *kt_name;
+ void *kt_data;
+#define KTT_NULL 1
+#define KTT_TIME 5
+#define KTT_INT 17
+#define KTT_STRING 31
+#define KTT_HOSTNAME 47
+#define KTT_AVENRUN 53
+ int kt_tag;
+ int kt_rw;
+ int kt_vtype;
+} kern_targets[] = {
+/* NOTE: The name must be less than UIO_MX-16 chars in length */
+ /* name data tag ro/rw */
+ { ".", 0, KTT_NULL, VREAD, VDIR },
+ { "..", 0, KTT_NULL, VREAD, VDIR },
+ { "boottime", &boottime.tv_sec, KTT_INT, VREAD, VREG },
+ { "copyright", copyright, KTT_STRING, VREAD, VREG },
+ { "hostname", 0, KTT_HOSTNAME, VREAD|VWRITE, VREG },
+ { "hz", &hz, KTT_INT, VREAD, VREG },
+ { "loadavg", 0, KTT_AVENRUN, VREAD, VREG },
+ { "pagesize", &cnt.v_page_size, KTT_INT, VREAD, VREG },
+ { "physmem", &physmem, KTT_INT, VREAD, VREG },
+#if 0
+ { "root", 0, KTT_NULL, VREAD, VDIR },
+#endif
+ { "rootdev", 0, KTT_NULL, VREAD, VBLK },
+ { "rrootdev", 0, KTT_NULL, VREAD, VCHR },
+ { "time", 0, KTT_TIME, VREAD, VREG },
+ { "version", version, KTT_STRING, VREAD, VREG },
+};
+
+static int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
+
+static int
+kernfs_xread(kt, buf, len, lenp)
+ struct kern_target *kt;
+ char *buf;
+ int len;
+ int *lenp;
+{
+ switch (kt->kt_tag) {
+ case KTT_TIME: {
+ struct timeval tv;
+ microtime(&tv);
+ sprintf(buf, "%d %d\n", tv.tv_sec, tv.tv_usec);
+ break;
+ }
+
+ case KTT_INT: {
+ int *ip = kt->kt_data;
+ sprintf(buf, "%d\n", *ip);
+ break;
+ }
+
+ case KTT_STRING: {
+ char *cp = kt->kt_data;
+ int xlen = strlen(cp) + 1;
+
+ if (xlen >= len)
+ return (EINVAL);
+
+ bcopy(cp, buf, xlen);
+ break;
+ }
+
+ case KTT_HOSTNAME: {
+ char *cp = hostname;
+ int xlen = hostnamelen;
+
+ if (xlen >= (len-2))
+ return (EINVAL);
+
+ bcopy(cp, buf, xlen);
+ buf[xlen] = '\n';
+ buf[xlen+1] = '\0';
+ break;
+ }
+
+ case KTT_AVENRUN:
+ sprintf(buf, "%ld %ld %ld %ld\n",
+ averunnable.ldavg[0],
+ averunnable.ldavg[1],
+ averunnable.ldavg[2],
+ averunnable.fscale);
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ *lenp = strlen(buf);
+ return (0);
+}
+
+static int
+kernfs_xwrite(kt, buf, len)
+ struct kern_target *kt;
+ char *buf;
+ int len;
+{
+ switch (kt->kt_tag) {
+ case KTT_HOSTNAME: {
+ if (buf[len-1] == '\n')
+ --len;
+ bcopy(buf, hostname, len);
+ hostname[len] = '\0';
+ hostnamelen = len;
+ return (0);
+ }
+
+ default:
+ return (EIO);
+ }
+}
+
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+kernfs_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode *fvp;
+ int error, i;
+ char *pname;
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_lookup(%x)\n", ap);
+ printf("kernfs_lookup(dp = %x, vpp = %x, cnp = %x)\n", dvp, vpp, ap->a_cnp);
+#endif
+ pname = cnp->cn_nameptr;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_lookup(%s)\n", pname);
+#endif
+ if (cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ /*VOP_LOCK(dvp);*/
+ return (0);
+ }
+
+#if 0
+ if (cnp->cn_namelen == 4 && bcmp(pname, "root", 4) == 0) {
+ *vpp = rootdir;
+ VREF(rootdir);
+ VOP_LOCK(rootdir);
+ return (0);
+ }
+#endif
+
+ /*
+ * /kern/rootdev is the root device
+ */
+ if (cnp->cn_namelen == 7 && bcmp(pname, "rootdev", 7) == 0) {
+ *vpp = rootvp;
+ VREF(rootvp);
+ VOP_LOCK(rootvp);
+ return (0);
+ }
+
+ /*
+ * /kern/rrootdev is the raw root device
+ */
+ if (cnp->cn_namelen == 8 && bcmp(pname, "rrootdev", 8) == 0) {
+ if (rrootvp) {
+ *vpp = rrootvp;
+ VREF(rrootvp);
+ VOP_LOCK(rrootvp);
+ return (0);
+ }
+ error = ENXIO;
+ goto bad;
+ }
+
+ error = ENOENT;
+
+ for (i = 0; i < nkern_targets; i++) {
+ struct kern_target *kt = &kern_targets[i];
+ if (cnp->cn_namelen == strlen(kt->kt_name) &&
+ bcmp(kt->kt_name, pname, cnp->cn_namelen) == 0) {
+ error = 0;
+ break;
+ }
+ }
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_lookup: i = %d, error = %d\n", i, error);
+#endif
+
+ if (error)
+ goto bad;
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_lookup: allocate new vnode\n");
+#endif
+ error = getnewvnode(VT_KERNFS, dvp->v_mount, kernfs_vnodeop_p, &fvp);
+ if (error)
+ goto bad;
+ MALLOC(fvp->v_data, void *, sizeof(struct kernfs_node), M_TEMP, M_WAITOK);
+ VTOKERN(fvp)->kf_kt = &kern_targets[i];
+ fvp->v_type = VTOKERN(fvp)->kf_kt->kt_vtype;
+ *vpp = fvp;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_lookup: newvp = %x\n", fvp);
+#endif
+ return (0);
+
+bad:;
+ *vpp = NULL;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_lookup: error = %d\n", error);
+#endif
+ return (error);
+}
+
+kernfs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ /*
+ * Can always open the root (modulo perms)
+ */
+ if (vp->v_flag & VROOT)
+ return (0);
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_open, mode = %x, file = %s\n",
+ ap->a_mode, VTOKERN(vp)->kf_kt->kt_name);
+#endif
+
+ if ((ap->a_mode & FWRITE) && !(VTOKERN(vp)->kf_kt->kt_rw & VWRITE))
+ return (EOPNOTSUPP);
+
+ return (0);
+}
+
+static int
+kernfs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct ucred *cred = ap->a_cred;
+ mode_t mode = ap->a_mode;
+
+ if (mode & VEXEC) {
+ if (vp->v_flag & VROOT)
+ return (0);
+ return (EACCES);
+ }
+
+ if (cred->cr_uid == 0) {
+ if ((vp->v_flag & VROOT) == 0) {
+ struct kern_target *kt = VTOKERN(vp)->kf_kt;
+
+ if ((mode & VWRITE) && !(kt->kt_rw & VWRITE))
+ return (EROFS);
+ }
+ return (0);
+ }
+
+ if (mode & VWRITE)
+ return (EACCES);
+
+ return (0);
+}
+
+
+kernfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ int error = 0;
+ char strbuf[KSTRING];
+
+ bzero((caddr_t) vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ /* vap->va_qsize = 0; */
+ vap->va_blocksize = DEV_BSIZE;
+ microtime(&vap->va_atime);
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_ctime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ /* vap->va_qbytes = 0; */
+ vap->va_bytes = 0;
+
+ if (vp->v_flag & VROOT) {
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_getattr: stat rootdir\n");
+#endif
+ vap->va_type = VDIR;
+ vap->va_mode = DIR_MODE;
+ vap->va_nlink = 2;
+ vap->va_fileid = 2;
+ vap->va_size = DEV_BSIZE;
+ } else {
+ struct kern_target *kt = VTOKERN(vp)->kf_kt;
+ int nbytes;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_getattr: stat target %s\n", kt->kt_name);
+#endif
+ vap->va_type = kt->kt_vtype;
+ vap->va_mode = (kt->kt_rw & VWRITE ? WRITE_MODE : READ_MODE);
+ vap->va_nlink = 1;
+ vap->va_fileid = 3 + (kt - kern_targets) / sizeof(*kt);
+ error = kernfs_xread(kt, strbuf, sizeof(strbuf), &nbytes);
+ vap->va_size = nbytes;
+ }
+
+ vp->v_type = vap->va_type;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_getattr: return error %d\n", error);
+#endif
+ return (error);
+}
+
+kernfs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Silently ignore attribute changes.
+ * This allows for open with truncate to have no
+ * effect until some data is written. I want to
+ * do it this way because all writes are atomic.
+ */
+ return (0);
+}
+
+static int
+kernfs_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct uio *uio = ap->a_uio;
+ struct kern_target *kt;
+ char strbuf[KSTRING];
+ int off = uio->uio_offset;
+ int error, len;
+ char *cp;
+
+ if (vp->v_flag & VROOT)
+ return (EOPNOTSUPP);
+
+ kt = VTOKERN(vp)->kf_kt;
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kern_read %s\n", kt->kt_name);
+#endif
+
+ len = 0;
+ error = kernfs_xread(kt, strbuf, sizeof(strbuf), &len);
+ if (error)
+ return (error);
+ cp = strbuf + off;
+ len -= off;
+ return (uiomove(cp, len, uio));
+}
+
+static int
+kernfs_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct uio *uio = ap->a_uio;
+ struct kern_target *kt;
+ int error, xlen;
+ char strbuf[KSTRING];
+
+ if (vp->v_flag & VROOT)
+ return (0);
+
+ kt = VTOKERN(vp)->kf_kt;
+
+ if (uio->uio_offset != 0)
+ return (EINVAL);
+
+ xlen = min(uio->uio_resid, KSTRING-1);
+ error = uiomove(strbuf, xlen, uio);
+ if (error)
+ return (error);
+
+ if (uio->uio_resid != 0)
+ return (EIO);
+
+ strbuf[xlen] = '\0';
+ xlen = strlen(strbuf);
+ return (kernfs_xwrite(kt, strbuf, xlen));
+}
+
+
+kernfs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct uio *uio = ap->a_uio;
+ int i;
+ int error;
+
+ i = uio->uio_offset / UIO_MX;
+ error = 0;
+ while (uio->uio_resid > 0 && i < nkern_targets) {
+ struct dirent d;
+ struct dirent *dp = &d;
+ struct kern_target *kt = &kern_targets[i];
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_readdir: i = %d\n", i);
+#endif
+
+ bzero((caddr_t) dp, UIO_MX);
+
+ dp->d_namlen = strlen(kt->kt_name);
+ bcopy(kt->kt_name, dp->d_name, dp->d_namlen+1);
+
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_readdir: name = %s, len = %d\n",
+ dp->d_name, dp->d_namlen);
+#endif
+ /*
+ * Fill in the remaining fields
+ */
+ dp->d_reclen = UIO_MX;
+ dp->d_fileno = i + 3;
+ dp->d_type = DT_UNKNOWN; /* XXX */
+ /*
+ * And ship to userland
+ */
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ i++;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+
+ return (error);
+}
+
+kernfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ /*
+ * Clear out the v_type field to avoid
+ * nasty things happening in vgone().
+ */
+ vp->v_type = VNON;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_inactive(%x)\n", vp);
+#endif
+ return (0);
+}
+
+kernfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+#ifdef KERNFS_DIAGNOSTIC
+ printf("kernfs_reclaim(%x)\n", vp);
+#endif
+ if (vp->v_data) {
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+ }
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+kernfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+kernfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_KERNFS, kernfs vnode\n");
+ return (0);
+}
+
+/*void*/
+kernfs_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * /dev/fd vnode unsupported operation
+ */
+kernfs_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+kernfs_badop()
+{
+
+ panic("kernfs: bad op");
+ /* NOTREACHED */
+}
+
+/*
+ * kernfs vnode null operation
+ */
+kernfs_nullop()
+{
+
+ return (0);
+}
+
+#define kernfs_create ((int (*) __P((struct vop_create_args *)))kernfs_enotsupp)
+#define kernfs_mknod ((int (*) __P((struct vop_mknod_args *)))kernfs_enotsupp)
+#define kernfs_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define kernfs_ioctl ((int (*) __P((struct vop_ioctl_args *)))kernfs_enotsupp)
+#define kernfs_select ((int (*) __P((struct vop_select_args *)))kernfs_enotsupp)
+#define kernfs_mmap ((int (*) __P((struct vop_mmap_args *)))kernfs_enotsupp)
+#define kernfs_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define kernfs_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define kernfs_remove ((int (*) __P((struct vop_remove_args *)))kernfs_enotsupp)
+#define kernfs_link ((int (*) __P((struct vop_link_args *)))kernfs_enotsupp)
+#define kernfs_rename ((int (*) __P((struct vop_rename_args *)))kernfs_enotsupp)
+#define kernfs_mkdir ((int (*) __P((struct vop_mkdir_args *)))kernfs_enotsupp)
+#define kernfs_rmdir ((int (*) __P((struct vop_rmdir_args *)))kernfs_enotsupp)
+#define kernfs_symlink ((int (*) __P((struct vop_symlink_args *)))kernfs_enotsupp)
+#define kernfs_readlink \
+ ((int (*) __P((struct vop_readlink_args *)))kernfs_enotsupp)
+#define kernfs_abortop ((int (*) __P((struct vop_abortop_args *)))nullop)
+#define kernfs_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define kernfs_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define kernfs_bmap ((int (*) __P((struct vop_bmap_args *)))kernfs_badop)
+#define kernfs_strategy ((int (*) __P((struct vop_strategy_args *)))kernfs_badop)
+#define kernfs_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define kernfs_advlock ((int (*) __P((struct vop_advlock_args *)))kernfs_enotsupp)
+#define kernfs_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))kernfs_enotsupp)
+#define kernfs_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) kernfs_enotsupp)
+#define kernfs_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))kernfs_enotsupp)
+#define kernfs_update ((int (*) __P((struct vop_update_args *)))kernfs_enotsupp)
+#define kernfs_bwrite ((int (*) __P((struct vop_bwrite_args *)))kernfs_enotsupp)
+
+int (**kernfs_vnodeop_p)();
+struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, kernfs_lookup }, /* lookup */
+ { &vop_create_desc, kernfs_create }, /* create */
+ { &vop_mknod_desc, kernfs_mknod }, /* mknod */
+ { &vop_open_desc, kernfs_open }, /* open */
+ { &vop_close_desc, kernfs_close }, /* close */
+ { &vop_access_desc, kernfs_access }, /* access */
+ { &vop_getattr_desc, kernfs_getattr }, /* getattr */
+ { &vop_setattr_desc, kernfs_setattr }, /* setattr */
+ { &vop_read_desc, kernfs_read }, /* read */
+ { &vop_write_desc, kernfs_write }, /* write */
+ { &vop_ioctl_desc, kernfs_ioctl }, /* ioctl */
+ { &vop_select_desc, kernfs_select }, /* select */
+ { &vop_mmap_desc, kernfs_mmap }, /* mmap */
+ { &vop_fsync_desc, kernfs_fsync }, /* fsync */
+ { &vop_seek_desc, kernfs_seek }, /* seek */
+ { &vop_remove_desc, kernfs_remove }, /* remove */
+ { &vop_link_desc, kernfs_link }, /* link */
+ { &vop_rename_desc, kernfs_rename }, /* rename */
+ { &vop_mkdir_desc, kernfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, kernfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, kernfs_symlink }, /* symlink */
+ { &vop_readdir_desc, kernfs_readdir }, /* readdir */
+ { &vop_readlink_desc, kernfs_readlink },/* readlink */
+ { &vop_abortop_desc, kernfs_abortop }, /* abortop */
+ { &vop_inactive_desc, kernfs_inactive },/* inactive */
+ { &vop_reclaim_desc, kernfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, kernfs_lock }, /* lock */
+ { &vop_unlock_desc, kernfs_unlock }, /* unlock */
+ { &vop_bmap_desc, kernfs_bmap }, /* bmap */
+ { &vop_strategy_desc, kernfs_strategy },/* strategy */
+ { &vop_print_desc, kernfs_print }, /* print */
+ { &vop_islocked_desc, kernfs_islocked },/* islocked */
+ { &vop_pathconf_desc, kernfs_pathconf },/* pathconf */
+ { &vop_advlock_desc, kernfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, kernfs_blkatoff },/* blkatoff */
+ { &vop_valloc_desc, kernfs_valloc }, /* valloc */
+ { &vop_vfree_desc, kernfs_vfree }, /* vfree */
+ { &vop_truncate_desc, kernfs_truncate },/* truncate */
+ { &vop_update_desc, kernfs_update }, /* update */
+ { &vop_bwrite_desc, kernfs_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc kernfs_vnodeop_opv_desc =
+ { &kernfs_vnodeop_p, kernfs_vnodeop_entries };
diff --git a/sys/miscfs/nullfs/null.h b/sys/miscfs/nullfs/null.h
new file mode 100644
index 000000000000..14286ffeee0c
--- /dev/null
+++ b/sys/miscfs/nullfs/null.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null.h 8.2 (Berkeley) 1/21/94
+ *
+ * $Id: lofs.h,v 1.8 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+struct null_args {
+ char *target; /* Target of loopback */
+};
+
+struct null_mount {
+ struct mount *nullm_vfs;
+ struct vnode *nullm_rootvp; /* Reference to root null_node */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct null_node {
+ struct null_node *null_forw; /* Hash chain */
+ struct null_node *null_back;
+ struct vnode *null_lowervp; /* VREFed once */
+ struct vnode *null_vnode; /* Back pointer */
+};
+
+extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+
+#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
+#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define NULLTOV(xp) ((xp)->null_vnode)
+#ifdef NULLFS_DIAGNOSTIC
+extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
+#else
+#define NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
+#endif
+
+extern int (**null_vnodeop_p)();
+extern struct vfsops null_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/nullfs/null_subr.c b/sys/miscfs/nullfs/null_subr.c
new file mode 100644
index 000000000000..a31723fe4c22
--- /dev/null
+++ b/sys/miscfs/nullfs/null_subr.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_subr.c 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: lofs_subr.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NNULLNODECACHE 16
+#define NULL_NHASH(vp) ((((u_long)vp)>>LOG2_SIZEVNODE) & (NNULLNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the lower vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the lower vnode is VREF'd. When the
+ * alias is removed the lower vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct null_node_cache {
+ struct null_node *ac_forw;
+ struct null_node *ac_back;
+};
+
+static struct null_node_cache null_node_cache[NNULLNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+nullfs_init()
+{
+ struct null_node_cache *ac;
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_init\n"); /* printed during system boot */
+#endif
+
+ for (ac = null_node_cache; ac < null_node_cache + NNULLNODECACHE; ac++)
+ ac->ac_forw = ac->ac_back = (struct null_node *) ac;
+}
+
+/*
+ * Compute hash list for given lower vnode
+ */
+static struct null_node_cache *
+null_node_hash(lowervp)
+struct vnode *lowervp;
+{
+
+ return (&null_node_cache[NULL_NHASH(lowervp)]);
+}
+
+/*
+ * Return a VREF'ed alias for lower vnode if already exists, else 0.
+ */
+static struct vnode *
+null_node_find(mp, lowervp)
+ struct mount *mp;
+ struct vnode *lowervp;
+{
+ struct null_node_cache *hd;
+ struct null_node *a;
+ struct vnode *vp;
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a null_node structure which is referencing
+ * the lower vnode. If found, the increment the null_node
+ * reference count (but NOT the lower vnode's VREF counter).
+ */
+ hd = null_node_hash(lowervp);
+loop:
+ for (a = hd->ac_forw; a != (struct null_node *) hd; a = a->null_forw) {
+ if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
+ vp = NULLTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0)) {
+ printf ("null_node_find: vget failed.\n");
+ goto loop;
+ };
+ return (vp);
+ }
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Make a new null_node node.
+ * Vp is the alias vnode, lofsvp is the lower vnode.
+ * Maintain a reference to (lowervp).
+ */
+static int
+null_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct null_node_cache *hd;
+ struct null_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ if (error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp))
+ return (error);
+ vp = *vpp;
+
+ MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
+ vp->v_type = lowervp->v_type;
+ xp->null_vnode = vp;
+ vp->v_data = xp;
+ xp->null_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ if (othervp = null_node_find(lowervp)) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return 0;
+ };
+ VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */
+ hd = null_node_hash(lowervp);
+ insque(xp, hd);
+ return 0;
+}
+
+
+/*
+ * Try to find an existing null_node vnode refering
+ * to it, otherwise make a new null_node vnode which
+ * contains a reference to the lower vnode.
+ */
+int
+null_node_create(mp, lowervp, newvpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ if (aliasvp = null_node_find(mp, lowervp)) {
+ /*
+ * null_node_find has taken another reference
+ * to the alias vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: exists", NULLTOV(ap));
+#endif
+ /* VREF(aliasvp); --- done in null_node_find */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ printf("null_node_create: create new alias vnode\n");
+#endif
+
+ /*
+ * Make new vnode reference the null_node.
+ */
+ if (error = null_node_alloc(mp, lowervp, &aliasvp))
+ return error;
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(lowervp);
+
+#ifdef DIAGNOSTIC
+ if (lowervp->v_usecount < 1) {
+ /* Should never happen... */
+ vprint ("null_node_create: alias ");
+ vprint ("null_node_create: lower ");
+ printf ("null_node_create: lower has 0 usecount.\n");
+ panic ("null_node_create: lower has 0 usecount.");
+ };
+#endif
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: alias", aliasvp);
+ vprint("null_node_create: lower", lowervp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+#ifdef NULLFS_DIAGNOSTIC
+struct vnode *
+null_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct null_node *a = VTONULL(vp);
+#ifdef notyet
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with a funny vop vector.
+ */
+ if (vp->v_op != null_vnodeop_p) {
+ printf ("null_checkvp: on non-null-node\n");
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ };
+#endif
+ if (a->null_lowervp == NULL) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %x, ZERO ptr\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ }
+ if (a->null_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %x, unref'ed lowervp\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic ("null with unref'ed lowervp");
+ };
+#ifdef notyet
+ printf("null %x/%d -> %x/%d [%s, %d]\n",
+ NULLTOV(a), NULLTOV(a)->v_usecount,
+ a->null_lowervp, a->null_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return a->null_lowervp;
+}
+#endif
diff --git a/sys/miscfs/nullfs/null_vfsops.c b/sys/miscfs/nullfs/null_vfsops.c
new file mode 100644
index 000000000000..b0d2df75cdaf
--- /dev/null
+++ b/sys/miscfs/nullfs/null_vfsops.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp jsp $
+ */
+
+/*
+ * Null Layer
+ * (See null_vnops.c for a description of what this does.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+/*
+ * Mount null layer
+ */
+int
+nullfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct null_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *nullm_rootvp;
+ struct null_mount *xmp;
+ u_int size;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/
+ }
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct null_args)))
+ return (error);
+
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ if (error = namei(ndp))
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+
+ xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ xmp->nullm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = null_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(xmp, M_UFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in nullfs_unmount.
+ */
+ nullm_rootvp = vp;
+ nullm_rootvp->v_flag |= VROOT;
+ xmp->nullm_rootvp = nullm_rootvp;
+ if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) xmp;
+ getnewfsid(mp, MOUNT_LOFS);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+nullfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */
+}
+
+/*
+ * Free reference to null layer
+ */
+int
+nullfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE) {
+ /* lofs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (nullm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, nullm_rootvp, flags))
+ return (error);
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("alias root of lower", nullm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(nullm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(nullm_rootvp);
+ /*
+ * Finally, throw away the null_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return 0;
+}
+
+int
+nullfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_root(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+ );
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return 0;
+}
+
+int
+nullfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p);
+}
+
+int
+nullfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+ );
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+nullfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at null layer.
+ */
+ return (0);
+}
+
+int
+nullfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+}
+
+int
+nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, vpp, exflagsp,credanonp);
+}
+
+int
+nullfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
+}
+
+int nullfs_init __P((void));
+
+struct vfsops null_vfsops = {
+ nullfs_mount,
+ nullfs_start,
+ nullfs_unmount,
+ nullfs_root,
+ nullfs_quotactl,
+ nullfs_statfs,
+ nullfs_sync,
+ nullfs_vget,
+ nullfs_fhtovp,
+ nullfs_vptofh,
+ nullfs_init,
+};
diff --git a/sys/miscfs/nullfs/null_vnops.c b/sys/miscfs/nullfs/null_vnops.c
new file mode 100644
index 000000000000..115ff6f46432
--- /dev/null
+++ b/sys/miscfs/nullfs/null_vnops.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * John Heidemann of the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vnops.c 8.1 (Berkeley) 6/10/93
+ *
+ * Ancestors:
+ * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ * ...and...
+ * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
+ */
+
+/*
+ * Null Layer
+ *
+ * (See mount_null(8) for more information.)
+ *
+ * The null layer duplicates a portion of the file system
+ * name space under a new name. In this respect, it is
+ * similar to the loopback file system. It differs from
+ * the loopback fs in two respects: it is implemented using
+ * a stackable layers techniques, and it's "null-node"s stack above
+ * all lower-layer vnodes, not just over directory vnodes.
+ *
+ * The null layer has two purposes. First, it serves as a demonstration
+ * of layering by proving a layer which does nothing. (It actually
+ * does everything the loopback file system does, which is slightly
+ * more than nothing.) Second, the null layer can serve as a prototype
+ * layer. Since it provides all necessary layer framework,
+ * new file system layers can be created very easily be starting
+ * with a null layer.
+ *
+ * The remainder of this man page examines the null layer as a basis
+ * for constructing new layers.
+ *
+ *
+ * INSTANTIATING NEW NULL LAYERS
+ *
+ * New null layers are created with mount_null(8).
+ * Mount_null(8) takes two arguments, the pathname
+ * of the lower vfs (target-pn) and the pathname where the null
+ * layer will appear in the namespace (alias-pn). After
+ * the null layer is put into place, the contents
+ * of target-pn subtree will be aliased under alias-pn.
+ *
+ *
+ * OPERATION OF A NULL LAYER
+ *
+ * The null layer is the minimum file system layer,
+ * simply bypassing all possible operations to the lower layer
+ * for processing there. The majority of its activity centers
+ * on the bypass routine, though which nearly all vnode operations
+ * pass.
+ *
+ * The bypass routine accepts arbitrary vnode operations for
+ * handling by the lower layer. It begins by examing vnode
+ * operation arguments and replacing any null-nodes by their
+ * lower-layer equivlants. It then invokes the operation
+ * on the lower layer. Finally, it replaces the null-nodes
+ * in the arguments and, if a vnode is return by the operation,
+ * stacks a null-node on top of the returned vnode.
+ *
+ * Although bypass handles most operations,
+ * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
+ * Vop_getattr must change the fsid being returned.
+ * Vop_inactive and vop_reclaim are not bypassed so that
+ * they can handle freeing null-layer specific data.
+ * Vop_print is not bypassed to avoid excessive debugging
+ * information.
+ *
+ *
+ * INSTANTIATING VNODE STACKS
+ *
+ * Mounting associates the null layer with a lower layer,
+ * effect stacking two VFSes. Vnode stacks are instead
+ * created on demand as files are accessed.
+ *
+ * The initial mount creates a single vnode stack for the
+ * root of the new null layer. All other vnode stacks
+ * are created as a result of vnode operations on
+ * this or other null vnode stacks.
+ *
+ * New vnode stacks come into existance as a result of
+ * an operation which returns a vnode.
+ * The bypass routine stacks a null-node above the new
+ * vnode before returning it to the caller.
+ *
+ * For example, imagine mounting a null layer with
+ * "mount_null /usr/include /dev/layer/null".
+ * Changing directory to /dev/layer/null will assign
+ * the root null-node (which was created when the null layer was mounted).
+ * Now consider opening "sys". A vop_lookup would be
+ * done on the root null-node. This operation would bypass through
+ * to the lower layer which would return a vnode representing
+ * the UFS "sys". Null_bypass then builds a null-node
+ * aliasing the UFS "sys" and returns this to the caller.
+ * Later operations on the null-node "sys" will repeat this
+ * process when constructing other vnode stacks.
+ *
+ *
+ * CREATING OTHER FILE SYSTEM LAYERS
+ *
+ * One of the easiest ways to construct new file system layers is to make
+ * a copy of the null layer, rename all files and variables, and
+ * then begin modifing the copy. Sed can be used to easily rename
+ * all variables.
+ *
+ * The umap layer is an example of a layer descended from the
+ * null layer.
+ *
+ *
+ * INVOKING OPERATIONS ON LOWER LAYERS
+ *
+ * There are two techniques to invoke operations on a lower layer
+ * when the operation cannot be completely bypassed. Each method
+ * is appropriate in different situations. In both cases,
+ * it is the responsibility of the aliasing layer to make
+ * the operation arguments "correct" for the lower layer
+ * by mapping an vnode arguments to the lower layer.
+ *
+ * The first approach is to call the aliasing layer's bypass routine.
+ * This method is most suitable when you wish to invoke the operation
+ * currently being hanldled on the lower layer. It has the advantage
+ * that the bypass routine already must do argument mapping.
+ * An example of this is null_getattrs in the null layer.
+ *
+ * A second approach is to directly invoked vnode operations on
+ * the lower layer with the VOP_OPERATIONNAME interface.
+ * The advantage of this method is that it is easy to invoke
+ * arbitrary operations on the lower layer. The disadvantage
+ * is that vnodes arguments must be manualy mapped.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/nullfs/null.h>
+
+
+int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * This version has been optimized for speed, throwing away some
+ * safety checks. It should still always work, but it's not as
+ * robust to programmer errors.
+ * Define SAFETY to include some error checking code.
+ *
+ * In general, we map all vnodes going down and unmap them on the way back.
+ * As an exception to this, vnodes can be marked "unmapped" by setting
+ * the Nth bit in operation's vdesc_flags.
+ *
+ * Also, some BSD vnode operations have the side effect of vrele'ing
+ * their arguments. With stacking, the reference counts are held
+ * by the upper node, not the lower one, so we must handle these
+ * side-effects here. This is not of concern in Sun-derived systems
+ * since there are no such side-effects.
+ *
+ * This makes the following assumptions:
+ * - only one returned vpp
+ * - no INOUT vpp's (Sun's vop_open has one of these)
+ * - the vnode operation vector of the first vnode should be used
+ * to determine what implementation of the op should be invoked
+ * - all mapped vnodes are of our vnode-type (NEEDSWORK:
+ * problems on rmdir'ing mount points and renaming?)
+ */
+int
+null_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ extern int (**null_vnodeop_p)(); /* not extern, really "forward" */
+ register struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+
+ if (null_bug_bypass)
+ printf ("null_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("null_bypass: no vp's in map.\n");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (We must always map first vp or vclean fails.)
+ */
+ if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
+ old_vps[i] = NULL;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
+ /*
+ * XXX - Several operations have the side effect
+ * of vrele'ing their vp's. We must account for
+ * that. (This should go away in the future.)
+ */
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ }
+ }
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ /*
+ * XXX - even though some ops have vpp returned vp's,
+ * several ops actually vrele this before returning.
+ * We must avoid these ops.
+ * (This should go away when these ops are regularized.)
+ */
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset,ap);
+ error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ }
+
+ out:
+ return (error);
+}
+
+
+/*
+ * We handle getattr only to change the fsid.
+ */
+int
+null_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+ if (error = null_bypass(ap))
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+
+int
+null_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our null_node is in the
+ * cache and reusable.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+ return (0);
+}
+
+int
+null_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct null_node *xp = VTONULL(vp);
+ struct vnode *lowervp = xp->null_lowervp;
+
+ /*
+ * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
+ * so we can't call VOPs on ourself.
+ */
+ /* After this assignment, this node will not be re-used. */
+ xp->null_lowervp = NULL;
+ remque(xp);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele (lowervp);
+ return (0);
+}
+
+
+int
+null_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
+ return (0);
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+
+/*
+ * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**null_vnodeop_p)();
+struct vnodeopv_entry_desc null_vnodeop_entries[] = {
+ { &vop_default_desc, null_bypass },
+
+ { &vop_getattr_desc, null_getattr },
+ { &vop_inactive_desc, null_inactive },
+ { &vop_reclaim_desc, null_reclaim },
+ { &vop_print_desc, null_print },
+
+ { &vop_strategy_desc, null_strategy },
+ { &vop_bwrite_desc, null_bwrite },
+
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc null_vnodeop_opv_desc =
+ { &null_vnodeop_p, null_vnodeop_entries };
diff --git a/sys/miscfs/portal/portal.h b/sys/miscfs/portal/portal.h
new file mode 100644
index 000000000000..38d7ee0cdd27
--- /dev/null
+++ b/sys/miscfs/portal/portal.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal.h 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: portal.h,v 1.3 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+struct portal_args {
+ char *pa_config; /* Config file */
+ int pa_socket; /* Socket to server */
+};
+
+struct portal_cred {
+ int pcr_flag; /* File open mode */
+ uid_t pcr_uid; /* From ucred */
+ short pcr_ngroups; /* From ucred */
+ gid_t pcr_groups[NGROUPS]; /* From ucred */
+};
+
+#ifdef KERNEL
+struct portalmount {
+ struct vnode *pm_root; /* Root node */
+ struct file *pm_server; /* Held reference to server socket */
+};
+
+struct portalnode {
+ int pt_size; /* Length of Arg */
+ char *pt_arg; /* Arg to send to server */
+ int pt_fileid; /* cookie */
+};
+
+#define VFSTOPORTAL(mp) ((struct portalmount *)((mp)->mnt_data))
+#define VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data)
+
+#define PORTAL_ROOTFILEID 2
+
+extern int (**portal_vnodeop_p)();
+extern struct vfsops portal_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/portal/portal_vfsops.c b/sys/miscfs/portal/portal_vfsops.c
new file mode 100644
index 000000000000..39e8563009b4
--- /dev/null
+++ b/sys/miscfs/portal/portal_vfsops.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vfsops.c 8.6 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vfsops.c,v 1.5 1992/05/30 10:25:27 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/un.h>
+#include <miscfs/portal/portal.h>
+
+int
+portal_init()
+{
+
+ return (0);
+}
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+portal_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct file *fp;
+ struct portal_args args;
+ struct portalmount *fmp;
+ struct socket *so;
+ struct vnode *rvp;
+ u_int size;
+ int error;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ if (error = copyin(data, (caddr_t) &args, sizeof(struct portal_args)))
+ return (error);
+
+ if (error = getsock(p->p_fd, args.pa_socket, &fp))
+ return (error);
+ so = (struct socket *) fp->f_data;
+ if (so->so_proto->pr_domain->dom_family != AF_UNIX)
+ return (ESOCKTNOSUPPORT);
+
+ error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */
+ if (error)
+ return (error);
+ MALLOC(rvp->v_data, void *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ fmp = (struct portalmount *) malloc(sizeof(struct portalmount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ VTOPORTAL(rvp)->pt_arg = 0;
+ VTOPORTAL(rvp)->pt_size = 0;
+ VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
+ fmp->pm_root = rvp;
+ fmp->pm_server = fp; fp->f_count++;
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) fmp;
+ getnewfsid(mp, MOUNT_PORTAL);
+
+ (void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void)copyinstr(args.pa_config,
+ mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+#ifdef notdef
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal"));
+#endif
+
+ return (0);
+}
+
+int
+portal_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+int
+portal_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ extern int doforce;
+ struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root;
+ int error, flags = 0;
+
+
+ if (mntflags & MNT_FORCE) {
+ /* portal can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, rootvp, flags))
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Shutdown the socket. This will cause the select in the
+ * daemon to wake up, and then the accept will get ECONNABORTED
+ * which it interprets as a request to go and bury itself.
+ */
+ soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2);
+ /*
+ * Discard reference to underlying file. Must call closef because
+ * this may be the last reference.
+ */
+ closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0);
+ /*
+ * Finally, throw away the portalmount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+portal_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOPORTAL(mp)->pm_root;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+portal_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+
+ sbp->f_type = MOUNT_PORTAL;
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = 1; /* Allow for "." */
+ sbp->f_ffree = 0; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+portal_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+int
+portal_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_fhtovp(mp, fhp, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+struct vfsops portal_vfsops = {
+ portal_mount,
+ portal_start,
+ portal_unmount,
+ portal_root,
+ portal_quotactl,
+ portal_statfs,
+ portal_sync,
+ portal_vget,
+ portal_fhtovp,
+ portal_vptofh,
+ portal_init,
+};
diff --git a/sys/miscfs/portal/portal_vnops.c b/sys/miscfs/portal/portal_vnops.c
new file mode 100644
index 000000000000..5e170261e71f
--- /dev/null
+++ b/sys/miscfs/portal/portal_vnops.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vnops.c 8.8 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vnops.c,v 1.4 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <miscfs/portal/portal.h>
+
+static int portal_fileid = PORTAL_ROOTFILEID+1;
+
+static void
+portal_closefd(p, fd)
+ struct proc *p;
+ int fd;
+{
+ int error;
+ struct {
+ int fd;
+ } ua;
+ int rc;
+
+ ua.fd = fd;
+ error = close(p, &ua, &rc);
+ /*
+ * We should never get an error, and there isn't anything
+ * we could do if we got one, so just print a message.
+ */
+ if (error)
+ printf("portal_closefd: error = %d\n", error);
+}
+
+/*
+ * vp is the current namei directory
+ * cnp is the name to locate in that directory...
+ */
+int
+portal_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ char *pname = ap->a_cnp->cn_nameptr;
+ struct portalnode *pt;
+ int error;
+ struct vnode *fvp = 0;
+ char *path;
+ int size;
+
+ if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+ *ap->a_vpp = ap->a_dvp;
+ VREF(ap->a_dvp);
+ /*VOP_LOCK(ap->a_dvp);*/
+ return (0);
+ }
+
+
+ error = getnewvnode(VT_PORTAL, ap->a_dvp->v_mount, portal_vnodeop_p, &fvp);
+ if (error)
+ goto bad;
+ fvp->v_type = VREG;
+ MALLOC(fvp->v_data, void *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ pt = VTOPORTAL(fvp);
+ /*
+ * Save all of the remaining pathname and
+ * advance the namei next pointer to the end
+ * of the string.
+ */
+ for (size = 0, path = pname; *path; path++)
+ size++;
+ ap->a_cnp->cn_consume = size - ap->a_cnp->cn_namelen;
+
+ pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK);
+ pt->pt_size = size+1;
+ bcopy(pname, pt->pt_arg, pt->pt_size);
+ pt->pt_fileid = portal_fileid++;
+
+ *ap->a_vpp = fvp;
+ /*VOP_LOCK(fvp);*/
+ return (0);
+
+bad:;
+ if (fvp) {
+ vrele(fvp);
+ }
+ *ap->a_vpp = NULL;
+ return (error);
+}
+
+static int
+portal_connect(so, so2)
+ struct socket *so;
+ struct socket *so2;
+{
+ /* from unp_connect, bypassing the namei stuff... */
+ struct socket *so3;
+ struct unpcb *unp2;
+ struct unpcb *unp3;
+
+ if (so2 == 0)
+ return (ECONNREFUSED);
+
+ if (so->so_type != so2->so_type)
+ return (EPROTOTYPE);
+
+ if ((so2->so_options & SO_ACCEPTCONN) == 0)
+ return (ECONNREFUSED);
+
+ if ((so3 = sonewconn(so2, 0)) == 0)
+ return (ECONNREFUSED);
+
+ unp2 = sotounpcb(so2);
+ unp3 = sotounpcb(so3);
+ if (unp2->unp_addr)
+ unp3->unp_addr = m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+
+ so2 = so3;
+
+
+ return (unp_connect2(so, so2));
+}
+
+int
+portal_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct socket *so = 0;
+ struct portalnode *pt;
+ struct proc *p = ap->a_p;
+ struct vnode *vp = ap->a_vp;
+ int s;
+ struct uio auio;
+ struct iovec aiov[2];
+ int res;
+ struct mbuf *cm = 0;
+ struct cmsghdr *cmsg;
+ int newfds;
+ int *ip;
+ int fd;
+ int error;
+ int len;
+ struct portalmount *fmp;
+ struct file *fp;
+ struct portal_cred pcred;
+
+ /*
+ * Nothing to do when opening the root node.
+ */
+ if (vp->v_flag & VROOT)
+ return (0);
+
+ /*
+ * Can't be opened unless the caller is set up
+ * to deal with the side effects. Check for this
+ * by testing whether the p_dupfd has been set.
+ */
+ if (p->p_dupfd >= 0)
+ return (ENODEV);
+
+ pt = VTOPORTAL(vp);
+ fmp = VFSTOPORTAL(vp->v_mount);
+
+ /*
+ * Create a new socket.
+ */
+ error = socreate(AF_UNIX, &so, SOCK_STREAM, 0);
+ if (error)
+ goto bad;
+
+ /*
+ * Reserve some buffer space
+ */
+ res = pt->pt_size + sizeof(pcred) + 512; /* XXX */
+ error = soreserve(so, res, res);
+ if (error)
+ goto bad;
+
+ /*
+ * Kick off connection
+ */
+ error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
+ if (error)
+ goto bad;
+
+ /*
+ * Wait for connection to complete
+ */
+ /*
+ * XXX: Since the mount point is holding a reference on the
+ * underlying server socket, it is not easy to find out whether
+ * the server process is still running. To handle this problem
+ * we loop waiting for the new socket to be connected (something
+ * which will only happen if the server is still running) or for
+ * the reference count on the server socket to drop to 1, which
+ * will happen if the server dies. Sleep for 5 second intervals
+ * and keep polling the reference count. XXX.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ if (fmp->pm_server->f_count == 1) {
+ error = ECONNREFUSED;
+ splx(s);
+ goto bad;
+ }
+ (void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz);
+ }
+ splx(s);
+
+ if (so->so_error) {
+ error = so->so_error;
+ goto bad;
+ }
+
+ /*
+ * Set miscellaneous flags
+ */
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+
+ pcred.pcr_flag = ap->a_mode;
+ pcred.pcr_uid = ap->a_cred->cr_uid;
+ pcred.pcr_ngroups = ap->a_cred->cr_ngroups;
+ bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t));
+ aiov[0].iov_base = (caddr_t) &pcred;
+ aiov[0].iov_len = sizeof(pcred);
+ aiov[1].iov_base = pt->pt_arg;
+ aiov[1].iov_len = pt->pt_size;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = 2;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+ auio.uio_offset = 0;
+ auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
+
+ error = sosend(so, (struct mbuf *) 0, &auio,
+ (struct mbuf *) 0, (struct mbuf *) 0, 0);
+ if (error)
+ goto bad;
+
+ len = auio.uio_resid = sizeof(int);
+ do {
+ struct mbuf *m = 0;
+ int flags = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **) 0, &auio,
+ &m, &cm, &flags);
+ if (error)
+ goto bad;
+
+ /*
+ * Grab an error code from the mbuf.
+ */
+ if (m) {
+ m = m_pullup(m, sizeof(int)); /* Needed? */
+ if (m) {
+ error = *(mtod(m, int *));
+ m_freem(m);
+ } else {
+ error = EINVAL;
+ }
+ } else {
+ if (cm == 0) {
+ error = ECONNRESET; /* XXX */
+#ifdef notdef
+ break;
+#endif
+ }
+ }
+ } while (cm == 0 && auio.uio_resid == len && !error);
+
+ if (cm == 0)
+ goto bad;
+
+ if (auio.uio_resid) {
+ error = 0;
+#ifdef notdef
+ error = EMSGSIZE;
+ goto bad;
+#endif
+ }
+
+ /*
+ * XXX: Break apart the control message, and retrieve the
+ * received file descriptor. Note that more than one descriptor
+ * may have been received, or that the rights chain may have more
+ * than a single mbuf in it. What to do?
+ */
+ cmsg = mtod(cm, struct cmsghdr *);
+ newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int);
+ if (newfds == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ /*
+ * At this point the rights message consists of a control message
+ * header, followed by a data region containing a vector of
+ * integer file descriptors. The fds were allocated by the action
+ * of receiving the control message.
+ */
+ ip = (int *) (cmsg + 1);
+ fd = *ip++;
+ if (newfds > 1) {
+ /*
+ * Close extra fds.
+ */
+ int i;
+ printf("portal_open: %d extra fds\n", newfds - 1);
+ for (i = 1; i < newfds; i++) {
+ portal_closefd(p, *ip);
+ ip++;
+ }
+ }
+
+ /*
+ * Check that the mode the file is being opened for is a subset
+ * of the mode of the existing descriptor.
+ */
+ fp = p->p_fd->fd_ofiles[fd];
+ if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+ portal_closefd(p, fd);
+ error = EACCES;
+ goto bad;
+ }
+
+ /*
+ * Save the dup fd in the proc structure then return the
+ * special error code (ENXIO) which causes magic things to
+ * happen in vn_open. The whole concept is, well, hmmm.
+ */
+ p->p_dupfd = fd;
+ error = ENXIO;
+
+bad:;
+ /*
+ * And discard the control message.
+ */
+ if (cm) {
+ m_freem(cm);
+ }
+
+ if (so) {
+ soshutdown(so, 2);
+ soclose(so);
+ }
+ return (error);
+}
+
+int
+portal_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+
+ bzero(vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_size = DEV_BSIZE;
+ vap->va_blocksize = DEV_BSIZE;
+ microtime(&vap->va_atime);
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_ctime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ /* vap->va_qbytes = 0; */
+ vap->va_bytes = 0;
+ /* vap->va_qsize = 0; */
+ if (vp->v_flag & VROOT) {
+ vap->va_type = VDIR;
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR|
+ S_IRGRP|S_IWGRP|S_IXGRP|
+ S_IROTH|S_IWOTH|S_IXOTH;
+ vap->va_nlink = 2;
+ vap->va_fileid = 2;
+ } else {
+ vap->va_type = VREG;
+ vap->va_mode = S_IRUSR|S_IWUSR|
+ S_IRGRP|S_IWGRP|
+ S_IROTH|S_IWOTH;
+ vap->va_nlink = 1;
+ vap->va_fileid = VTOPORTAL(vp)->pt_fileid;
+ }
+ return (0);
+}
+
+int
+portal_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Can't mess with the root vnode
+ */
+ if (ap->a_vp->v_flag & VROOT)
+ return (EACCES);
+
+ return (0);
+}
+
+/*
+ * Fake readdir, just return empty directory.
+ * It is hard to deal with '.' and '..' so don't bother.
+ */
+int
+portal_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+int
+portal_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+int
+portal_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct portalnode *pt = VTOPORTAL(ap->a_vp);
+
+ if (pt->pt_arg) {
+ free((caddr_t) pt->pt_arg, M_TEMP);
+ pt->pt_arg = 0;
+ }
+ FREE(ap->a_vp->v_data, M_TEMP);
+ ap->a_vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+portal_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a Portal vnode.
+ */
+/* ARGSUSED */
+int
+portal_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_PORTAL, portal vnode\n");
+ return (0);
+}
+
+/*void*/
+int
+portal_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+
+/*
+ * Portal vnode unsupported operation
+ */
+int
+portal_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Portal "should never get here" operation
+ */
+int
+portal_badop()
+{
+
+ panic("portal: bad op");
+ /* NOTREACHED */
+}
+
+/*
+ * Portal vnode null operation
+ */
+int
+portal_nullop()
+{
+
+ return (0);
+}
+
+#define portal_create ((int (*) __P((struct vop_create_args *)))portal_enotsupp)
+#define portal_mknod ((int (*) __P((struct vop_mknod_args *)))portal_enotsupp)
+#define portal_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define portal_access ((int (*) __P((struct vop_access_args *)))nullop)
+#define portal_read ((int (*) __P((struct vop_read_args *)))portal_enotsupp)
+#define portal_write ((int (*) __P((struct vop_write_args *)))portal_enotsupp)
+#define portal_ioctl ((int (*) __P((struct vop_ioctl_args *)))portal_enotsupp)
+#define portal_select ((int (*) __P((struct vop_select_args *)))portal_enotsupp)
+#define portal_mmap ((int (*) __P((struct vop_mmap_args *)))portal_enotsupp)
+#define portal_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define portal_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define portal_remove ((int (*) __P((struct vop_remove_args *)))portal_enotsupp)
+#define portal_link ((int (*) __P((struct vop_link_args *)))portal_enotsupp)
+#define portal_rename ((int (*) __P((struct vop_rename_args *)))portal_enotsupp)
+#define portal_mkdir ((int (*) __P((struct vop_mkdir_args *)))portal_enotsupp)
+#define portal_rmdir ((int (*) __P((struct vop_rmdir_args *)))portal_enotsupp)
+#define portal_symlink \
+ ((int (*) __P((struct vop_symlink_args *)))portal_enotsupp)
+#define portal_readlink \
+ ((int (*) __P((struct vop_readlink_args *)))portal_enotsupp)
+#define portal_abortop ((int (*) __P((struct vop_abortop_args *)))nullop)
+#define portal_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define portal_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define portal_bmap ((int (*) __P((struct vop_bmap_args *)))portal_badop)
+#define portal_strategy \
+ ((int (*) __P((struct vop_strategy_args *)))portal_badop)
+#define portal_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define portal_advlock \
+ ((int (*) __P((struct vop_advlock_args *)))portal_enotsupp)
+#define portal_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))portal_enotsupp)
+#define portal_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) portal_enotsupp)
+#define portal_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))portal_enotsupp)
+#define portal_update ((int (*) __P((struct vop_update_args *)))portal_enotsupp)
+#define portal_bwrite ((int (*) __P((struct vop_bwrite_args *)))portal_enotsupp)
+
+int (**portal_vnodeop_p)();
+struct vnodeopv_entry_desc portal_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, portal_lookup }, /* lookup */
+ { &vop_create_desc, portal_create }, /* create */
+ { &vop_mknod_desc, portal_mknod }, /* mknod */
+ { &vop_open_desc, portal_open }, /* open */
+ { &vop_close_desc, portal_close }, /* close */
+ { &vop_access_desc, portal_access }, /* access */
+ { &vop_getattr_desc, portal_getattr }, /* getattr */
+ { &vop_setattr_desc, portal_setattr }, /* setattr */
+ { &vop_read_desc, portal_read }, /* read */
+ { &vop_write_desc, portal_write }, /* write */
+ { &vop_ioctl_desc, portal_ioctl }, /* ioctl */
+ { &vop_select_desc, portal_select }, /* select */
+ { &vop_mmap_desc, portal_mmap }, /* mmap */
+ { &vop_fsync_desc, portal_fsync }, /* fsync */
+ { &vop_seek_desc, portal_seek }, /* seek */
+ { &vop_remove_desc, portal_remove }, /* remove */
+ { &vop_link_desc, portal_link }, /* link */
+ { &vop_rename_desc, portal_rename }, /* rename */
+ { &vop_mkdir_desc, portal_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, portal_rmdir }, /* rmdir */
+ { &vop_symlink_desc, portal_symlink }, /* symlink */
+ { &vop_readdir_desc, portal_readdir }, /* readdir */
+ { &vop_readlink_desc, portal_readlink }, /* readlink */
+ { &vop_abortop_desc, portal_abortop }, /* abortop */
+ { &vop_inactive_desc, portal_inactive }, /* inactive */
+ { &vop_reclaim_desc, portal_reclaim }, /* reclaim */
+ { &vop_lock_desc, portal_lock }, /* lock */
+ { &vop_unlock_desc, portal_unlock }, /* unlock */
+ { &vop_bmap_desc, portal_bmap }, /* bmap */
+ { &vop_strategy_desc, portal_strategy }, /* strategy */
+ { &vop_print_desc, portal_print }, /* print */
+ { &vop_islocked_desc, portal_islocked }, /* islocked */
+ { &vop_pathconf_desc, portal_pathconf }, /* pathconf */
+ { &vop_advlock_desc, portal_advlock }, /* advlock */
+ { &vop_blkatoff_desc, portal_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, portal_valloc }, /* valloc */
+ { &vop_vfree_desc, portal_vfree }, /* vfree */
+ { &vop_truncate_desc, portal_truncate }, /* truncate */
+ { &vop_update_desc, portal_update }, /* update */
+ { &vop_bwrite_desc, portal_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc portal_vnodeop_opv_desc =
+ { &portal_vnodeop_p, portal_vnodeop_entries };
diff --git a/sys/miscfs/procfs/README b/sys/miscfs/procfs/README
new file mode 100644
index 000000000000..38811b3f6e3a
--- /dev/null
+++ b/sys/miscfs/procfs/README
@@ -0,0 +1,113 @@
+saute procfs lyonnais
+
+procfs supports two levels of directory. the filesystem root
+directory contains a representation of the system process table.
+this consists of an entry for each active and zombie process, and
+an additional entry "curproc" which always represents the process
+making the lookup request.
+
+each of the sub-directories contains several files. these files
+are used to control and interrogate processes. the files implemented
+are:
+
+ file - xxx. the exec'ed file.
+
+ status - r/o. returns process status.
+
+ ctl - w/o. sends a control message to the process.
+ for example:
+ echo hup > /proc/curproc/note
+ will send a SIGHUP to the shell.
+ whereas
+ echo attach > /proc/1293/ctl
+ would set up process 1293 for debugging.
+ see below for more details.
+
+ mem - r/w. virtual memory image of the process.
+ parts of the address space are readable
+ only if they exist in the target process.
+ a more reasonable alternative might be
+ to return zero pages instead of an error.
+ comments?
+
+ note - w/o. writing a string here sends the
+ equivalent note to the process.
+ [ not implemented. ]
+
+ notepg - w/o. the same as note, but sends to all
+ members of the process group.
+ [ not implemented. ]
+
+ regs - r/w. process register set. this can be read
+ or written any time even if the process
+ is not stopped. since the bsd kernel
+ is single-processor, this implementation
+ will get the "right" register values.
+ a multi-proc kernel would need to do some
+ synchronisation.
+
+this then looks like:
+
+% ls -li /proc
+total 0
+ 9 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 0
+ 17 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 1
+ 89 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 10
+ 25 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 2
+2065 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 257
+2481 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 309
+ 265 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 32
+3129 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 390
+3209 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 400
+3217 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 401
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 408
+ 393 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 48
+ 409 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 50
+ 465 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 57
+ 481 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 59
+ 537 dr-xr-xr-x 2 root kmem 0 Sep 21 15:06 66
+ 545 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 67
+ 657 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 81
+ 665 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 82
+ 673 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 83
+ 681 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 84
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 curproc
+% ls -li /proc/curproc
+total 408
+3341 --w------- 1 jsp staff 0 Sep 21 15:06 ctl
+1554 -r-xr-xr-x 1 bin bin 90112 Mar 29 04:52 file
+3339 -rw------- 1 jsp staff 118784 Sep 21 15:06 mem
+3343 --w------- 1 jsp staff 0 Sep 21 15:06 note
+3344 --w------- 1 jsp staff 0 Sep 21 15:06 notepg
+3340 -rw------- 1 jsp staff 0 Sep 21 15:06 regs
+3342 -r--r--r-- 1 jsp staff 0 Sep 21 15:06 status
+% df /proc/curproc /proc/curproc/file
+Filesystem 512-blocks Used Avail Capacity Mounted on
+proc 2 2 0 100% /proc
+/dev/wd0a 16186 13548 1018 93% /
+% cat /proc/curproc/status
+cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117
+
+
+
+the basic sequence of commands written to "ctl" would be
+
+ attach - this stops the target process and
+ arranges for the sending process
+ to become the debug control process
+ wait - wait for the target process to come to
+ a steady state ready for debugging.
+ step - single step, with no signal delivery.
+ run - continue running, with no signal delivery,
+ until next trap or breakpoint.
+ <signame> - deliver signal <signame> and continue running.
+ detach - continue execution of the target process
+ and remove it from control by the debug process
+
+in a normal debugging environment, where the target is fork/exec'd by
+the debugger, the debugger should fork and the child should stop itself
+(with a self-inflicted SIGSTOP). the parent should do a "wait" then an
+"attach". as before, the child will hit a breakpoint on the first
+instruction in any newly exec'd image.
+
+$Id: README,v 3.1 1993/12/15 09:40:17 jsp Exp $
diff --git a/sys/miscfs/procfs/procfs.h b/sys/miscfs/procfs/procfs.h
new file mode 100644
index 000000000000..f7b8fa3ef0ed
--- /dev/null
+++ b/sys/miscfs/procfs/procfs.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs.h 8.6 (Berkeley) 2/3/94
+ *
+ * From:
+ * $Id: procfs.h,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * The different types of node in a procfs filesystem
+ */
+typedef enum {
+ Proot, /* the filesystem root */
+ Pproc, /* a process-specific sub-directory */
+ Pfile, /* the executable file */
+ Pmem, /* the process's memory image */
+ Pregs, /* the process's register set */
+ Pfpregs, /* the process's FP register set */
+ Pctl, /* process control */
+ Pstatus, /* process status */
+ Pnote, /* process notifier */
+ Pnotepg /* process group notifier */
+} pfstype;
+
+/*
+ * control data for the proc file system.
+ */
+struct pfsnode {
+ struct pfsnode *pfs_next; /* next on list */
+ struct vnode *pfs_vnode; /* vnode associated with this pfsnode */
+ pfstype pfs_type; /* type of procfs node */
+ pid_t pfs_pid; /* associated process */
+ u_short pfs_mode; /* mode bits for stat() */
+ u_long pfs_flags; /* open flags */
+ u_long pfs_fileno; /* unique file id */
+};
+
+#define PROCFS_NOTELEN 64 /* max length of a note (/proc/$pid/note) */
+#define PROCFS_CTLLEN 8 /* max length of a ctl msg (/proc/$pid/ctl */
+
+/*
+ * Kernel stuff follows
+ */
+#ifdef KERNEL
+#define CNEQ(cnp, s, len) \
+ ((cnp)->cn_namelen == (len) && \
+ (bcmp((s), (cnp)->cn_nameptr, (len)) == 0))
+
+/*
+ * Format of a directory entry in /proc, ...
+ * This must map onto struct dirent (see <dirent.h>)
+ */
+#define PROCFS_NAMELEN 8
+struct pfsdent {
+ u_long d_fileno;
+ u_short d_reclen;
+ u_char d_type;
+ u_char d_namlen;
+ char d_name[PROCFS_NAMELEN];
+};
+#define UIO_MX sizeof(struct pfsdent)
+#define PROCFS_FILENO(pid, type) \
+ (((type) == Proot) ? \
+ 2 : \
+ ((((pid)+1) << 3) + ((int) (type))))
+
+/*
+ * Convert between pfsnode vnode
+ */
+#define VTOPFS(vp) ((struct pfsnode *)(vp)->v_data)
+#define PFSTOV(pfs) ((pfs)->pfs_vnode)
+
+typedef struct vfs_namemap vfs_namemap_t;
+struct vfs_namemap {
+ const char *nm_name;
+ int nm_val;
+};
+
+extern int vfs_getuserstr __P((struct uio *, char *, int *));
+extern vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int));
+
+/* <machine/reg.h> */
+struct reg;
+struct fpreg;
+
+#define PFIND(pid) ((pid) ? pfind(pid) : &proc0)
+extern int procfs_freevp __P((struct vnode *));
+extern int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype));
+extern struct vnode *procfs_findtextvp __P((struct proc *));
+extern int procfs_sstep __P((struct proc *));
+extern void procfs_fix_sstep __P((struct proc *));
+extern int procfs_read_regs __P((struct proc *, struct reg *));
+extern int procfs_write_regs __P((struct proc *, struct reg *));
+extern int procfs_read_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_write_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+
+#define PROCFS_LOCKED 0x01
+#define PROCFS_WANT 0x02
+
+extern int (**procfs_vnodeop_p)();
+extern struct vfsops procfs_vfsops;
+
+/*
+ * Prototypes for procfs vnode ops
+ */
+int procfs_badop(); /* varargs */
+int procfs_rw __P((struct vop_read_args *));
+int procfs_lookup __P((struct vop_lookup_args *));
+#define procfs_create ((int (*) __P((struct vop_create_args *))) procfs_badop)
+#define procfs_mknod ((int (*) __P((struct vop_mknod_args *))) procfs_badop)
+int procfs_open __P((struct vop_open_args *));
+int procfs_close __P((struct vop_close_args *));
+int procfs_access __P((struct vop_access_args *));
+int procfs_getattr __P((struct vop_getattr_args *));
+int procfs_setattr __P((struct vop_setattr_args *));
+#define procfs_read procfs_rw
+#define procfs_write procfs_rw
+int procfs_ioctl __P((struct vop_ioctl_args *));
+#define procfs_select ((int (*) __P((struct vop_select_args *))) procfs_badop)
+#define procfs_mmap ((int (*) __P((struct vop_mmap_args *))) procfs_badop)
+#define procfs_fsync ((int (*) __P((struct vop_fsync_args *))) procfs_badop)
+#define procfs_seek ((int (*) __P((struct vop_seek_args *))) procfs_badop)
+#define procfs_remove ((int (*) __P((struct vop_remove_args *))) procfs_badop)
+#define procfs_link ((int (*) __P((struct vop_link_args *))) procfs_badop)
+#define procfs_rename ((int (*) __P((struct vop_rename_args *))) procfs_badop)
+#define procfs_mkdir ((int (*) __P((struct vop_mkdir_args *))) procfs_badop)
+#define procfs_rmdir ((int (*) __P((struct vop_rmdir_args *))) procfs_badop)
+#define procfs_symlink ((int (*) __P((struct vop_symlink_args *))) procfs_badop)
+int procfs_readdir __P((struct vop_readdir_args *));
+#define procfs_readlink ((int (*) __P((struct vop_readlink_args *))) procfs_badop)
+int procfs_abortop __P((struct vop_abortop_args *));
+int procfs_inactive __P((struct vop_inactive_args *));
+int procfs_reclaim __P((struct vop_reclaim_args *));
+#define procfs_lock ((int (*) __P((struct vop_lock_args *))) nullop)
+#define procfs_unlock ((int (*) __P((struct vop_unlock_args *))) nullop)
+int procfs_bmap __P((struct vop_bmap_args *));
+#define procfs_strategy ((int (*) __P((struct vop_strategy_args *))) procfs_badop)
+int procfs_print __P((struct vop_print_args *));
+#define procfs_islocked ((int (*) __P((struct vop_islocked_args *))) nullop)
+#define procfs_advlock ((int (*) __P((struct vop_advlock_args *))) procfs_badop)
+#define procfs_blkatoff ((int (*) __P((struct vop_blkatoff_args *))) procfs_badop)
+#define procfs_valloc ((int (*) __P((struct vop_valloc_args *))) procfs_badop)
+#define procfs_vfree ((int (*) __P((struct vop_vfree_args *))) nullop)
+#define procfs_truncate ((int (*) __P((struct vop_truncate_args *))) procfs_badop)
+#define procfs_update ((int (*) __P((struct vop_update_args *))) nullop)
+#endif /* KERNEL */
diff --git a/sys/miscfs/procfs/procfs_ctl.c b/sys/miscfs/procfs/procfs_ctl.c
new file mode 100644
index 000000000000..a42a03ce91cd
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_ctl.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_ctl.c 8.3 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_ctl.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+/*
+ * True iff process (p) is in trace wait state
+ * relative to process (curp)
+ */
+#define TRACE_WAIT_P(curp, p) \
+ ((p)->p_stat == SSTOP && \
+ (p)->p_pptr == (curp) && \
+ ((p)->p_flag & P_TRACED))
+
+#ifdef notdef
+#define FIX_SSTEP(p) { \
+ procfs_fix_sstep(p); \
+ } \
+}
+#else
+#define FIX_SSTEP(p)
+#endif
+
+#define PROCFS_CTL_ATTACH 1
+#define PROCFS_CTL_DETACH 2
+#define PROCFS_CTL_STEP 3
+#define PROCFS_CTL_RUN 4
+#define PROCFS_CTL_WAIT 5
+
+static vfs_namemap_t ctlnames[] = {
+ /* special /proc commands */
+ { "attach", PROCFS_CTL_ATTACH },
+ { "detach", PROCFS_CTL_DETACH },
+ { "step", PROCFS_CTL_STEP },
+ { "run", PROCFS_CTL_RUN },
+ { "wait", PROCFS_CTL_WAIT },
+ { 0 },
+};
+
+static vfs_namemap_t signames[] = {
+ /* regular signal names */
+ { "hup", SIGHUP }, { "int", SIGINT },
+ { "quit", SIGQUIT }, { "ill", SIGILL },
+ { "trap", SIGTRAP }, { "abrt", SIGABRT },
+ { "iot", SIGIOT }, { "emt", SIGEMT },
+ { "fpe", SIGFPE }, { "kill", SIGKILL },
+ { "bus", SIGBUS }, { "segv", SIGSEGV },
+ { "sys", SIGSYS }, { "pipe", SIGPIPE },
+ { "alrm", SIGALRM }, { "term", SIGTERM },
+ { "urg", SIGURG }, { "stop", SIGSTOP },
+ { "tstp", SIGTSTP }, { "cont", SIGCONT },
+ { "chld", SIGCHLD }, { "ttin", SIGTTIN },
+ { "ttou", SIGTTOU }, { "io", SIGIO },
+ { "xcpu", SIGXCPU }, { "xfsz", SIGXFSZ },
+ { "vtalrm", SIGVTALRM }, { "prof", SIGPROF },
+ { "winch", SIGWINCH }, { "info", SIGINFO },
+ { "usr1", SIGUSR1 }, { "usr2", SIGUSR2 },
+ { 0 },
+};
+
+static int
+procfs_control(curp, p, op)
+ struct proc *curp;
+ struct proc *p;
+ int op;
+{
+ int error;
+
+ /*
+ * Attach - attaches the target process for debugging
+ * by the calling process.
+ */
+ if (op == PROCFS_CTL_ATTACH) {
+ /* check whether already being traced */
+ if (p->p_flag & P_TRACED)
+ return (EBUSY);
+
+ /* can't trace yourself! */
+ if (p->p_pid == curp->p_pid)
+ return (EINVAL);
+
+ /*
+ * Go ahead and set the trace flag.
+ * Save the old parent (it's reset in
+ * _DETACH, and also in kern_exit.c:wait4()
+ * Reparent the process so that the tracing
+ * proc gets to see all the action.
+ * Stop the target.
+ */
+ p->p_flag |= P_TRACED;
+ p->p_xstat = 0; /* XXX ? */
+ if (p->p_pptr != curp) {
+ p->p_oppid = p->p_pptr->p_pid;
+ proc_reparent(p, curp);
+ }
+ psignal(p, SIGSTOP);
+ return (0);
+ }
+
+ /*
+ * Target process must be stopped, owned by (curp) and
+ * be set up for tracing (P_TRACED flag set).
+ * Allow DETACH to take place at any time for sanity.
+ * Allow WAIT any time, of course.
+ */
+ switch (op) {
+ case PROCFS_CTL_DETACH:
+ case PROCFS_CTL_WAIT:
+ break;
+
+ default:
+ if (!TRACE_WAIT_P(curp, p))
+ return (EBUSY);
+ }
+
+ /*
+ * do single-step fixup if needed
+ */
+ FIX_SSTEP(p);
+
+ /*
+ * Don't deliver any signal by default.
+ * To continue with a signal, just send
+ * the signal name to the ctl file
+ */
+ p->p_xstat = 0;
+
+ switch (op) {
+ /*
+ * Detach. Cleans up the target process, reparent it if possible
+ * and set it running once more.
+ */
+ case PROCFS_CTL_DETACH:
+ /* if not being traced, then this is a painless no-op */
+ if ((p->p_flag & P_TRACED) == 0)
+ return (0);
+
+ /* not being traced any more */
+ p->p_flag &= ~P_TRACED;
+
+ /* give process back to original parent */
+ if (p->p_oppid != p->p_pptr->p_pid) {
+ struct proc *pp;
+
+ pp = pfind(p->p_oppid);
+ if (pp)
+ proc_reparent(p, pp);
+ }
+
+ p->p_oppid = 0;
+ p->p_flag &= ~P_WAITED; /* XXX ? */
+ wakeup((caddr_t) curp); /* XXX for CTL_WAIT below ? */
+
+ break;
+
+ /*
+ * Step. Let the target process execute a single instruction.
+ */
+ case PROCFS_CTL_STEP:
+ procfs_sstep(p);
+ break;
+
+ /*
+ * Run. Let the target process continue running until a breakpoint
+ * or some other trap.
+ */
+ case PROCFS_CTL_RUN:
+ break;
+
+ /*
+ * Wait for the target process to stop.
+ * If the target is not being traced then just wait
+ * to enter
+ */
+ case PROCFS_CTL_WAIT:
+ error = 0;
+ if (p->p_flag & P_TRACED) {
+ while (error == 0 &&
+ (p->p_stat != SSTOP) &&
+ (p->p_flag & P_TRACED) &&
+ (p->p_pptr == curp)) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfsx", 0);
+ }
+ if (error == 0 && !TRACE_WAIT_P(curp, p))
+ error = EBUSY;
+ } else {
+ while (error == 0 && p->p_stat != SSTOP) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfs", 0);
+ }
+ }
+ return (error);
+
+ default:
+ panic("procfs_control");
+ }
+
+ if (p->p_stat == SSTOP)
+ setrunnable(p);
+ return (0);
+}
+
+int
+procfs_doctl(curp, p, pfs, uio)
+ struct proc *curp;
+ struct pfsnode *pfs;
+ struct uio *uio;
+ struct proc *p;
+{
+ int xlen;
+ int error;
+ char msg[PROCFS_CTLLEN+1];
+ vfs_namemap_t *nm;
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EOPNOTSUPP);
+
+ xlen = PROCFS_CTLLEN;
+ error = vfs_getuserstr(uio, msg, &xlen);
+ if (error)
+ return (error);
+
+ /*
+ * Map signal names into signal generation
+ * or debug control. Unknown commands and/or signals
+ * return EOPNOTSUPP.
+ *
+ * Sending a signal while the process is being debugged
+ * also has the side effect of letting the target continue
+ * to run. There is no way to single-step a signal delivery.
+ */
+ error = EOPNOTSUPP;
+
+ nm = vfs_findname(ctlnames, msg, xlen);
+ if (nm) {
+ error = procfs_control(curp, p, nm->nm_val);
+ } else {
+ nm = vfs_findname(signames, msg, xlen);
+ if (nm) {
+ if (TRACE_WAIT_P(curp, p)) {
+ p->p_xstat = nm->nm_val;
+ FIX_SSTEP(p);
+ setrunnable(p);
+ } else {
+ psignal(p, nm->nm_val);
+ }
+ error = 0;
+ }
+ }
+
+ return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_fpregs.c b/sys/miscfs/procfs/procfs_fpregs.c
new file mode 100644
index 000000000000..6d850a6a881b
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_fpregs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_fpregs.c 8.1 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dofpregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct fpreg r;
+ char *kv;
+ int kl;
+
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_fpregs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_fpregs(p, &r);
+ }
+
+ uio->uio_offset = 0;
+ return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_mem.c b/sys/miscfs/procfs/procfs_mem.c
new file mode 100644
index 000000000000..039983da09c6
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_mem.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993 Sean Eric Fagan
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry and Sean Eric Fagan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_mem.c 8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_mem.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * This is a lightly hacked and merged version
+ * of sef's pread/pwrite functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+static int
+procfs_rwmem(p, uio)
+ struct proc *p;
+ struct uio *uio;
+{
+ int error;
+ int writing;
+
+ writing = uio->uio_rw == UIO_WRITE;
+
+ /*
+ * Only map in one page at a time. We don't have to, but it
+ * makes things easier. This way is trivial - right?
+ */
+ do {
+ vm_map_t map, tmap;
+ vm_object_t object;
+ vm_offset_t kva;
+ vm_offset_t uva;
+ int page_offset; /* offset into page */
+ vm_offset_t pageno; /* page number */
+ vm_map_entry_t out_entry;
+ vm_prot_t out_prot;
+ vm_page_t m;
+ boolean_t wired, single_use;
+ vm_offset_t off;
+ u_int len;
+ int fix_prot;
+
+ uva = (vm_offset_t) uio->uio_offset;
+ if (uva > VM_MAXUSER_ADDRESS) {
+ error = 0;
+ break;
+ }
+
+ /*
+ * Get the page number of this segment.
+ */
+ pageno = trunc_page(uva);
+ page_offset = uva - pageno;
+
+ /*
+ * How many bytes to copy
+ */
+ len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+ /*
+ * The map we want...
+ */
+ map = &p->p_vmspace->vm_map;
+
+ /*
+ * Check the permissions for the area we're interested
+ * in.
+ */
+ fix_prot = 0;
+ if (writing)
+ fix_prot = !vm_map_check_protection(map, pageno,
+ pageno + PAGE_SIZE, VM_PROT_WRITE);
+
+ if (fix_prot) {
+ /*
+ * If the page is not writable, we make it so.
+ * XXX It is possible that a page may *not* be
+ * read/executable, if a process changes that!
+ * We will assume, for now, that a page is either
+ * VM_PROT_ALL, or VM_PROT_READ|VM_PROT_EXECUTE.
+ */
+ error = vm_map_protect(map, pageno,
+ pageno + PAGE_SIZE, VM_PROT_ALL, 0);
+ if (error)
+ break;
+ }
+
+ /*
+ * Now we need to get the page. out_entry, out_prot, wired,
+ * and single_use aren't used. One would think the vm code
+ * would be a *bit* nicer... We use tmap because
+ * vm_map_lookup() can change the map argument.
+ */
+ tmap = map;
+ error = vm_map_lookup(&tmap, pageno,
+ writing ? VM_PROT_WRITE : VM_PROT_READ,
+ &out_entry, &object, &off, &out_prot,
+ &wired, &single_use);
+ /*
+ * We're done with tmap now.
+ */
+ if (!error)
+ vm_map_lookup_done(tmap, out_entry);
+
+ /*
+ * Fault the page in...
+ */
+ if (!error && writing && object->shadow) {
+ m = vm_page_lookup(object, off);
+ if (m == 0 || (m->flags & PG_COPYONWRITE))
+ error = vm_fault(map, pageno,
+ VM_PROT_WRITE, FALSE);
+ }
+
+ /* Find space in kernel_map for the page we're interested in */
+ if (!error)
+ error = vm_map_find(kernel_map, object, off, &kva,
+ PAGE_SIZE, 1);
+
+ if (!error) {
+ /*
+ * Neither vm_map_lookup() nor vm_map_find() appear
+ * to add a reference count to the object, so we do
+ * that here and now.
+ */
+ vm_object_reference(object);
+
+ /*
+ * Mark the page we just found as pageable.
+ */
+ error = vm_map_pageable(kernel_map, kva,
+ kva + PAGE_SIZE, 0);
+
+ /*
+ * Now do the i/o move.
+ */
+ if (!error)
+ error = uiomove(kva + page_offset, len, uio);
+
+ vm_map_remove(kernel_map, kva, kva + PAGE_SIZE);
+ }
+ if (fix_prot)
+ vm_map_protect(map, pageno, pageno + PAGE_SIZE,
+ VM_PROT_READ|VM_PROT_EXECUTE, 0);
+ } while (error == 0 && uio->uio_resid > 0);
+
+ return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+procfs_domem(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ error = procfs_rwmem(p, uio);
+
+ return (error);
+}
+
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being executed.
+ *
+ * It would be nice to grab this information from
+ * the VM system, however, there is no sure-fire
+ * way of doing that. Instead, fork(), exec() and
+ * wait() all maintain the p_textvp field in the
+ * process proc structure which contains a held
+ * reference to the exec'ed vnode.
+ */
+struct vnode *
+procfs_findtextvp(p)
+ struct proc *p;
+{
+ return (p->p_textvp);
+}
+
+
+#ifdef probably_never
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being mapped.
+ *
+ * (This is here, rather than in procfs_subr in order
+ * to keep all the VM related code in one place.)
+ */
+struct vnode *
+procfs_findtextvp(p)
+ struct proc *p;
+{
+ int error;
+ vm_object_t object;
+ vm_offset_t pageno; /* page number */
+
+ /* find a vnode pager for the user address space */
+
+ for (pageno = VM_MIN_ADDRESS;
+ pageno < VM_MAXUSER_ADDRESS;
+ pageno += PAGE_SIZE) {
+ vm_map_t map;
+ vm_map_entry_t out_entry;
+ vm_prot_t out_prot;
+ boolean_t wired, single_use;
+ vm_offset_t off;
+
+ map = &p->p_vmspace->vm_map;
+ error = vm_map_lookup(&map, pageno,
+ VM_PROT_READ,
+ &out_entry, &object, &off, &out_prot,
+ &wired, &single_use);
+
+ if (!error) {
+ vm_pager_t pager;
+
+ printf("procfs: found vm object\n");
+ vm_map_lookup_done(map, out_entry);
+ printf("procfs: vm object = %x\n", object);
+
+ /*
+ * At this point, assuming no errors, object
+ * is the VM object mapping UVA (pageno).
+ * Ensure it has a vnode pager, then grab
+ * the vnode from that pager's handle.
+ */
+
+ pager = object->pager;
+ printf("procfs: pager = %x\n", pager);
+ if (pager)
+ printf("procfs: found pager, type = %d\n", pager->pg_type);
+ if (pager && pager->pg_type == PG_VNODE) {
+ struct vnode *vp;
+
+ vp = (struct vnode *) pager->pg_handle;
+ printf("procfs: vp = 0x%x\n", vp);
+ return (vp);
+ }
+ }
+ }
+
+ printf("procfs: text object not found\n");
+ return (0);
+}
+#endif /* probably_never */
diff --git a/sys/miscfs/procfs/procfs_note.c b/sys/miscfs/procfs/procfs_note.c
new file mode 100644
index 000000000000..bf2f160baa0d
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_note.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_note.c 8.2 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_note.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/signal.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_donote(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int xlen;
+ int error;
+ char note[PROCFS_NOTELEN+1];
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EINVAL);
+
+ xlen = PROCFS_NOTELEN;
+ error = vfs_getuserstr(uio, note, &xlen);
+ if (error)
+ return (error);
+
+ /* send to process's notify function */
+ return (EOPNOTSUPP);
+}
diff --git a/sys/miscfs/procfs/procfs_regs.c b/sys/miscfs/procfs/procfs_regs.c
new file mode 100644
index 000000000000..fa95fef8f102
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_regs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_regs.c 8.3 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_doregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct reg r;
+ char *kv;
+ int kl;
+
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_regs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_regs(p, &r);
+ }
+
+ uio->uio_offset = 0;
+ return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_status.c b/sys/miscfs/procfs/procfs_status.c
new file mode 100644
index 000000000000..d88aaabdfb05
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_status.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_status.c 8.3 (Berkeley) 2/17/94
+ *
+ * From:
+ * $Id: procfs_status.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dostatus(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ struct session *sess;
+ struct tty *tp;
+ struct ucred *cr;
+ char *ps;
+ char *sep;
+ int pid, ppid, pgid, sid;
+ int i;
+ int xlen;
+ int error;
+ char psbuf[256]; /* XXX - conservative */
+
+ if (uio->uio_rw != UIO_READ)
+ return (EOPNOTSUPP);
+
+ pid = p->p_pid;
+ ppid = p->p_pptr ? p->p_pptr->p_pid : 0,
+ pgid = p->p_pgrp->pg_id;
+ sess = p->p_pgrp->pg_session;
+ sid = sess->s_leader ? sess->s_leader->p_pid : 0;
+
+/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg uid groups ... */
+
+ ps = psbuf;
+ bcopy(p->p_comm, ps, MAXCOMLEN);
+ ps[MAXCOMLEN] = '\0';
+ ps += strlen(ps);
+ ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid);
+
+ if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp))
+ ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev));
+ else
+ ps += sprintf(ps, "%d,%d ", -1, -1);
+
+ sep = "";
+ if (sess->s_ttyvp) {
+ ps += sprintf(ps, "%sctty", sep);
+ sep = ",";
+ }
+ if (SESS_LEADER(p)) {
+ ps += sprintf(ps, "%ssldr", sep);
+ sep = ",";
+ }
+ if (*sep != ',')
+ ps += sprintf(ps, "noflags");
+
+ if (p->p_flag & P_INMEM)
+ ps += sprintf(ps, " %d,%d",
+ p->p_stats->p_start.tv_sec,
+ p->p_stats->p_start.tv_usec);
+ else
+ ps += sprintf(ps, " -1,-1");
+
+ {
+ struct timeval ut, st;
+
+ calcru(p, &ut, &st, (void *) 0);
+ ps += sprintf(ps, " %d,%d %d,%d",
+ ut.tv_sec,
+ ut.tv_usec,
+ st.tv_sec,
+ st.tv_usec);
+ }
+
+ ps += sprintf(ps, " %s",
+ (p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan");
+
+ cr = p->p_ucred;
+
+ ps += sprintf(ps, " %d", cr->cr_uid, cr->cr_gid);
+ for (i = 0; i < cr->cr_ngroups; i++)
+ ps += sprintf(ps, ",%d", cr->cr_groups[i]);
+ ps += sprintf(ps, "\n");
+
+ xlen = ps - psbuf;
+ xlen -= uio->uio_offset;
+ ps = psbuf + uio->uio_offset;
+ xlen = min(xlen, uio->uio_resid);
+ if (xlen <= 0)
+ error = 0;
+ else
+ error = uiomove(ps, xlen, uio);
+
+ return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_subr.c b/sys/miscfs/procfs/procfs_subr.c
new file mode 100644
index 000000000000..b371af19af0b
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_subr.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_subr.c 8.4 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_subr.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <miscfs/procfs/procfs.h>
+
+static struct pfsnode *pfshead;
+static int pfsvplock;
+
+/*
+ * allocate a pfsnode/vnode pair. the vnode is
+ * referenced, but not locked.
+ *
+ * the pid, pfs_type, and mount point uniquely
+ * identify a pfsnode. the mount point is needed
+ * because someone might mount this filesystem
+ * twice.
+ *
+ * all pfsnodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+procfs_allocvp(mp, vpp, pid, pfs_type)
+ struct mount *mp;
+ struct vnode **vpp;
+ long pid;
+ pfstype pfs_type;
+{
+ int error;
+ struct pfsnode *pfs;
+ struct pfsnode **pp;
+
+loop:
+ for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) {
+ if (pfs->pfs_pid == pid &&
+ pfs->pfs_type == pfs_type &&
+ PFSTOV(pfs)->v_mount == mp) {
+ if (vget(pfs->pfs_vnode, 0))
+ goto loop;
+ *vpp = pfs->pfs_vnode;
+ return (0);
+ }
+ }
+
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ if (pfsvplock & PROCFS_LOCKED) {
+ pfsvplock |= PROCFS_WANT;
+ sleep((caddr_t) &pfsvplock, PINOD);
+ goto loop;
+ }
+ pfsvplock |= PROCFS_LOCKED;
+
+ error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp);
+ if (error)
+ goto out;
+
+ MALLOC((*vpp)->v_data, void *, sizeof(struct pfsnode),
+ M_TEMP, M_WAITOK);
+
+ pfs = VTOPFS(*vpp);
+ pfs->pfs_next = 0;
+ pfs->pfs_pid = (pid_t) pid;
+ pfs->pfs_type = pfs_type;
+ pfs->pfs_vnode = *vpp;
+ pfs->pfs_flags = 0;
+ pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
+
+ switch (pfs_type) {
+ case Proot: /* /proc = dr-xr-xr-x */
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ break;
+
+ case Pproc:
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ break;
+
+ case Pfile:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pmem:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pregs:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pfpregs:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pctl:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ case Pstatus:
+ pfs->pfs_mode = (VREAD) |
+ (VREAD >> 3) |
+ (VREAD >> 6);
+ break;
+
+ case Pnote:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ case Pnotepg:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ default:
+ panic("procfs_allocvp");
+ }
+
+ /* add to procfs vnode list */
+ for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next)
+ continue;
+ *pp = pfs;
+
+out:
+ pfsvplock &= ~PROCFS_LOCKED;
+
+ if (pfsvplock & PROCFS_WANT) {
+ pfsvplock &= ~PROCFS_WANT;
+ wakeup((caddr_t) &pfsvplock);
+ }
+
+ return (error);
+}
+
+int
+procfs_freevp(vp)
+ struct vnode *vp;
+{
+ struct pfsnode **pfspp;
+ struct pfsnode *pfs = VTOPFS(vp);
+
+ for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) {
+ if (*pfspp == pfs) {
+ *pfspp = pfs->pfs_next;
+ break;
+ }
+ }
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+ return (0);
+}
+
+int
+procfs_rw(ap)
+ struct vop_read_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct uio *uio = ap->a_uio;
+ struct proc *curp = uio->uio_procp;
+ struct pfsnode *pfs = VTOPFS(vp);
+ struct proc *p;
+
+ p = PFIND(pfs->pfs_pid);
+ if (p == 0)
+ return (EINVAL);
+
+ switch (pfs->pfs_type) {
+ case Pnote:
+ case Pnotepg:
+ return (procfs_donote(curp, p, pfs, uio));
+
+ case Pregs:
+ return (procfs_doregs(curp, p, pfs, uio));
+
+ case Pfpregs:
+ return (procfs_dofpregs(curp, p, pfs, uio));
+
+ case Pctl:
+ return (procfs_doctl(curp, p, pfs, uio));
+
+ case Pstatus:
+ return (procfs_dostatus(curp, p, pfs, uio));
+
+ case Pmem:
+ return (procfs_domem(curp, p, pfs, uio));
+
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+/*
+ * Get a string from userland into (buf). Strip a trailing
+ * nl character (to allow easy access from the shell).
+ * The buffer should be *buflenp + 1 chars long. vfs_getuserstr
+ * will automatically add a nul char at the end.
+ *
+ * Returns 0 on success or the following errors
+ *
+ * EINVAL: file offset is non-zero.
+ * EMSGSIZE: message is longer than kernel buffer
+ * EFAULT: user i/o buffer is not addressable
+ */
+int
+vfs_getuserstr(uio, buf, buflenp)
+ struct uio *uio;
+ char *buf;
+ int *buflenp;
+{
+ int xlen;
+ int error;
+
+ if (uio->uio_offset != 0)
+ return (EINVAL);
+
+ xlen = *buflenp;
+
+ /* must be able to read the whole string in one go */
+ if (xlen < uio->uio_resid)
+ return (EMSGSIZE);
+ xlen = uio->uio_resid;
+
+ error = uiomove(buf, xlen, uio);
+ if (error)
+ return (error);
+
+ /* allow multiple writes without seeks */
+ uio->uio_offset = 0;
+
+ /* cleanup string and remove trailing newline */
+ buf[xlen] = '\0';
+ xlen = strlen(buf);
+ if (xlen > 0 && buf[xlen-1] == '\n')
+ buf[--xlen] = '\0';
+ *buflenp = xlen;
+
+ return (0);
+}
+
+vfs_namemap_t *
+vfs_findname(nm, buf, buflen)
+ vfs_namemap_t *nm;
+ char *buf;
+ int buflen;
+{
+ for (; nm->nm_name; nm++)
+ if (bcmp(buf, (char *) nm->nm_name, buflen+1) == 0)
+ return (nm);
+
+ return (0);
+}
diff --git a/sys/miscfs/procfs/procfs_vfsops.c b/sys/miscfs/procfs/procfs_vfsops.c
new file mode 100644
index 000000000000..3938ca123576
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_vfsops.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vfsops.c 8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_vfsops.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs VFS interface
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h> /* for PAGE_SIZE */
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+procfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ u_int size;
+
+ if (UIO_MX & (UIO_MX-1)) {
+ log(LOG_ERR, "procfs: invalid directory entry size");
+ return (EINVAL);
+ }
+
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = 0;
+ getnewfsid(mp, MOUNT_PROCFS);
+
+ (void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ size = sizeof("procfs") - 1;
+ bcopy("procfs", mp->mnt_stat.f_mntfromname, size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+ return (0);
+}
+
+/*
+ * unmount system call
+ */
+procfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ extern int doforce;
+ int flags = 0;
+
+ if (mntflags & MNT_FORCE) {
+ /* procfs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ if (error = vflush(mp, 0, flags))
+ return (error);
+
+ return (0);
+}
+
+procfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct pfsnode *pfs;
+ struct vnode *vp;
+ int error;
+
+ error = procfs_allocvp(mp, &vp, (pid_t) 0, Proot);
+ if (error)
+ return (error);
+
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ pfs = VTOPFS(vp);
+
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ */
+/* ARGSUSED */
+procfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Get file system statistics.
+ */
+procfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ sbp->f_type = MOUNT_PROCFS;
+ sbp->f_bsize = PAGE_SIZE;
+ sbp->f_iosize = PAGE_SIZE;
+ sbp->f_blocks = 1; /* avoid divide by zero in some df's */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = maxproc; /* approx */
+ sbp->f_ffree = maxproc - nprocs; /* approx */
+
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+
+ return (0);
+}
+
+
+procfs_quotactl(mp, cmds, uid, arg, p)
+ struct mount *mp;
+ int cmds;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+procfs_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+procfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+procfs_fhtovp(mp, fhp, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ struct vnode **vpp;
+{
+
+ return (EINVAL);
+}
+
+procfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return EINVAL;
+}
+
+procfs_init()
+{
+
+ return (0);
+}
+
+struct vfsops procfs_vfsops = {
+ procfs_mount,
+ procfs_start,
+ procfs_unmount,
+ procfs_root,
+ procfs_quotactl,
+ procfs_statfs,
+ procfs_sync,
+ procfs_vget,
+ procfs_fhtovp,
+ procfs_vptofh,
+ procfs_init,
+};
diff --git a/sys/miscfs/procfs/procfs_vnops.c b/sys/miscfs/procfs/procfs_vnops.c
new file mode 100644
index 000000000000..4e1ee002bb90
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_vnops.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vnops.c 8.6 (Berkeley) 2/7/94
+ *
+ * From:
+ * $Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs vnode interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h> /* for PAGE_SIZE */
+
+/*
+ * Vnode Operations.
+ *
+ */
+
+/*
+ * This is a list of the valid names in the
+ * process-specific sub-directories. It is
+ * used in procfs_lookup and procfs_readdir
+ */
+static struct pfsnames {
+ u_short d_namlen;
+ char d_name[PROCFS_NAMELEN];
+ pfstype d_pfstype;
+} procent[] = {
+#define N(s) sizeof(s)-1, s
+ /* namlen, nam, type */
+ { N("file"), Pfile },
+ { N("mem"), Pmem },
+ { N("regs"), Pregs },
+ { N("fpregs"), Pfpregs },
+ { N("ctl"), Pctl },
+ { N("status"), Pstatus },
+ { N("note"), Pnote },
+ { N("notepg"), Pnotepg },
+#undef N
+};
+#define Nprocent (sizeof(procent)/sizeof(procent[0]))
+
+static pid_t atopid __P((const char *, u_int));
+
+/*
+ * set things up for doing i/o on
+ * the pfsnode (vp). (vp) is locked
+ * on entry, and should be left locked
+ * on exit.
+ *
+ * for procfs we don't need to do anything
+ * in particular for i/o. all that is done
+ * is to support exclusive open on process
+ * memory images.
+ */
+procfs_open(ap)
+ struct vop_open_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if (PFIND(pfs->pfs_pid) == 0)
+ return (ENOENT); /* was ESRCH, jsp */
+
+ if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) ||
+ (pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))
+ return (EBUSY);
+
+
+ if (ap->a_mode & FWRITE)
+ pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
+
+ return (0);
+
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * close the pfsnode (vp) after doing i/o.
+ * (vp) is not locked on entry or exit.
+ *
+ * nothing to do for procfs other than undo
+ * any exclusive open flag (see _open above).
+ */
+procfs_close(ap)
+ struct vop_close_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
+ pfs->pfs_flags &= ~(FWRITE|O_EXCL);
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * do an ioctl operation on pfsnode (vp).
+ * (vp) is not locked on entry or exit.
+ */
+procfs_ioctl(ap)
+ struct vop_ioctl_args *ap;
+{
+
+ return (ENOTTY);
+}
+
+/*
+ * do block mapping for pfsnode (vp).
+ * since we don't use the buffer cache
+ * for procfs this function should never
+ * be called. in any case, it's not clear
+ * what part of the kernel ever makes use
+ * of this function. for sanity, this is the
+ * usual no-op bmap, although returning
+ * (EIO) would be a reasonable alternative.
+ */
+procfs_bmap(ap)
+ struct vop_bmap_args *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * _inactive is called when the pfsnode
+ * is vrele'd and the reference count goes
+ * to zero. (vp) will be on the vnode free
+ * list, so to get it back vget() must be
+ * used.
+ *
+ * for procfs, check if the process is still
+ * alive and if it isn't then just throw away
+ * the vnode by calling vgone(). this may
+ * be overkill and a waste of time since the
+ * chances are that the process will still be
+ * there and PFIND is not free.
+ *
+ * (vp) is not locked on entry or exit.
+ */
+procfs_inactive(ap)
+ struct vop_inactive_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ if (PFIND(pfs->pfs_pid) == 0)
+ vgone(ap->a_vp);
+
+ return (0);
+}
+
+/*
+ * _reclaim is called when getnewvnode()
+ * wants to make use of an entry on the vnode
+ * free list. at this time the filesystem needs
+ * to free any private data and remove the node
+ * from any private lists.
+ */
+procfs_reclaim(ap)
+ struct vop_reclaim_args *ap;
+{
+ int error;
+
+ error = procfs_freevp(ap->a_vp);
+ return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+procfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * _print is used for debugging.
+ * just print a readable description
+ * of (vp).
+ */
+procfs_print(ap)
+ struct vop_print_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n",
+ pfs->pfs_pid,
+ pfs->pfs_mode, pfs->pfs_flags);
+}
+
+/*
+ * _abortop is called when operations such as
+ * rename and create fail. this entry is responsible
+ * for undoing any side-effects caused by the lookup.
+ * this will always include freeing the pathname buffer.
+ */
+procfs_abortop(ap)
+ struct vop_abortop_args *ap;
+{
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return (0);
+}
+
+/*
+ * generic entry point for unsupported operations
+ */
+procfs_badop()
+{
+
+ return (EIO);
+}
+
+/*
+ * Invent attributes for pfsnode (vp) and store
+ * them in (vap).
+ * Directories lengths are returned as zero since
+ * any real length would require the genuine size
+ * to be computed, and nothing cares anyway.
+ *
+ * this is relatively minimal for procfs.
+ */
+procfs_getattr(ap)
+ struct vop_getattr_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+ struct vattr *vap = ap->a_vap;
+ struct proc *procp;
+ int error;
+
+ /* first check the process still exists */
+ switch (pfs->pfs_type) {
+ case Proot:
+ procp = 0;
+ break;
+
+ default:
+ procp = PFIND(pfs->pfs_pid);
+ if (procp == 0)
+ return (ENOENT);
+ }
+
+ error = 0;
+
+ /* start by zeroing out the attributes */
+ VATTR_NULL(vap);
+
+ /* next do all the common fields */
+ vap->va_type = ap->a_vp->v_type;
+ vap->va_mode = pfs->pfs_mode;
+ vap->va_fileid = pfs->pfs_fileno;
+ vap->va_flags = 0;
+ vap->va_blocksize = PAGE_SIZE;
+ vap->va_bytes = vap->va_size = 0;
+
+ /*
+ * If the process has exercised some setuid or setgid
+ * privilege, then rip away read/write permission so
+ * that only root can gain access.
+ */
+ switch (pfs->pfs_type) {
+ case Pregs:
+ case Pfpregs:
+ case Pmem:
+ if (procp->p_flag & P_SUGID)
+ vap->va_mode &= ~((VREAD|VWRITE)|
+ ((VREAD|VWRITE)>>3)|
+ ((VREAD|VWRITE)>>6));
+ break;
+ }
+
+ /*
+ * Make all times be current TOD.
+ * It would be possible to get the process start
+ * time from the p_stat structure, but there's
+ * no "file creation" time stamp anyway, and the
+ * p_stat structure is not addressible if u. gets
+ * swapped out for that process.
+ */
+ microtime(&vap->va_ctime);
+ vap->va_atime = vap->va_mtime = vap->va_ctime;
+
+ /*
+ * now do the object specific fields
+ *
+ * The size could be set from struct reg, but it's hardly
+ * worth the trouble, and it puts some (potentially) machine
+ * dependent data into this machine-independent code. If it
+ * becomes important then this function should break out into
+ * a per-file stat function in the corresponding .c file.
+ */
+
+ switch (pfs->pfs_type) {
+ case Proot:
+ vap->va_nlink = 2;
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ break;
+
+ case Pproc:
+ vap->va_nlink = 2;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ case Pfile:
+ error = EOPNOTSUPP;
+ break;
+
+ case Pmem:
+ vap->va_nlink = 1;
+ vap->va_bytes = vap->va_size =
+ ctob(procp->p_vmspace->vm_tsize +
+ procp->p_vmspace->vm_dsize +
+ procp->p_vmspace->vm_ssize);
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ case Pregs:
+ case Pfpregs:
+ case Pctl:
+ case Pstatus:
+ case Pnote:
+ case Pnotepg:
+ vap->va_nlink = 1;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ default:
+ panic("procfs_getattr");
+ }
+
+ return (error);
+}
+
+procfs_setattr(ap)
+ struct vop_setattr_args *ap;
+{
+ /*
+ * just fake out attribute setting
+ * it's not good to generate an error
+ * return, otherwise things like creat()
+ * will fail when they try to set the
+ * file length to 0. worse, this means
+ * that echo $note > /proc/$pid/note will fail.
+ */
+
+ return (0);
+}
+
+/*
+ * implement access checking.
+ *
+ * something very similar to this code is duplicated
+ * throughout the 4bsd kernel and should be moved
+ * into kern/vfs_subr.c sometime.
+ *
+ * actually, the check for super-user is slightly
+ * broken since it will allow read access to write-only
+ * objects. this doesn't cause any particular trouble
+ * but does mean that the i/o entry points need to check
+ * that the operation really does make sense.
+ */
+procfs_access(ap)
+ struct vop_access_args *ap;
+{
+ struct vattr *vap;
+ struct vattr vattr;
+ int error;
+
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (ap->a_cred->cr_uid == (uid_t) 0)
+ return (0);
+ vap = &vattr;
+ if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p))
+ return (error);
+
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (ap->a_cred->cr_uid != vap->va_uid) {
+ gid_t *gp;
+ int i;
+
+ (ap->a_mode) >>= 3;
+ gp = ap->a_cred->cr_groups;
+ for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ ap->a_mode >>= 3;
+found:
+ ;
+ }
+
+ if ((vap->va_mode & ap->a_mode) == ap->a_mode)
+ return (0);
+
+ return (EACCES);
+}
+
+/*
+ * lookup. this is incredibly complicated in the
+ * general case, however for most pseudo-filesystems
+ * very little needs to be done.
+ *
+ * unless you want to get a migraine, just make sure your
+ * filesystem doesn't do any locking of its own. otherwise
+ * read and inwardly digest ufs_lookup().
+ */
+procfs_lookup(ap)
+ struct vop_lookup_args *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname = cnp->cn_nameptr;
+ int error = 0;
+ pid_t pid;
+ struct vnode *nvp;
+ struct pfsnode *pfs;
+ struct proc *procp;
+ pfstype pfs_type;
+ int i;
+
+ if (cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ /*VOP_LOCK(dvp);*/
+ return (0);
+ }
+
+ *vpp = NULL;
+
+ pfs = VTOPFS(dvp);
+ switch (pfs->pfs_type) {
+ case Proot:
+ if (cnp->cn_flags & ISDOTDOT)
+ return (EIO);
+
+ if (CNEQ(cnp, "curproc", 7))
+ pid = cnp->cn_proc->p_pid;
+ else
+ pid = atopid(pname, cnp->cn_namelen);
+ if (pid == NO_PID)
+ return (ENOENT);
+
+ procp = PFIND(pid);
+ if (procp == 0)
+ return (ENOENT);
+
+ error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc);
+ if (error)
+ return (error);
+
+ nvp->v_type = VDIR;
+ pfs = VTOPFS(nvp);
+
+ *vpp = nvp;
+ return (0);
+
+ case Pproc:
+ if (cnp->cn_flags & ISDOTDOT) {
+ error = procfs_root(dvp->v_mount, vpp);
+ return (error);
+ }
+
+ procp = PFIND(pfs->pfs_pid);
+ if (procp == 0)
+ return (ENOENT);
+
+ for (i = 0; i < Nprocent; i++) {
+ struct pfsnames *dp = &procent[i];
+
+ if (cnp->cn_namelen == dp->d_namlen &&
+ bcmp(pname, dp->d_name, dp->d_namlen) == 0) {
+ pfs_type = dp->d_pfstype;
+ goto found;
+ }
+ }
+ return (ENOENT);
+
+ found:
+ if (pfs_type == Pfile) {
+ nvp = procfs_findtextvp(procp);
+ if (nvp) {
+ VREF(nvp);
+ VOP_LOCK(nvp);
+ } else {
+ error = ENXIO;
+ }
+ } else {
+ error = procfs_allocvp(dvp->v_mount, &nvp,
+ pfs->pfs_pid, pfs_type);
+ if (error)
+ return (error);
+
+ nvp->v_type = VREG;
+ pfs = VTOPFS(nvp);
+ }
+ *vpp = nvp;
+ return (error);
+
+ default:
+ return (ENOTDIR);
+ }
+}
+
+/*
+ * readdir returns directory entries from pfsnode (vp).
+ *
+ * the strategy here with procfs is to generate a single
+ * directory entry at a time (struct pfsdent) and then
+ * copy that out to userland using uiomove. a more efficent
+ * though more complex implementation, would try to minimize
+ * the number of calls to uiomove(). for procfs, this is
+ * hardly worth the added code complexity.
+ *
+ * this should just be done through read()
+ */
+procfs_readdir(ap)
+ struct vop_readdir_args *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct pfsdent d;
+ struct pfsdent *dp = &d;
+ struct pfsnode *pfs;
+ int error;
+ int count;
+ int i;
+
+ pfs = VTOPFS(ap->a_vp);
+
+ if (uio->uio_resid < UIO_MX)
+ return (EINVAL);
+ if (uio->uio_offset & (UIO_MX-1))
+ return (EINVAL);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+
+ error = 0;
+ count = 0;
+ i = uio->uio_offset / UIO_MX;
+
+ switch (pfs->pfs_type) {
+ /*
+ * this is for the process-specific sub-directories.
+ * all that is needed to is copy out all the entries
+ * from the procent[] table (top of this file).
+ */
+ case Pproc: {
+ while (uio->uio_resid >= UIO_MX) {
+ struct pfsnames *dt;
+
+ if (i >= Nprocent)
+ break;
+
+ dt = &procent[i];
+
+ dp->d_reclen = UIO_MX;
+ dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype);
+ dp->d_type = DT_REG;
+ dp->d_namlen = dt->d_namlen;
+ bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1);
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ count += UIO_MX;
+ i++;
+ }
+
+ break;
+
+ }
+
+ /*
+ * this is for the root of the procfs filesystem
+ * what is needed is a special entry for "curproc"
+ * followed by an entry for each process on allproc
+#ifdef PROCFS_ZOMBIE
+ * and zombproc.
+#endif
+ */
+
+ case Proot: {
+ int pcnt;
+#ifdef PROCFS_ZOMBIE
+ int doingzomb = 0;
+#endif
+ volatile struct proc *p;
+
+ p = allproc;
+
+#define PROCFS_XFILES 1 /* number of other entries, like "curproc" */
+ pcnt = PROCFS_XFILES;
+
+ while (p && uio->uio_resid >= UIO_MX) {
+ bzero((char *) dp, UIO_MX);
+ dp->d_type = DT_DIR;
+ dp->d_reclen = UIO_MX;
+
+ switch (i) {
+ case 0:
+ /* ship out entry for "curproc" */
+ dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc);
+ dp->d_namlen = sprintf(dp->d_name, "curproc");
+ break;
+
+ default:
+ if (pcnt >= i) {
+ dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
+ dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid);
+ }
+
+ p = p->p_next;
+
+#ifdef PROCFS_ZOMBIE
+ if (p == 0 && doingzomb == 0) {
+ doingzomb = 1;
+ p = zombproc;
+ }
+#endif
+
+ if (pcnt++ < i)
+ continue;
+
+ break;
+ }
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ count += UIO_MX;
+ i++;
+ }
+
+ break;
+
+ }
+
+ default:
+ error = ENOTDIR;
+ break;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+
+ return (error);
+}
+
+/*
+ * convert decimal ascii to pid_t
+ */
+static pid_t
+atopid(b, len)
+ const char *b;
+ u_int len;
+{
+ pid_t p = 0;
+
+ while (len--) {
+ char c = *b++;
+ if (c < '0' || c > '9')
+ return (NO_PID);
+ p = 10 * p + (c - '0');
+ if (p > PID_MAX)
+ return (NO_PID);
+ }
+
+ return (p);
+}
+
+/*
+ * procfs vnode operations.
+ */
+int (**procfs_vnodeop_p)();
+struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, procfs_lookup }, /* lookup */
+ { &vop_create_desc, procfs_create }, /* create */
+ { &vop_mknod_desc, procfs_mknod }, /* mknod */
+ { &vop_open_desc, procfs_open }, /* open */
+ { &vop_close_desc, procfs_close }, /* close */
+ { &vop_access_desc, procfs_access }, /* access */
+ { &vop_getattr_desc, procfs_getattr }, /* getattr */
+ { &vop_setattr_desc, procfs_setattr }, /* setattr */
+ { &vop_read_desc, procfs_read }, /* read */
+ { &vop_write_desc, procfs_write }, /* write */
+ { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
+ { &vop_select_desc, procfs_select }, /* select */
+ { &vop_mmap_desc, procfs_mmap }, /* mmap */
+ { &vop_fsync_desc, procfs_fsync }, /* fsync */
+ { &vop_seek_desc, procfs_seek }, /* seek */
+ { &vop_remove_desc, procfs_remove }, /* remove */
+ { &vop_link_desc, procfs_link }, /* link */
+ { &vop_rename_desc, procfs_rename }, /* rename */
+ { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, procfs_symlink }, /* symlink */
+ { &vop_readdir_desc, procfs_readdir }, /* readdir */
+ { &vop_readlink_desc, procfs_readlink }, /* readlink */
+ { &vop_abortop_desc, procfs_abortop }, /* abortop */
+ { &vop_inactive_desc, procfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, procfs_lock }, /* lock */
+ { &vop_unlock_desc, procfs_unlock }, /* unlock */
+ { &vop_bmap_desc, procfs_bmap }, /* bmap */
+ { &vop_strategy_desc, procfs_strategy }, /* strategy */
+ { &vop_print_desc, procfs_print }, /* print */
+ { &vop_islocked_desc, procfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, procfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, procfs_valloc }, /* valloc */
+ { &vop_vfree_desc, procfs_vfree }, /* vfree */
+ { &vop_truncate_desc, procfs_truncate }, /* truncate */
+ { &vop_update_desc, procfs_update }, /* update */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc procfs_vnodeop_opv_desc =
+ { &procfs_vnodeop_p, procfs_vnodeop_entries };
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
new file mode 100644
index 000000000000..111c517b1627
--- /dev/null
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <miscfs/specfs/specdev.h>
+
+/* symbolic sleep message strings for devices */
+char devopn[] = "devopn";
+char devio[] = "devio";
+char devwait[] = "devwait";
+char devin[] = "devin";
+char devout[] = "devout";
+char devioc[] = "devioc";
+char devcls[] = "devcls";
+
+int (**spec_vnodeop_p)();
+struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, spec_close }, /* close */
+ { &vop_access_desc, spec_access }, /* access */
+ { &vop_getattr_desc, spec_getattr }, /* getattr */
+ { &vop_setattr_desc, spec_setattr }, /* setattr */
+ { &vop_read_desc, spec_read }, /* read */
+ { &vop_write_desc, spec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, spec_inactive }, /* inactive */
+ { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
+ { &vop_lock_desc, spec_lock }, /* lock */
+ { &vop_unlock_desc, spec_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, spec_print }, /* print */
+ { &vop_islocked_desc, spec_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, spec_update }, /* update */
+ { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_vnodeop_opv_desc =
+ { &spec_vnodeop_p, spec_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+int
+spec_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open a special file.
+ */
+/* ARGSUSED */
+spec_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *bvp, *vp = ap->a_vp;
+ dev_t bdev, dev = (dev_t)vp->v_rdev;
+ register int maj = major(dev);
+ int error;
+
+ /*
+ * Don't allow open if fs is mounted -nodev.
+ */
+ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
+ return (ENXIO);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ if ((u_int)maj >= nchrdev)
+ return (ENXIO);
+ if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk character devices.
+ */
+ if (securelevel >= 2 && isdisk(dev, VCHR))
+ return (EPERM);
+ /*
+ * When running in secure mode, do not allow opens
+ * for writing of /dev/mem, /dev/kmem, or character
+ * devices whose corresponding block devices are
+ * currently mounted.
+ */
+ if (securelevel >= 1) {
+ if ((bdev = chrtoblk(dev)) != NODEV &&
+ vfinddev(bdev, VBLK, &bvp) &&
+ bvp->v_usecount > 0 &&
+ (error = vfs_mountedon(bvp)))
+ return (error);
+ if (iskmemdev(dev))
+ return (EPERM);
+ }
+ }
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if ((u_int)maj >= nblkdev)
+ return (ENXIO);
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk block devices.
+ */
+ if (securelevel >= 2 && ap->a_cred != FSCRED &&
+ (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
+ return (EPERM);
+ /*
+ * Do not allow opens of block devices that are
+ * currently mounted.
+ */
+ if (error = vfs_mountedon(vp))
+ return (error);
+ return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
+ }
+ return (0);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+spec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn, nextbn;
+ long bsize, bscale;
+ struct partinfo dpart;
+ int n, on, majordev, (*ioctl)();
+ int error = 0;
+ dev_t dev;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("spec_read mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_read proc");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[major(vp->v_rdev)].d_read)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ dev = vp->v_rdev;
+ if ((majordev = major(dev)) < nblkdev &&
+ (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
+ (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
+ dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag * dpart.part->p_fsize;
+ bscale = bsize / DEV_BSIZE;
+ do {
+ bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (vp->v_lastr + bscale == bn) {
+ nextbn = bn + bscale;
+ error = breadn(vp, bn, (int)bsize, &nextbn,
+ (int *)&bsize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, bn, (int)bsize, NOCRED, &bp);
+ vp->v_lastr = bn;
+ n = min(n, bsize - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ if (n + on == bsize)
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_read type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+spec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn;
+ int bsize, blkmask;
+ struct partinfo dpart;
+ register int n, on;
+ int error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("spec_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_write proc");
+#endif
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[major(vp->v_rdev)].d_write)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
+ (caddr_t)&dpart, FREAD, p) == 0) {
+ if (dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag *
+ dpart.part->p_fsize;
+ }
+ blkmask = (bsize / DEV_BSIZE) - 1;
+ do {
+ bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (n == bsize)
+ bp = getblk(vp, bn, bsize, 0, 0);
+ else
+ error = bread(vp, bn, bsize, NOCRED, &bp);
+ n = min(n, bsize - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ if (n + on == bsize) {
+ bp->b_flags |= B_AGE;
+ bawrite(bp);
+ } else
+ bdwrite(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_write type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+spec_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ dev_t dev = ap->a_vp->v_rdev;
+
+ switch (ap->a_vp->v_type) {
+
+ case VCHR:
+ return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p));
+
+ case VBLK:
+ if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
+ if (bdevsw[major(dev)].d_flags & B_TAPE)
+ return (0);
+ else
+ return (1);
+ return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p));
+
+ default:
+ panic("spec_ioctl");
+ /* NOTREACHED */
+ }
+}
+
+/* ARGSUSED */
+spec_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register dev_t dev;
+
+ switch (ap->a_vp->v_type) {
+
+ default:
+ return (1); /* XXX */
+
+ case VCHR:
+ dev = ap->a_vp->v_rdev;
+ return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
+ }
+}
+/*
+ * Synch buffers associated with a block device
+ */
+/* ARGSUSED */
+int
+spec_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct buf *bp;
+ struct buf *nbp;
+ int s;
+
+ if (vp->v_type == VCHR)
+ return (0);
+ /*
+ * Flush all dirty buffers associated with a block device.
+ */
+loop:
+ s = splbio();
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if ((bp->b_flags & B_BUSY))
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("spec_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ bawrite(bp);
+ goto loop;
+ }
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+ }
+#ifdef DIAGNOSTIC
+ if (vp->v_dirtyblkhd.lh_first) {
+ vprint("spec_fsync: dirty", vp);
+ goto loop;
+ }
+#endif
+ }
+ splx(s);
+ return (0);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+spec_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+
+ (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
+ return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+spec_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+spec_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/* ARGSUSED */
+spec_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+spec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ dev_t dev = vp->v_rdev;
+ int (*devclose) __P((dev_t, int, int, struct proc *));
+ int mode, error;
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ /*
+ * Hack: a tty device that is a controlling terminal
+ * has a reference from the session structure.
+ * We cannot easily tell that a character device is
+ * a controlling terminal, unless it is the closing
+ * process' controlling terminal. In that case,
+ * if the reference count is 2 (this last descriptor
+ * plus the session), release the reference from the session.
+ */
+ if (vcount(vp) == 2 && ap->a_p &&
+ vp == ap->a_p->p_session->s_ttyvp) {
+ vrele(vp);
+ ap->a_p->p_session->s_ttyvp = NULL;
+ }
+ /*
+ * If the vnode is locked, then we are in the midst
+ * of forcably closing the device, otherwise we only
+ * close on last reference.
+ */
+ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+ devclose = cdevsw[major(dev)].d_close;
+ mode = S_IFCHR;
+ break;
+
+ case VBLK:
+ /*
+ * On last close of a block device (that isn't mounted)
+ * we must invalidate any in core blocks, so that
+ * we can, for instance, change floppy disks.
+ */
+ if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
+ return (error);
+ /*
+ * We do not want to really close the device if it
+ * is still in use unless we are trying to close it
+ * forcibly. Since every use (buffer, vnode, swap, cmap)
+ * holds a reference to the vnode, and because we mark
+ * any other vnodes that alias this device, when the
+ * sum of the reference counts on all the aliased
+ * vnodes descends to one, we are on last close.
+ */
+ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+ devclose = bdevsw[major(dev)].d_close;
+ mode = S_IFBLK;
+ break;
+
+ default:
+ panic("spec_close: not special");
+ }
+
+ return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
+}
+
+/*
+ * Print out the contents of a special device vnode.
+ */
+spec_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
+ minor(ap->a_vp->v_rdev));
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+spec_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Special device advisory byte-level locks.
+ */
+/* ARGSUSED */
+spec_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Special device failed operation
+ */
+spec_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Special device bad operation
+ */
+spec_badop()
+{
+
+ panic("spec_badop called");
+ /* NOTREACHED */
+}
diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h
new file mode 100644
index 000000000000..a13b66e51132
--- /dev/null
+++ b/sys/miscfs/specfs/specdev.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)specdev.h 8.2 (Berkeley) 2/2/94
+ */
+
+/*
+ * This structure defines the information maintained about
+ * special devices. It is allocated in checkalias and freed
+ * in vgone.
+ */
+struct specinfo {
+ struct vnode **si_hashchain;
+ struct vnode *si_specnext;
+ long si_flags;
+ dev_t si_rdev;
+};
+/*
+ * Exported shorthand
+ */
+#define v_rdev v_specinfo->si_rdev
+#define v_hashchain v_specinfo->si_hashchain
+#define v_specnext v_specinfo->si_specnext
+#define v_specflags v_specinfo->si_flags
+
+/*
+ * Flags for specinfo
+ */
+#define SI_MOUNTEDON 0x0001 /* block special device is mounted on */
+
+/*
+ * Special device management
+ */
+#define SPECHSZ 64
+#if ((SPECHSZ&(SPECHSZ-1)) == 0)
+#define SPECHASH(rdev) (((rdev>>5)+(rdev))&(SPECHSZ-1))
+#else
+#define SPECHASH(rdev) (((unsigned)((rdev>>5)+(rdev)))%SPECHSZ)
+#endif
+
+struct vnode *speclisth[SPECHSZ];
+
+/*
+ * Prototypes for special file operations on vnodes.
+ */
+extern int (**spec_vnodeop_p)();
+struct nameidata;
+struct componentname;
+struct ucred;
+struct flock;
+struct buf;
+struct uio;
+
+int spec_badop(),
+ spec_ebadf();
+
+int spec_lookup __P((struct vop_lookup_args *));
+#define spec_create ((int (*) __P((struct vop_create_args *)))spec_badop)
+#define spec_mknod ((int (*) __P((struct vop_mknod_args *)))spec_badop)
+int spec_open __P((struct vop_open_args *));
+int spec_close __P((struct vop_close_args *));
+#define spec_access ((int (*) __P((struct vop_access_args *)))spec_ebadf)
+#define spec_getattr ((int (*) __P((struct vop_getattr_args *)))spec_ebadf)
+#define spec_setattr ((int (*) __P((struct vop_setattr_args *)))spec_ebadf)
+int spec_read __P((struct vop_read_args *));
+int spec_write __P((struct vop_write_args *));
+int spec_ioctl __P((struct vop_ioctl_args *));
+int spec_select __P((struct vop_select_args *));
+#define spec_mmap ((int (*) __P((struct vop_mmap_args *)))spec_badop)
+int spec_fsync __P((struct vop_fsync_args *));
+#define spec_seek ((int (*) __P((struct vop_seek_args *)))spec_badop)
+#define spec_remove ((int (*) __P((struct vop_remove_args *)))spec_badop)
+#define spec_link ((int (*) __P((struct vop_link_args *)))spec_badop)
+#define spec_rename ((int (*) __P((struct vop_rename_args *)))spec_badop)
+#define spec_mkdir ((int (*) __P((struct vop_mkdir_args *)))spec_badop)
+#define spec_rmdir ((int (*) __P((struct vop_rmdir_args *)))spec_badop)
+#define spec_symlink ((int (*) __P((struct vop_symlink_args *)))spec_badop)
+#define spec_readdir ((int (*) __P((struct vop_readdir_args *)))spec_badop)
+#define spec_readlink ((int (*) __P((struct vop_readlink_args *)))spec_badop)
+#define spec_abortop ((int (*) __P((struct vop_abortop_args *)))spec_badop)
+#define spec_inactive ((int (*) __P((struct vop_inactive_args *)))nullop)
+#define spec_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop)
+int spec_lock __P((struct vop_lock_args *));
+int spec_unlock __P((struct vop_unlock_args *));
+int spec_bmap __P((struct vop_bmap_args *));
+int spec_strategy __P((struct vop_strategy_args *));
+int spec_print __P((struct vop_print_args *));
+#define spec_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+int spec_pathconf __P((struct vop_pathconf_args *));
+int spec_advlock __P((struct vop_advlock_args *));
+#define spec_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))spec_badop)
+#define spec_valloc ((int (*) __P((struct vop_valloc_args *)))spec_badop)
+#define spec_reallocblks \
+ ((int (*) __P((struct vop_reallocblks_args *)))spec_badop)
+#define spec_vfree ((int (*) __P((struct vop_vfree_args *)))spec_badop)
+#define spec_truncate ((int (*) __P((struct vop_truncate_args *)))nullop)
+#define spec_update ((int (*) __P((struct vop_update_args *)))nullop)
+#define spec_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop)
diff --git a/sys/miscfs/umapfs/umap.h b/sys/miscfs/umapfs/umap.h
new file mode 100644
index 000000000000..9f4d1e7ace53
--- /dev/null
+++ b/sys/miscfs/umapfs/umap.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap.h 8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vnops.c 1.5 (Berkeley) 7/10/92
+ */
+
+#define MAPFILEENTRIES 64
+#define GMAPFILEENTRIES 16
+#define NOBODY 32767
+#define NULLGROUP 65534
+
+struct umap_args {
+ char *target; /* Target of loopback */
+ int nentries; /* # of entries in user map array */
+ int gnentries; /* # of entries in group map array */
+ u_long (*mapdata)[2]; /* pointer to array of user mappings */
+ u_long (*gmapdata)[2]; /* pointer to array of group mappings */
+};
+
+struct umap_mount {
+ struct mount *umapm_vfs;
+ struct vnode *umapm_rootvp; /* Reference to root umap_node */
+ int info_nentries; /* number of uid mappings */
+ int info_gnentries; /* number of gid mappings */
+ u_long info_mapdata[MAPFILEENTRIES][2]; /* mapping data for
+ user mapping in ficus */
+ u_long info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for
+ group mapping in ficus */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct umap_node {
+ struct umap_node *umap_forw; /* Hash chain */
+ struct umap_node *umap_back;
+ struct vnode *umap_lowervp; /* Aliased vnode - VREFed once */
+ struct vnode *umap_vnode; /* Back pointer to vnode/umap_node */
+};
+
+extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries));
+extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp));
+
+#define MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data))
+#define VTOUMAP(vp) ((struct umap_node *)(vp)->v_data)
+#define UMAPTOV(xp) ((xp)->umap_vnode)
+#ifdef UMAPFS_DIAGNOSTIC
+extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__)
+#else
+#define UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp)
+#endif
+
+extern int (**umap_vnodeop_p)();
+extern struct vfsops umap_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/umapfs/umap_subr.c b/sys/miscfs/umapfs/umap_subr.c
new file mode 100644
index 000000000000..6f1f077a6217
--- /dev/null
+++ b/sys/miscfs/umapfs/umap_subr.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_subr.c 8.6 (Berkeley) 1/26/94
+ *
+ * $Id: lofs_subr.c, v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NUMAPNODECACHE 16
+#define UMAP_NHASH(vp) ((((u_long) vp)>>LOG2_SIZEVNODE) & (NUMAPNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the target vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the target vnode is VREF'd. When the
+ * alias is removed the target vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct umap_node_cache {
+ struct umap_node *ac_forw;
+ struct umap_node *ac_back;
+};
+
+static struct umap_node_cache umap_node_cache[NUMAPNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+umapfs_init()
+{
+ struct umap_node_cache *ac;
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_init\n"); /* printed during system boot */
+#endif
+
+ for (ac = umap_node_cache; ac < umap_node_cache + NUMAPNODECACHE; ac++)
+ ac->ac_forw = ac->ac_back = (struct umap_node *) ac;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct umap_node_cache *
+umap_node_hash(targetvp)
+ struct vnode *targetvp;
+{
+
+ return (&umap_node_cache[UMAP_NHASH(targetvp)]);
+}
+
+/*
+ * umap_findid is called by various routines in umap_vnodeops.c to
+ * find a user or group id in a map.
+ */
+static u_long
+umap_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][0]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][1]);
+ else
+ return (-1);
+
+}
+
+/*
+ * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to
+ * find a user or group id in a map, in reverse.
+ */
+u_long
+umap_reverse_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][1]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][0]);
+ else
+ return (-1);
+
+}
+
+/*
+ * Return alias for target vnode if already exists, else 0.
+ */
+static struct vnode *
+umap_node_find(mp, targetvp)
+ struct mount *mp;
+ struct vnode *targetvp;
+{
+ struct umap_node_cache *hd;
+ struct umap_node *a;
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp);
+#endif
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a umap_node structure which is referencing
+ * the target vnode. If found, the increment the umap_node
+ * reference count (but NOT the target vnode's VREF counter).
+ */
+ hd = umap_node_hash(targetvp);
+
+ loop:
+ for (a = hd->ac_forw; a != (struct umap_node *) hd; a = a->umap_forw) {
+ if (a->umap_lowervp == targetvp &&
+ a->umap_vnode->v_mount == mp) {
+ vp = UMAPTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0)) {
+#ifdef UMAPFS_DIAGNOSTIC
+ printf ("umap_node_find: vget failed.\n");
+#endif
+ goto loop;
+ }
+ return (vp);
+ }
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp);
+#endif
+
+ return (0);
+}
+
+/*
+ * Make a new umap_node node.
+ * Vp is the alias vnode, lofsvp is the target vnode.
+ * Maintain a reference to (targetvp).
+ */
+static int
+umap_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct umap_node_cache *hd;
+ struct umap_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ if (error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp))
+ return (error);
+ vp = *vpp;
+
+ MALLOC(xp, struct umap_node *, sizeof(struct umap_node),
+ M_TEMP, M_WAITOK);
+ vp->v_type = lowervp->v_type;
+ xp->umap_vnode = vp;
+ vp->v_data = xp;
+ xp->umap_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ if (othervp = umap_node_find(lowervp)) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return (0);
+ }
+ VREF(lowervp); /* Extra VREF will be vrele'd in umap_node_create */
+ hd = umap_node_hash(lowervp);
+ insque(xp, hd);
+ return (0);
+}
+
+
+/*
+ * Try to find an existing umap_node vnode refering
+ * to it, otherwise make a new umap_node vnode which
+ * contains a reference to the target vnode.
+ */
+int
+umap_node_create(mp, targetvp, newvpp)
+ struct mount *mp;
+ struct vnode *targetvp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ if (aliasvp = umap_node_find(mp, targetvp)) {
+ /*
+ * Take another reference to the alias vnode
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: exists", ap->umap_vnode);
+#endif
+ /* VREF(aliasvp); */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_create: create new alias vnode\n");
+#endif
+ /*
+ * Make new vnode reference the umap_node.
+ */
+ if (error = umap_node_alloc(mp, targetvp, &aliasvp))
+ return (error);
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(targetvp);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: alias", aliasvp);
+ vprint("umap_node_create: target", targetvp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+
+#ifdef UMAPFS_DIAGNOSTIC
+int umap_checkvp_barrier = 1;
+struct vnode *
+umap_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct umap_node *a = VTOUMAP(vp);
+#if 0
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with funny vop vector.
+ */
+ if (vp->v_op != umap_vnodeop_p) {
+ printf ("umap_checkvp: on non-umap-node\n");
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+#endif
+ if (a->umap_lowervp == NULL) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %x, ZERO ptr\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+ if (a->umap_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %x, unref'ed lowervp\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic ("umap with unref'ed lowervp");
+ }
+#if 0
+ printf("umap %x/%d -> %x/%d [%s, %d]\n",
+ a->umap_vnode, a->umap_vnode->v_usecount,
+ a->umap_lowervp, a->umap_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return (a->umap_lowervp);
+}
+#endif
+
+/* umap_mapids maps all of the ids in a credential, both user and group. */
+
+void
+umap_mapids(v_mount, credp)
+ struct mount *v_mount;
+ struct ucred *credp;
+{
+ int i, unentries, gnentries;
+ u_long *groupmap, *usermap;
+ uid_t uid;
+ gid_t gid;
+
+ unentries = MOUNTTOUMAPMOUNT(v_mount)->info_nentries;
+ usermap = &(MOUNTTOUMAPMOUNT(v_mount)->info_mapdata[0][0]);
+ gnentries = MOUNTTOUMAPMOUNT(v_mount)->info_gnentries;
+ groupmap = &(MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata[0][0]);
+
+ /* Find uid entry in map */
+
+ uid = (uid_t) umap_findid(credp->cr_uid, usermap, unentries);
+
+ if (uid != -1)
+ credp->cr_uid = uid;
+ else
+ credp->cr_uid = (uid_t) NOBODY;
+
+#ifdef notdef
+ /* cr_gid is the same as cr_groups[0] in 4BSD */
+
+ /* Find gid entry in map */
+
+ gid = (gid_t) umap_findid(credp->cr_gid, groupmap, gnentries);
+
+ if (gid != -1)
+ credp->cr_gid = gid;
+ else
+ credp->cr_gid = NULLGROUP;
+#endif
+
+ /* Now we must map each of the set of groups in the cr_groups
+ structure. */
+
+ i = 0;
+ while (credp->cr_groups[i] != 0) {
+ gid = (gid_t) umap_findid(credp->cr_groups[i],
+ groupmap, gnentries);
+
+ if (gid != -1)
+ credp->cr_groups[i++] = gid;
+ else
+ credp->cr_groups[i++] = NULLGROUP;
+ }
+}
diff --git a/sys/miscfs/umapfs/umap_vfsops.c b/sys/miscfs/umapfs/umap_vfsops.c
new file mode 100644
index 000000000000..2480a85e440c
--- /dev/null
+++ b/sys/miscfs/umapfs/umap_vfsops.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vfsops.c 8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vfsops.c 1.5 (Berkeley) 7/10/92
+ */
+
+/*
+ * Umap Layer
+ * (See mount_umap(8) for a description of this layer.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+/*
+ * Mount umap layer
+ */
+int
+umapfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct umap_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *umapm_rootvp;
+ struct umap_mount *amp;
+ u_int size;
+ int error;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/
+ }
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct umap_args)))
+ return (error);
+
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ if (error = namei(ndp))
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("vp = %x, check for VDIR...\n", lowerrootvp);
+#endif
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = 0;
+
+ if (lowerrootvp->v_type != VDIR) {
+ vput(lowerrootvp);
+ return (EINVAL);
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("mp = %x\n", mp);
+#endif
+
+ amp = (struct umap_mount *) malloc(sizeof(struct umap_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ amp->umapm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Now copy in the number of entries and maps for umap mapping.
+ */
+ amp->info_nentries = args.nentries;
+ amp->info_gnentries = args.gnentries;
+ error = copyin(args.mapdata, (caddr_t)amp->info_mapdata,
+ 2*sizeof(u_long)*args.nentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:nentries %d\n",args.nentries);
+ for (i = 0; i < args.nentries; i++)
+ printf(" %d maps to %d\n", amp->info_mapdata[i][0],
+ amp->info_mapdata[i][1]);
+#endif
+
+ error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata,
+ 2*sizeof(u_long)*args.nentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:gnentries %d\n",args.gnentries);
+ for (i = 0; i < args.gnentries; i++)
+ printf(" group %d maps to %d\n",
+ amp->info_gmapdata[i][0],
+ amp->info_gmapdata[i][1]);
+#endif
+
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = umap_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(amp, M_UFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in umapfs_unmount.
+ */
+ umapm_rootvp = vp;
+ umapm_rootvp->v_flag |= VROOT;
+ amp->umapm_rootvp = umapm_rootvp;
+ if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) amp;
+ getnewfsid(mp, MOUNT_LOFS);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+umapfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */
+}
+
+/*
+ * Free reference to umap layer
+ */
+int
+umapfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE) {
+ /* lofs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (umapm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, umapm_rootvp, flags))
+ return (error);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("alias root of lower", umapm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(umapm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(umapm_rootvp);
+ /*
+ * Finally, throw away the umap_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+umapfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+umapfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p));
+}
+
+int
+umapfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+umapfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at umap layer.
+ */
+ return (0);
+}
+
+int
+umapfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp));
+}
+
+int
+umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp));
+}
+
+int
+umapfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp));
+}
+
+int umapfs_init __P((void));
+
+struct vfsops umap_vfsops = {
+ umapfs_mount,
+ umapfs_start,
+ umapfs_unmount,
+ umapfs_root,
+ umapfs_quotactl,
+ umapfs_statfs,
+ umapfs_sync,
+ umapfs_vget,
+ umapfs_fhtovp,
+ umapfs_vptofh,
+ umapfs_init,
+};
diff --git a/sys/miscfs/umapfs/umap_vnops.c b/sys/miscfs/umapfs/umap_vnops.c
new file mode 100644
index 000000000000..287804e15618
--- /dev/null
+++ b/sys/miscfs/umapfs/umap_vnops.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vnops.c 8.3 (Berkeley) 1/5/94
+ */
+
+/*
+ * Umap Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/umapfs/umap.h>
+
+
+int umap_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * See null_vnops.c:null_bypass for more details.
+ */
+int
+umap_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ extern int (**umap_vnodeop_p)(); /* not extern, really "forward" */
+ struct ucred **credpp = 0, *credp = 0;
+ struct ucred *savecredp, *savecompcredp = 0;
+ struct ucred *compcredp = 0;
+ struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode *vp1 = 0;
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+ struct componentname **compnamepp = 0;
+
+ if (umap_bug_bypass)
+ printf ("umap_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("umap_bypass: no vp's in map.\n");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap);
+
+ if (i == 0) {
+ vp1 = *vps_p[0];
+ }
+
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (Must map first vp or vclean fails.)
+ */
+
+ if (i && (*this_vp_p)->v_op != umap_vnodeop_p) {
+ old_vps[i] = NULL;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Fix the credentials. (That's the purpose of this layer.)
+ */
+
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+
+ credpp = VOPARG_OFFSETTO(struct ucred**,
+ descp->vdesc_cred_offset, ap);
+
+ /* Save old values */
+
+ savecredp = (*credpp);
+ (*credpp) = crdup(savecredp);
+ credp = *credpp;
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user was %d, group %d\n",
+ credp->cr_uid, credp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, credp);
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user now %d, group %d\n",
+ credp->cr_uid, credp->cr_gid);
+ }
+
+ /* BSD often keeps a credential in the componentname structure
+ * for speed. If there is one, it better get mapped, too.
+ */
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+
+ compnamepp = VOPARG_OFFSETTO(struct componentname**,
+ descp->vdesc_componentname_offset, ap);
+
+ compcredp = (*compnamepp)->cn_cred;
+ savecompcredp = compcredp;
+ compcredp = (*compnamepp)->cn_cred = crdup(savecompcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_bypass: component credit user was %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_bypass: component credit user now %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ };
+ };
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset, ap);
+ error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ };
+
+ out:
+ /*
+ * Free duplicate cred structure and restore old one.
+ */
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && credp && credp->cr_uid != 0)
+ printf("umap_bypass: returning-user was %d\n",
+ credp->cr_uid);
+
+ crfree(credp);
+ (*credpp) = savecredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf("umap_bypass: returning-user now %d\n\n",
+ (*credpp)->cr_uid);
+ }
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0)
+ printf("umap_bypass: returning-component-user was %d\n",
+ compcredp->cr_uid);
+
+ crfree(compcredp);
+ (*compnamepp)->cn_cred = savecompcredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf("umap_bypass: returning-component-user now %d\n",
+ compcredp->cr_uid);
+ }
+
+ return (error);
+}
+
+
+/*
+ * We handle getattr to change the fsid.
+ */
+int
+umap_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ short uid, gid;
+ int error, tmpid, nentries, gnentries;
+ u_long (*mapdata)[2], (*gmapdata)[2];
+ struct vnode **vp1p;
+ struct vnodeop_desc *descp = ap->a_desc;
+
+ if (error = umap_bypass(ap))
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+ /*
+ * Umap needs to map the uid and gid returned by a stat
+ * into the proper values for this site. This involves
+ * finding the returned uid in the mapping information,
+ * translating it into the uid on the other end,
+ * and filling in the proper field in the vattr
+ * structure pointed to by ap->a_vap. The group
+ * is easier, since currently all groups will be
+ * translate to the NULLGROUP.
+ */
+
+ /* Find entry in map */
+
+ uid = ap->a_vap->va_uid;
+ gid = ap->a_vap->va_gid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid,
+ gid);
+
+ vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
+ nentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
+ mapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
+ gnentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
+ gmapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
+
+ /* Reverse map the uid for the vnode. Since it's a reverse
+ map, we can't use umap_mapids() to do it. */
+
+ tmpid = umap_reverse_findid(uid, mapdata, nentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_uid = (uid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original uid = %d\n", uid);
+ } else
+ ap->a_vap->va_uid = (uid_t) NOBODY;
+
+ /* Reverse map the gid for the vnode. */
+
+ tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_gid = (gid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original gid = %d\n", gid);
+ } else
+ ap->a_vap->va_gid = (gid_t) NULLGROUP;
+
+ return (0);
+}
+
+int
+umap_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our umap_node is in the
+ * cache and reusable.
+ *
+ */
+ return (0);
+}
+
+int
+umap_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct umap_node *xp = VTOUMAP(vp);
+ struct vnode *lowervp = xp->umap_lowervp;
+
+ /* After this assignment, this node will not be re-used. */
+ xp->umap_lowervp = NULL;
+ remque(xp);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele(lowervp);
+ return (0);
+}
+
+int
+umap_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+int
+umap_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+
+int
+umap_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ printf("\ttag VT_UMAPFS, vp=%x, lowervp=%x\n", vp, UMAPVPTOLOWERVP(vp));
+ return (0);
+}
+
+int
+umap_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+ struct componentname *compnamep;
+ struct ucred *compcredp, *savecompcredp;
+ struct vnode *vp;
+
+ /*
+ * Rename is irregular, having two componentname structures.
+ * We need to map the cre in the second structure,
+ * and then bypass takes care of the rest.
+ */
+
+ vp = ap->a_fdvp;
+ compnamep = ap->a_tcnp;
+ compcredp = compnamep->cn_cred;
+
+ savecompcredp = compcredp;
+ compcredp = compnamep->cn_cred = crdup(savecompcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_rename: rename component credit user was %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_rename: rename component credit user now %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ error = umap_bypass(ap);
+
+ /* Restore the additional mapped componentname cred structure. */
+
+ crfree(compcredp);
+ compnamep->cn_cred = savecompcredp;
+
+ return error;
+}
+
+/*
+ * Global vfs data structures
+ */
+/*
+ * XXX - strategy, bwrite are hand coded currently. They should
+ * go away with a merged buffer/block cache.
+ *
+ */
+int (**umap_vnodeop_p)();
+struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
+ { &vop_default_desc, umap_bypass },
+
+ { &vop_getattr_desc, umap_getattr },
+ { &vop_inactive_desc, umap_inactive },
+ { &vop_reclaim_desc, umap_reclaim },
+ { &vop_print_desc, umap_print },
+ { &vop_rename_desc, umap_rename },
+
+ { &vop_strategy_desc, umap_strategy },
+ { &vop_bwrite_desc, umap_bwrite },
+
+ { (struct vnodeop_desc*) NULL, (int(*)()) NULL }
+};
+struct vnodeopv_desc umap_vnodeop_opv_desc =
+ { &umap_vnodeop_p, umap_vnodeop_entries };
diff --git a/sys/miscfs/union/README b/sys/miscfs/union/README
new file mode 100644
index 000000000000..14a476987c98
--- /dev/null
+++ b/sys/miscfs/union/README
@@ -0,0 +1,7 @@
+If you plan on using union mounts, then you should consider replacing
+"libc/gen/opendir.c" in the C library with the file "libc.opendir.c"
+in this directory. The replacement version of opendir() automatically
+removes duplicate names when a union stack is encountered. You will
+then need to rebuild the C library and all commands.
+
+@(#)README 8.1 (Berkeley) 2/15/94
diff --git a/sys/miscfs/union/libc.opendir.c b/sys/miscfs/union/libc.opendir.c
new file mode 100644
index 000000000000..99ed58b86fd4
--- /dev/null
+++ b/sys/miscfs/union/libc.opendir.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 1983, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char orig_sccsid[] = "@(#)opendir.c 8.2 (Berkeley) 2/12/94";
+static char sccsid[] = "@(#)libc.opendir.c 8.1 (Berkeley) 2/15/94";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/*
+ * open a directory.
+ */
+DIR *
+opendir(name)
+ const char *name;
+{
+ DIR *dirp;
+ int fd;
+ int incr;
+ struct statfs sfb;
+
+ if ((fd = open(name, 0)) == -1)
+ return (NULL);
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1 ||
+ (dirp = (DIR *)malloc(sizeof(DIR))) == NULL) {
+ close(fd);
+ return (NULL);
+ }
+
+ /*
+ * If CLBYTES is an exact multiple of DIRBLKSIZ, use a CLBYTES
+ * buffer that it cluster boundary aligned.
+ * Hopefully this can be a big win someday by allowing page
+ * trades trade to user space to be done by getdirentries()
+ */
+ if ((CLBYTES % DIRBLKSIZ) == 0)
+ incr = CLBYTES;
+ else
+ incr = DIRBLKSIZ;
+
+#ifdef MOUNT_UNION
+ /*
+ * Determine whether this directory is the top of a union stack.
+ */
+ if (fstatfs(fd, &sfb) < 0) {
+ free(dirp);
+ close(fd);
+ return (NULL);
+ }
+
+ if (sfb.f_type == MOUNT_UNION) {
+ int len = 0;
+ int space = 0;
+ char *buf = 0;
+ char *ddptr = 0;
+ int n;
+ struct dirent **dpv;
+
+ /*
+ * The strategy here is to read all the directory
+ * entries into a buffer, sort the buffer, and
+ * remove duplicate entries by setting the inode
+ * number to zero.
+ */
+
+ /*
+ * Fixup dd_loc to be non-zero to fake out readdir
+ */
+ dirp->dd_loc = sizeof(void *);
+
+ do {
+ /*
+ * Always make at least DIRBLKSIZ bytes
+ * available to getdirentries
+ */
+ if (space < DIRBLKSIZ) {
+ space += incr;
+ len += incr;
+ buf = realloc(buf, len);
+ if (buf == NULL) {
+ free(dirp);
+ close(fd);
+ return (NULL);
+ }
+ ddptr = buf + (len - space) + dirp->dd_loc;
+ }
+
+ n = getdirentries(fd, ddptr, space, &dirp->dd_seek);
+ if (n > 0) {
+ ddptr += n;
+ space -= n;
+ }
+ } while (n > 0);
+
+ /*
+ * There is now a buffer full of (possibly) duplicate
+ * names.
+ */
+ dirp->dd_buf = buf;
+
+ /*
+ * Go round this loop twice...
+ *
+ * Scan through the buffer, counting entries.
+ * On the second pass, save pointers to each one.
+ * Then sort the pointers and remove duplicate names.
+ */
+ for (dpv = 0;;) {
+ n = 0;
+ ddptr = buf + dirp->dd_loc;
+ while (ddptr < buf + len) {
+ struct dirent *dp;
+
+ dp = (struct dirent *) ddptr;
+ if ((int)dp & 03)
+ break;
+ if ((dp->d_reclen <= 0) ||
+ (dp->d_reclen > (buf + len + 1 - ddptr)))
+ break;
+ ddptr += dp->d_reclen;
+ if (dp->d_fileno) {
+ if (dpv)
+ dpv[n] = dp;
+ n++;
+ }
+ }
+
+ if (dpv) {
+ struct dirent *xp;
+
+ /*
+ * If and when whiteouts happen,
+ * this sort would need to be stable.
+ */
+ heapsort(dpv, n, sizeof(*dpv), alphasort);
+
+ dpv[n] = NULL;
+ xp = NULL;
+
+ /*
+ * Scan through the buffer in sort order,
+ * zapping the inode number of any
+ * duplicate names.
+ */
+ for (n = 0; dpv[n]; n++) {
+ struct dirent *dp = dpv[n];
+
+ if ((xp == NULL) ||
+ strcmp(dp->d_name, xp->d_name))
+ xp = dp;
+ else
+ dp->d_fileno = 0;
+ }
+
+ free(dpv);
+ break;
+ } else {
+ dpv = malloc((n+1) * sizeof(struct dirent *));
+ if (dpv == NULL)
+ break;
+ }
+ }
+
+ dirp->dd_len = len;
+ dirp->dd_size = ddptr - dirp->dd_buf;
+ } else
+#endif /* MOUNT_UNION */
+ {
+ dirp->dd_len = incr;
+ dirp->dd_buf = malloc(dirp->dd_len);
+ if (dirp->dd_buf == NULL) {
+ free(dirp);
+ close (fd);
+ return (NULL);
+ }
+ dirp->dd_seek = 0;
+ dirp->dd_loc = 0;
+ }
+
+ dirp->dd_fd = fd;
+
+ /*
+ * Set up seek point for rewinddir.
+ */
+ dirp->dd_rewind = telldir(dirp);
+
+ return (dirp);
+}
diff --git a/sys/miscfs/union/union.h b/sys/miscfs/union/union.h
new file mode 100644
index 000000000000..463218ac3ed2
--- /dev/null
+++ b/sys/miscfs/union/union.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union.h 8.2 (Berkeley) 2/17/94
+ */
+
+struct union_args {
+ char *target; /* Target of loopback */
+ int mntflags; /* Options on the mount */
+};
+
+#define UNMNT_ABOVE 0x0001 /* Target appears below mount point */
+#define UNMNT_BELOW 0x0002 /* Target appears below mount point */
+#define UNMNT_REPLACE 0x0003 /* Target replaces mount point */
+#define UNMNT_OPMASK 0x0003
+
+struct union_mount {
+ struct vnode *um_uppervp;
+ struct vnode *um_lowervp;
+ struct ucred *um_cred; /* Credentials of user calling mount */
+ int um_cmode; /* cmask from mount process */
+ int um_op; /* Operation mode */
+};
+
+#ifdef KERNEL
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references
+ */
+struct union_node {
+ LIST_ENTRY(union_node) un_cache; /* Hash chain */
+ struct vnode *un_vnode; /* Back pointer */
+ struct vnode *un_uppervp; /* overlaying object */
+ struct vnode *un_lowervp; /* underlying object */
+ struct vnode *un_dirvp; /* Parent dir of uppervp */
+ char *un_path; /* saved component name */
+ int un_hash; /* saved un_path hash value */
+ int un_openl; /* # of opens on lowervp */
+ int un_flags;
+#ifdef DIAGNOSTIC
+ pid_t un_pid;
+#endif
+};
+
+#define UN_WANT 0x01
+#define UN_LOCKED 0x02
+#define UN_ULOCK 0x04 /* Upper node is locked */
+#define UN_KLOCK 0x08 /* Keep upper node locked on vput */
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+ struct vnode *, struct vnode *,
+ struct componentname *, struct vnode *,
+ struct vnode *));
+extern int union_copyfile __P((struct proc *, struct ucred *,
+ struct vnode *, struct vnode *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+ struct componentname *, struct vnode **));
+extern int union_vn_create __P((struct vnode **, struct union_node *,
+ struct proc *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+ struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newlower __P((struct union_node *, struct vnode *));
+extern void union_newupper __P((struct union_node *, struct vnode *));
+
+#define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define UNIONTOV(un) ((un)->un_vnode)
+#define LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+extern int (**union_vnodeop_p)();
+extern struct vfsops union_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c
new file mode 100644
index 000000000000..77947d1dfbe1
--- /dev/null
+++ b/sys/miscfs/union/union_subr.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_subr.c 8.4 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#ifdef DIAGNOSTIC
+#include <sys/proc.h>
+#endif
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+ (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+int
+union_init()
+{
+ int i;
+
+ for (i = 0; i < NHASH; i++)
+ LIST_INIT(&unhead[i]);
+ bzero((caddr_t) unvplock, sizeof(unvplock));
+}
+
+static int
+union_list_lock(ix)
+ int ix;
+{
+
+ if (unvplock[ix] & UN_LOCKED) {
+ unvplock[ix] |= UN_WANT;
+ sleep((caddr_t) &unvplock[ix], PINOD);
+ return (1);
+ }
+
+ unvplock[ix] |= UN_LOCKED;
+
+ return (0);
+}
+
+static void
+union_list_unlock(ix)
+ int ix;
+{
+
+ unvplock[ix] &= ~UN_LOCKED;
+
+ if (unvplock[ix] & UN_WANT) {
+ unvplock[ix] &= ~UN_WANT;
+ wakeup((caddr_t) &unvplock[ix]);
+ }
+}
+
+void
+union_updatevp(un, uppervp, lowervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+ struct vnode *lowervp;
+{
+ int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+ int nhash = UNION_HASH(uppervp, lowervp);
+
+ if (ohash != nhash) {
+ /*
+ * Ensure locking is ordered from lower to higher
+ * to avoid deadlocks.
+ */
+ if (nhash < ohash) {
+ int t = ohash;
+ ohash = nhash;
+ nhash = t;
+ }
+
+ while (union_list_lock(ohash))
+ continue;
+
+ while (union_list_lock(nhash))
+ continue;
+
+ LIST_REMOVE(un, un_cache);
+ union_list_unlock(ohash);
+ } else {
+ while (union_list_lock(nhash))
+ continue;
+ }
+
+ if (un->un_lowervp != lowervp) {
+ if (un->un_lowervp) {
+ vrele(un->un_lowervp);
+ if (un->un_path) {
+ free(un->un_path, M_TEMP);
+ un->un_path = 0;
+ }
+ if (un->un_dirvp) {
+ vrele(un->un_dirvp);
+ un->un_dirvp = NULLVP;
+ }
+ }
+ un->un_lowervp = lowervp;
+ }
+
+ if (un->un_uppervp != uppervp) {
+ if (un->un_uppervp)
+ vrele(un->un_uppervp);
+
+ un->un_uppervp = uppervp;
+ }
+
+ if (ohash != nhash)
+ LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+
+ union_list_unlock(nhash);
+}
+
+void
+union_newlower(un, lowervp)
+ struct union_node *un;
+ struct vnode *lowervp;
+{
+
+ union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+void
+union_newupper(un, uppervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+{
+
+ union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * allocate a union_node/vnode pair. the vnode is
+ * referenced and locked. the new vnode is returned
+ * via (vpp). (mp) is the mountpoint of the union filesystem,
+ * (dvp) is the parent directory where the upper layer object
+ * should exist (but doesn't) and (cnp) is the componentname
+ * information which is partially copied to allow the upper
+ * layer object to be created at a later time. (uppervp)
+ * and (lowervp) reference the upper and lower layer objects
+ * being mapped. either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
+ * the reference is either maintained in the new union_node
+ * object which is allocated, or they are vrele'd.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp)
+ struct vnode **vpp;
+ struct mount *mp;
+ struct vnode *undvp;
+ struct vnode *dvp; /* may be null */
+ struct componentname *cnp; /* may be null */
+ struct vnode *uppervp; /* may be null */
+ struct vnode *lowervp; /* may be null */
+{
+ int error;
+ struct union_node *un;
+ struct union_node **pp;
+ struct vnode *xlowervp = NULLVP;
+ int hash;
+ int try;
+
+ if (uppervp == NULLVP && lowervp == NULLVP)
+ panic("union: unidentifiable allocation");
+
+ if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+ xlowervp = lowervp;
+ lowervp = NULLVP;
+ }
+
+loop:
+ for (try = 0; try < 3; try++) {
+ switch (try) {
+ case 0:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, lowervp);
+ break;
+
+ case 1:
+ if (uppervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, NULLVP);
+ break;
+
+ case 2:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(NULLVP, lowervp);
+ break;
+ }
+
+ while (union_list_lock(hash))
+ continue;
+
+ for (un = unhead[hash].lh_first; un != 0;
+ un = un->un_cache.le_next) {
+ if ((un->un_lowervp == lowervp ||
+ un->un_lowervp == NULLVP) &&
+ (un->un_uppervp == uppervp ||
+ un->un_uppervp == NULLVP) &&
+ (UNIONTOV(un)->v_mount == mp)) {
+ if (vget(UNIONTOV(un), 0)) {
+ union_list_unlock(hash);
+ goto loop;
+ }
+ break;
+ }
+ }
+
+ union_list_unlock(hash);
+
+ if (un)
+ break;
+ }
+
+ if (un) {
+ /*
+ * Obtain a lock on the union_node.
+ * uppervp is locked, though un->un_uppervp
+ * may not be. this doesn't break the locking
+ * hierarchy since in the case that un->un_uppervp
+ * is not yet locked it will be vrele'd and replaced
+ * with uppervp.
+ */
+
+ if ((dvp != NULLVP) && (uppervp == dvp)) {
+ /*
+ * Access ``.'', so (un) will already
+ * be locked. Since this process has
+ * the lock on (uppervp) no other
+ * process can hold the lock on (un).
+ */
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: . not locked");
+ else if (curproc && un->un_pid != curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: allocvp not lock owner");
+#endif
+ } else {
+ if (un->un_flags & UN_LOCKED) {
+ vrele(UNIONTOV(un));
+ un->un_flags |= UN_WANT;
+ sleep((caddr_t) &un->un_flags, PINOD);
+ goto loop;
+ }
+ un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ }
+
+ /*
+ * At this point, the union_node is locked,
+ * un->un_uppervp may not be locked, and uppervp
+ * is locked or nil.
+ */
+
+ /*
+ * Save information about the upper layer.
+ */
+ if (uppervp != un->un_uppervp) {
+ union_newupper(un, uppervp);
+ } else if (uppervp) {
+ vrele(uppervp);
+ }
+
+ if (un->un_uppervp) {
+ un->un_flags |= UN_ULOCK;
+ un->un_flags &= ~UN_KLOCK;
+ }
+
+ /*
+ * Save information about the lower layer.
+ * This needs to keep track of pathname
+ * and directory information which union_vn_create
+ * might need.
+ */
+ if (lowervp != un->un_lowervp) {
+ union_newlower(un, lowervp);
+ if (cnp && (lowervp != NULLVP) &&
+ (lowervp->v_type == VREG)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1,
+ M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path,
+ cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ }
+ } else if (lowervp) {
+ vrele(lowervp);
+ }
+ *vpp = UNIONTOV(un);
+ return (0);
+ }
+
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ hash = UNION_HASH(uppervp, lowervp);
+
+ if (union_list_lock(hash))
+ goto loop;
+
+ error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+ if (error) {
+ if (uppervp) {
+ if (dvp == uppervp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ }
+ if (lowervp)
+ vrele(lowervp);
+
+ goto out;
+ }
+
+ MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+ M_TEMP, M_WAITOK);
+
+ if (uppervp)
+ (*vpp)->v_type = uppervp->v_type;
+ else
+ (*vpp)->v_type = lowervp->v_type;
+ un = VTOUNION(*vpp);
+ un->un_vnode = *vpp;
+ un->un_uppervp = uppervp;
+ un->un_lowervp = lowervp;
+ un->un_openl = 0;
+ un->un_flags = UN_LOCKED;
+ if (un->un_uppervp)
+ un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ } else {
+ un->un_hash = 0;
+ un->un_path = 0;
+ un->un_dirvp = 0;
+ }
+
+ LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+
+ if (xlowervp)
+ vrele(xlowervp);
+
+out:
+ union_list_unlock(hash);
+
+ return (error);
+}
+
+int
+union_freevp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ LIST_REMOVE(un, un_cache);
+
+ if (un->un_uppervp)
+ vrele(un->un_uppervp);
+ if (un->un_lowervp)
+ vrele(un->un_lowervp);
+ if (un->un_dirvp)
+ vrele(un->un_dirvp);
+ if (un->un_path)
+ free(un->un_path, M_TEMP);
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * copyfile. copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes. both (fvp)
+ * and (tvp) are locked on entry and exit.
+ */
+int
+union_copyfile(p, cred, fvp, tvp)
+ struct proc *p;
+ struct ucred *cred;
+ struct vnode *fvp;
+ struct vnode *tvp;
+{
+ char *buf;
+ struct uio uio;
+ struct iovec iov;
+ int error = 0;
+
+ /*
+ * strategy:
+ * allocate a buffer of size MAXBSIZE.
+ * loop doing reads and writes, keeping track
+ * of the current uio offset.
+ * give up at the first sign of trouble.
+ */
+
+ uio.uio_procp = p;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_offset = 0;
+
+ VOP_UNLOCK(fvp); /* XXX */
+ LEASE_CHECK(fvp, p, cred, LEASE_READ);
+ VOP_LOCK(fvp); /* XXX */
+ VOP_UNLOCK(tvp); /* XXX */
+ LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
+ VOP_LOCK(tvp); /* XXX */
+
+ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+ /* ugly loop follows... */
+ do {
+ off_t offset = uio.uio_offset;
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_rw = UIO_READ;
+ error = VOP_READ(fvp, &uio, 0, cred);
+
+ if (error == 0) {
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE - uio.uio_resid;
+ uio.uio_offset = offset;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_resid = iov.iov_len;
+
+ if (uio.uio_resid == 0)
+ break;
+
+ do {
+ error = VOP_WRITE(tvp, &uio, 0, cred);
+ } while ((uio.uio_resid > 0) && (error == 0));
+ }
+
+ } while (error == 0);
+
+ free(buf, M_TEMP);
+ return (error);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory.
+ * it is unlocked on entry and exit.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked.
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+ struct union_mount *um;
+ struct vnode *dvp;
+ struct componentname *cnp;
+ struct vnode **vpp;
+{
+ int error;
+ struct vattr va;
+ struct proc *p = cnp->cn_proc;
+ struct componentname cn;
+
+ /*
+ * policy: when creating the shadow directory in the
+ * upper layer, create it owned by the user who did
+ * the mount, group from parent directory, and mode
+ * 777 modified by umask (ie mostly identical to the
+ * mkdir syscall). (jsp, kb)
+ */
+
+ /*
+ * A new componentname structure must be faked up because
+ * there is no way to know where the upper level cnp came
+ * from or what it is being used for. This must duplicate
+ * some of the work done by NDINIT, some of the work done
+ * by namei, some of the work done by lookup and some of
+ * the work done by VOP_LOOKUP when given a CREATE flag.
+ * Conclusion: Horrible.
+ *
+ * The pathname buffer will be FREEed by VOP_MKDIR.
+ */
+ cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
+ bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
+ cn.cn_pnbuf[cnp->cn_namelen] = '\0';
+
+ cn.cn_nameiop = CREATE;
+ cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn.cn_proc = cnp->cn_proc;
+ if (um->um_op == UNMNT_ABOVE)
+ cn.cn_cred = cnp->cn_cred;
+ else
+ cn.cn_cred = um->um_cred;
+ cn.cn_nameptr = cn.cn_pnbuf;
+ cn.cn_namelen = cnp->cn_namelen;
+ cn.cn_hash = cnp->cn_hash;
+ cn.cn_consume = cnp->cn_consume;
+
+ VREF(dvp);
+ if (error = relookup(dvp, vpp, &cn))
+ return (error);
+ vrele(dvp);
+
+ if (*vpp) {
+ VOP_ABORTOP(dvp, &cn);
+ VOP_UNLOCK(dvp);
+ vrele(*vpp);
+ *vpp = NULLVP;
+ return (EEXIST);
+ }
+
+ VATTR_NULL(&va);
+ va.va_type = VDIR;
+ va.va_mode = um->um_cmode;
+
+ /* LEASE_CHECK: dvp is locked */
+ LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
+
+ error = VOP_MKDIR(dvp, vpp, &cn, &va);
+ return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer. this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ */
+int
+union_vn_create(vpp, un, p)
+ struct vnode **vpp;
+ struct union_node *un;
+ struct proc *p;
+{
+ struct vnode *vp;
+ struct ucred *cred = p->p_ucred;
+ struct vattr vat;
+ struct vattr *vap = &vat;
+ int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+ int error;
+ int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+ char *cp;
+ struct componentname cn;
+
+ *vpp = NULLVP;
+
+ /*
+ * Build a new componentname structure (for the same
+ * reasons outlines in union_mkshadow).
+ * The difference here is that the file is owned by
+ * the current user, rather than by the person who
+ * did the mount, since the current user needs to be
+ * able to write the file (that's why it is being
+ * copied in the first place).
+ */
+ cn.cn_namelen = strlen(un->un_path);
+ cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
+ bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+ cn.cn_nameiop = CREATE;
+ cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn.cn_proc = p;
+ cn.cn_cred = p->p_ucred;
+ cn.cn_nameptr = cn.cn_pnbuf;
+ cn.cn_hash = un->un_hash;
+ cn.cn_consume = 0;
+
+ VREF(un->un_dirvp);
+ if (error = relookup(un->un_dirvp, &vp, &cn))
+ return (error);
+ vrele(un->un_dirvp);
+
+ if (vp) {
+ VOP_ABORTOP(un->un_dirvp, &cn);
+ if (un->un_dirvp == vp)
+ vrele(un->un_dirvp);
+ else
+ vput(un->un_dirvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+
+ /*
+ * Good - there was no race to create the file
+ * so go ahead and create it. The permissions
+ * on the file will be 0666 modified by the
+ * current user's umask. Access to the file, while
+ * it is unioned, will require access to the top *and*
+ * bottom files. Access when not unioned will simply
+ * require access to the top-level file.
+ * TODO: confirm choice of access permissions.
+ */
+ VATTR_NULL(vap);
+ vap->va_type = VREG;
+ vap->va_mode = cmode;
+ LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE);
+ if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap))
+ return (error);
+
+ if (error = VOP_OPEN(vp, fmode, cred, p)) {
+ vput(vp);
+ return (error);
+ }
+
+ vp->v_writecount++;
+ *vpp = vp;
+ return (0);
+}
+
+int
+union_vn_close(vp, fmode, cred, p)
+ struct vnode *vp;
+ int fmode;
+ struct ucred *cred;
+ struct proc *p;
+{
+ if (fmode & FWRITE)
+ --vp->v_writecount;
+ return (VOP_CLOSE(vp, fmode));
+}
+
+void
+union_removed_upper(un)
+ struct union_node *un;
+{
+ if (un->un_flags & UN_ULOCK) {
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(un->un_uppervp);
+ }
+
+ union_newupper(un, NULLVP);
+}
+
+struct vnode *
+union_lowervp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) {
+ if (vget(un->un_lowervp, 0))
+ return (NULLVP);
+ }
+
+ return (un->un_lowervp);
+}
diff --git a/sys/miscfs/union/union_vfsops.c b/sys/miscfs/union/union_vfsops.c
new file mode 100644
index 000000000000..9fa27460e3d4
--- /dev/null
+++ b/sys/miscfs/union/union_vfsops.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vfsops.c 8.7 (Berkeley) 3/5/94
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+/*
+ * Mount union filesystem
+ */
+int
+union_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct union_args args;
+ struct vnode *lowerrootvp = NULLVP;
+ struct vnode *upperrootvp = NULLVP;
+ struct union_mount *um;
+ struct ucred *cred = 0;
+ struct ucred *scred;
+ struct vattr va;
+ char *cp;
+ int len;
+ u_int size;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ /*
+ * Need to provide.
+ * 1. a way to convert between rdonly and rdwr mounts.
+ * 2. support for nfs exports.
+ */
+ error = EOPNOTSUPP;
+ goto bad;
+ }
+
+ /*
+ * Take a copy of the process's credentials. This isn't
+ * quite right since the euid will always be zero and we
+ * want to get the "real" users credentials. So fix up
+ * the uid field after taking the copy.
+ */
+ cred = crdup(p->p_ucred);
+ cred->cr_uid = p->p_cred->p_ruid;
+
+ /*
+ * Ensure the *real* user has write permission on the
+ * mounted-on directory. This allows the mount_union
+ * command to be made setuid root so allowing anyone
+ * to do union mounts onto any directory on which they
+ * have write permission and which they also own.
+ */
+ error = VOP_GETATTR(mp->mnt_vnodecovered, &va, cred, p);
+ if (error)
+ goto bad;
+ if ((va.va_uid != cred->cr_uid) &&
+ (cred->cr_uid != 0)) {
+ error = EACCES;
+ goto bad;
+ }
+ error = VOP_ACCESS(mp->mnt_vnodecovered, VWRITE, cred, p);
+ if (error)
+ goto bad;
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args)))
+ goto bad;
+
+ lowerrootvp = mp->mnt_vnodecovered;
+ VREF(lowerrootvp);
+
+ /*
+ * Find upper node. Use the real process credentials,
+ * not the effective ones since this will have come
+ * through a setuid process (mount_union). All this
+ * messing around with permissions is entirely bogus
+ * and should be removed by allowing any user straight
+ * past the mount system call.
+ */
+ scred = p->p_ucred;
+ p->p_ucred = cred;
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+ UIO_USERSPACE, args.target, p);
+ p->p_ucred = scred;
+
+ if (error = namei(ndp))
+ goto bad;
+
+ upperrootvp = ndp->ni_vp;
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+
+ if (upperrootvp->v_type != VDIR) {
+ error = EINVAL;
+ goto bad;
+ }
+
+ um = (struct union_mount *) malloc(sizeof(struct union_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Keep a held reference to the target vnodes.
+ * They are vrele'd in union_unmount.
+ *
+ * Depending on the _BELOW flag, the filesystems are
+ * viewed in a different order. In effect, this is the
+ * same as providing a mount under option to the mount syscall.
+ */
+
+ um->um_op = args.mntflags & UNMNT_OPMASK;
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ um->um_lowervp = lowerrootvp;
+ um->um_uppervp = upperrootvp;
+ break;
+
+ case UNMNT_BELOW:
+ um->um_lowervp = upperrootvp;
+ um->um_uppervp = lowerrootvp;
+ break;
+
+ case UNMNT_REPLACE:
+ vrele(lowerrootvp);
+ lowerrootvp = NULLVP;
+ um->um_uppervp = upperrootvp;
+ um->um_lowervp = lowerrootvp;
+ break;
+
+ default:
+ error = EINVAL;
+ goto bad;
+ }
+
+ um->um_cred = cred;
+ um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+ /*
+ * Depending on what you think the MNT_LOCAL flag might mean,
+ * you may want the && to be || on the conditional below.
+ * At the moment it has been defined that the filesystem is
+ * only local if it is all local, ie the MNT_LOCAL flag implies
+ * that the entire namespace is local. If you think the MNT_LOCAL
+ * flag implies that some of the files might be stored locally
+ * then you will want to change the conditional.
+ */
+ if (um->um_op == UNMNT_ABOVE) {
+ if (((um->um_lowervp == NULLVP) ||
+ (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+ (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+ mp->mnt_flag |= MNT_LOCAL;
+ }
+
+ /*
+ * Copy in the upper layer's RDONLY flag. This is for the benefit
+ * of lookup() which explicitly checks the flag, rather than asking
+ * the filesystem for it's own opinion. This means, that an update
+ * mount of the underlying filesystem to go from rdonly to rdwr
+ * will leave the unioned view as read-only.
+ */
+ mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+ /*
+ * This is a user mount. Privilege check for unmount
+ * will be done in union_unmount.
+ */
+ mp->mnt_flag |= MNT_USER;
+
+ mp->mnt_data = (qaddr_t) um;
+ getnewfsid(mp, MOUNT_UNION);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ cp = "<above>";
+ break;
+ case UNMNT_BELOW:
+ cp = "<below>";
+ break;
+ case UNMNT_REPLACE:
+ cp = "";
+ break;
+ }
+ len = strlen(cp);
+ bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+ cp = mp->mnt_stat.f_mntfromname + len;
+ len = MNAMELEN - len;
+
+ (void) copyinstr(args.target, cp, len - 1, &size);
+ bzero(cp + size, len - size);
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount: from %s, on %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+
+bad:
+ if (cred)
+ crfree(cred);
+ if (upperrootvp)
+ vrele(upperrootvp);
+ if (lowerrootvp)
+ vrele(lowerrootvp);
+ return (error);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem(s) will have been called
+ * when that filesystem was mounted.
+ */
+int
+union_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Free reference to union layer
+ */
+int
+union_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct vnode *um_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_unmount(mp = %x)\n", mp);
+#endif
+
+ /* only the mounter, or superuser can unmount */
+ if ((p->p_cred->p_ruid != um->um_cred->cr_uid) &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ return (error);
+
+ if (mntflags & MNT_FORCE) {
+ /* union can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ if (error = union_root(mp, &um_rootvp))
+ return (error);
+ if (um_rootvp->v_usecount > 1) {
+ vput(um_rootvp);
+ return (EBUSY);
+ }
+ if (error = vflush(mp, um_rootvp, flags)) {
+ vput(um_rootvp);
+ return (error);
+ }
+
+#ifdef UNION_DIAGNOSTIC
+ vprint("alias root of lower", um_rootvp);
+#endif
+ /*
+ * Discard references to upper and lower target vnodes.
+ */
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ vrele(um->um_uppervp);
+ crfree(um->um_cred);
+ /*
+ * Release reference on underlying root vnode
+ */
+ vput(um_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(um_rootvp);
+ /*
+ * Finally, throw away the union_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+union_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ int error;
+ int loselock;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_root(mp = %x, lvp = %x, uvp = %x)\n", mp,
+ um->um_lowervp,
+ um->um_uppervp);
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ VREF(um->um_uppervp);
+ if ((um->um_op == UNMNT_BELOW) &&
+ VOP_ISLOCKED(um->um_uppervp)) {
+ loselock = 1;
+ } else {
+ VOP_LOCK(um->um_uppervp);
+ loselock = 0;
+ }
+ if (um->um_lowervp)
+ VREF(um->um_lowervp);
+ error = union_allocvp(vpp, mp,
+ (struct vnode *) 0,
+ (struct vnode *) 0,
+ (struct componentname *) 0,
+ um->um_uppervp,
+ um->um_lowervp);
+
+ if (error) {
+ if (!loselock)
+ VOP_UNLOCK(um->um_uppervp);
+ vrele(um->um_uppervp);
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ } else {
+ (*vpp)->v_flag |= VROOT;
+ if (loselock)
+ VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+ }
+
+ return (error);
+}
+
+int
+union_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct statfs mstat;
+ int lbsize;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
+ um->um_lowervp,
+ um->um_uppervp);
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ if (um->um_lowervp) {
+ error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+ }
+
+ /* now copy across the "interesting" information and fake the rest */
+#if 0
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+#endif
+ lbsize = mstat.f_bsize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+
+ error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+
+ sbp->f_type = MOUNT_UNION;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+
+ /*
+ * if the lower and upper blocksizes differ, then frig the
+ * block counts so that the sizes reported by df make some
+ * kind of sense. none of this makes sense though.
+ */
+
+ if (mstat.f_bsize != lbsize) {
+ sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize;
+ sbp->f_bfree = sbp->f_bfree * lbsize / mstat.f_bsize;
+ sbp->f_bavail = sbp->f_bavail * lbsize / mstat.f_bsize;
+ }
+ sbp->f_blocks += mstat.f_blocks;
+ sbp->f_bfree += mstat.f_bfree;
+ sbp->f_bavail += mstat.f_bavail;
+ sbp->f_files += mstat.f_files;
+ sbp->f_ffree += mstat.f_ffree;
+
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+union_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+
+ /*
+ * XXX - Assumes no data cached at union layer.
+ */
+ return (0);
+}
+
+int
+union_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int union_init __P((void));
+
+struct vfsops union_vfsops = {
+ union_mount,
+ union_start,
+ union_unmount,
+ union_root,
+ union_quotactl,
+ union_statfs,
+ union_sync,
+ union_vget,
+ union_fhtovp,
+ union_vptofh,
+ union_init,
+};
diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c
new file mode 100644
index 000000000000..96327b0922d4
--- /dev/null
+++ b/sys/miscfs/union/union_vnops.c
@@ -0,0 +1,1495 @@
+/*
+ * Copyright (c) 1992, 1993, 1994 The Regents of the University of California.
+ * Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vnops.c 8.6 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#define FIXUP(un) { \
+ if (((un)->un_flags & UN_ULOCK) == 0) { \
+ union_fixup(un); \
+ } \
+}
+
+static void
+union_fixup(un)
+ struct union_node *un;
+{
+
+ VOP_LOCK(un->un_uppervp);
+ un->un_flags |= UN_ULOCK;
+}
+
+static int
+union_lookup1(udvp, dvp, vpp, cnp)
+ struct vnode *udvp;
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+{
+ int error;
+ struct vnode *tdvp;
+ struct mount *mp;
+
+ /*
+ * If stepping up the directory tree, check for going
+ * back across the mount point, in which case do what
+ * lookup would do by stepping back down the mount
+ * hierarchy.
+ */
+ if (cnp->cn_flags & ISDOTDOT) {
+ for (;;) {
+ /*
+ * Don't do the NOCROSSMOUNT check
+ * at this level. By definition,
+ * union fs deals with namespaces, not
+ * filesystems.
+ */
+ if ((dvp->v_flag & VROOT) == 0)
+ break;
+
+ tdvp = dvp;
+ dvp = dvp->v_mount->mnt_vnodecovered;
+ vput(tdvp);
+ VREF(dvp);
+ VOP_LOCK(dvp);
+ }
+ }
+
+ error = VOP_LOOKUP(dvp, &tdvp, cnp);
+ if (error)
+ return (error);
+
+ /*
+ * The parent directory will have been unlocked, unless lookup
+ * found the last component. In which case, re-lock the node
+ * here to allow it to be unlocked again (phew) in union_lookup.
+ */
+ if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+ VOP_LOCK(dvp);
+
+ dvp = tdvp;
+
+ /*
+ * Lastly check if the current node is a mount point in
+ * which case walk up the mount hierarchy making sure not to
+ * bump into the root of the mount tree (ie. dvp != udvp).
+ */
+ while (dvp != udvp && (dvp->v_type == VDIR) &&
+ (mp = dvp->v_mountedhere)) {
+
+ if (mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t) mp, PVFS);
+ continue;
+ }
+
+ if (error = VFS_ROOT(mp, &tdvp)) {
+ vput(dvp);
+ return (error);
+ }
+
+ vput(dvp);
+ dvp = tdvp;
+ }
+
+ *vpp = dvp;
+ return (0);
+}
+
+int
+union_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ int uerror, lerror;
+ struct vnode *uppervp, *lowervp;
+ struct vnode *upperdvp, *lowerdvp;
+ struct vnode *dvp = ap->a_dvp;
+ struct union_node *dun = VTOUNION(dvp);
+ struct componentname *cnp = ap->a_cnp;
+ int lockparent = cnp->cn_flags & LOCKPARENT;
+ int rdonly = cnp->cn_flags & RDONLY;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+ struct ucred *saved_cred;
+
+ cnp->cn_flags |= LOCKPARENT;
+
+ upperdvp = dun->un_uppervp;
+ lowerdvp = dun->un_lowervp;
+ uppervp = NULLVP;
+ lowervp = NULLVP;
+
+ /*
+ * do the lookup in the upper level.
+ * if that level comsumes additional pathnames,
+ * then assume that something special is going
+ * on and just return that vnode.
+ */
+ if (upperdvp) {
+ FIXUP(dun);
+ uerror = union_lookup1(um->um_uppervp, upperdvp,
+ &uppervp, cnp);
+ /*if (uppervp == upperdvp)
+ dun->un_flags |= UN_KLOCK;*/
+
+ if (cnp->cn_consume != 0) {
+ *ap->a_vpp = uppervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ return (uerror);
+ }
+ } else {
+ uerror = ENOENT;
+ }
+
+ /*
+ * in a similar way to the upper layer, do the lookup
+ * in the lower layer. this time, if there is some
+ * component magic going on, then vput whatever we got
+ * back from the upper layer and return the lower vnode
+ * instead.
+ */
+ if (lowerdvp) {
+ int nameiop;
+
+ VOP_LOCK(lowerdvp);
+
+ /*
+ * Only do a LOOKUP on the bottom node, since
+ * we won't be making changes to it anyway.
+ */
+ nameiop = cnp->cn_nameiop;
+ cnp->cn_nameiop = LOOKUP;
+ if (um->um_op == UNMNT_BELOW) {
+ saved_cred = cnp->cn_cred;
+ cnp->cn_cred = um->um_cred;
+ }
+ lerror = union_lookup1(um->um_lowervp, lowerdvp,
+ &lowervp, cnp);
+ if (um->um_op == UNMNT_BELOW)
+ cnp->cn_cred = saved_cred;
+ cnp->cn_nameiop = nameiop;
+
+ if (lowervp != lowerdvp)
+ VOP_UNLOCK(lowerdvp);
+
+ if (cnp->cn_consume != 0) {
+ if (uppervp) {
+ if (uppervp == upperdvp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ uppervp = NULLVP;
+ }
+ *ap->a_vpp = lowervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ return (lerror);
+ }
+ } else {
+ lerror = ENOENT;
+ }
+
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+
+ /*
+ * at this point, we have uerror and lerror indicating
+ * possible errors with the lookups in the upper and lower
+ * layers. additionally, uppervp and lowervp are (locked)
+ * references to existing vnodes in the upper and lower layers.
+ *
+ * there are now three cases to consider.
+ * 1. if both layers returned an error, then return whatever
+ * error the upper layer generated.
+ *
+ * 2. if the top layer failed and the bottom layer succeeded
+ * then two subcases occur.
+ * a. the bottom vnode is not a directory, in which
+ * case just return a new union vnode referencing
+ * an empty top layer and the existing bottom layer.
+ * b. the bottom vnode is a directory, in which case
+ * create a new directory in the top-level and
+ * continue as in case 3.
+ *
+ * 3. if the top layer succeeded then return a new union
+ * vnode referencing whatever the new top layer and
+ * whatever the bottom layer returned.
+ */
+
+ *ap->a_vpp = NULLVP;
+
+ /* case 1. */
+ if ((uerror != 0) && (lerror != 0)) {
+ return (uerror);
+ }
+
+ /* case 2. */
+ if (uerror != 0 /* && (lerror == 0) */ ) {
+ if (lowervp->v_type == VDIR) { /* case 2b. */
+ dun->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(upperdvp);
+ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+ VOP_LOCK(upperdvp);
+ dun->un_flags |= UN_ULOCK;
+
+ if (uerror) {
+ if (lowervp) {
+ vput(lowervp);
+ lowervp = NULLVP;
+ }
+ return (uerror);
+ }
+ }
+ }
+
+ if (lowervp)
+ VOP_UNLOCK(lowervp);
+
+ error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+ uppervp, lowervp);
+
+ if (error) {
+ if (uppervp)
+ vput(uppervp);
+ if (lowervp)
+ vrele(lowervp);
+ } else {
+ if (*ap->a_vpp != dvp)
+ if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+ VOP_UNLOCK(dvp);
+ }
+
+ return (error);
+}
+
+int
+union_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_CREATE(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_MKNOD(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ if (vp) {
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ }
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_open(ap)
+ struct vop_open_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *tvp;
+ int mode = ap->a_mode;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+ int error;
+
+ /*
+ * If there is an existing upper vp then simply open that.
+ */
+ tvp = un->un_uppervp;
+ if (tvp == NULLVP) {
+ /*
+ * If the lower vnode is being opened for writing, then
+ * copy the file contents to the upper vnode and open that,
+ * otherwise can simply open the lower vnode.
+ */
+ tvp = un->un_lowervp;
+ if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+ struct vnode *vp;
+ int i;
+
+ /*
+ * Open the named file in the upper layer. Note that
+ * the file may have come into existence *since* the
+ * lookup was done, since the upper layer may really
+ * be a loopback mount of some other filesystem...
+ * so open the file with exclusive create and barf if
+ * it already exists.
+ * XXX - perhaps should re-lookup the node (once more
+ * with feeling) and simply open that. Who knows.
+ */
+ error = union_vn_create(&vp, un, p);
+ if (error)
+ return (error);
+
+ /* at this point, uppervp is locked */
+ union_newupper(un, vp);
+ un->un_flags |= UN_ULOCK;
+
+ /*
+ * Now, if the file is being opened with truncation,
+ * then the (new) upper vnode is ready to fly,
+ * otherwise the data from the lower vnode must be
+ * copied to the upper layer first. This only works
+ * for regular files (check is made above).
+ */
+ if ((mode & O_TRUNC) == 0) {
+ /*
+ * XXX - should not ignore errors
+ * from VOP_CLOSE
+ */
+ VOP_LOCK(tvp);
+ error = VOP_OPEN(tvp, FREAD, cred, p);
+ if (error == 0) {
+ error = union_copyfile(p, cred,
+ tvp, un->un_uppervp);
+ VOP_UNLOCK(tvp);
+ (void) VOP_CLOSE(tvp, FREAD);
+ } else {
+ VOP_UNLOCK(tvp);
+ }
+
+#ifdef UNION_DIAGNOSTIC
+ if (!error)
+ uprintf("union: copied up %s\n",
+ un->un_path);
+#endif
+ }
+
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(un->un_uppervp);
+ union_vn_close(un->un_uppervp, FWRITE, cred, p);
+ VOP_LOCK(un->un_uppervp);
+ un->un_flags |= UN_ULOCK;
+
+ /*
+ * Subsequent IOs will go to the top layer, so
+ * call close on the lower vnode and open on the
+ * upper vnode to ensure that the filesystem keeps
+ * its references counts right. This doesn't do
+ * the right thing with (cred) and (FREAD) though.
+ * Ignoring error returns is not righ, either.
+ */
+ for (i = 0; i < un->un_openl; i++) {
+ (void) VOP_CLOSE(tvp, FREAD);
+ (void) VOP_OPEN(un->un_uppervp, FREAD, cred, p);
+ }
+ un->un_openl = 0;
+
+ if (error == 0)
+ error = VOP_OPEN(un->un_uppervp, mode, cred, p);
+ return (error);
+ }
+
+ /*
+ * Just open the lower vnode
+ */
+ un->un_openl++;
+ VOP_LOCK(tvp);
+ error = VOP_OPEN(tvp, mode, cred, p);
+ VOP_UNLOCK(tvp);
+
+ return (error);
+ }
+
+ FIXUP(un);
+
+ error = VOP_OPEN(tvp, mode, cred, p);
+
+ return (error);
+}
+
+int
+union_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp;
+
+ if (un->un_uppervp) {
+ vp = un->un_uppervp;
+ } else {
+#ifdef UNION_DIAGNOSTIC
+ if (un->un_openl <= 0)
+ panic("union: un_openl cnt");
+#endif
+ --un->un_openl;
+ vp = un->un_lowervp;
+ }
+
+ return (VOP_CLOSE(vp, ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode. This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+int
+union_access(ap)
+ struct vop_access_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ int error = EACCES;
+ struct vnode *vp;
+
+ if (vp = un->un_uppervp) {
+ FIXUP(un);
+ return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
+ }
+
+ if (vp = un->un_lowervp) {
+ VOP_LOCK(vp);
+ error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
+ if (error == 0) {
+ struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
+
+ if (um->um_op == UNMNT_BELOW)
+ error = VOP_ACCESS(vp, ap->a_mode,
+ um->um_cred, ap->a_p);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ }
+
+ return (error);
+}
+
+/*
+ * We handle getattr only to change the fsid.
+ */
+int
+union_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp = un->un_uppervp;
+ struct vattr *vap;
+ struct vattr va;
+
+
+ /*
+ * Some programs walk the filesystem hierarchy by counting
+ * links to directories to avoid stat'ing all the time.
+ * This means the link count on directories needs to be "correct".
+ * The only way to do that is to call getattr on both layers
+ * and fix up the link count. The link count will not necessarily
+ * be accurate but will be large enough to defeat the tree walkers.
+ */
+
+ vap = ap->a_vap;
+
+ vp = un->un_uppervp;
+ if (vp != NULLVP) {
+ FIXUP(un);
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ }
+
+ if (vp == NULLVP) {
+ vp = un->un_lowervp;
+ } else if (vp->v_type == VDIR) {
+ vp = un->un_lowervp;
+ vap = &va;
+ } else {
+ vp = NULLVP;
+ }
+
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ }
+
+ if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+ ap->a_vap->va_nlink += vap->va_nlink;
+
+ vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+int
+union_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ int error;
+
+ /*
+ * Handle case of truncating lower object to zero size,
+ * by creating a zero length upper object. This is to
+ * handle the case of open with O_TRUNC and O_CREAT.
+ */
+ if ((un->un_uppervp == NULLVP) &&
+ /* assert(un->un_lowervp != NULLVP) */
+ (un->un_lowervp->v_type == VREG) &&
+ (ap->a_vap->va_size == 0)) {
+ struct vnode *vp;
+
+ error = union_vn_create(&vp, un, ap->a_p);
+ if (error)
+ return (error);
+
+ /* at this point, uppervp is locked */
+ union_newupper(un, vp);
+
+ VOP_UNLOCK(vp);
+ union_vn_close(un->un_uppervp, FWRITE, ap->a_cred, ap->a_p);
+ VOP_LOCK(vp);
+ un->un_flags |= UN_ULOCK;
+ }
+
+ /*
+ * Try to set attributes in upper layer,
+ * otherwise return read-only filesystem error.
+ */
+ if (un->un_uppervp != NULLVP) {
+ FIXUP(un);
+ error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+ ap->a_cred, ap->a_p);
+ } else {
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_write(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_IOCTL(OTHERVP(ap->a_vp), ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+int
+union_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_SELECT(OTHERVP(ap->a_vp), ap->a_which, ap->a_fflags,
+ ap->a_cred, ap->a_p));
+}
+
+int
+union_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_MMAP(OTHERVP(ap->a_vp), ap->a_fflags,
+ ap->a_cred, ap->a_p));
+}
+
+int
+union_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = 0;
+ struct vnode *targetvp = OTHERVP(ap->a_vp);
+
+ if (targetvp) {
+ int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(targetvp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_FSYNC(targetvp, ap->a_cred,
+ ap->a_waitfor, ap->a_p);
+ if (dolock)
+ VOP_UNLOCK(targetvp);
+ }
+
+ return (error);
+}
+
+int
+union_seek(ap)
+ struct vop_seek_args /* {
+ struct vnode *a_vp;
+ off_t a_oldoff;
+ off_t a_newoff;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (VOP_SEEK(OTHERVP(ap->a_vp), ap->a_oldoff, ap->a_newoff, ap->a_cred));
+}
+
+int
+union_remove(ap)
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ FIXUP(un);
+ VREF(vp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+
+ error = VOP_REMOVE(dvp, vp, ap->a_cnp);
+ if (!error)
+ union_removed_upper(un);
+
+ /*
+ * XXX: should create a whiteout here
+ */
+ } else {
+ /*
+ * XXX: should create a whiteout here
+ */
+ vput(ap->a_dvp);
+ vput(ap->a_vp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_vp);
+ struct union_node *un = VTOUNION(ap->a_tdvp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+ FIXUP(un);
+ VREF(vp);
+ vrele(ap->a_tdvp);
+
+ error = VOP_LINK(dvp, vp, ap->a_cnp);
+ } else {
+ /*
+ * XXX: need to copy to upper layer
+ * and do the link there.
+ */
+ vput(ap->a_vp);
+ vrele(ap->a_tdvp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+
+ struct vnode *fdvp = ap->a_fdvp;
+ struct vnode *fvp = ap->a_fvp;
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *tvp = ap->a_tvp;
+
+ if (fdvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fdvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ FIXUP(un);
+ fdvp = un->un_uppervp;
+ VREF(fdvp);
+ vrele(ap->a_fdvp);
+ }
+
+ if (fvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ FIXUP(un);
+ fvp = un->un_uppervp;
+ VREF(fvp);
+ vrele(ap->a_fvp);
+ }
+
+ if (tdvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tdvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ tdvp = un->un_uppervp;
+ VREF(tdvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_tdvp);
+ }
+
+ if (tvp && tvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ tvp = un->un_uppervp;
+ VREF(tvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_tvp);
+ }
+
+ return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+ vrele(fdvp);
+ vrele(fvp);
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+
+ return (error);
+}
+
+int
+union_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_MKDIR(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ FIXUP(un);
+ VREF(vp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+
+ error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+ if (!error)
+ union_removed_upper(un);
+
+ /*
+ * XXX: should create a whiteout here
+ */
+ } else {
+ /*
+ * XXX: should create a whiteout here
+ */
+ vput(ap->a_dvp);
+ vput(ap->a_vp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+ struct mount *mp = ap->a_dvp->v_mount;
+
+ FIXUP(un);
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_SYMLINK(dvp, &vp, ap->a_cnp,
+ ap->a_vap, ap->a_target);
+ *ap->a_vpp = NULLVP;
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories. getdirentries is responsible for walking
+ * down the union stack. readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+int
+union_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = 0;
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (un->un_uppervp) {
+ FIXUP(un);
+ error = VOP_READDIR(un->un_uppervp, ap->a_uio, ap->a_cred);
+ }
+
+ return (error);
+}
+
+int
+union_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ int islocked = un->un_flags & UN_LOCKED;
+ int dolock = (vp == LOWERVP(ap->a_dvp));
+
+ if (islocked) {
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_dvp));
+ }
+ error = VOP_ABORTOP(vp, ap->a_cnp);
+ if (islocked && dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our union_node is in the
+ * cache and reusable.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+
+#ifdef UNION_DIAGNOSTIC
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (un->un_flags & UN_LOCKED)
+ panic("union: inactivating locked node");
+#endif
+
+ return (0);
+}
+
+int
+union_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ union_freevp(ap->a_vp);
+
+ return (0);
+}
+
+int
+union_lock(ap)
+ struct vop_lock_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct union_node *un;
+
+start:
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ }
+
+ un = VTOUNION(vp);
+
+ if (un->un_uppervp) {
+ if ((un->un_flags & UN_ULOCK) == 0) {
+ un->un_flags |= UN_ULOCK;
+ VOP_LOCK(un->un_uppervp);
+ }
+#ifdef DIAGNOSTIC
+ if (un->un_flags & UN_KLOCK)
+ panic("union: dangling upper lock");
+#endif
+ }
+
+ if (un->un_flags & UN_LOCKED) {
+#ifdef DIAGNOSTIC
+ if (curproc && un->un_pid == curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: locking against myself");
+#endif
+ un->un_flags |= UN_WANT;
+ sleep((caddr_t) &un->un_flags, PINOD);
+ goto start;
+ }
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+
+ un->un_flags |= UN_LOCKED;
+ return (0);
+}
+
+int
+union_unlock(ap)
+ struct vop_lock_args *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: unlock unlocked node");
+ if (curproc && un->un_pid != curproc->p_pid &&
+ curproc->p_pid > -1 && un->un_pid > -1)
+ panic("union: unlocking other process's union node");
+#endif
+
+ un->un_flags &= ~UN_LOCKED;
+
+ if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
+ VOP_UNLOCK(un->un_uppervp);
+
+ un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
+ if (un->un_flags & UN_WANT) {
+ un->un_flags &= ~UN_WANT;
+ wakeup((caddr_t) &un->un_flags);
+ }
+
+#ifdef DIAGNOSTIC
+ un->un_pid = 0;
+#endif
+
+ return (0);
+}
+
+int
+union_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_BMAP(vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
+ vp, UPPERVP(vp), LOWERVP(vp));
+ return (0);
+}
+
+int
+union_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+int
+union_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_PATHCONF(vp, ap->a_name, ap->a_retval);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (VOP_ADVLOCK(OTHERVP(ap->a_vp), ap->a_id, ap->a_op,
+ ap->a_fl, ap->a_flags));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+union_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+ if (bp->b_vp == NULLVP)
+ panic("union_strategy: nil vp");
+ if (((bp->b_flags & B_READ) == 0) &&
+ (bp->b_vp == LOWERVP(savedvp)))
+ panic("union_strategy: writing to lowervp");
+#endif
+
+ error = VOP_STRATEGY(bp);
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**union_vnodeop_p)();
+struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, union_lookup }, /* lookup */
+ { &vop_create_desc, union_create }, /* create */
+ { &vop_mknod_desc, union_mknod }, /* mknod */
+ { &vop_open_desc, union_open }, /* open */
+ { &vop_close_desc, union_close }, /* close */
+ { &vop_access_desc, union_access }, /* access */
+ { &vop_getattr_desc, union_getattr }, /* getattr */
+ { &vop_setattr_desc, union_setattr }, /* setattr */
+ { &vop_read_desc, union_read }, /* read */
+ { &vop_write_desc, union_write }, /* write */
+ { &vop_ioctl_desc, union_ioctl }, /* ioctl */
+ { &vop_select_desc, union_select }, /* select */
+ { &vop_mmap_desc, union_mmap }, /* mmap */
+ { &vop_fsync_desc, union_fsync }, /* fsync */
+ { &vop_seek_desc, union_seek }, /* seek */
+ { &vop_remove_desc, union_remove }, /* remove */
+ { &vop_link_desc, union_link }, /* link */
+ { &vop_rename_desc, union_rename }, /* rename */
+ { &vop_mkdir_desc, union_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, union_rmdir }, /* rmdir */
+ { &vop_symlink_desc, union_symlink }, /* symlink */
+ { &vop_readdir_desc, union_readdir }, /* readdir */
+ { &vop_readlink_desc, union_readlink }, /* readlink */
+ { &vop_abortop_desc, union_abortop }, /* abortop */
+ { &vop_inactive_desc, union_inactive }, /* inactive */
+ { &vop_reclaim_desc, union_reclaim }, /* reclaim */
+ { &vop_lock_desc, union_lock }, /* lock */
+ { &vop_unlock_desc, union_unlock }, /* unlock */
+ { &vop_bmap_desc, union_bmap }, /* bmap */
+ { &vop_strategy_desc, union_strategy }, /* strategy */
+ { &vop_print_desc, union_print }, /* print */
+ { &vop_islocked_desc, union_islocked }, /* islocked */
+ { &vop_pathconf_desc, union_pathconf }, /* pathconf */
+ { &vop_advlock_desc, union_advlock }, /* advlock */
+#ifdef notdef
+ { &vop_blkatoff_desc, union_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, union_valloc }, /* valloc */
+ { &vop_vfree_desc, union_vfree }, /* vfree */
+ { &vop_truncate_desc, union_truncate }, /* truncate */
+ { &vop_update_desc, union_update }, /* update */
+ { &vop_bwrite_desc, union_bwrite }, /* bwrite */
+#endif
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc union_vnodeop_opv_desc =
+ { &union_vnodeop_p, union_vnodeop_entries };
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
new file mode 100644
index 000000000000..e40b769b9802
--- /dev/null
+++ b/sys/net/bpf.c
@@ -0,0 +1,1316 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.c 8.2 (Berkeley) 3/28/94
+ *
+ * static char rcsid[] =
+ * "$Header: bpf.c,v 1.33 91/10/27 21:21:58 mccanne Exp $";
+ */
+
+#include "bpfilter.h"
+
+#if NBPFILTER > 0
+
+#ifndef __GNUC__
+#define inline
+#else
+#define inline __inline
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/buf.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/ioctl.h>
+#include <sys/map.h>
+
+#include <sys/file.h>
+#if defined(sparc) && BSD < 199103
+#include <sys/stream.h>
+#endif
+#include <sys/tty.h>
+#include <sys/uio.h>
+
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <net/if.h>
+
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+
+#include <sys/errno.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <sys/kernel.h>
+
+/*
+ * Older BSDs don't have kernel malloc.
+ */
+#if BSD < 199103
+extern bcopy();
+static caddr_t bpf_alloc();
+#include <net/bpf_compat.h>
+#define BPF_BUFSIZE (MCLBYTES-8)
+#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio)
+#else
+#define BPF_BUFSIZE 4096
+#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
+#endif
+
+#define PRINET 26 /* interruptible */
+
+/*
+ * The default read buffer size is patchable.
+ */
+int bpf_bufsize = BPF_BUFSIZE;
+
+/*
+ * bpf_iflist is the list of interfaces; each corresponds to an ifnet
+ * bpf_dtab holds the descriptors, indexed by minor device #
+ */
+struct bpf_if *bpf_iflist;
+struct bpf_d bpf_dtab[NBPFILTER];
+
+#if BSD >= 199207
+/*
+ * bpfilterattach() is called at boot time in new systems. We do
+ * nothing here since old systems will not call this.
+ */
+/* ARGSUSED */
+void
+bpfilterattach(n)
+ int n;
+{
+}
+#endif
+
+static int bpf_allocbufs __P((struct bpf_d *));
+static int bpf_allocbufs __P((struct bpf_d *));
+static void bpf_freed __P((struct bpf_d *));
+static void bpf_freed __P((struct bpf_d *));
+static void bpf_ifname __P((struct ifnet *, struct ifreq *));
+static void bpf_ifname __P((struct ifnet *, struct ifreq *));
+static void bpf_mcopy __P((const void *, void *, u_int));
+static int bpf_movein __P((struct uio *, int,
+ struct mbuf **, struct sockaddr *, int *));
+static int bpf_setif __P((struct bpf_d *, struct ifreq *));
+static int bpf_setif __P((struct bpf_d *, struct ifreq *));
+static inline void
+ bpf_wakeup __P((struct bpf_d *));
+static void catchpacket __P((struct bpf_d *, u_char *, u_int,
+ u_int, void (*)(const void *, void *, u_int)));
+static void reset_d __P((struct bpf_d *));
+
+static int
+bpf_movein(uio, linktype, mp, sockp, datlen)
+ register struct uio *uio;
+ int linktype, *datlen;
+ register struct mbuf **mp;
+ register struct sockaddr *sockp;
+{
+ struct mbuf *m;
+ int error;
+ int len;
+ int hlen;
+
+ /*
+ * Build a sockaddr based on the data link layer type.
+ * We do this at this level because the ethernet header
+ * is copied directly into the data field of the sockaddr.
+ * In the case of SLIP, there is no header and the packet
+ * is forwarded as is.
+ * Also, we are careful to leave room at the front of the mbuf
+ * for the link level header.
+ */
+ switch (linktype) {
+
+ case DLT_SLIP:
+ sockp->sa_family = AF_INET;
+ hlen = 0;
+ break;
+
+ case DLT_EN10MB:
+ sockp->sa_family = AF_UNSPEC;
+ /* XXX Would MAXLINKHDR be better? */
+ hlen = sizeof(struct ether_header);
+ break;
+
+ case DLT_FDDI:
+ sockp->sa_family = AF_UNSPEC;
+ /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
+ hlen = 24;
+ break;
+
+ case DLT_NULL:
+ sockp->sa_family = AF_UNSPEC;
+ hlen = 0;
+ break;
+
+ default:
+ return (EIO);
+ }
+
+ len = uio->uio_resid;
+ *datlen = len - hlen;
+ if ((unsigned)len > MCLBYTES)
+ return (EIO);
+
+ MGET(m, M_WAIT, MT_DATA);
+ if (m == 0)
+ return (ENOBUFS);
+ if (len > MLEN) {
+#if BSD >= 199103
+ MCLGET(m, M_WAIT);
+ if ((m->m_flags & M_EXT) == 0) {
+#else
+ MCLGET(m);
+ if (m->m_len != MCLBYTES) {
+#endif
+ error = ENOBUFS;
+ goto bad;
+ }
+ }
+ m->m_len = len;
+ *mp = m;
+ /*
+ * Make room for link header.
+ */
+ if (hlen != 0) {
+ m->m_len -= hlen;
+#if BSD >= 199103
+ m->m_data += hlen; /* XXX */
+#else
+ m->m_off += hlen;
+#endif
+ error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
+ if (error)
+ goto bad;
+ }
+ error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
+ if (!error)
+ return (0);
+ bad:
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Attach file to the bpf interface, i.e. make d listen on bp.
+ * Must be called at splimp.
+ */
+static void
+bpf_attachd(d, bp)
+ struct bpf_d *d;
+ struct bpf_if *bp;
+{
+ /*
+ * Point d at bp, and add d to the interface's list of listeners.
+ * Finally, point the driver's bpf cookie at the interface so
+ * it will divert packets to bpf.
+ */
+ d->bd_bif = bp;
+ d->bd_next = bp->bif_dlist;
+ bp->bif_dlist = d;
+
+ *bp->bif_driverp = bp;
+}
+
+/*
+ * Detach a file from its interface.
+ */
+static void
+bpf_detachd(d)
+ struct bpf_d *d;
+{
+ struct bpf_d **p;
+ struct bpf_if *bp;
+
+ bp = d->bd_bif;
+ /*
+ * Check if this descriptor had requested promiscuous mode.
+ * If so, turn it off.
+ */
+ if (d->bd_promisc) {
+ d->bd_promisc = 0;
+ if (ifpromisc(bp->bif_ifp, 0))
+ /*
+ * Something is really wrong if we were able to put
+ * the driver into promiscuous mode, but can't
+ * take it out.
+ */
+ panic("bpf: ifpromisc failed");
+ }
+ /* Remove d from the interface's descriptor list. */
+ p = &bp->bif_dlist;
+ while (*p != d) {
+ p = &(*p)->bd_next;
+ if (*p == 0)
+ panic("bpf_detachd: descriptor not in list");
+ }
+ *p = (*p)->bd_next;
+ if (bp->bif_dlist == 0)
+ /*
+ * Let the driver know that there are no more listeners.
+ */
+ *d->bd_bif->bif_driverp = 0;
+ d->bd_bif = 0;
+}
+
+
+/*
+ * Mark a descriptor free by making it point to itself.
+ * This is probably cheaper than marking with a constant since
+ * the address should be in a register anyway.
+ */
+#define D_ISFREE(d) ((d) == (d)->bd_next)
+#define D_MARKFREE(d) ((d)->bd_next = (d))
+#define D_MARKUSED(d) ((d)->bd_next = 0)
+
+/*
+ * Open ethernet device. Returns ENXIO for illegal minor device number,
+ * EBUSY if file is open by another process.
+ */
+/* ARGSUSED */
+int
+bpfopen(dev, flag)
+ dev_t dev;
+ int flag;
+{
+ register struct bpf_d *d;
+
+ if (minor(dev) >= NBPFILTER)
+ return (ENXIO);
+ /*
+ * Each minor can be opened by only one process. If the requested
+ * minor is in use, return EBUSY.
+ */
+ d = &bpf_dtab[minor(dev)];
+ if (!D_ISFREE(d))
+ return (EBUSY);
+
+ /* Mark "free" and do most initialization. */
+ bzero((char *)d, sizeof(*d));
+ d->bd_bufsize = bpf_bufsize;
+
+ return (0);
+}
+
+/*
+ * Close the descriptor by detaching it from its interface,
+ * deallocating its buffers, and marking it free.
+ */
+/* ARGSUSED */
+int
+bpfclose(dev, flag)
+ dev_t dev;
+ int flag;
+{
+ register struct bpf_d *d = &bpf_dtab[minor(dev)];
+ register int s;
+
+ s = splimp();
+ if (d->bd_bif)
+ bpf_detachd(d);
+ splx(s);
+ bpf_freed(d);
+
+ return (0);
+}
+
+/*
+ * Support for SunOS, which does not have tsleep.
+ */
+#if BSD < 199103
+static
+bpf_timeout(arg)
+ caddr_t arg;
+{
+ struct bpf_d *d = (struct bpf_d *)arg;
+ d->bd_timedout = 1;
+ wakeup(arg);
+}
+
+#define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan)
+
+int
+bpf_sleep(d)
+ register struct bpf_d *d;
+{
+ register int rto = d->bd_rtout;
+ register int st;
+
+ if (rto != 0) {
+ d->bd_timedout = 0;
+ timeout(bpf_timeout, (caddr_t)d, rto);
+ }
+ st = sleep((caddr_t)d, PRINET|PCATCH);
+ if (rto != 0) {
+ if (d->bd_timedout == 0)
+ untimeout(bpf_timeout, (caddr_t)d);
+ else if (st == 0)
+ return EWOULDBLOCK;
+ }
+ return (st != 0) ? EINTR : 0;
+}
+#else
+#define BPF_SLEEP tsleep
+#endif
+
+/*
+ * Rotate the packet buffers in descriptor d. Move the store buffer
+ * into the hold slot, and the free buffer into the store slot.
+ * Zero the length of the new store buffer.
+ */
+#define ROTATE_BUFFERS(d) \
+ (d)->bd_hbuf = (d)->bd_sbuf; \
+ (d)->bd_hlen = (d)->bd_slen; \
+ (d)->bd_sbuf = (d)->bd_fbuf; \
+ (d)->bd_slen = 0; \
+ (d)->bd_fbuf = 0;
+/*
+ * bpfread - read next chunk of packets from buffers
+ */
+int
+bpfread(dev, uio)
+ dev_t dev;
+ register struct uio *uio;
+{
+ register struct bpf_d *d = &bpf_dtab[minor(dev)];
+ int error;
+ int s;
+
+ /*
+ * Restrict application to use a buffer the same size as
+ * as kernel buffers.
+ */
+ if (uio->uio_resid != d->bd_bufsize)
+ return (EINVAL);
+
+ s = splimp();
+ /*
+ * If the hold buffer is empty, then do a timed sleep, which
+ * ends when the timeout expires or when enough packets
+ * have arrived to fill the store buffer.
+ */
+ while (d->bd_hbuf == 0) {
+ if (d->bd_immediate && d->bd_slen != 0) {
+ /*
+ * A packet(s) either arrived since the previous
+ * read or arrived while we were asleep.
+ * Rotate the buffers and return what's here.
+ */
+ ROTATE_BUFFERS(d);
+ break;
+ }
+ error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf",
+ d->bd_rtout);
+ if (error == EINTR || error == ERESTART) {
+ splx(s);
+ return (error);
+ }
+ if (error == EWOULDBLOCK) {
+ /*
+ * On a timeout, return what's in the buffer,
+ * which may be nothing. If there is something
+ * in the store buffer, we can rotate the buffers.
+ */
+ if (d->bd_hbuf)
+ /*
+ * We filled up the buffer in between
+ * getting the timeout and arriving
+ * here, so we don't need to rotate.
+ */
+ break;
+
+ if (d->bd_slen == 0) {
+ splx(s);
+ return (0);
+ }
+ ROTATE_BUFFERS(d);
+ break;
+ }
+ }
+ /*
+ * At this point, we know we have something in the hold slot.
+ */
+ splx(s);
+
+ /*
+ * Move data from hold buffer into user space.
+ * We know the entire buffer is transferred since
+ * we checked above that the read buffer is bpf_bufsize bytes.
+ */
+ error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
+
+ s = splimp();
+ d->bd_fbuf = d->bd_hbuf;
+ d->bd_hbuf = 0;
+ d->bd_hlen = 0;
+ splx(s);
+
+ return (error);
+}
+
+
+/*
+ * If there are processes sleeping on this descriptor, wake them up.
+ */
+static inline void
+bpf_wakeup(d)
+ register struct bpf_d *d;
+{
+ wakeup((caddr_t)d);
+#if BSD >= 199103
+ selwakeup(&d->bd_sel);
+ /* XXX */
+ d->bd_sel.si_pid = 0;
+#else
+ if (d->bd_selproc) {
+ selwakeup(d->bd_selproc, (int)d->bd_selcoll);
+ d->bd_selcoll = 0;
+ d->bd_selproc = 0;
+ }
+#endif
+}
+
+int
+bpfwrite(dev, uio)
+ dev_t dev;
+ struct uio *uio;
+{
+ register struct bpf_d *d = &bpf_dtab[minor(dev)];
+ struct ifnet *ifp;
+ struct mbuf *m;
+ int error, s;
+ static struct sockaddr dst;
+ int datlen;
+
+ if (d->bd_bif == 0)
+ return (ENXIO);
+
+ ifp = d->bd_bif->bif_ifp;
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
+ if (error)
+ return (error);
+
+ if (datlen > ifp->if_mtu)
+ return (EMSGSIZE);
+
+ s = splnet();
+#if BSD >= 199103
+ error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
+#else
+ error = (*ifp->if_output)(ifp, m, &dst);
+#endif
+ splx(s);
+ /*
+ * The driver frees the mbuf.
+ */
+ return (error);
+}
+
+/*
+ * Reset a descriptor by flushing its packet buffer and clearing the
+ * receive and drop counts. Should be called at splimp.
+ */
+static void
+reset_d(d)
+ struct bpf_d *d;
+{
+ if (d->bd_hbuf) {
+ /* Free the hold buffer. */
+ d->bd_fbuf = d->bd_hbuf;
+ d->bd_hbuf = 0;
+ }
+ d->bd_slen = 0;
+ d->bd_hlen = 0;
+ d->bd_rcount = 0;
+ d->bd_dcount = 0;
+}
+
+/*
+ * FIONREAD Check for read packet available.
+ * SIOCGIFADDR Get interface address - convenient hook to driver.
+ * BIOCGBLEN Get buffer len [for read()].
+ * BIOCSETF Set ethernet read filter.
+ * BIOCFLUSH Flush read packet buffer.
+ * BIOCPROMISC Put interface into promiscuous mode.
+ * BIOCGDLT Get link layer type.
+ * BIOCGETIF Get interface name.
+ * BIOCSETIF Set interface.
+ * BIOCSRTIMEOUT Set read timeout.
+ * BIOCGRTIMEOUT Get read timeout.
+ * BIOCGSTATS Get packet stats.
+ * BIOCIMMEDIATE Set immediate mode.
+ * BIOCVERSION Get filter language version.
+ */
+/* ARGSUSED */
+int
+bpfioctl(dev, cmd, addr, flag)
+ dev_t dev;
+ int cmd;
+ caddr_t addr;
+ int flag;
+{
+ register struct bpf_d *d = &bpf_dtab[minor(dev)];
+ int s, error = 0;
+
+ switch (cmd) {
+
+ default:
+ error = EINVAL;
+ break;
+
+ /*
+ * Check for read packet available.
+ */
+ case FIONREAD:
+ {
+ int n;
+
+ s = splimp();
+ n = d->bd_slen;
+ if (d->bd_hbuf)
+ n += d->bd_hlen;
+ splx(s);
+
+ *(int *)addr = n;
+ break;
+ }
+
+ case SIOCGIFADDR:
+ {
+ struct ifnet *ifp;
+
+ if (d->bd_bif == 0)
+ error = EINVAL;
+ else {
+ ifp = d->bd_bif->bif_ifp;
+ error = (*ifp->if_ioctl)(ifp, cmd, addr);
+ }
+ break;
+ }
+
+ /*
+ * Get buffer len [for read()].
+ */
+ case BIOCGBLEN:
+ *(u_int *)addr = d->bd_bufsize;
+ break;
+
+ /*
+ * Set buffer length.
+ */
+ case BIOCSBLEN:
+#if BSD < 199103
+ error = EINVAL;
+#else
+ if (d->bd_bif != 0)
+ error = EINVAL;
+ else {
+ register u_int size = *(u_int *)addr;
+
+ if (size > BPF_MAXBUFSIZE)
+ *(u_int *)addr = size = BPF_MAXBUFSIZE;
+ else if (size < BPF_MINBUFSIZE)
+ *(u_int *)addr = size = BPF_MINBUFSIZE;
+ d->bd_bufsize = size;
+ }
+#endif
+ break;
+
+ /*
+ * Set link layer read filter.
+ */
+ case BIOCSETF:
+ error = bpf_setf(d, (struct bpf_program *)addr);
+ break;
+
+ /*
+ * Flush read packet buffer.
+ */
+ case BIOCFLUSH:
+ s = splimp();
+ reset_d(d);
+ splx(s);
+ break;
+
+ /*
+ * Put interface into promiscuous mode.
+ */
+ case BIOCPROMISC:
+ if (d->bd_bif == 0) {
+ /*
+ * No interface attached yet.
+ */
+ error = EINVAL;
+ break;
+ }
+ s = splimp();
+ if (d->bd_promisc == 0) {
+ error = ifpromisc(d->bd_bif->bif_ifp, 1);
+ if (error == 0)
+ d->bd_promisc = 1;
+ }
+ splx(s);
+ break;
+
+ /*
+ * Get device parameters.
+ */
+ case BIOCGDLT:
+ if (d->bd_bif == 0)
+ error = EINVAL;
+ else
+ *(u_int *)addr = d->bd_bif->bif_dlt;
+ break;
+
+ /*
+ * Set interface name.
+ */
+ case BIOCGETIF:
+ if (d->bd_bif == 0)
+ error = EINVAL;
+ else
+ bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
+ break;
+
+ /*
+ * Set interface.
+ */
+ case BIOCSETIF:
+ error = bpf_setif(d, (struct ifreq *)addr);
+ break;
+
+ /*
+ * Set read timeout.
+ */
+ case BIOCSRTIMEOUT:
+ {
+ struct timeval *tv = (struct timeval *)addr;
+ u_long msec;
+
+ /* Compute number of milliseconds. */
+ msec = tv->tv_sec * 1000 + tv->tv_usec / 1000;
+ /* Scale milliseconds to ticks. Assume hard
+ clock has millisecond or greater resolution
+ (i.e. tick >= 1000). For 10ms hardclock,
+ tick/1000 = 10, so rtout<-msec/10. */
+ d->bd_rtout = msec / (tick / 1000);
+ break;
+ }
+
+ /*
+ * Get read timeout.
+ */
+ case BIOCGRTIMEOUT:
+ {
+ struct timeval *tv = (struct timeval *)addr;
+ u_long msec = d->bd_rtout;
+
+ msec *= tick / 1000;
+ tv->tv_sec = msec / 1000;
+ tv->tv_usec = msec % 1000;
+ break;
+ }
+
+ /*
+ * Get packet stats.
+ */
+ case BIOCGSTATS:
+ {
+ struct bpf_stat *bs = (struct bpf_stat *)addr;
+
+ bs->bs_recv = d->bd_rcount;
+ bs->bs_drop = d->bd_dcount;
+ break;
+ }
+
+ /*
+ * Set immediate mode.
+ */
+ case BIOCIMMEDIATE:
+ d->bd_immediate = *(u_int *)addr;
+ break;
+
+ case BIOCVERSION:
+ {
+ struct bpf_version *bv = (struct bpf_version *)addr;
+
+ bv->bv_major = BPF_MAJOR_VERSION;
+ bv->bv_minor = BPF_MINOR_VERSION;
+ break;
+ }
+ }
+ return (error);
+}
+
+/*
+ * Set d's packet filter program to fp. If this file already has a filter,
+ * free it and replace it. Returns EINVAL for bogus requests.
+ */
+int
+bpf_setf(d, fp)
+ struct bpf_d *d;
+ struct bpf_program *fp;
+{
+ struct bpf_insn *fcode, *old;
+ u_int flen, size;
+ int s;
+
+ old = d->bd_filter;
+ if (fp->bf_insns == 0) {
+ if (fp->bf_len != 0)
+ return (EINVAL);
+ s = splimp();
+ d->bd_filter = 0;
+ reset_d(d);
+ splx(s);
+ if (old != 0)
+ free((caddr_t)old, M_DEVBUF);
+ return (0);
+ }
+ flen = fp->bf_len;
+ if (flen > BPF_MAXINSNS)
+ return (EINVAL);
+
+ size = flen * sizeof(*fp->bf_insns);
+ fcode = (struct bpf_insn *)malloc(size, M_DEVBUF, M_WAITOK);
+ if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
+ bpf_validate(fcode, (int)flen)) {
+ s = splimp();
+ d->bd_filter = fcode;
+ reset_d(d);
+ splx(s);
+ if (old != 0)
+ free((caddr_t)old, M_DEVBUF);
+
+ return (0);
+ }
+ free((caddr_t)fcode, M_DEVBUF);
+ return (EINVAL);
+}
+
+/*
+ * Detach a file from its current interface (if attached at all) and attach
+ * to the interface indicated by the name stored in ifr.
+ * Return an errno or 0.
+ */
+static int
+bpf_setif(d, ifr)
+ struct bpf_d *d;
+ struct ifreq *ifr;
+{
+ struct bpf_if *bp;
+ char *cp;
+ int unit, s, error;
+
+ /*
+ * Separate string into name part and unit number. Put a null
+ * byte at the end of the name part, and compute the number.
+ * If the a unit number is unspecified, the default is 0,
+ * as initialized above. XXX This should be common code.
+ */
+ unit = 0;
+ cp = ifr->ifr_name;
+ cp[sizeof(ifr->ifr_name) - 1] = '\0';
+ while (*cp++) {
+ if (*cp >= '0' && *cp <= '9') {
+ unit = *cp - '0';
+ *cp++ = '\0';
+ while (*cp)
+ unit = 10 * unit + *cp++ - '0';
+ break;
+ }
+ }
+ /*
+ * Look through attached interfaces for the named one.
+ */
+ for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
+ struct ifnet *ifp = bp->bif_ifp;
+
+ if (ifp == 0 || unit != ifp->if_unit
+ || strcmp(ifp->if_name, ifr->ifr_name) != 0)
+ continue;
+ /*
+ * We found the requested interface.
+ * If it's not up, return an error.
+ * Allocate the packet buffers if we need to.
+ * If we're already attached to requested interface,
+ * just flush the buffer.
+ */
+ if ((ifp->if_flags & IFF_UP) == 0)
+ return (ENETDOWN);
+
+ if (d->bd_sbuf == 0) {
+ error = bpf_allocbufs(d);
+ if (error != 0)
+ return (error);
+ }
+ s = splimp();
+ if (bp != d->bd_bif) {
+ if (d->bd_bif)
+ /*
+ * Detach if attached to something else.
+ */
+ bpf_detachd(d);
+
+ bpf_attachd(d, bp);
+ }
+ reset_d(d);
+ splx(s);
+ return (0);
+ }
+ /* Not found. */
+ return (ENXIO);
+}
+
+/*
+ * Convert an interface name plus unit number of an ifp to a single
+ * name which is returned in the ifr.
+ */
+static void
+bpf_ifname(ifp, ifr)
+ struct ifnet *ifp;
+ struct ifreq *ifr;
+{
+ char *s = ifp->if_name;
+ char *d = ifr->ifr_name;
+
+ while (*d++ = *s++)
+ continue;
+ /* XXX Assume that unit number is less than 10. */
+ *d++ = ifp->if_unit + '0';
+ *d = '\0';
+}
+
+/*
+ * The new select interface passes down the proc pointer; the old select
+ * stubs had to grab it out of the user struct. This glue allows either case.
+ */
+#if BSD >= 199103
+#define bpf_select bpfselect
+#else
+int
+bpfselect(dev, rw)
+ register dev_t dev;
+ int rw;
+{
+ return (bpf_select(dev, rw, u.u_procp));
+}
+#endif
+
+/*
+ * Support for select() system call
+ *
+ * Return true iff the specific operation will not block indefinitely.
+ * Otherwise, return false but make a note that a selwakeup() must be done.
+ */
+int
+bpf_select(dev, rw, p)
+ register dev_t dev;
+ int rw;
+ struct proc *p;
+{
+ register struct bpf_d *d;
+ register int s;
+
+ if (rw != FREAD)
+ return (0);
+ /*
+ * An imitation of the FIONREAD ioctl code.
+ */
+ d = &bpf_dtab[minor(dev)];
+
+ s = splimp();
+ if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) {
+ /*
+ * There is data waiting.
+ */
+ splx(s);
+ return (1);
+ }
+#if BSD >= 199103
+ selrecord(p, &d->bd_sel);
+#else
+ /*
+ * No data ready. If there's already a select() waiting on this
+ * minor device then this is a collision. This shouldn't happen
+ * because minors really should not be shared, but if a process
+ * forks while one of these is open, it is possible that both
+ * processes could select on the same descriptor.
+ */
+ if (d->bd_selproc && d->bd_selproc->p_wchan == (caddr_t)&selwait)
+ d->bd_selcoll = 1;
+ else
+ d->bd_selproc = p;
+#endif
+ splx(s);
+ return (0);
+}
+
+/*
+ * Incoming linkage from device drivers. Process the packet pkt, of length
+ * pktlen, which is stored in a contiguous buffer. The packet is parsed
+ * by each process' filter, and if accepted, stashed into the corresponding
+ * buffer.
+ */
+void
+bpf_tap(arg, pkt, pktlen)
+ caddr_t arg;
+ register u_char *pkt;
+ register u_int pktlen;
+{
+ struct bpf_if *bp;
+ register struct bpf_d *d;
+ register u_int slen;
+ /*
+ * Note that the ipl does not have to be raised at this point.
+ * The only problem that could arise here is that if two different
+ * interfaces shared any data. This is not the case.
+ */
+ bp = (struct bpf_if *)arg;
+ for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
+ ++d->bd_rcount;
+ slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
+ if (slen != 0)
+ catchpacket(d, pkt, pktlen, slen, bcopy);
+ }
+}
+
+/*
+ * Copy data from an mbuf chain into a buffer. This code is derived
+ * from m_copydata in sys/uipc_mbuf.c.
+ */
+static void
+bpf_mcopy(src_arg, dst_arg, len)
+ const void *src_arg;
+ void *dst_arg;
+ register u_int len;
+{
+ register const struct mbuf *m;
+ register u_int count;
+ u_char *dst;
+
+ m = src_arg;
+ dst = dst_arg;
+ while (len > 0) {
+ if (m == 0)
+ panic("bpf_mcopy");
+ count = min(m->m_len, len);
+ bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
+ m = m->m_next;
+ dst += count;
+ len -= count;
+ }
+}
+
+/*
+ * Incoming linkage from device drivers, when packet is in an mbuf chain.
+ */
+void
+bpf_mtap(arg, m)
+ caddr_t arg;
+ struct mbuf *m;
+{
+ struct bpf_if *bp = (struct bpf_if *)arg;
+ struct bpf_d *d;
+ u_int pktlen, slen;
+ struct mbuf *m0;
+
+ pktlen = 0;
+ for (m0 = m; m0 != 0; m0 = m0->m_next)
+ pktlen += m0->m_len;
+
+ for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
+ ++d->bd_rcount;
+ slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
+ if (slen != 0)
+ catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
+ }
+}
+
+/*
+ * Move the packet data from interface memory (pkt) into the
+ * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
+ * otherwise 0. "copy" is the routine called to do the actual data
+ * transfer. bcopy is passed in to copy contiguous chunks, while
+ * bpf_mcopy is passed in to copy mbuf chains. In the latter case,
+ * pkt is really an mbuf.
+ */
+static void
+catchpacket(d, pkt, pktlen, snaplen, cpfn)
+ register struct bpf_d *d;
+ register u_char *pkt;
+ register u_int pktlen, snaplen;
+ register void (*cpfn)(const void *, void *, u_int);
+{
+ register struct bpf_hdr *hp;
+ register int totlen, curlen;
+ register int hdrlen = d->bd_bif->bif_hdrlen;
+ /*
+ * Figure out how many bytes to move. If the packet is
+ * greater or equal to the snapshot length, transfer that
+ * much. Otherwise, transfer the whole packet (unless
+ * we hit the buffer size limit).
+ */
+ totlen = hdrlen + min(snaplen, pktlen);
+ if (totlen > d->bd_bufsize)
+ totlen = d->bd_bufsize;
+
+ /*
+ * Round up the end of the previous packet to the next longword.
+ */
+ curlen = BPF_WORDALIGN(d->bd_slen);
+ if (curlen + totlen > d->bd_bufsize) {
+ /*
+ * This packet will overflow the storage buffer.
+ * Rotate the buffers if we can, then wakeup any
+ * pending reads.
+ */
+ if (d->bd_fbuf == 0) {
+ /*
+ * We haven't completed the previous read yet,
+ * so drop the packet.
+ */
+ ++d->bd_dcount;
+ return;
+ }
+ ROTATE_BUFFERS(d);
+ bpf_wakeup(d);
+ curlen = 0;
+ }
+ else if (d->bd_immediate)
+ /*
+ * Immediate mode is set. A packet arrived so any
+ * reads should be woken up.
+ */
+ bpf_wakeup(d);
+
+ /*
+ * Append the bpf header.
+ */
+ hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
+#if BSD >= 199103
+ microtime(&hp->bh_tstamp);
+#elif defined(sun)
+ uniqtime(&hp->bh_tstamp);
+#else
+ hp->bh_tstamp = time;
+#endif
+ hp->bh_datalen = pktlen;
+ hp->bh_hdrlen = hdrlen;
+ /*
+ * Copy the packet data into the store buffer and update its length.
+ */
+ (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
+ d->bd_slen = curlen + totlen;
+}
+
+/*
+ * Initialize all nonzero fields of a descriptor.
+ */
+static int
+bpf_allocbufs(d)
+ register struct bpf_d *d;
+{
+ d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
+ if (d->bd_fbuf == 0)
+ return (ENOBUFS);
+
+ d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
+ if (d->bd_sbuf == 0) {
+ free(d->bd_fbuf, M_DEVBUF);
+ return (ENOBUFS);
+ }
+ d->bd_slen = 0;
+ d->bd_hlen = 0;
+ return (0);
+}
+
+/*
+ * Free buffers currently in use by a descriptor.
+ * Called on close.
+ */
+static void
+bpf_freed(d)
+ register struct bpf_d *d;
+{
+ /*
+ * We don't need to lock out interrupts since this descriptor has
+ * been detached from its interface and it yet hasn't been marked
+ * free.
+ */
+ if (d->bd_sbuf != 0) {
+ free(d->bd_sbuf, M_DEVBUF);
+ if (d->bd_hbuf != 0)
+ free(d->bd_hbuf, M_DEVBUF);
+ if (d->bd_fbuf != 0)
+ free(d->bd_fbuf, M_DEVBUF);
+ }
+ if (d->bd_filter)
+ free((caddr_t)d->bd_filter, M_DEVBUF);
+
+ D_MARKFREE(d);
+}
+
+/*
+ * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
+ * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
+ * size of the link header (variable length headers not yet supported).
+ */
+void
+bpfattach(driverp, ifp, dlt, hdrlen)
+ caddr_t *driverp;
+ struct ifnet *ifp;
+ u_int dlt, hdrlen;
+{
+ struct bpf_if *bp;
+ int i;
+#if BSD < 199103
+ static struct bpf_if bpf_ifs[NBPFILTER];
+ static int bpfifno;
+
+ bp = (bpfifno < NBPFILTER) ? &bpf_ifs[bpfifno++] : 0;
+#else
+ bp = (struct bpf_if *)malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
+#endif
+ if (bp == 0)
+ panic("bpfattach");
+
+ bp->bif_dlist = 0;
+ bp->bif_driverp = (struct bpf_if **)driverp;
+ bp->bif_ifp = ifp;
+ bp->bif_dlt = dlt;
+
+ bp->bif_next = bpf_iflist;
+ bpf_iflist = bp;
+
+ *bp->bif_driverp = 0;
+
+ /*
+ * Compute the length of the bpf header. This is not necessarily
+ * equal to SIZEOF_BPF_HDR because we want to insert spacing such
+ * that the network layer header begins on a longword boundary (for
+ * performance reasons and to alleviate alignment restrictions).
+ */
+ bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
+
+ /*
+ * Mark all the descriptors free if this hasn't been done.
+ */
+ if (!D_ISFREE(&bpf_dtab[0]))
+ for (i = 0; i < NBPFILTER; ++i)
+ D_MARKFREE(&bpf_dtab[i]);
+
+ printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
+}
+
+#if BSD >= 199103
+/* XXX This routine belongs in net/if.c. */
+/*
+ * Set/clear promiscuous mode on interface ifp based on the truth value
+ * of pswitch. The calls are reference counted so that only the first
+ * "on" request actually has an effect, as does the final "off" request.
+ * Results are undefined if the "off" and "on" requests are not matched.
+ */
+int
+ifpromisc(ifp, pswitch)
+ struct ifnet *ifp;
+ int pswitch;
+{
+ struct ifreq ifr;
+ /*
+ * If the device is not configured up, we cannot put it in
+ * promiscuous mode.
+ */
+ if ((ifp->if_flags & IFF_UP) == 0)
+ return (ENETDOWN);
+
+ if (pswitch) {
+ if (ifp->if_pcount++ != 0)
+ return (0);
+ ifp->if_flags |= IFF_PROMISC;
+ } else {
+ if (--ifp->if_pcount > 0)
+ return (0);
+ ifp->if_flags &= ~IFF_PROMISC;
+ }
+ ifr.ifr_flags = ifp->if_flags;
+ return ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
+}
+#endif
+
+#if BSD < 199103
+/*
+ * Allocate some memory for bpf. This is temporary SunOS support, and
+ * is admittedly a hack.
+ * If resources unavaiable, return 0.
+ */
+static caddr_t
+bpf_alloc(size, canwait)
+ register int size;
+ register int canwait;
+{
+ register struct mbuf *m;
+
+ if ((unsigned)size > (MCLBYTES-8))
+ return 0;
+
+ MGET(m, canwait, MT_DATA);
+ if (m == 0)
+ return 0;
+ if ((unsigned)size > (MLEN-8)) {
+ MCLGET(m);
+ if (m->m_len != MCLBYTES) {
+ m_freem(m);
+ return 0;
+ }
+ }
+ *mtod(m, struct mbuf **) = m;
+ return mtod(m, caddr_t) + 8;
+}
+#endif
+#endif
diff --git a/sys/net/bpf.h b/sys/net/bpf.h
new file mode 100644
index 000000000000..2e093ac5ce11
--- /dev/null
+++ b/sys/net/bpf.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.h 8.1 (Berkeley) 6/10/93
+ *
+ * @(#) $Header: bpf.h,v 1.24 91/10/27 21:22:32 mccanne Exp $ (LBL)
+ */
+
+/*
+ * Alignment macros. BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#define BPF_ALIGNMENT sizeof(long)
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+#define BPF_MAXINSNS 512
+#define BPF_MAXBUFSIZE 0x8000
+#define BPF_MINBUFSIZE 32
+
+/*
+ * Structure for BIOCSETF.
+ */
+struct bpf_program {
+ u_int bf_len;
+ struct bpf_insn *bf_insns;
+};
+
+/*
+ * Struct returned by BIOCGSTATS.
+ */
+struct bpf_stat {
+ u_int bs_recv; /* number of packets received */
+ u_int bs_drop; /* number of packets dropped */
+};
+
+/*
+ * Struct return by BIOCVERSION. This represents the version number of
+ * the filter language described by the instruction encodings below.
+ * bpf understands a program iff kernel_major == filter_major &&
+ * kernel_minor >= filter_minor, that is, if the value returned by the
+ * running kernel has the same major number and a minor number equal
+ * equal to or less than the filter being downloaded. Otherwise, the
+ * results are undefined, meaning an error may be returned or packets
+ * may be accepted haphazardly.
+ * It has nothing to do with the source code version.
+ */
+struct bpf_version {
+ u_short bv_major;
+ u_short bv_minor;
+};
+/* Current version number. */
+#define BPF_MAJOR_VERSION 1
+#define BPF_MINOR_VERSION 1
+
+/*
+ * BPF ioctls
+ *
+ * The first set is for compatibility with Sun's pcc style
+ * header files. If your using gcc, we assume that you
+ * have run fixincludes so the latter set should work.
+ */
+#if (defined(sun) || defined(ibm032)) && !defined(__GNUC__)
+#define BIOCGBLEN _IOR(B,102, u_int)
+#define BIOCSBLEN _IOWR(B,102, u_int)
+#define BIOCSETF _IOW(B,103, struct bpf_program)
+#define BIOCFLUSH _IO(B,104)
+#define BIOCPROMISC _IO(B,105)
+#define BIOCGDLT _IOR(B,106, u_int)
+#define BIOCGETIF _IOR(B,107, struct ifreq)
+#define BIOCSETIF _IOW(B,108, struct ifreq)
+#define BIOCSRTIMEOUT _IOW(B,109, struct timeval)
+#define BIOCGRTIMEOUT _IOR(B,110, struct timeval)
+#define BIOCGSTATS _IOR(B,111, struct bpf_stat)
+#define BIOCIMMEDIATE _IOW(B,112, u_int)
+#define BIOCVERSION _IOR(B,113, struct bpf_version)
+#else
+#define BIOCGBLEN _IOR('B',102, u_int)
+#define BIOCSBLEN _IOWR('B',102, u_int)
+#define BIOCSETF _IOW('B',103, struct bpf_program)
+#define BIOCFLUSH _IO('B',104)
+#define BIOCPROMISC _IO('B',105)
+#define BIOCGDLT _IOR('B',106, u_int)
+#define BIOCGETIF _IOR('B',107, struct ifreq)
+#define BIOCSETIF _IOW('B',108, struct ifreq)
+#define BIOCSRTIMEOUT _IOW('B',109, struct timeval)
+#define BIOCGRTIMEOUT _IOR('B',110, struct timeval)
+#define BIOCGSTATS _IOR('B',111, struct bpf_stat)
+#define BIOCIMMEDIATE _IOW('B',112, u_int)
+#define BIOCVERSION _IOR('B',113, struct bpf_version)
+#endif
+
+/*
+ * Structure prepended to each packet.
+ */
+struct bpf_hdr {
+ struct timeval bh_tstamp; /* time stamp */
+ u_long bh_caplen; /* length of captured portion */
+ u_long bh_datalen; /* original length of packet */
+ u_short bh_hdrlen; /* length of bpf header (this struct
+ plus alignment padding) */
+};
+/*
+ * Because the structure above is not a multiple of 4 bytes, some compilers
+ * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
+ * Only the kernel needs to know about it; applications use bh_hdrlen.
+ */
+#ifdef KERNEL
+#define SIZEOF_BPF_HDR 18
+#endif
+
+/*
+ * Data-link level type codes.
+ * Currently, only DLT_EN10MB and DLT_SLIP are supported.
+ */
+#define DLT_NULL 0 /* no link-layer encapsulation */
+#define DLT_EN10MB 1 /* Ethernet (10Mb) */
+#define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */
+#define DLT_AX25 3 /* Amateur Radio AX.25 */
+#define DLT_PRONET 4 /* Proteon ProNET Token Ring */
+#define DLT_CHAOS 5 /* Chaos */
+#define DLT_IEEE802 6 /* IEEE 802 Networks */
+#define DLT_ARCNET 7 /* ARCNET */
+#define DLT_SLIP 8 /* Serial Line IP */
+#define DLT_PPP 9 /* Point-to-point Protocol */
+#define DLT_FDDI 10 /* FDDI */
+
+/*
+ * The instruction encondings.
+ */
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code) ((code) & 0x18)
+#define BPF_A 0x10
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define BPF_TAX 0x00
+#define BPF_TXA 0x80
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+ u_short code;
+ u_char jt;
+ u_char jf;
+ long k;
+};
+
+/*
+ * Macros for insn array initializers.
+ */
+#define BPF_STMT(code, k) { (u_short)(code), 0, 0, k }
+#define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k }
+
+#ifdef KERNEL
+int bpf_validate __P((struct bpf_insn *, int));
+int bpfopen __P((dev_t, int));
+int bpfclose __P((dev_t, int));
+int bpfread __P((dev_t, struct uio *));
+int bpfwrite __P((dev_t, struct uio *));
+int bpfioctl __P((dev_t, int, caddr_t, int));
+int bpf_select __P((dev_t, int, struct proc *));
+void bpf_tap __P((caddr_t, u_char *, u_int));
+void bpf_mtap __P((caddr_t, struct mbuf *));
+void bpfattach __P((caddr_t *, struct ifnet *, u_int, u_int));
+void bpfilterattach __P((int));
+u_int bpf_filter __P((struct bpf_insn *, u_char *, u_int, u_int));
+#endif
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
diff --git a/sys/net/bpf_compat.h b/sys/net/bpf_compat.h
new file mode 100644
index 000000000000..132a6df64526
--- /dev/null
+++ b/sys/net/bpf_compat.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf_compat.h 8.1 (Berkeley) 6/10/93
+ */
+
+/* from: $Header: bpf_compat.h,v 1.1 92/05/22 15:33:20 mccanne Exp $ (LBL) */
+
+/*
+ * Some hacks for compatibility across SunOS and 4.4BSD. We emulate malloc
+ * and free with mbuf clusters. We store a pointer to the mbuf in the first
+ * word of the mbuf and return 8 bytes passed the start of data (for double
+ * word alignment). We cannot just use offsets because clusters are not at
+ * a fixed offset from the associated mbuf. Sorry for this kludge.
+ */
+#define malloc(size, type, canwait) bpf_alloc(size, canwait)
+#define free(cp, type) m_free(*(struct mbuf **)(cp - 8))
+#define M_WAITOK M_WAIT
+
+/* This mapping works for our purposes. */
+#define ERESTART EINTR
diff --git a/sys/net/bpf_filter.c b/sys/net/bpf_filter.c
new file mode 100644
index 000000000000..6a30a6657542
--- /dev/null
+++ b/sys/net/bpf_filter.c
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf_filter.c 8.1 (Berkeley) 6/10/93
+ *
+ * static char rcsid[] =
+ * "$Header: bpf_filter.c,v 1.16 91/10/27 21:22:35 mccanne Exp $";
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+#ifdef sun
+#include <netinet/in.h>
+#endif
+
+#if defined(sparc) || defined(mips) || defined(ibm032)
+#define BPF_ALIGN
+#endif
+
+#ifndef BPF_ALIGN
+#define EXTRACT_SHORT(p) ((u_short)ntohs(*(u_short *)p))
+#define EXTRACT_LONG(p) (ntohl(*(u_long *)p))
+#else
+#define EXTRACT_SHORT(p)\
+ ((u_short)\
+ ((u_short)*((u_char *)p+0)<<8|\
+ (u_short)*((u_char *)p+1)<<0))
+#define EXTRACT_LONG(p)\
+ ((u_long)*((u_char *)p+0)<<24|\
+ (u_long)*((u_char *)p+1)<<16|\
+ (u_long)*((u_char *)p+2)<<8|\
+ (u_long)*((u_char *)p+3)<<0)
+#endif
+
+#ifdef KERNEL
+#include <sys/mbuf.h>
+#define MINDEX(m, k) \
+{ \
+ register int len = m->m_len; \
+ \
+ while (k >= len) { \
+ k -= len; \
+ m = m->m_next; \
+ if (m == 0) \
+ return 0; \
+ len = m->m_len; \
+ } \
+}
+
+static int
+m_xword(m, k, err)
+ register struct mbuf *m;
+ register int k, *err;
+{
+ register int len;
+ register u_char *cp, *np;
+ register struct mbuf *m0;
+
+ len = m->m_len;
+ while (k >= len) {
+ k -= len;
+ m = m->m_next;
+ if (m == 0)
+ goto bad;
+ len = m->m_len;
+ }
+ cp = mtod(m, u_char *) + k;
+ if (len - k >= 4) {
+ *err = 0;
+ return EXTRACT_LONG(cp);
+ }
+ m0 = m->m_next;
+ if (m0 == 0 || m0->m_len + len - k < 4)
+ goto bad;
+ *err = 0;
+ np = mtod(m0, u_char *);
+ switch (len - k) {
+
+ case 1:
+ return (cp[k] << 24) | (np[0] << 16) | (np[1] << 8) | np[2];
+
+ case 2:
+ return (cp[k] << 24) | (cp[k + 1] << 16) | (np[0] << 8) |
+ np[1];
+
+ default:
+ return (cp[k] << 24) | (cp[k + 1] << 16) | (cp[k + 2] << 8) |
+ np[0];
+ }
+ bad:
+ *err = 1;
+ return 0;
+}
+
+static int
+m_xhalf(m, k, err)
+ register struct mbuf *m;
+ register int k, *err;
+{
+ register int len;
+ register u_char *cp;
+ register struct mbuf *m0;
+
+ len = m->m_len;
+ while (k >= len) {
+ k -= len;
+ m = m->m_next;
+ if (m == 0)
+ goto bad;
+ len = m->m_len;
+ }
+ cp = mtod(m, u_char *) + k;
+ if (len - k >= 2) {
+ *err = 0;
+ return EXTRACT_SHORT(cp);
+ }
+ m0 = m->m_next;
+ if (m0 == 0)
+ goto bad;
+ *err = 0;
+ return (cp[k] << 8) | mtod(m0, u_char *)[0];
+ bad:
+ *err = 1;
+ return 0;
+}
+#endif
+
+#include <net/bpf.h>
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ */
+u_int
+bpf_filter(pc, p, wirelen, buflen)
+ register struct bpf_insn *pc;
+ register u_char *p;
+ u_int wirelen;
+ register u_int buflen;
+{
+ register u_long A, X;
+ register int k;
+ long mem[BPF_MEMWORDS];
+
+ if (pc == 0)
+ /*
+ * No filter means accept all.
+ */
+ return (u_int)-1;
+#ifdef lint
+ A = 0;
+ X = 0;
+#endif
+ --pc;
+ while (1) {
+ ++pc;
+ switch (pc->code) {
+
+ default:
+#ifdef KERNEL
+ return 0;
+#else
+ abort();
+#endif
+ case BPF_RET|BPF_K:
+ return (u_int)pc->k;
+
+ case BPF_RET|BPF_A:
+ return (u_int)A;
+
+ case BPF_LD|BPF_W|BPF_ABS:
+ k = pc->k;
+ if (k + sizeof(long) > buflen) {
+#ifdef KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return 0;
+ A = m_xword((struct mbuf *)p, k, &merr);
+ if (merr != 0)
+ return 0;
+ continue;
+#else
+ return 0;
+#endif
+ }
+#ifdef BPF_ALIGN
+ if (((int)(p + k) & 3) != 0)
+ A = EXTRACT_LONG(&p[k]);
+ else
+#endif
+ A = ntohl(*(long *)(p + k));
+ continue;
+
+ case BPF_LD|BPF_H|BPF_ABS:
+ k = pc->k;
+ if (k + sizeof(short) > buflen) {
+#ifdef KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return 0;
+ A = m_xhalf((struct mbuf *)p, k, &merr);
+ continue;
+#else
+ return 0;
+#endif
+ }
+ A = EXTRACT_SHORT(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_ABS:
+ k = pc->k;
+ if (k >= buflen) {
+#ifdef KERNEL
+ register struct mbuf *m;
+
+ if (buflen != 0)
+ return 0;
+ m = (struct mbuf *)p;
+ MINDEX(m, k);
+ A = mtod(m, u_char *)[k];
+ continue;
+#else
+ return 0;
+#endif
+ }
+ A = p[k];
+ continue;
+
+ case BPF_LD|BPF_W|BPF_LEN:
+ A = wirelen;
+ continue;
+
+ case BPF_LDX|BPF_W|BPF_LEN:
+ X = wirelen;
+ continue;
+
+ case BPF_LD|BPF_W|BPF_IND:
+ k = X + pc->k;
+ if (k + sizeof(long) > buflen) {
+#ifdef KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return 0;
+ A = m_xword((struct mbuf *)p, k, &merr);
+ if (merr != 0)
+ return 0;
+ continue;
+#else
+ return 0;
+#endif
+ }
+#ifdef BPF_ALIGN
+ if (((int)(p + k) & 3) != 0)
+ A = EXTRACT_LONG(&p[k]);
+ else
+#endif
+ A = ntohl(*(long *)(p + k));
+ continue;
+
+ case BPF_LD|BPF_H|BPF_IND:
+ k = X + pc->k;
+ if (k + sizeof(short) > buflen) {
+#ifdef KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return 0;
+ A = m_xhalf((struct mbuf *)p, k, &merr);
+ if (merr != 0)
+ return 0;
+ continue;
+#else
+ return 0;
+#endif
+ }
+ A = EXTRACT_SHORT(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_IND:
+ k = X + pc->k;
+ if (k >= buflen) {
+#ifdef KERNEL
+ register struct mbuf *m;
+
+ if (buflen != 0)
+ return 0;
+ m = (struct mbuf *)p;
+ MINDEX(m, k);
+ A = mtod(m, char *)[k];
+ continue;
+#else
+ return 0;
+#endif
+ }
+ A = p[k];
+ continue;
+
+ case BPF_LDX|BPF_MSH|BPF_B:
+ k = pc->k;
+ if (k >= buflen) {
+#ifdef KERNEL
+ register struct mbuf *m;
+
+ if (buflen != 0)
+ return 0;
+ m = (struct mbuf *)p;
+ MINDEX(m, k);
+ X = (mtod(m, char *)[k] & 0xf) << 2;
+ continue;
+#else
+ return 0;
+#endif
+ }
+ X = (p[pc->k] & 0xf) << 2;
+ continue;
+
+ case BPF_LD|BPF_IMM:
+ A = pc->k;
+ continue;
+
+ case BPF_LDX|BPF_IMM:
+ X = pc->k;
+ continue;
+
+ case BPF_LD|BPF_MEM:
+ A = mem[pc->k];
+ continue;
+
+ case BPF_LDX|BPF_MEM:
+ X = mem[pc->k];
+ continue;
+
+ case BPF_ST:
+ mem[pc->k] = A;
+ continue;
+
+ case BPF_STX:
+ mem[pc->k] = X;
+ continue;
+
+ case BPF_JMP|BPF_JA:
+ pc += pc->k;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_K:
+ pc += (A > pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_K:
+ pc += (A >= pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ pc += (A == pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_K:
+ pc += (A & pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_X:
+ pc += (A > X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_X:
+ pc += (A >= X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ pc += (A == X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_X:
+ pc += (A & X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_ALU|BPF_ADD|BPF_X:
+ A += X;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_X:
+ A -= X;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_X:
+ A *= X;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_X:
+ if (X == 0)
+ return 0;
+ A /= X;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_X:
+ A &= X;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_X:
+ A |= X;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_X:
+ A <<= X;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_X:
+ A >>= X;
+ continue;
+
+ case BPF_ALU|BPF_ADD|BPF_K:
+ A += pc->k;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_K:
+ A -= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_K:
+ A *= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_K:
+ A /= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_K:
+ A &= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_K:
+ A |= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_K:
+ A <<= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_K:
+ A >>= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_NEG:
+ A = -A;
+ continue;
+
+ case BPF_MISC|BPF_TAX:
+ X = A;
+ continue;
+
+ case BPF_MISC|BPF_TXA:
+ A = X;
+ continue;
+ }
+ }
+}
+
+#ifdef KERNEL
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code. The code must terminate with either an accept or reject.
+ * 'valid' is an array for use by the routine (it must be at least
+ * 'len' bytes long).
+ *
+ * The kernel needs to be able to verify an application's filter code.
+ * Otherwise, a bogus program could easily crash the system.
+ */
+int
+bpf_validate(f, len)
+ struct bpf_insn *f;
+ int len;
+{
+ register int i;
+ register struct bpf_insn *p;
+
+ for (i = 0; i < len; ++i) {
+ /*
+ * Check that that jumps are forward, and within
+ * the code block.
+ */
+ p = &f[i];
+ if (BPF_CLASS(p->code) == BPF_JMP) {
+ register int from = i + 1;
+
+ if (BPF_OP(p->code) == BPF_JA) {
+ if (from + p->k >= len)
+ return 0;
+ }
+ else if (from + p->jt >= len || from + p->jf >= len)
+ return 0;
+ }
+ /*
+ * Check that memory operations use valid addresses.
+ */
+ if ((BPF_CLASS(p->code) == BPF_ST ||
+ (BPF_CLASS(p->code) == BPF_LD &&
+ (p->code & 0xe0) == BPF_MEM)) &&
+ (p->k >= BPF_MEMWORDS || p->k < 0))
+ return 0;
+ /*
+ * Check for constant division by 0.
+ */
+ if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0)
+ return 0;
+ }
+ return BPF_CLASS(f[len - 1].code) == BPF_RET;
+}
+#endif
diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h
new file mode 100644
index 000000000000..a13320e86a6a
--- /dev/null
+++ b/sys/net/bpfdesc.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpfdesc.h 8.1 (Berkeley) 6/10/93
+ *
+ * @(#) $Header: bpfdesc.h,v 1.9 91/10/27 21:22:38 mccanne Exp $ (LBL)
+ */
+
+/*
+ * Descriptor associated with each open bpf file.
+ */
+struct bpf_d {
+ struct bpf_d *bd_next; /* Linked list of descriptors */
+ /*
+ * Buffer slots: two mbuf clusters buffer the incoming packets.
+ * The model has three slots. Sbuf is always occupied.
+ * sbuf (store) - Receive interrupt puts packets here.
+ * hbuf (hold) - When sbuf is full, put cluster here and
+ * wakeup read (replace sbuf with fbuf).
+ * fbuf (free) - When read is done, put cluster here.
+ * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
+ */
+ caddr_t bd_sbuf; /* store slot */
+ caddr_t bd_hbuf; /* hold slot */
+ caddr_t bd_fbuf; /* free slot */
+ int bd_slen; /* current length of store buffer */
+ int bd_hlen; /* current length of hold buffer */
+
+ int bd_bufsize; /* absolute length of buffers */
+
+ struct bpf_if * bd_bif; /* interface descriptor */
+ u_long bd_rtout; /* Read timeout in 'ticks' */
+ struct bpf_insn *bd_filter; /* filter code */
+ u_long bd_rcount; /* number of packets received */
+ u_long bd_dcount; /* number of packets dropped */
+
+ u_char bd_promisc; /* true if listening promiscuously */
+ u_char bd_state; /* idle, waiting, or timed out */
+ u_char bd_immediate; /* true to return on packet arrival */
+#if BSD < 199103
+ u_char bd_selcoll; /* true if selects collide */
+ int bd_timedout;
+ struct proc * bd_selproc; /* process that last selected us */
+#else
+ u_char bd_pad; /* explicit alignment */
+ struct selinfo bd_sel; /* bsd select info */
+#endif
+};
+
+/*
+ * Descriptor associated with each attached hardware interface.
+ */
+struct bpf_if {
+ struct bpf_if *bif_next; /* list of all interfaces */
+ struct bpf_d *bif_dlist; /* descriptor list */
+ struct bpf_if **bif_driverp; /* pointer into softc */
+ u_int bif_dlt; /* link layer type */
+ u_int bif_hdrlen; /* length of header (with padding) */
+ struct ifnet *bif_ifp; /* correspoding interface */
+};
+
+#ifdef KERNEL
+int bpf_setf __P((struct bpf_d *, struct bpf_program *));
+#endif
diff --git a/sys/net/if.c b/sys/net/if.c
new file mode 100644
index 000000000000..36963885cc71
--- /dev/null
+++ b/sys/net/if.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/kernel.h>
+#include <sys/ioctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+int ifqmaxlen = IFQ_MAXLEN;
+void if_slowtimo __P((void *arg));
+
+/*
+ * Network interface utility routines.
+ *
+ * Routines with ifa_ifwith* names take sockaddr *'s as
+ * parameters.
+ */
+void
+ifinit()
+{
+ register struct ifnet *ifp;
+
+ for (ifp = ifnet; ifp; ifp = ifp->if_next)
+ if (ifp->if_snd.ifq_maxlen == 0)
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ if_slowtimo(0);
+}
+
+#ifdef vax
+/*
+ * Call each interface on a Unibus reset.
+ */
+void
+ifubareset(uban)
+ int uban;
+{
+ register struct ifnet *ifp;
+
+ for (ifp = ifnet; ifp; ifp = ifp->if_next)
+ if (ifp->if_reset)
+ (*ifp->if_reset)(ifp->if_unit, uban);
+}
+#endif
+
+int if_index = 0;
+struct ifaddr **ifnet_addrs;
+static char *sprint_d __P((u_int, char *, int));
+
+/*
+ * Attach an interface to the
+ * list of "active" interfaces.
+ */
+void
+if_attach(ifp)
+ struct ifnet *ifp;
+{
+ unsigned socksize, ifasize;
+ int namelen, unitlen, masklen, ether_output();
+ char workbuf[12], *unitname;
+ register struct ifnet **p = &ifnet;
+ register struct sockaddr_dl *sdl;
+ register struct ifaddr *ifa;
+ static int if_indexlim = 8;
+ extern void link_rtrequest();
+
+ while (*p)
+ p = &((*p)->if_next);
+ *p = ifp;
+ ifp->if_index = ++if_index;
+ if (ifnet_addrs == 0 || if_index >= if_indexlim) {
+ unsigned n = (if_indexlim <<= 1) * sizeof(ifa);
+ struct ifaddr **q = (struct ifaddr **)
+ malloc(n, M_IFADDR, M_WAITOK);
+ if (ifnet_addrs) {
+ bcopy((caddr_t)ifnet_addrs, (caddr_t)q, n/2);
+ free((caddr_t)ifnet_addrs, M_IFADDR);
+ }
+ ifnet_addrs = q;
+ }
+ /*
+ * create a Link Level name for this device
+ */
+ unitname = sprint_d((u_int)ifp->if_unit, workbuf, sizeof(workbuf));
+ namelen = strlen(ifp->if_name);
+ unitlen = strlen(unitname);
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+ masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) +
+ unitlen + namelen;
+ socksize = masklen + ifp->if_addrlen;
+#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
+ socksize = ROUNDUP(socksize);
+ if (socksize < sizeof(*sdl))
+ socksize = sizeof(*sdl);
+ ifasize = sizeof(*ifa) + 2 * socksize;
+ if (ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK)) {
+ bzero((caddr_t)ifa, ifasize);
+ sdl = (struct sockaddr_dl *)(ifa + 1);
+ sdl->sdl_len = socksize;
+ sdl->sdl_family = AF_LINK;
+ bcopy(ifp->if_name, sdl->sdl_data, namelen);
+ bcopy(unitname, namelen + (caddr_t)sdl->sdl_data, unitlen);
+ sdl->sdl_nlen = (namelen += unitlen);
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = ifp->if_type;
+ ifnet_addrs[if_index - 1] = ifa;
+ ifa->ifa_ifp = ifp;
+ ifa->ifa_next = ifp->if_addrlist;
+ ifa->ifa_rtrequest = link_rtrequest;
+ ifp->if_addrlist = ifa;
+ ifa->ifa_addr = (struct sockaddr *)sdl;
+ sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
+ ifa->ifa_netmask = (struct sockaddr *)sdl;
+ sdl->sdl_len = masklen;
+ while (namelen != 0)
+ sdl->sdl_data[--namelen] = 0xff;
+ }
+ /* XXX -- Temporary fix before changing 10 ethernet drivers */
+ if (ifp->if_output == ether_output)
+ ether_ifattach(ifp);
+}
+/*
+ * Locate an interface based on a complete address.
+ */
+/*ARGSUSED*/
+struct ifaddr *
+ifa_ifwithaddr(addr)
+ register struct sockaddr *addr;
+{
+ register struct ifnet *ifp;
+ register struct ifaddr *ifa;
+
+#define equal(a1, a2) \
+ (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
+ for (ifp = ifnet; ifp; ifp = ifp->if_next)
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family != addr->sa_family)
+ continue;
+ if (equal(addr, ifa->ifa_addr))
+ return (ifa);
+ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr &&
+ equal(ifa->ifa_broadaddr, addr))
+ return (ifa);
+ }
+ return ((struct ifaddr *)0);
+}
+/*
+ * Locate the point to point interface with a given destination address.
+ */
+/*ARGSUSED*/
+struct ifaddr *
+ifa_ifwithdstaddr(addr)
+ register struct sockaddr *addr;
+{
+ register struct ifnet *ifp;
+ register struct ifaddr *ifa;
+
+ for (ifp = ifnet; ifp; ifp = ifp->if_next)
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family != addr->sa_family)
+ continue;
+ if (equal(addr, ifa->ifa_dstaddr))
+ return (ifa);
+ }
+ return ((struct ifaddr *)0);
+}
+
+/*
+ * Find an interface on a specific network. If many, choice
+ * is most specific found.
+ */
+struct ifaddr *
+ifa_ifwithnet(addr)
+ struct sockaddr *addr;
+{
+ register struct ifnet *ifp;
+ register struct ifaddr *ifa;
+ struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
+ u_int af = addr->sa_family;
+ char *addr_data = addr->sa_data, *cplim;
+
+ if (af == AF_LINK) {
+ register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+ if (sdl->sdl_index && sdl->sdl_index <= if_index)
+ return (ifnet_addrs[sdl->sdl_index - 1]);
+ }
+ for (ifp = ifnet; ifp; ifp = ifp->if_next)
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+ register char *cp, *cp2, *cp3;
+
+ if (ifa->ifa_addr->sa_family != af || ifa->ifa_netmask == 0)
+ next: continue;
+ cp = addr_data;
+ cp2 = ifa->ifa_addr->sa_data;
+ cp3 = ifa->ifa_netmask->sa_data;
+ cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
+ while (cp3 < cplim)
+ if ((*cp++ ^ *cp2++) & *cp3++)
+ goto next;
+ if (ifa_maybe == 0 ||
+ rn_refines((caddr_t)ifa->ifa_netmask,
+ (caddr_t)ifa_maybe->ifa_netmask))
+ ifa_maybe = ifa;
+ }
+ return (ifa_maybe);
+}
+
+/*
+ * Find an interface using a specific address family
+ */
+struct ifaddr *
+ifa_ifwithaf(af)
+ register int af;
+{
+ register struct ifnet *ifp;
+ register struct ifaddr *ifa;
+
+ for (ifp = ifnet; ifp; ifp = ifp->if_next)
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ if (ifa->ifa_addr->sa_family == af)
+ return (ifa);
+ return ((struct ifaddr *)0);
+}
+
+/*
+ * Find an interface address specific to an interface best matching
+ * a given address.
+ */
+struct ifaddr *
+ifaof_ifpforaddr(addr, ifp)
+ struct sockaddr *addr;
+ register struct ifnet *ifp;
+{
+ register struct ifaddr *ifa;
+ register char *cp, *cp2, *cp3;
+ register char *cplim;
+ struct ifaddr *ifa_maybe = 0;
+ u_int af = addr->sa_family;
+
+ if (af >= AF_MAX)
+ return (0);
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family != af)
+ continue;
+ ifa_maybe = ifa;
+ if (ifa->ifa_netmask == 0) {
+ if (equal(addr, ifa->ifa_addr) ||
+ (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
+ return (ifa);
+ continue;
+ }
+ cp = addr->sa_data;
+ cp2 = ifa->ifa_addr->sa_data;
+ cp3 = ifa->ifa_netmask->sa_data;
+ cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
+ for (; cp3 < cplim; cp3++)
+ if ((*cp++ ^ *cp2++) & *cp3)
+ break;
+ if (cp3 == cplim)
+ return (ifa);
+ }
+ return (ifa_maybe);
+}
+
+#include <net/route.h>
+
+/*
+ * Default action when installing a route with a Link Level gateway.
+ * Lookup an appropriate real ifa to point to.
+ * This should be moved to /sys/net/link.c eventually.
+ */
+void
+link_rtrequest(cmd, rt, sa)
+ int cmd;
+ register struct rtentry *rt;
+ struct sockaddr *sa;
+{
+ register struct ifaddr *ifa;
+ struct sockaddr *dst;
+ struct ifnet *ifp;
+
+ if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
+ ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
+ return;
+ if (ifa = ifaof_ifpforaddr(dst, ifp)) {
+ IFAFREE(rt->rt_ifa);
+ rt->rt_ifa = ifa;
+ ifa->ifa_refcnt++;
+ if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
+ ifa->ifa_rtrequest(cmd, rt, sa);
+ }
+}
+
+/*
+ * Mark an interface down and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+void
+if_down(ifp)
+ register struct ifnet *ifp;
+{
+ register struct ifaddr *ifa;
+
+ ifp->if_flags &= ~IFF_UP;
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
+ if_qflush(&ifp->if_snd);
+ rt_ifmsg(ifp);
+}
+
+/*
+ * Mark an interface up and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+void
+if_up(ifp)
+ register struct ifnet *ifp;
+{
+ register struct ifaddr *ifa;
+
+ ifp->if_flags |= IFF_UP;
+#ifdef notyet
+ /* this has no effect on IP, and will kill all iso connections XXX */
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ pfctlinput(PRC_IFUP, ifa->ifa_addr);
+#endif
+ rt_ifmsg(ifp);
+}
+
+/*
+ * Flush an interface queue.
+ */
+void
+if_qflush(ifq)
+ register struct ifqueue *ifq;
+{
+ register struct mbuf *m, *n;
+
+ n = ifq->ifq_head;
+ while (m = n) {
+ n = m->m_act;
+ m_freem(m);
+ }
+ ifq->ifq_head = 0;
+ ifq->ifq_tail = 0;
+ ifq->ifq_len = 0;
+}
+
+/*
+ * Handle interface watchdog timer routines. Called
+ * from softclock, we decrement timers (if set) and
+ * call the appropriate interface routine on expiration.
+ */
+void
+if_slowtimo(arg)
+ void *arg;
+{
+ register struct ifnet *ifp;
+ int s = splimp();
+
+ for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+ if (ifp->if_timer == 0 || --ifp->if_timer)
+ continue;
+ if (ifp->if_watchdog)
+ (*ifp->if_watchdog)(ifp->if_unit);
+ }
+ splx(s);
+ timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
+}
+
+/*
+ * Map interface name to
+ * interface structure pointer.
+ */
+struct ifnet *
+ifunit(name)
+ register char *name;
+{
+ register char *cp;
+ register struct ifnet *ifp;
+ int unit;
+ unsigned len;
+ char *ep, c;
+
+ for (cp = name; cp < name + IFNAMSIZ && *cp; cp++)
+ if (*cp >= '0' && *cp <= '9')
+ break;
+ if (*cp == '\0' || cp == name + IFNAMSIZ)
+ return ((struct ifnet *)0);
+ /*
+ * Save first char of unit, and pointer to it,
+ * so we can put a null there to avoid matching
+ * initial substrings of interface names.
+ */
+ len = cp - name + 1;
+ c = *cp;
+ ep = cp;
+ for (unit = 0; *cp >= '0' && *cp <= '9'; )
+ unit = unit * 10 + *cp++ - '0';
+ *ep = 0;
+ for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+ if (bcmp(ifp->if_name, name, len))
+ continue;
+ if (unit == ifp->if_unit)
+ break;
+ }
+ *ep = c;
+ return (ifp);
+}
+
+/*
+ * Interface ioctls.
+ */
+int
+ifioctl(so, cmd, data, p)
+ struct socket *so;
+ int cmd;
+ caddr_t data;
+ struct proc *p;
+{
+ register struct ifnet *ifp;
+ register struct ifreq *ifr;
+ int error;
+
+ switch (cmd) {
+
+ case SIOCGIFCONF:
+ case OSIOCGIFCONF:
+ return (ifconf(cmd, data));
+ }
+ ifr = (struct ifreq *)data;
+ ifp = ifunit(ifr->ifr_name);
+ if (ifp == 0)
+ return (ENXIO);
+ switch (cmd) {
+
+ case SIOCGIFFLAGS:
+ ifr->ifr_flags = ifp->if_flags;
+ break;
+
+ case SIOCGIFMETRIC:
+ ifr->ifr_metric = ifp->if_metric;
+ break;
+
+ case SIOCSIFFLAGS:
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
+ int s = splimp();
+ if_down(ifp);
+ splx(s);
+ }
+ if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
+ int s = splimp();
+ if_up(ifp);
+ splx(s);
+ }
+ ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
+ (ifr->ifr_flags &~ IFF_CANTCHANGE);
+ if (ifp->if_ioctl)
+ (void) (*ifp->if_ioctl)(ifp, cmd, data);
+ break;
+
+ case SIOCSIFMETRIC:
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ ifp->if_metric = ifr->ifr_metric;
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ if (ifp->if_ioctl == NULL)
+ return (EOPNOTSUPP);
+ return ((*ifp->if_ioctl)(ifp, cmd, data));
+
+ default:
+ if (so->so_proto == 0)
+ return (EOPNOTSUPP);
+#ifndef COMPAT_43
+ return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
+ cmd, data, ifp));
+#else
+ {
+ int ocmd = cmd;
+
+ switch (cmd) {
+
+ case SIOCSIFDSTADDR:
+ case SIOCSIFADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCSIFNETMASK:
+#if BYTE_ORDER != BIG_ENDIAN
+ if (ifr->ifr_addr.sa_family == 0 &&
+ ifr->ifr_addr.sa_len < 16) {
+ ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
+ ifr->ifr_addr.sa_len = 16;
+ }
+#else
+ if (ifr->ifr_addr.sa_len == 0)
+ ifr->ifr_addr.sa_len = 16;
+#endif
+ break;
+
+ case OSIOCGIFADDR:
+ cmd = SIOCGIFADDR;
+ break;
+
+ case OSIOCGIFDSTADDR:
+ cmd = SIOCGIFDSTADDR;
+ break;
+
+ case OSIOCGIFBRDADDR:
+ cmd = SIOCGIFBRDADDR;
+ break;
+
+ case OSIOCGIFNETMASK:
+ cmd = SIOCGIFNETMASK;
+ }
+ error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
+ cmd, data, ifp));
+ switch (ocmd) {
+
+ case OSIOCGIFADDR:
+ case OSIOCGIFDSTADDR:
+ case OSIOCGIFBRDADDR:
+ case OSIOCGIFNETMASK:
+ *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
+ }
+ return (error);
+
+ }
+#endif
+ }
+ return (0);
+}
+
+/*
+ * Return interface configuration
+ * of system. List may be used
+ * in later ioctl's (above) to get
+ * other information.
+ */
+/*ARGSUSED*/
+int
+ifconf(cmd, data)
+ int cmd;
+ caddr_t data;
+{
+ register struct ifconf *ifc = (struct ifconf *)data;
+ register struct ifnet *ifp = ifnet;
+ register struct ifaddr *ifa;
+ register char *cp, *ep;
+ struct ifreq ifr, *ifrp;
+ int space = ifc->ifc_len, error = 0;
+
+ ifrp = ifc->ifc_req;
+ ep = ifr.ifr_name + sizeof (ifr.ifr_name) - 2;
+ for (; space > sizeof (ifr) && ifp; ifp = ifp->if_next) {
+ strncpy(ifr.ifr_name, ifp->if_name, sizeof (ifr.ifr_name) - 2);
+ for (cp = ifr.ifr_name; cp < ep && *cp; cp++)
+ continue;
+ *cp++ = '0' + ifp->if_unit; *cp = '\0';
+ if ((ifa = ifp->if_addrlist) == 0) {
+ bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
+ error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+ sizeof (ifr));
+ if (error)
+ break;
+ space -= sizeof (ifr), ifrp++;
+ } else
+ for ( ; space > sizeof (ifr) && ifa; ifa = ifa->ifa_next) {
+ register struct sockaddr *sa = ifa->ifa_addr;
+#ifdef COMPAT_43
+ if (cmd == OSIOCGIFCONF) {
+ struct osockaddr *osa =
+ (struct osockaddr *)&ifr.ifr_addr;
+ ifr.ifr_addr = *sa;
+ osa->sa_family = sa->sa_family;
+ error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+ sizeof (ifr));
+ ifrp++;
+ } else
+#endif
+ if (sa->sa_len <= sizeof(*sa)) {
+ ifr.ifr_addr = *sa;
+ error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+ sizeof (ifr));
+ ifrp++;
+ } else {
+ space -= sa->sa_len - sizeof(*sa);
+ if (space < sizeof (ifr))
+ break;
+ error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+ sizeof (ifr.ifr_name));
+ if (error == 0)
+ error = copyout((caddr_t)sa,
+ (caddr_t)&ifrp->ifr_addr, sa->sa_len);
+ ifrp = (struct ifreq *)
+ (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
+ }
+ if (error)
+ break;
+ space -= sizeof (ifr);
+ }
+ }
+ ifc->ifc_len -= space;
+ return (error);
+}
+
+static char *
+sprint_d(n, buf, buflen)
+ u_int n;
+ char *buf;
+ int buflen;
+{
+ register char *cp = buf + buflen - 1;
+
+ *cp = 0;
+ do {
+ cp--;
+ *cp = "0123456789"[n % 10];
+ n /= 10;
+ } while (n != 0);
+ return (cp);
+}
diff --git a/sys/net/if.h b/sys/net/if.h
new file mode 100644
index 000000000000..c27c4f9cf632
--- /dev/null
+++ b/sys/net/if.h
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structures defining a network interface, providing a packet
+ * transport mechanism (ala level 0 of the PUP protocols).
+ *
+ * Each interface accepts output datagrams of a specified maximum
+ * length, and provides higher level routines with input datagrams
+ * received from its medium.
+ *
+ * Output occurs when the routine if_output is called, with three parameters:
+ * (*ifp->if_output)(ifp, m, dst, rt)
+ * Here m is the mbuf chain to be sent and dst is the destination address.
+ * The output routine encapsulates the supplied datagram if necessary,
+ * and then transmits it on its medium.
+ *
+ * On input, each interface unwraps the data received by it, and either
+ * places it on the input queue of a internetwork datagram routine
+ * and posts the associated software interrupt, or passes the datagram to a raw
+ * packet input routine.
+ *
+ * Routines exist for locating interfaces by their addresses
+ * or for locating a interface on a certain network, as well as more general
+ * routing and gateway routines maintaining information used to locate
+ * interfaces. These routines live in the files if.c and route.c
+ */
+#ifndef _TIME_ /* XXX fast fix for SNMP, going away soon */
+#include <sys/time.h>
+#endif
+
+#ifdef __STDC__
+/*
+ * Forward structure declarations for function prototypes [sic].
+ */
+struct mbuf;
+struct proc;
+struct rtentry;
+struct socket;
+struct ether_header;
+#endif
+/*
+ * Structure describing information about an interface
+ * which may be of interest to management entities.
+ */
+/*
+ * Structure defining a queue for a network interface.
+ *
+ * (Would like to call this struct ``if'', but C isn't PL/1.)
+ */
+
+struct ifnet {
+ char *if_name; /* name, e.g. ``en'' or ``lo'' */
+ struct ifnet *if_next; /* all struct ifnets are chained */
+ struct ifaddr *if_addrlist; /* linked list of addresses per if */
+ int if_pcount; /* number of promiscuous listeners */
+ caddr_t if_bpf; /* packet filter structure */
+ u_short if_index; /* numeric abbreviation for this if */
+ short if_unit; /* sub-unit for lower level driver */
+ short if_timer; /* time 'til if_watchdog called */
+ short if_flags; /* up/down, broadcast, etc. */
+ struct if_data {
+/* generic interface information */
+ u_char ifi_type; /* ethernet, tokenring, etc */
+ u_char ifi_addrlen; /* media address length */
+ u_char ifi_hdrlen; /* media header length */
+ u_long ifi_mtu; /* maximum transmission unit */
+ u_long ifi_metric; /* routing metric (external only) */
+ u_long ifi_baudrate; /* linespeed */
+/* volatile statistics */
+ u_long ifi_ipackets; /* packets received on interface */
+ u_long ifi_ierrors; /* input errors on interface */
+ u_long ifi_opackets; /* packets sent on interface */
+ u_long ifi_oerrors; /* output errors on interface */
+ u_long ifi_collisions; /* collisions on csma interfaces */
+ u_long ifi_ibytes; /* total number of octets received */
+ u_long ifi_obytes; /* total number of octets sent */
+ u_long ifi_imcasts; /* packets received via multicast */
+ u_long ifi_omcasts; /* packets sent via multicast */
+ u_long ifi_iqdrops; /* dropped on input, this interface */
+ u_long ifi_noproto; /* destined for unsupported protocol */
+ struct timeval ifi_lastchange;/* last updated */
+ } if_data;
+/* procedure handles */
+ int (*if_init) /* init routine */
+ __P((int));
+ int (*if_output) /* output routine (enqueue) */
+ __P((struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *));
+ int (*if_start) /* initiate output routine */
+ __P((struct ifnet *));
+ int (*if_done) /* output complete routine */
+ __P((struct ifnet *)); /* (XXX not used; fake prototype) */
+ int (*if_ioctl) /* ioctl routine */
+ __P((struct ifnet *, int, caddr_t));
+ int (*if_reset)
+ __P((int)); /* new autoconfig will permit removal */
+ int (*if_watchdog) /* timer routine */
+ __P((int));
+ struct ifqueue {
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+ int ifq_drops;
+ } if_snd; /* output queue */
+};
+#define if_mtu if_data.ifi_mtu
+#define if_type if_data.ifi_type
+#define if_addrlen if_data.ifi_addrlen
+#define if_hdrlen if_data.ifi_hdrlen
+#define if_metric if_data.ifi_metric
+#define if_baudrate if_data.ifi_baudrate
+#define if_ipackets if_data.ifi_ipackets
+#define if_ierrors if_data.ifi_ierrors
+#define if_opackets if_data.ifi_opackets
+#define if_oerrors if_data.ifi_oerrors
+#define if_collisions if_data.ifi_collisions
+#define if_ibytes if_data.ifi_ibytes
+#define if_obytes if_data.ifi_obytes
+#define if_imcasts if_data.ifi_imcasts
+#define if_omcasts if_data.ifi_omcasts
+#define if_iqdrops if_data.ifi_iqdrops
+#define if_noproto if_data.ifi_noproto
+#define if_lastchange if_data.ifi_lastchange
+
+#define IFF_UP 0x1 /* interface is up */
+#define IFF_BROADCAST 0x2 /* broadcast address valid */
+#define IFF_DEBUG 0x4 /* turn on debugging */
+#define IFF_LOOPBACK 0x8 /* is a loopback net */
+#define IFF_POINTOPOINT 0x10 /* interface is point-to-point link */
+#define IFF_NOTRAILERS 0x20 /* avoid use of trailers */
+#define IFF_RUNNING 0x40 /* resources allocated */
+#define IFF_NOARP 0x80 /* no address resolution protocol */
+#define IFF_PROMISC 0x100 /* receive all packets */
+#define IFF_ALLMULTI 0x200 /* receive all multicast packets */
+#define IFF_OACTIVE 0x400 /* transmission in progress */
+#define IFF_SIMPLEX 0x800 /* can't hear own transmissions */
+#define IFF_LINK0 0x1000 /* per link layer defined bit */
+#define IFF_LINK1 0x2000 /* per link layer defined bit */
+#define IFF_LINK2 0x4000 /* per link layer defined bit */
+#define IFF_MULTICAST 0x8000 /* supports multicast */
+
+/* flags set internally only: */
+#define IFF_CANTCHANGE \
+ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\
+ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI)
+
+/*
+ * Output queues (ifp->if_snd) and internetwork datagram level (pup level 1)
+ * input routines have queues of messages stored on ifqueue structures
+ * (defined above). Entries are added to and deleted from these structures
+ * by these macros, which should be called with ipl raised to splimp().
+ */
+#define IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
+#define IF_DROP(ifq) ((ifq)->ifq_drops++)
+#define IF_ENQUEUE(ifq, m) { \
+ (m)->m_nextpkt = 0; \
+ if ((ifq)->ifq_tail == 0) \
+ (ifq)->ifq_head = m; \
+ else \
+ (ifq)->ifq_tail->m_nextpkt = m; \
+ (ifq)->ifq_tail = m; \
+ (ifq)->ifq_len++; \
+}
+#define IF_PREPEND(ifq, m) { \
+ (m)->m_nextpkt = (ifq)->ifq_head; \
+ if ((ifq)->ifq_tail == 0) \
+ (ifq)->ifq_tail = (m); \
+ (ifq)->ifq_head = (m); \
+ (ifq)->ifq_len++; \
+}
+#define IF_DEQUEUE(ifq, m) { \
+ (m) = (ifq)->ifq_head; \
+ if (m) { \
+ if (((ifq)->ifq_head = (m)->m_nextpkt) == 0) \
+ (ifq)->ifq_tail = 0; \
+ (m)->m_nextpkt = 0; \
+ (ifq)->ifq_len--; \
+ } \
+}
+
+#define IFQ_MAXLEN 50
+#define IFNET_SLOWHZ 1 /* granularity is 1 second */
+
+/*
+ * The ifaddr structure contains information about one address
+ * of an interface. They are maintained by the different address families,
+ * are allocated and attached when an address is set, and are linked
+ * together so all addresses for an interface can be located.
+ */
+struct ifaddr {
+ struct sockaddr *ifa_addr; /* address of interface */
+ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */
+#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
+ struct sockaddr *ifa_netmask; /* used to determine subnet */
+ struct ifnet *ifa_ifp; /* back-pointer to interface */
+ struct ifaddr *ifa_next; /* next address for interface */
+ void (*ifa_rtrequest)(); /* check or clean routes (+ or -)'d */
+ u_short ifa_flags; /* mostly rt_flags for cloning */
+ short ifa_refcnt; /* extra to malloc for link info */
+ int ifa_metric; /* cost of going out this interface */
+#ifdef notdef
+ struct rtentry *ifa_rt; /* XXXX for ROUTETOIF ????? */
+#endif
+};
+#define IFA_ROUTE RTF_UP /* route installed */
+
+/*
+ * Message format for use in obtaining information about interfaces
+ * from getkerninfo and the routing socket
+ */
+struct if_msghdr {
+ u_short ifm_msglen; /* to skip over non-understood messages */
+ u_char ifm_version; /* future binary compatability */
+ u_char ifm_type; /* message type */
+ int ifm_addrs; /* like rtm_addrs */
+ int ifm_flags; /* value of if_flags */
+ u_short ifm_index; /* index for associated ifp */
+ struct if_data ifm_data;/* statistics and other data about if */
+};
+
+/*
+ * Message format for use in obtaining information about interface addresses
+ * from getkerninfo and the routing socket
+ */
+struct ifa_msghdr {
+ u_short ifam_msglen; /* to skip over non-understood messages */
+ u_char ifam_version; /* future binary compatability */
+ u_char ifam_type; /* message type */
+ int ifam_addrs; /* like rtm_addrs */
+ int ifam_flags; /* value of ifa_flags */
+ u_short ifam_index; /* index for associated ifp */
+ int ifam_metric; /* value of ifa_metric */
+};
+
+/*
+ * Interface request structure used for socket
+ * ioctl's. All interface ioctl's must have parameter
+ * definitions which begin with ifr_name. The
+ * remainder may be interface specific.
+ */
+struct ifreq {
+#define IFNAMSIZ 16
+ char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ union {
+ struct sockaddr ifru_addr;
+ struct sockaddr ifru_dstaddr;
+ struct sockaddr ifru_broadaddr;
+ short ifru_flags;
+ int ifru_metric;
+ caddr_t ifru_data;
+ } ifr_ifru;
+#define ifr_addr ifr_ifru.ifru_addr /* address */
+#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
+#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */
+#define ifr_flags ifr_ifru.ifru_flags /* flags */
+#define ifr_metric ifr_ifru.ifru_metric /* metric */
+#define ifr_data ifr_ifru.ifru_data /* for use by interface */
+};
+
+struct ifaliasreq {
+ char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct sockaddr ifra_addr;
+ struct sockaddr ifra_broadaddr;
+ struct sockaddr ifra_mask;
+};
+
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+struct ifconf {
+ int ifc_len; /* size of associated buffer */
+ union {
+ caddr_t ifcu_buf;
+ struct ifreq *ifcu_req;
+ } ifc_ifcu;
+#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */
+#define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */
+};
+
+#include <net/if_arp.h>
+
+#ifdef KERNEL
+#define IFAFREE(ifa) \
+ if ((ifa)->ifa_refcnt <= 0) \
+ ifafree(ifa); \
+ else \
+ (ifa)->ifa_refcnt--;
+
+struct ifnet *ifnet;
+
+void ether_ifattach __P((struct ifnet *));
+void ether_input __P((struct ifnet *, struct ether_header *, struct mbuf *));
+int ether_output __P((struct ifnet *,
+ struct mbuf *, struct sockaddr *, struct rtentry *));
+char *ether_sprintf __P((u_char *));
+
+void if_attach __P((struct ifnet *));
+void if_down __P((struct ifnet *));
+void if_qflush __P((struct ifqueue *));
+void if_slowtimo __P((void *));
+void if_up __P((struct ifnet *));
+#ifdef vax
+void ifubareset __P((int));
+#endif
+int ifconf __P((int, caddr_t));
+void ifinit __P((void));
+int ifioctl __P((struct socket *, int, caddr_t, struct proc *));
+int ifpromisc __P((struct ifnet *, int));
+struct ifnet *ifunit __P((char *));
+
+struct ifaddr *ifa_ifwithaddr __P((struct sockaddr *));
+struct ifaddr *ifa_ifwithaf __P((int));
+struct ifaddr *ifa_ifwithdstaddr __P((struct sockaddr *));
+struct ifaddr *ifa_ifwithnet __P((struct sockaddr *));
+struct ifaddr *ifa_ifwithroute __P((int, struct sockaddr *,
+ struct sockaddr *));
+struct ifaddr *ifaof_ifpforaddr __P((struct sockaddr *, struct ifnet *));
+void ifafree __P((struct ifaddr *));
+void link_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+
+int loioctl __P((struct ifnet *, int, caddr_t));
+void loopattach __P((int));
+int looutput __P((struct ifnet *,
+ struct mbuf *, struct sockaddr *, struct rtentry *));
+void lortrequest __P((int, struct rtentry *, struct sockaddr *));
+#endif
diff --git a/sys/net/if_arp.h b/sys/net/if_arp.h
new file mode 100644
index 000000000000..84581cbb98d9
--- /dev/null
+++ b/sys/net/if_arp.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_arp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description. ARP packets are variable
+ * in size; the arphdr structure defines the fixed-length portion.
+ * Protocol type values are the same as those for 10 Mb/s Ethernet.
+ * It is followed by the variable-sized fields ar_sha, arp_spa,
+ * arp_tha and arp_tpa in that order, according to the lengths
+ * specified. Field names used correspond to RFC 826.
+ */
+struct arphdr {
+ u_short ar_hrd; /* format of hardware address */
+#define ARPHRD_ETHER 1 /* ethernet hardware format */
+#define ARPHRD_FRELAY 15 /* frame relay hardware format */
+ u_short ar_pro; /* format of protocol address */
+ u_char ar_hln; /* length of hardware address */
+ u_char ar_pln; /* length of protocol address */
+ u_short ar_op; /* one of: */
+#define ARPOP_REQUEST 1 /* request to resolve address */
+#define ARPOP_REPLY 2 /* response to previous request */
+#define ARPOP_REVREQUEST 3 /* request protocol address given hardware */
+#define ARPOP_REVREPLY 4 /* response giving protocol address */
+#define ARPOP_INVREQUEST 8 /* request to identify peer */
+#define ARPOP_INVREPLY 9 /* response identifying peer */
+/*
+ * The remaining fields are variable in size,
+ * according to the sizes above.
+ */
+#ifdef COMMENT_ONLY
+ u_char ar_sha[]; /* sender hardware address */
+ u_char ar_spa[]; /* sender protocol address */
+ u_char ar_tha[]; /* target hardware address */
+ u_char ar_tpa[]; /* target protocol address */
+#endif
+};
+
+/*
+ * ARP ioctl request
+ */
+struct arpreq {
+ struct sockaddr arp_pa; /* protocol address */
+ struct sockaddr arp_ha; /* hardware address */
+ int arp_flags; /* flags */
+};
+/* arp_flags and at_flags field values */
+#define ATF_INUSE 0x01 /* entry in use */
+#define ATF_COM 0x02 /* completed entry (enaddr valid) */
+#define ATF_PERM 0x04 /* permanent entry */
+#define ATF_PUBL 0x08 /* publish entry (respond for other host) */
+#define ATF_USETRAILERS 0x10 /* has requested trailers */
diff --git a/sys/net/if_dl.h b/sys/net/if_dl.h
new file mode 100644
index 000000000000..3e53449085aa
--- /dev/null
+++ b/sys/net/if_dl.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_dl.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * A Link-Level Sockaddr may specify the interface in one of two
+ * ways: either by means of a system-provided index number (computed
+ * anew and possibly differently on every reboot), or by a human-readable
+ * string such as "il0" (for managerial convenience).
+ *
+ * Census taking actions, such as something akin to SIOCGCONF would return
+ * both the index and the human name.
+ *
+ * High volume transactions (such as giving a link-level ``from'' address
+ * in a recvfrom or recvmsg call) may be likely only to provide the indexed
+ * form, (which requires fewer copy operations and less space).
+ *
+ * The form and interpretation of the link-level address is purely a matter
+ * of convention between the device driver and its consumers; however, it is
+ * expected that all drivers for an interface of a given if_type will agree.
+ */
+
+/*
+ * Structure of a Link-Level sockaddr:
+ */
+struct sockaddr_dl {
+ u_char sdl_len; /* Total length of sockaddr */
+ u_char sdl_family; /* AF_DLI */
+ u_short sdl_index; /* if != 0, system given index for interface */
+ u_char sdl_type; /* interface type */
+ u_char sdl_nlen; /* interface name length, no trailing 0 reqd. */
+ u_char sdl_alen; /* link level address length */
+ u_char sdl_slen; /* link layer selector length */
+ char sdl_data[12]; /* minimum work area, can be larger;
+ contains both if name and ll address */
+};
+
+#define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+void link_addr __P((const char *, struct sockaddr_dl *));
+char *link_ntoa __P((const struct sockaddr_dl *));
+__END_DECLS
+
+#endif /* !KERNEL */
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
new file mode 100644
index 000000000000..d4d6680fdb02
--- /dev/null
+++ b/sys/net/if_ethersubr.c
@@ -0,0 +1,675 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/if_llc.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#endif
+#include <netinet/if_ether.h>
+
+#ifdef NS
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+#ifdef ISO
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#endif
+
+#ifdef LLC
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+#endif
+
+#if defined(LLC) && defined(CCITT)
+extern struct ifqueue pkintrq;
+#endif
+
+u_char etherbroadcastaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+extern struct ifnet loif;
+#define senderr(e) { error = (e); goto bad;}
+
+/*
+ * Ethernet output routine.
+ * Encapsulate a packet of type family for the local net.
+ * Use trailer local net encapsulation if enough data in first
+ * packet leaves a multiple of 512 bytes of data in remainder.
+ * Assumes that ifp is actually pointer to arpcom structure.
+ */
+int
+ether_output(ifp, m0, dst, rt0)
+ register struct ifnet *ifp;
+ struct mbuf *m0;
+ struct sockaddr *dst;
+ struct rtentry *rt0;
+{
+ short type;
+ int s, error = 0;
+ u_char edst[6];
+ register struct mbuf *m = m0;
+ register struct rtentry *rt;
+ struct mbuf *mcopy = (struct mbuf *)0;
+ register struct ether_header *eh;
+ int off, len = m->m_pkthdr.len;
+ struct arpcom *ac = (struct arpcom *)ifp;
+
+ if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
+ senderr(ENETDOWN);
+ ifp->if_lastchange = time;
+ if (rt = rt0) {
+ if ((rt->rt_flags & RTF_UP) == 0) {
+ if (rt0 = rt = rtalloc1(dst, 1))
+ rt->rt_refcnt--;
+ else
+ senderr(EHOSTUNREACH);
+ }
+ if (rt->rt_flags & RTF_GATEWAY) {
+ if (rt->rt_gwroute == 0)
+ goto lookup;
+ if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
+ rtfree(rt); rt = rt0;
+ lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1);
+ if ((rt = rt->rt_gwroute) == 0)
+ senderr(EHOSTUNREACH);
+ }
+ }
+ if (rt->rt_flags & RTF_REJECT)
+ if (rt->rt_rmx.rmx_expire == 0 ||
+ time.tv_sec < rt->rt_rmx.rmx_expire)
+ senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
+ }
+ switch (dst->sa_family) {
+
+#ifdef INET
+ case AF_INET:
+ if (!arpresolve(ac, rt, m, dst, edst))
+ return (0); /* if not yet resolved */
+ /* If broadcasting on a simplex interface, loopback a copy */
+ if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
+ mcopy = m_copy(m, 0, (int)M_COPYALL);
+ off = m->m_pkthdr.len - m->m_len;
+ type = ETHERTYPE_IP;
+ break;
+#endif
+#ifdef NS
+ case AF_NS:
+ type = ETHERTYPE_NS;
+ bcopy((caddr_t)&(((struct sockaddr_ns *)dst)->sns_addr.x_host),
+ (caddr_t)edst, sizeof (edst));
+ if (!bcmp((caddr_t)edst, (caddr_t)&ns_thishost, sizeof(edst)))
+ return (looutput(ifp, m, dst, rt));
+ /* If broadcasting on a simplex interface, loopback a copy */
+ if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
+ mcopy = m_copy(m, 0, (int)M_COPYALL);
+ break;
+#endif
+#ifdef ISO
+ case AF_ISO: {
+ int snpalen;
+ struct llc *l;
+ register struct sockaddr_dl *sdl;
+
+ if (rt && (sdl = (struct sockaddr_dl *)rt->rt_gateway) &&
+ sdl->sdl_family == AF_LINK && sdl->sdl_alen > 0) {
+ bcopy(LLADDR(sdl), (caddr_t)edst, sizeof(edst));
+ } else if (error =
+ iso_snparesolve(ifp, (struct sockaddr_iso *)dst,
+ (char *)edst, &snpalen))
+ goto bad; /* Not Resolved */
+ /* If broadcasting on a simplex interface, loopback a copy */
+ if (*edst & 1)
+ m->m_flags |= (M_BCAST|M_MCAST);
+ if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
+ (mcopy = m_copy(m, 0, (int)M_COPYALL))) {
+ M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT);
+ if (mcopy) {
+ eh = mtod(mcopy, struct ether_header *);
+ bcopy((caddr_t)edst,
+ (caddr_t)eh->ether_dhost, sizeof (edst));
+ bcopy((caddr_t)ac->ac_enaddr,
+ (caddr_t)eh->ether_shost, sizeof (edst));
+ }
+ }
+ M_PREPEND(m, 3, M_DONTWAIT);
+ if (m == NULL)
+ return (0);
+ type = m->m_pkthdr.len;
+ l = mtod(m, struct llc *);
+ l->llc_dsap = l->llc_ssap = LLC_ISO_LSAP;
+ l->llc_control = LLC_UI;
+ len += 3;
+ IFDEBUG(D_ETHER)
+ int i;
+ printf("unoutput: sending pkt to: ");
+ for (i=0; i<6; i++)
+ printf("%x ", edst[i] & 0xff);
+ printf("\n");
+ ENDDEBUG
+ } break;
+#endif /* ISO */
+#ifdef LLC
+/* case AF_NSAP: */
+ case AF_CCITT: {
+ register struct sockaddr_dl *sdl =
+ (struct sockaddr_dl *) rt -> rt_gateway;
+
+ if (sdl && sdl->sdl_family == AF_LINK
+ && sdl->sdl_alen > 0) {
+ bcopy(LLADDR(sdl), (char *)edst,
+ sizeof(edst));
+ } else goto bad; /* Not a link interface ? Funny ... */
+ if ((ifp->if_flags & IFF_SIMPLEX) && (*edst & 1) &&
+ (mcopy = m_copy(m, 0, (int)M_COPYALL))) {
+ M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT);
+ if (mcopy) {
+ eh = mtod(mcopy, struct ether_header *);
+ bcopy((caddr_t)edst,
+ (caddr_t)eh->ether_dhost, sizeof (edst));
+ bcopy((caddr_t)ac->ac_enaddr,
+ (caddr_t)eh->ether_shost, sizeof (edst));
+ }
+ }
+ type = m->m_pkthdr.len;
+#ifdef LLC_DEBUG
+ {
+ int i;
+ register struct llc *l = mtod(m, struct llc *);
+
+ printf("ether_output: sending LLC2 pkt to: ");
+ for (i=0; i<6; i++)
+ printf("%x ", edst[i] & 0xff);
+ printf(" len 0x%x dsap 0x%x ssap 0x%x control 0x%x\n",
+ type & 0xff, l->llc_dsap & 0xff, l->llc_ssap &0xff,
+ l->llc_control & 0xff);
+
+ }
+#endif /* LLC_DEBUG */
+ } break;
+#endif /* LLC */
+
+ case AF_UNSPEC:
+ eh = (struct ether_header *)dst->sa_data;
+ bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, sizeof (edst));
+ type = eh->ether_type;
+ break;
+
+ default:
+ printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit,
+ dst->sa_family);
+ senderr(EAFNOSUPPORT);
+ }
+
+
+ if (mcopy)
+ (void) looutput(ifp, mcopy, dst, rt);
+ /*
+ * Add local net header. If no space in first mbuf,
+ * allocate another.
+ */
+ M_PREPEND(m, sizeof (struct ether_header), M_DONTWAIT);
+ if (m == 0)
+ senderr(ENOBUFS);
+ eh = mtod(m, struct ether_header *);
+ type = htons((u_short)type);
+ bcopy((caddr_t)&type,(caddr_t)&eh->ether_type,
+ sizeof(eh->ether_type));
+ bcopy((caddr_t)edst, (caddr_t)eh->ether_dhost, sizeof (edst));
+ bcopy((caddr_t)ac->ac_enaddr, (caddr_t)eh->ether_shost,
+ sizeof(eh->ether_shost));
+ s = splimp();
+ /*
+ * Queue message on interface, and start output if interface
+ * not yet active.
+ */
+ if (IF_QFULL(&ifp->if_snd)) {
+ IF_DROP(&ifp->if_snd);
+ splx(s);
+ senderr(ENOBUFS);
+ }
+ IF_ENQUEUE(&ifp->if_snd, m);
+ if ((ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+ splx(s);
+ ifp->if_obytes += len + sizeof (struct ether_header);
+ if (m->m_flags & M_MCAST)
+ ifp->if_omcasts++;
+ return (error);
+
+bad:
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Process a received Ethernet packet;
+ * the packet is in the mbuf chain m without
+ * the ether header, which is provided separately.
+ */
+void
+ether_input(ifp, eh, m)
+ struct ifnet *ifp;
+ register struct ether_header *eh;
+ struct mbuf *m;
+{
+ register struct ifqueue *inq;
+ register struct llc *l;
+ struct arpcom *ac = (struct arpcom *)ifp;
+ int s;
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return;
+ }
+ ifp->if_lastchange = time;
+ ifp->if_ibytes += m->m_pkthdr.len + sizeof (*eh);
+ if (bcmp((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost,
+ sizeof(etherbroadcastaddr)) == 0)
+ m->m_flags |= M_BCAST;
+ else if (eh->ether_dhost[0] & 1)
+ m->m_flags |= M_MCAST;
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ ifp->if_imcasts++;
+
+ switch (eh->ether_type) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ schednetisr(NETISR_IP);
+ inq = &ipintrq;
+ break;
+
+ case ETHERTYPE_ARP:
+ schednetisr(NETISR_ARP);
+ inq = &arpintrq;
+ break;
+#endif
+#ifdef NS
+ case ETHERTYPE_NS:
+ schednetisr(NETISR_NS);
+ inq = &nsintrq;
+ break;
+
+#endif
+ default:
+#if defined (ISO) || defined (LLC)
+ if (eh->ether_type > ETHERMTU)
+ goto dropanyway;
+ l = mtod(m, struct llc *);
+ switch (l->llc_dsap) {
+#ifdef ISO
+ case LLC_ISO_LSAP:
+ switch (l->llc_control) {
+ case LLC_UI:
+ /* LLC_UI_P forbidden in class 1 service */
+ if ((l->llc_dsap == LLC_ISO_LSAP) &&
+ (l->llc_ssap == LLC_ISO_LSAP)) {
+ /* LSAP for ISO */
+ if (m->m_pkthdr.len > eh->ether_type)
+ m_adj(m, eh->ether_type - m->m_pkthdr.len);
+ m->m_data += 3; /* XXX */
+ m->m_len -= 3; /* XXX */
+ m->m_pkthdr.len -= 3; /* XXX */
+ M_PREPEND(m, sizeof *eh, M_DONTWAIT);
+ if (m == 0)
+ return;
+ *mtod(m, struct ether_header *) = *eh;
+ IFDEBUG(D_ETHER)
+ printf("clnp packet");
+ ENDDEBUG
+ schednetisr(NETISR_ISO);
+ inq = &clnlintrq;
+ break;
+ }
+ goto dropanyway;
+
+ case LLC_XID:
+ case LLC_XID_P:
+ if(m->m_len < 6)
+ goto dropanyway;
+ l->llc_window = 0;
+ l->llc_fid = 9;
+ l->llc_class = 1;
+ l->llc_dsap = l->llc_ssap = 0;
+ /* Fall through to */
+ case LLC_TEST:
+ case LLC_TEST_P:
+ {
+ struct sockaddr sa;
+ register struct ether_header *eh2;
+ int i;
+ u_char c = l->llc_dsap;
+
+ l->llc_dsap = l->llc_ssap;
+ l->llc_ssap = c;
+ if (m->m_flags & (M_BCAST | M_MCAST))
+ bcopy((caddr_t)ac->ac_enaddr,
+ (caddr_t)eh->ether_dhost, 6);
+ sa.sa_family = AF_UNSPEC;
+ sa.sa_len = sizeof(sa);
+ eh2 = (struct ether_header *)sa.sa_data;
+ for (i = 0; i < 6; i++) {
+ eh2->ether_shost[i] = c = eh->ether_dhost[i];
+ eh2->ether_dhost[i] =
+ eh->ether_dhost[i] = eh->ether_shost[i];
+ eh->ether_shost[i] = c;
+ }
+ ifp->if_output(ifp, m, &sa, NULL);
+ return;
+ }
+ default:
+ m_freem(m);
+ return;
+ }
+ break;
+#endif /* ISO */
+#ifdef LLC
+ case LLC_X25_LSAP:
+ {
+ if (m->m_pkthdr.len > eh->ether_type)
+ m_adj(m, eh->ether_type - m->m_pkthdr.len);
+ M_PREPEND(m, sizeof(struct sdl_hdr) , M_DONTWAIT);
+ if (m == 0)
+ return;
+ if ( !sdl_sethdrif(ifp, eh->ether_shost, LLC_X25_LSAP,
+ eh->ether_dhost, LLC_X25_LSAP, 6,
+ mtod(m, struct sdl_hdr *)))
+ panic("ETHER cons addr failure");
+ mtod(m, struct sdl_hdr *)->sdlhdr_len = eh->ether_type;
+#ifdef LLC_DEBUG
+ printf("llc packet\n");
+#endif /* LLC_DEBUG */
+ schednetisr(NETISR_CCITT);
+ inq = &llcintrq;
+ break;
+ }
+#endif /* LLC */
+ dropanyway:
+ default:
+ m_freem(m);
+ return;
+ }
+#else /* ISO || LLC */
+ m_freem(m);
+ return;
+#endif /* ISO || LLC */
+ }
+
+ s = splimp();
+ if (IF_QFULL(inq)) {
+ IF_DROP(inq);
+ m_freem(m);
+ } else
+ IF_ENQUEUE(inq, m);
+ splx(s);
+}
+
+/*
+ * Convert Ethernet address to printable (loggable) representation.
+ */
+static char digits[] = "0123456789abcdef";
+char *
+ether_sprintf(ap)
+ register u_char *ap;
+{
+ register i;
+ static char etherbuf[18];
+ register char *cp = etherbuf;
+
+ for (i = 0; i < 6; i++) {
+ *cp++ = digits[*ap >> 4];
+ *cp++ = digits[*ap++ & 0xf];
+ *cp++ = ':';
+ }
+ *--cp = 0;
+ return (etherbuf);
+}
+
+/*
+ * Perform common duties while attaching to interface list
+ */
+void
+ether_ifattach(ifp)
+ register struct ifnet *ifp;
+{
+ register struct ifaddr *ifa;
+ register struct sockaddr_dl *sdl;
+
+ ifp->if_type = IFT_ETHER;
+ ifp->if_addrlen = 6;
+ ifp->if_hdrlen = 14;
+ ifp->if_mtu = ETHERMTU;
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ if ((sdl = (struct sockaddr_dl *)ifa->ifa_addr) &&
+ sdl->sdl_family == AF_LINK) {
+ sdl->sdl_type = IFT_ETHER;
+ sdl->sdl_alen = ifp->if_addrlen;
+ bcopy((caddr_t)((struct arpcom *)ifp)->ac_enaddr,
+ LLADDR(sdl), ifp->if_addrlen);
+ break;
+ }
+}
+
+u_char ether_ipmulticast_min[6] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
+u_char ether_ipmulticast_max[6] = { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
+/*
+ * Add an Ethernet multicast address or range of addresses to the list for a
+ * given interface.
+ */
+int
+ether_addmulti(ifr, ac)
+ struct ifreq *ifr;
+ register struct arpcom *ac;
+{
+ register struct ether_multi *enm;
+ struct sockaddr_in *sin;
+ u_char addrlo[6];
+ u_char addrhi[6];
+ int s = splimp();
+
+ switch (ifr->ifr_addr.sa_family) {
+
+ case AF_UNSPEC:
+ bcopy(ifr->ifr_addr.sa_data, addrlo, 6);
+ bcopy(addrlo, addrhi, 6);
+ break;
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)&(ifr->ifr_addr);
+ if (sin->sin_addr.s_addr == INADDR_ANY) {
+ /*
+ * An IP address of INADDR_ANY means listen to all
+ * of the Ethernet multicast addresses used for IP.
+ * (This is for the sake of IP multicast routers.)
+ */
+ bcopy(ether_ipmulticast_min, addrlo, 6);
+ bcopy(ether_ipmulticast_max, addrhi, 6);
+ }
+ else {
+ ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
+ bcopy(addrlo, addrhi, 6);
+ }
+ break;
+#endif
+
+ default:
+ splx(s);
+ return (EAFNOSUPPORT);
+ }
+
+ /*
+ * Verify that we have valid Ethernet multicast addresses.
+ */
+ if ((addrlo[0] & 0x01) != 1 || (addrhi[0] & 0x01) != 1) {
+ splx(s);
+ return (EINVAL);
+ }
+ /*
+ * See if the address range is already in the list.
+ */
+ ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm);
+ if (enm != NULL) {
+ /*
+ * Found it; just increment the reference count.
+ */
+ ++enm->enm_refcount;
+ splx(s);
+ return (0);
+ }
+ /*
+ * New address or range; malloc a new multicast record
+ * and link it into the interface's multicast list.
+ */
+ enm = (struct ether_multi *)malloc(sizeof(*enm), M_IFMADDR, M_NOWAIT);
+ if (enm == NULL) {
+ splx(s);
+ return (ENOBUFS);
+ }
+ bcopy(addrlo, enm->enm_addrlo, 6);
+ bcopy(addrhi, enm->enm_addrhi, 6);
+ enm->enm_ac = ac;
+ enm->enm_refcount = 1;
+ enm->enm_next = ac->ac_multiaddrs;
+ ac->ac_multiaddrs = enm;
+ ac->ac_multicnt++;
+ splx(s);
+ /*
+ * Return ENETRESET to inform the driver that the list has changed
+ * and its reception filter should be adjusted accordingly.
+ */
+ return (ENETRESET);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+int
+ether_delmulti(ifr, ac)
+ struct ifreq *ifr;
+ register struct arpcom *ac;
+{
+ register struct ether_multi *enm;
+ register struct ether_multi **p;
+ struct sockaddr_in *sin;
+ u_char addrlo[6];
+ u_char addrhi[6];
+ int s = splimp();
+
+ switch (ifr->ifr_addr.sa_family) {
+
+ case AF_UNSPEC:
+ bcopy(ifr->ifr_addr.sa_data, addrlo, 6);
+ bcopy(addrlo, addrhi, 6);
+ break;
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)&(ifr->ifr_addr);
+ if (sin->sin_addr.s_addr == INADDR_ANY) {
+ /*
+ * An IP address of INADDR_ANY means stop listening
+ * to the range of Ethernet multicast addresses used
+ * for IP.
+ */
+ bcopy(ether_ipmulticast_min, addrlo, 6);
+ bcopy(ether_ipmulticast_max, addrhi, 6);
+ }
+ else {
+ ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
+ bcopy(addrlo, addrhi, 6);
+ }
+ break;
+#endif
+
+ default:
+ splx(s);
+ return (EAFNOSUPPORT);
+ }
+
+ /*
+ * Look up the address in our list.
+ */
+ ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm);
+ if (enm == NULL) {
+ splx(s);
+ return (ENXIO);
+ }
+ if (--enm->enm_refcount != 0) {
+ /*
+ * Still some claims to this record.
+ */
+ splx(s);
+ return (0);
+ }
+ /*
+ * No remaining claims to this record; unlink and free it.
+ */
+ for (p = &enm->enm_ac->ac_multiaddrs;
+ *p != enm;
+ p = &(*p)->enm_next)
+ continue;
+ *p = (*p)->enm_next;
+ free(enm, M_IFMADDR);
+ ac->ac_multicnt--;
+ splx(s);
+ /*
+ * Return ENETRESET to inform the driver that the list has changed
+ * and its reception filter should be adjusted accordingly.
+ */
+ return (ENETRESET);
+}
diff --git a/sys/net/if_llc.h b/sys/net/if_llc.h
new file mode 100644
index 000000000000..90dcb07991de
--- /dev/null
+++ b/sys/net/if_llc.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_llc.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * IEEE 802.2 Link Level Control headers, for use in conjunction with
+ * 802.{3,4,5} media access control methods.
+ *
+ * Headers here do not use bit fields due to shortcommings in many
+ * compilers.
+ */
+
+struct llc {
+ u_char llc_dsap;
+ u_char llc_ssap;
+ union {
+ struct {
+ u_char control;
+ u_char format_id;
+ u_char class;
+ u_char window_x2;
+ } type_u;
+ struct {
+ u_char num_snd_x2;
+ u_char num_rcv_x2;
+ } type_i;
+ struct {
+ u_char control;
+ u_char num_rcv_x2;
+ } type_s;
+ struct {
+ u_char control;
+ struct frmrinfo {
+ u_char rej_pdu_0;
+ u_char rej_pdu_1;
+ u_char frmr_control;
+ u_char frmr_control_ext;
+ u_char frmr_cause;
+ } frmrinfo;
+ } type_frmr;
+ struct {
+ u_char control;
+ u_char org_code[3];
+ u_short ether_type;
+ } type_snap;
+ struct {
+ u_char control;
+ u_char control_ext;
+ } type_raw;
+ } llc_un;
+};
+#define llc_control llc_un.type_u.control
+#define llc_control_ext llc_un.type_raw.control_ext
+#define llc_fid llc_un.type_u.format_id
+#define llc_class llc_un.type_u.class
+#define llc_window llc_un.type_u.window_x2
+#define llc_frmrinfo llc_un.type_frmr.frmrinfo
+#define llc_frmr_pdu0 llc_un.type_frmr.frmrinfo.rej_pdu0
+#define llc_frmr_pdu1 llc_un.type_frmr.frmrinfo.rej_pdu1
+#define llc_frmr_control llc_un.type_frmr.frmrinfo.frmr_control
+#define llc_frmr_control_ext llc_un.type_frmr.frmrinfo.frmr_control_ext
+#define llc_frmr_cause llc_un.type_frmr.frmrinfo.frmr_control_ext
+
+/*
+ * Don't use sizeof(struct llc_un) for LLC header sizes
+ */
+#define LLC_ISFRAMELEN 4
+#define LLC_UFRAMELEN 3
+#define LLC_FRMRLEN 7
+
+/*
+ * Unnumbered LLC format commands
+ */
+#define LLC_UI 0x3
+#define LLC_UI_P 0x13
+#define LLC_DISC 0x43
+#define LLC_DISC_P 0x53
+#define LLC_UA 0x63
+#define LLC_UA_P 0x73
+#define LLC_TEST 0xe3
+#define LLC_TEST_P 0xf3
+#define LLC_FRMR 0x87
+#define LLC_FRMR_P 0x97
+#define LLC_DM 0x0f
+#define LLC_DM_P 0x1f
+#define LLC_XID 0xaf
+#define LLC_XID_P 0xbf
+#define LLC_SABME 0x6f
+#define LLC_SABME_P 0x7f
+
+/*
+ * Supervisory LLC commands
+ */
+#define LLC_RR 0x01
+#define LLC_RNR 0x05
+#define LLC_REJ 0x09
+
+/*
+ * Info format - dummy only
+ */
+#define LLC_INFO 0x00
+
+/*
+ * ISO PDTR 10178 contains among others
+ */
+#define LLC_X25_LSAP 0x7e
+#define LLC_SNAP_LSAP 0xaa
+#define LLC_ISO_LSAP 0xfe
+
+
+
+
+
+
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
new file mode 100644
index 000000000000..f09295e34be8
--- /dev/null
+++ b/sys/net/if_loop.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_loop.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Loopback interface driver for protocol testing and timing.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/bpf.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef NS
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+#ifdef ISO
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#endif
+
+#include "bpfilter.h"
+
+#define LOMTU (1024+512)
+
+struct ifnet loif;
+
+/* ARGSUSED */
+void
+loopattach(n)
+ int n;
+{
+ register struct ifnet *ifp = &loif;
+
+#ifdef lint
+ n = n; /* Highlander: there can only be one... */
+#endif
+ ifp->if_name = "lo";
+ ifp->if_mtu = LOMTU;
+ ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
+ ifp->if_ioctl = loioctl;
+ ifp->if_output = looutput;
+ ifp->if_type = IFT_LOOP;
+ ifp->if_hdrlen = 0;
+ ifp->if_addrlen = 0;
+ if_attach(ifp);
+#if NBPFILTER > 0
+ bpfattach(&ifp->if_bpf, ifp, DLT_NULL, sizeof(u_int));
+#endif
+}
+
+int
+looutput(ifp, m, dst, rt)
+ struct ifnet *ifp;
+ register struct mbuf *m;
+ struct sockaddr *dst;
+ register struct rtentry *rt;
+{
+ int s, isr;
+ register struct ifqueue *ifq = 0;
+
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("looutput no HDR");
+ ifp->if_lastchange = time;
+#if NBPFILTER > 0
+ if (loif.if_bpf) {
+ /*
+ * We need to prepend the address family as
+ * a four byte field. Cons up a dummy header
+ * to pacify bpf. This is safe because bpf
+ * will only read from the mbuf (i.e., it won't
+ * try to free it or keep a pointer a to it).
+ */
+ struct mbuf m0;
+ u_int af = dst->sa_family;
+
+ m0.m_next = m;
+ m0.m_len = 4;
+ m0.m_data = (char *)&af;
+
+ bpf_mtap(loif.if_bpf, &m0);
+ }
+#endif
+ m->m_pkthdr.rcvif = ifp;
+
+ if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ m_freem(m);
+ return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
+ rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ }
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+ switch (dst->sa_family) {
+
+#ifdef INET
+ case AF_INET:
+ ifq = &ipintrq;
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef NS
+ case AF_NS:
+ ifq = &nsintrq;
+ isr = NETISR_NS;
+ break;
+#endif
+#ifdef ISO
+ case AF_ISO:
+ ifq = &clnlintrq;
+ isr = NETISR_ISO;
+ break;
+#endif
+ default:
+ printf("lo%d: can't handle af%d\n", ifp->if_unit,
+ dst->sa_family);
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ s = splimp();
+ if (IF_QFULL(ifq)) {
+ IF_DROP(ifq);
+ m_freem(m);
+ splx(s);
+ return (ENOBUFS);
+ }
+ IF_ENQUEUE(ifq, m);
+ schednetisr(isr);
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ splx(s);
+ return (0);
+}
+
+/* ARGSUSED */
+void
+lortrequest(cmd, rt, sa)
+ int cmd;
+ struct rtentry *rt;
+ struct sockaddr *sa;
+{
+
+ if (rt)
+ rt->rt_rmx.rmx_mtu = LOMTU;
+}
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+int
+loioctl(ifp, cmd, data)
+ register struct ifnet *ifp;
+ int cmd;
+ caddr_t data;
+{
+ register struct ifaddr *ifa;
+ register struct ifreq *ifr;
+ register int error = 0;
+
+ switch (cmd) {
+
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ ifa = (struct ifaddr *)data;
+ if (ifa != 0 && ifa->ifa_addr->sa_family == AF_ISO)
+ ifa->ifa_rtrequest = lortrequest;
+ /*
+ * Everything else is done at a higher level.
+ */
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ ifr = (struct ifreq *)data;
+ if (ifr == 0) {
+ error = EAFNOSUPPORT; /* XXX */
+ break;
+ }
+ switch (ifr->ifr_addr.sa_family) {
+
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
diff --git a/sys/net/if_sl.c b/sys/net/if_sl.c
new file mode 100644
index 000000000000..56ce96f4b9d3
--- /dev/null
+++ b/sys/net/if_sl.c
@@ -0,0 +1,839 @@
+/*
+ * Copyright (c) 1987, 1989, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_sl.c 8.6 (Berkeley) 2/1/94
+ */
+
+/*
+ * Serial Line interface
+ *
+ * Rick Adams
+ * Center for Seismic Studies
+ * 1300 N 17th Street, Suite 1450
+ * Arlington, Virginia 22209
+ * (703)276-7900
+ * rick@seismo.ARPA
+ * seismo!rick
+ *
+ * Pounded on heavily by Chris Torek (chris@mimsy.umd.edu, umcp-cs!chris).
+ * N.B.: this belongs in netinet, not net, the way it stands now.
+ * Should have a link-layer type designation, but wouldn't be
+ * backwards-compatible.
+ *
+ * Converted to 4.3BSD Beta by Chris Torek.
+ * Other changes made at Berkeley, based in part on code by Kirk Smith.
+ * W. Jolitz added slip abort.
+ *
+ * Hacked almost beyond recognition by Van Jacobson (van@helios.ee.lbl.gov).
+ * Added priority queuing for "interactive" traffic; hooks for TCP
+ * header compression; ICMP filtering (at 2400 baud, some cretin
+ * pinging you can use up all your bandwidth). Made low clist behavior
+ * more robust and slightly less likely to hang serial line.
+ * Sped up a bunch of things.
+ *
+ * Note that splimp() is used throughout to block both (tty) input
+ * interrupts and network activity; thus, splimp must be >= spltty.
+ */
+
+#include "sl.h"
+#if NSL > 0
+
+#include "bpfilter.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/buf.h>
+#include <sys/dkstat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/tty.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#if INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#else
+Huh? Slip without inet?
+#endif
+
+#include <net/slcompress.h>
+#include <net/if_slvar.h>
+#include <net/slip.h>
+
+#if NBPFILTER > 0
+#include <sys/time.h>
+#include <net/bpf.h>
+#endif
+
+/*
+ * SLMAX is a hard limit on input packet size. To simplify the code
+ * and improve performance, we require that packets fit in an mbuf
+ * cluster, and if we get a compressed packet, there's enough extra
+ * room to expand the header into a max length tcp/ip header (128
+ * bytes). So, SLMAX can be at most
+ * MCLBYTES - 128
+ *
+ * SLMTU is a hard limit on output packet size. To insure good
+ * interactive response, SLMTU wants to be the smallest size that
+ * amortizes the header cost. (Remember that even with
+ * type-of-service queuing, we have to wait for any in-progress
+ * packet to finish. I.e., we wait, on the average, 1/2 * mtu /
+ * cps, where cps is the line speed in characters per second.
+ * E.g., 533ms wait for a 1024 byte MTU on a 9600 baud line. The
+ * average compressed header size is 6-8 bytes so any MTU > 90
+ * bytes will give us 90% of the line bandwidth. A 100ms wait is
+ * tolerable (500ms is not), so want an MTU around 296. (Since TCP
+ * will send 256 byte segments (to allow for 40 byte headers), the
+ * typical packet size on the wire will be around 260 bytes). In
+ * 4.3tahoe+ systems, we can set an MTU in a route so we do that &
+ * leave the interface MTU relatively high (so we don't IP fragment
+ * when acting as a gateway to someone using a stupid MTU).
+ *
+ * Similar considerations apply to SLIP_HIWAT: It's the amount of
+ * data that will be queued 'downstream' of us (i.e., in clists
+ * waiting to be picked up by the tty output interrupt). If we
+ * queue a lot of data downstream, it's immune to our t.o.s. queuing.
+ * E.g., if SLIP_HIWAT is 1024, the interactive traffic in mixed
+ * telnet/ftp will see a 1 sec wait, independent of the mtu (the
+ * wait is dependent on the ftp window size but that's typically
+ * 1k - 4k). So, we want SLIP_HIWAT just big enough to amortize
+ * the cost (in idle time on the wire) of the tty driver running
+ * off the end of its clists & having to call back slstart for a
+ * new packet. For a tty interface with any buffering at all, this
+ * cost will be zero. Even with a totally brain dead interface (like
+ * the one on a typical workstation), the cost will be <= 1 character
+ * time. So, setting SLIP_HIWAT to ~100 guarantees that we'll lose
+ * at most 1% while maintaining good interactive response.
+ */
+#if NBPFILTER > 0
+#define BUFOFFSET (128+sizeof(struct ifnet **)+SLIP_HDRLEN)
+#else
+#define BUFOFFSET (128+sizeof(struct ifnet **))
+#endif
+#define SLMAX (MCLBYTES - BUFOFFSET)
+#define SLBUFSIZE (SLMAX + BUFOFFSET)
+#define SLMTU 296
+#define SLIP_HIWAT roundup(50,CBSIZE)
+#define CLISTRESERVE 1024 /* Can't let clists get too low */
+
+/*
+ * SLIP ABORT ESCAPE MECHANISM:
+ * (inspired by HAYES modem escape arrangement)
+ * 1sec escape 1sec escape 1sec escape { 1sec escape 1sec escape }
+ * within window time signals a "soft" exit from slip mode by remote end
+ * if the IFF_DEBUG flag is on.
+ */
+#define ABT_ESC '\033' /* can't be t_intr - distant host must know it*/
+#define ABT_IDLE 1 /* in seconds - idle before an escape */
+#define ABT_COUNT 3 /* count of escapes for abort */
+#define ABT_WINDOW (ABT_COUNT*2+2) /* in seconds - time to count */
+
+struct sl_softc sl_softc[NSL];
+
+#define FRAME_END 0xc0 /* Frame End */
+#define FRAME_ESCAPE 0xdb /* Frame Esc */
+#define TRANS_FRAME_END 0xdc /* transposed frame end */
+#define TRANS_FRAME_ESCAPE 0xdd /* transposed frame esc */
+
+extern struct timeval time;
+
+static int slinit __P((struct sl_softc *));
+static struct mbuf *sl_btom __P((struct sl_softc *, int));
+
+/*
+ * Called from boot code to establish sl interfaces.
+ */
+void
+slattach()
+{
+ register struct sl_softc *sc;
+ register int i = 0;
+
+ for (sc = sl_softc; i < NSL; sc++) {
+ sc->sc_if.if_name = "sl";
+ sc->sc_if.if_next = NULL;
+ sc->sc_if.if_unit = i++;
+ sc->sc_if.if_mtu = SLMTU;
+ sc->sc_if.if_flags =
+ IFF_POINTOPOINT | SC_AUTOCOMP | IFF_MULTICAST;
+ sc->sc_if.if_type = IFT_SLIP;
+ sc->sc_if.if_ioctl = slioctl;
+ sc->sc_if.if_output = sloutput;
+ sc->sc_if.if_snd.ifq_maxlen = 50;
+ sc->sc_fastq.ifq_maxlen = 32;
+ if_attach(&sc->sc_if);
+#if NBPFILTER > 0
+ bpfattach(&sc->sc_bpf, &sc->sc_if, DLT_SLIP, SLIP_HDRLEN);
+#endif
+ }
+}
+
+static int
+slinit(sc)
+ register struct sl_softc *sc;
+{
+ register caddr_t p;
+
+ if (sc->sc_ep == (u_char *) 0) {
+ MCLALLOC(p, M_WAIT);
+ if (p)
+ sc->sc_ep = (u_char *)p + SLBUFSIZE;
+ else {
+ printf("sl%d: can't allocate buffer\n", sc - sl_softc);
+ sc->sc_if.if_flags &= ~IFF_UP;
+ return (0);
+ }
+ }
+ sc->sc_buf = sc->sc_ep - SLMAX;
+ sc->sc_mp = sc->sc_buf;
+ sl_compress_init(&sc->sc_comp);
+ return (1);
+}
+
+/*
+ * Line specific open routine.
+ * Attach the given tty to the first available sl unit.
+ */
+/* ARGSUSED */
+int
+slopen(dev, tp)
+ dev_t dev;
+ register struct tty *tp;
+{
+ struct proc *p = curproc; /* XXX */
+ register struct sl_softc *sc;
+ register int nsl;
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+
+ if (tp->t_line == SLIPDISC)
+ return (0);
+
+ for (nsl = NSL, sc = sl_softc; --nsl >= 0; sc++)
+ if (sc->sc_ttyp == NULL) {
+ if (slinit(sc) == 0)
+ return (ENOBUFS);
+ tp->t_sc = (caddr_t)sc;
+ sc->sc_ttyp = tp;
+ sc->sc_if.if_baudrate = tp->t_ospeed;
+ ttyflush(tp, FREAD | FWRITE);
+ return (0);
+ }
+ return (ENXIO);
+}
+
+/*
+ * Line specific close routine.
+ * Detach the tty from the sl unit.
+ */
+void
+slclose(tp)
+ struct tty *tp;
+{
+ register struct sl_softc *sc;
+ int s;
+
+ ttywflush(tp);
+ s = splimp(); /* actually, max(spltty, splnet) */
+ tp->t_line = 0;
+ sc = (struct sl_softc *)tp->t_sc;
+ if (sc != NULL) {
+ if_down(&sc->sc_if);
+ sc->sc_ttyp = NULL;
+ tp->t_sc = NULL;
+ MCLFREE((caddr_t)(sc->sc_ep - SLBUFSIZE));
+ sc->sc_ep = 0;
+ sc->sc_mp = 0;
+ sc->sc_buf = 0;
+ }
+ splx(s);
+}
+
+/*
+ * Line specific (tty) ioctl routine.
+ * Provide a way to get the sl unit number.
+ */
+/* ARGSUSED */
+int
+sltioctl(tp, cmd, data, flag)
+ struct tty *tp;
+ int cmd;
+ caddr_t data;
+ int flag;
+{
+ struct sl_softc *sc = (struct sl_softc *)tp->t_sc;
+
+ switch (cmd) {
+ case SLIOCGUNIT:
+ *(int *)data = sc->sc_if.if_unit;
+ break;
+
+ default:
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * Queue a packet. Start transmission if not active.
+ * Compression happens in slstart; if we do it here, IP TOS
+ * will cause us to not compress "background" packets, because
+ * ordering gets trashed. It can be done for all packets in slstart.
+ */
+int
+sloutput(ifp, m, dst, rtp)
+ struct ifnet *ifp;
+ register struct mbuf *m;
+ struct sockaddr *dst;
+ struct rtentry *rtp;
+{
+ register struct sl_softc *sc = &sl_softc[ifp->if_unit];
+ register struct ip *ip;
+ register struct ifqueue *ifq;
+ int s;
+
+ /*
+ * `Cannot happen' (see slioctl). Someday we will extend
+ * the line protocol to support other address families.
+ */
+ if (dst->sa_family != AF_INET) {
+ printf("sl%d: af%d not supported\n", sc->sc_if.if_unit,
+ dst->sa_family);
+ m_freem(m);
+ sc->sc_if.if_noproto++;
+ return (EAFNOSUPPORT);
+ }
+
+ if (sc->sc_ttyp == NULL) {
+ m_freem(m);
+ return (ENETDOWN); /* sort of */
+ }
+ if ((sc->sc_ttyp->t_state & TS_CARR_ON) == 0 &&
+ (sc->sc_ttyp->t_cflag & CLOCAL) == 0) {
+ m_freem(m);
+ return (EHOSTUNREACH);
+ }
+ ifq = &sc->sc_if.if_snd;
+ ip = mtod(m, struct ip *);
+ if (sc->sc_if.if_flags & SC_NOICMP && ip->ip_p == IPPROTO_ICMP) {
+ m_freem(m);
+ return (ENETRESET); /* XXX ? */
+ }
+ if (ip->ip_tos & IPTOS_LOWDELAY)
+ ifq = &sc->sc_fastq;
+ s = splimp();
+ if (IF_QFULL(ifq)) {
+ IF_DROP(ifq);
+ m_freem(m);
+ splx(s);
+ sc->sc_if.if_oerrors++;
+ return (ENOBUFS);
+ }
+ IF_ENQUEUE(ifq, m);
+ sc->sc_if.if_lastchange = time;
+ if (sc->sc_ttyp->t_outq.c_cc == 0)
+ slstart(sc->sc_ttyp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Start output on interface. Get another datagram
+ * to send from the interface queue and map it to
+ * the interface before starting output.
+ */
+void
+slstart(tp)
+ register struct tty *tp;
+{
+ register struct sl_softc *sc = (struct sl_softc *)tp->t_sc;
+ register struct mbuf *m;
+ register u_char *cp;
+ register struct ip *ip;
+ int s;
+ struct mbuf *m2;
+#if NBPFILTER > 0
+ u_char bpfbuf[SLMTU + SLIP_HDRLEN];
+ register int len;
+#endif
+ extern int cfreecount;
+
+ for (;;) {
+ /*
+ * If there is more in the output queue, just send it now.
+ * We are being called in lieu of ttstart and must do what
+ * it would.
+ */
+ if (tp->t_outq.c_cc != 0) {
+ (*tp->t_oproc)(tp);
+ if (tp->t_outq.c_cc > SLIP_HIWAT)
+ return;
+ }
+ /*
+ * This happens briefly when the line shuts down.
+ */
+ if (sc == NULL)
+ return;
+
+ /*
+ * Get a packet and send it to the interface.
+ */
+ s = splimp();
+ IF_DEQUEUE(&sc->sc_fastq, m);
+ if (m)
+ sc->sc_if.if_omcasts++; /* XXX */
+ else
+ IF_DEQUEUE(&sc->sc_if.if_snd, m);
+ splx(s);
+ if (m == NULL)
+ return;
+
+ /*
+ * We do the header compression here rather than in sloutput
+ * because the packets will be out of order if we are using TOS
+ * queueing, and the connection id compression will get
+ * munged when this happens.
+ */
+#if NBPFILTER > 0
+ if (sc->sc_bpf) {
+ /*
+ * We need to save the TCP/IP header before it's
+ * compressed. To avoid complicated code, we just
+ * copy the entire packet into a stack buffer (since
+ * this is a serial line, packets should be short
+ * and/or the copy should be negligible cost compared
+ * to the packet transmission time).
+ */
+ register struct mbuf *m1 = m;
+ register u_char *cp = bpfbuf + SLIP_HDRLEN;
+
+ len = 0;
+ do {
+ register int mlen = m1->m_len;
+
+ bcopy(mtod(m1, caddr_t), cp, mlen);
+ cp += mlen;
+ len += mlen;
+ } while (m1 = m1->m_next);
+ }
+#endif
+ if ((ip = mtod(m, struct ip *))->ip_p == IPPROTO_TCP) {
+ if (sc->sc_if.if_flags & SC_COMPRESS)
+ *mtod(m, u_char *) |= sl_compress_tcp(m, ip,
+ &sc->sc_comp, 1);
+ }
+#if NBPFILTER > 0
+ if (sc->sc_bpf) {
+ /*
+ * Put the SLIP pseudo-"link header" in place. The
+ * compressed header is now at the beginning of the
+ * mbuf.
+ */
+ bpfbuf[SLX_DIR] = SLIPDIR_OUT;
+ bcopy(mtod(m, caddr_t), &bpfbuf[SLX_CHDR], CHDR_LEN);
+ bpf_tap(sc->sc_bpf, bpfbuf, len + SLIP_HDRLEN);
+ }
+#endif
+ sc->sc_if.if_lastchange = time;
+
+ /*
+ * If system is getting low on clists, just flush our
+ * output queue (if the stuff was important, it'll get
+ * retransmitted).
+ */
+ if (cfreecount < CLISTRESERVE + SLMTU) {
+ m_freem(m);
+ sc->sc_if.if_collisions++;
+ continue;
+ }
+ /*
+ * The extra FRAME_END will start up a new packet, and thus
+ * will flush any accumulated garbage. We do this whenever
+ * the line may have been idle for some time.
+ */
+ if (tp->t_outq.c_cc == 0) {
+ ++sc->sc_if.if_obytes;
+ (void) putc(FRAME_END, &tp->t_outq);
+ }
+
+ while (m) {
+ register u_char *ep;
+
+ cp = mtod(m, u_char *); ep = cp + m->m_len;
+ while (cp < ep) {
+ /*
+ * Find out how many bytes in the string we can
+ * handle without doing something special.
+ */
+ register u_char *bp = cp;
+
+ while (cp < ep) {
+ switch (*cp++) {
+ case FRAME_ESCAPE:
+ case FRAME_END:
+ --cp;
+ goto out;
+ }
+ }
+ out:
+ if (cp > bp) {
+ /*
+ * Put n characters at once
+ * into the tty output queue.
+ */
+ if (b_to_q((char *)bp, cp - bp,
+ &tp->t_outq))
+ break;
+ sc->sc_if.if_obytes += cp - bp;
+ }
+ /*
+ * If there are characters left in the mbuf,
+ * the first one must be special..
+ * Put it out in a different form.
+ */
+ if (cp < ep) {
+ if (putc(FRAME_ESCAPE, &tp->t_outq))
+ break;
+ if (putc(*cp++ == FRAME_ESCAPE ?
+ TRANS_FRAME_ESCAPE : TRANS_FRAME_END,
+ &tp->t_outq)) {
+ (void) unputc(&tp->t_outq);
+ break;
+ }
+ sc->sc_if.if_obytes += 2;
+ }
+ }
+ MFREE(m, m2);
+ m = m2;
+ }
+
+ if (putc(FRAME_END, &tp->t_outq)) {
+ /*
+ * Not enough room. Remove a char to make room
+ * and end the packet normally.
+ * If you get many collisions (more than one or two
+ * a day) you probably do not have enough clists
+ * and you should increase "nclist" in param.c.
+ */
+ (void) unputc(&tp->t_outq);
+ (void) putc(FRAME_END, &tp->t_outq);
+ sc->sc_if.if_collisions++;
+ } else {
+ ++sc->sc_if.if_obytes;
+ sc->sc_if.if_opackets++;
+ }
+ }
+}
+
+/*
+ * Copy data buffer to mbuf chain; add ifnet pointer.
+ */
+static struct mbuf *
+sl_btom(sc, len)
+ register struct sl_softc *sc;
+ register int len;
+{
+ register struct mbuf *m;
+
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ return (NULL);
+
+ /*
+ * If we have more than MHLEN bytes, it's cheaper to
+ * queue the cluster we just filled & allocate a new one
+ * for the input buffer. Otherwise, fill the mbuf we
+ * allocated above. Note that code in the input routine
+ * guarantees that packet will fit in a cluster.
+ */
+ if (len >= MHLEN) {
+ MCLGET(m, M_DONTWAIT);
+ if ((m->m_flags & M_EXT) == 0) {
+ /*
+ * we couldn't get a cluster - if memory's this
+ * low, it's time to start dropping packets.
+ */
+ (void) m_free(m);
+ return (NULL);
+ }
+ sc->sc_ep = mtod(m, u_char *) + SLBUFSIZE;
+ m->m_data = (caddr_t)sc->sc_buf;
+ m->m_ext.ext_buf = (caddr_t)((int)sc->sc_buf &~ MCLOFSET);
+ } else
+ bcopy((caddr_t)sc->sc_buf, mtod(m, caddr_t), len);
+
+ m->m_len = len;
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = &sc->sc_if;
+ return (m);
+}
+
+/*
+ * tty interface receiver interrupt.
+ */
+void
+slinput(c, tp)
+ register int c;
+ register struct tty *tp;
+{
+ register struct sl_softc *sc;
+ register struct mbuf *m;
+ register int len;
+ int s;
+#if NBPFILTER > 0
+ u_char chdr[CHDR_LEN];
+#endif
+
+ tk_nin++;
+ sc = (struct sl_softc *)tp->t_sc;
+ if (sc == NULL)
+ return;
+ if (c & TTY_ERRORMASK || ((tp->t_state & TS_CARR_ON) == 0 &&
+ (tp->t_cflag & CLOCAL) == 0)) {
+ sc->sc_flags |= SC_ERROR;
+ return;
+ }
+ c &= TTY_CHARMASK;
+
+ ++sc->sc_if.if_ibytes;
+
+ if (sc->sc_if.if_flags & IFF_DEBUG) {
+ if (c == ABT_ESC) {
+ /*
+ * If we have a previous abort, see whether
+ * this one is within the time limit.
+ */
+ if (sc->sc_abortcount &&
+ time.tv_sec >= sc->sc_starttime + ABT_WINDOW)
+ sc->sc_abortcount = 0;
+ /*
+ * If we see an abort after "idle" time, count it;
+ * record when the first abort escape arrived.
+ */
+ if (time.tv_sec >= sc->sc_lasttime + ABT_IDLE) {
+ if (++sc->sc_abortcount == 1)
+ sc->sc_starttime = time.tv_sec;
+ if (sc->sc_abortcount >= ABT_COUNT) {
+ slclose(tp);
+ return;
+ }
+ }
+ } else
+ sc->sc_abortcount = 0;
+ sc->sc_lasttime = time.tv_sec;
+ }
+
+ switch (c) {
+
+ case TRANS_FRAME_ESCAPE:
+ if (sc->sc_escape)
+ c = FRAME_ESCAPE;
+ break;
+
+ case TRANS_FRAME_END:
+ if (sc->sc_escape)
+ c = FRAME_END;
+ break;
+
+ case FRAME_ESCAPE:
+ sc->sc_escape = 1;
+ return;
+
+ case FRAME_END:
+ if(sc->sc_flags & SC_ERROR) {
+ sc->sc_flags &= ~SC_ERROR;
+ goto newpack;
+ }
+ len = sc->sc_mp - sc->sc_buf;
+ if (len < 3)
+ /* less than min length packet - ignore */
+ goto newpack;
+
+#if NBPFILTER > 0
+ if (sc->sc_bpf) {
+ /*
+ * Save the compressed header, so we
+ * can tack it on later. Note that we
+ * will end up copying garbage in some
+ * cases but this is okay. We remember
+ * where the buffer started so we can
+ * compute the new header length.
+ */
+ bcopy(sc->sc_buf, chdr, CHDR_LEN);
+ }
+#endif
+
+ if ((c = (*sc->sc_buf & 0xf0)) != (IPVERSION << 4)) {
+ if (c & 0x80)
+ c = TYPE_COMPRESSED_TCP;
+ else if (c == TYPE_UNCOMPRESSED_TCP)
+ *sc->sc_buf &= 0x4f; /* XXX */
+ /*
+ * We've got something that's not an IP packet.
+ * If compression is enabled, try to decompress it.
+ * Otherwise, if `auto-enable' compression is on and
+ * it's a reasonable packet, decompress it and then
+ * enable compression. Otherwise, drop it.
+ */
+ if (sc->sc_if.if_flags & SC_COMPRESS) {
+ len = sl_uncompress_tcp(&sc->sc_buf, len,
+ (u_int)c, &sc->sc_comp);
+ if (len <= 0)
+ goto error;
+ } else if ((sc->sc_if.if_flags & SC_AUTOCOMP) &&
+ c == TYPE_UNCOMPRESSED_TCP && len >= 40) {
+ len = sl_uncompress_tcp(&sc->sc_buf, len,
+ (u_int)c, &sc->sc_comp);
+ if (len <= 0)
+ goto error;
+ sc->sc_if.if_flags |= SC_COMPRESS;
+ } else
+ goto error;
+ }
+#if NBPFILTER > 0
+ if (sc->sc_bpf) {
+ /*
+ * Put the SLIP pseudo-"link header" in place.
+ * We couldn't do this any earlier since
+ * decompression probably moved the buffer
+ * pointer. Then, invoke BPF.
+ */
+ register u_char *hp = sc->sc_buf - SLIP_HDRLEN;
+
+ hp[SLX_DIR] = SLIPDIR_IN;
+ bcopy(chdr, &hp[SLX_CHDR], CHDR_LEN);
+ bpf_tap(sc->sc_bpf, hp, len + SLIP_HDRLEN);
+ }
+#endif
+ m = sl_btom(sc, len);
+ if (m == NULL)
+ goto error;
+
+ sc->sc_if.if_ipackets++;
+ sc->sc_if.if_lastchange = time;
+ s = splimp();
+ if (IF_QFULL(&ipintrq)) {
+ IF_DROP(&ipintrq);
+ sc->sc_if.if_ierrors++;
+ sc->sc_if.if_iqdrops++;
+ m_freem(m);
+ } else {
+ IF_ENQUEUE(&ipintrq, m);
+ schednetisr(NETISR_IP);
+ }
+ splx(s);
+ goto newpack;
+ }
+ if (sc->sc_mp < sc->sc_ep) {
+ *sc->sc_mp++ = c;
+ sc->sc_escape = 0;
+ return;
+ }
+
+ /* can't put lower; would miss an extra frame */
+ sc->sc_flags |= SC_ERROR;
+
+error:
+ sc->sc_if.if_ierrors++;
+newpack:
+ sc->sc_mp = sc->sc_buf = sc->sc_ep - SLMAX;
+ sc->sc_escape = 0;
+}
+
+/*
+ * Process an ioctl request.
+ */
+int
+slioctl(ifp, cmd, data)
+ register struct ifnet *ifp;
+ int cmd;
+ caddr_t data;
+{
+ register struct ifaddr *ifa = (struct ifaddr *)data;
+ register struct ifreq *ifr;
+ register int s = splimp(), error = 0;
+
+ switch (cmd) {
+
+ case SIOCSIFADDR:
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ ifp->if_flags |= IFF_UP;
+ else
+ error = EAFNOSUPPORT;
+ break;
+
+ case SIOCSIFDSTADDR:
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ error = EAFNOSUPPORT;
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ ifr = (struct ifreq *)data;
+ if (ifr == 0) {
+ error = EAFNOSUPPORT; /* XXX */
+ break;
+ }
+ switch (ifr->ifr_addr.sa_family) {
+
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ splx(s);
+ return (error);
+}
+#endif
diff --git a/sys/net/if_slvar.h b/sys/net/if_slvar.h
new file mode 100644
index 000000000000..e7b27647284f
--- /dev/null
+++ b/sys/net/if_slvar.h
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_slvar.h 8.3 (Berkeley) 2/1/94
+ *
+ * $Header: if_slvar.h,v 1.3 89/05/31 02:25:18 van Exp $
+ */
+
+/*
+ * Definitions for SLIP interface data structures
+ *
+ * (This exists so programs like slstats can get at the definition
+ * of sl_softc.)
+ */
+struct sl_softc {
+ struct ifnet sc_if; /* network-visible interface */
+ struct ifqueue sc_fastq; /* interactive output queue */
+ struct tty *sc_ttyp; /* pointer to tty structure */
+ u_char *sc_mp; /* pointer to next available buf char */
+ u_char *sc_ep; /* pointer to last available buf char */
+ u_char *sc_buf; /* input buffer */
+ u_int sc_flags; /* see below */
+ u_int sc_escape; /* =1 if last char input was FRAME_ESCAPE */
+ long sc_lasttime; /* last time a char arrived */
+ long sc_abortcount; /* number of abort esacpe chars */
+ long sc_starttime; /* time of first abort in window */
+#ifdef INET /* XXX */
+ struct slcompress sc_comp; /* tcp compression data */
+#endif
+ caddr_t sc_bpf; /* BPF data */
+};
+
+/* internal flags */
+#define SC_ERROR 0x0001 /* had an input error */
+
+/* visible flags */
+#define SC_COMPRESS IFF_LINK0 /* compress TCP traffic */
+#define SC_NOICMP IFF_LINK1 /* supress ICMP traffic */
+#define SC_AUTOCOMP IFF_LINK2 /* auto-enable TCP compression */
+
+#ifdef KERNEL
+void slattach __P((void));
+void slclose __P((struct tty *));
+void slinput __P((int, struct tty *));
+int slioctl __P((struct ifnet *, int, caddr_t));
+int slopen __P((dev_t, struct tty *));
+int sloutput __P((struct ifnet *,
+ struct mbuf *, struct sockaddr *, struct rtentry *));
+void slstart __P((struct tty *));
+int sltioctl __P((struct tty *, int, caddr_t, int));
+#endif /* KERNEL */
diff --git a/sys/net/if_types.h b/sys/net/if_types.h
new file mode 100644
index 000000000000..030f234fbac9
--- /dev/null
+++ b/sys/net/if_types.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_types.h 8.2 (Berkeley) 4/20/94
+ */
+
+/*
+ * Interface types for benefit of parsing media address headers.
+ * This list is derived from the SNMP list of ifTypes, currently
+ * documented in RFC1573.
+ */
+
+#define IFT_OTHER 0x1 /* none of the following */
+#define IFT_1822 0x2 /* old-style arpanet imp */
+#define IFT_HDH1822 0x3 /* HDH arpanet imp */
+#define IFT_X25DDN 0x4 /* x25 to imp */
+#define IFT_X25 0x5 /* PDN X25 interface (RFC877) */
+#define IFT_ETHER 0x6 /* Ethernet CSMACD */
+#define IFT_ISO88023 0x7 /* CMSA CD */
+#define IFT_ISO88024 0x8 /* Token Bus */
+#define IFT_ISO88025 0x9 /* Token Ring */
+#define IFT_ISO88026 0xa /* MAN */
+#define IFT_STARLAN 0xb
+#define IFT_P10 0xc /* Proteon 10MBit ring */
+#define IFT_P80 0xd /* Proteon 10MBit ring */
+#define IFT_HY 0xe /* Hyperchannel */
+#define IFT_FDDI 0xf
+#define IFT_LAPB 0x10
+#define IFT_SDLC 0x11
+#define IFT_T1 0x12
+#define IFT_CEPT 0x13 /* E1 - european T1 */
+#define IFT_ISDNBASIC 0x14
+#define IFT_ISDNPRIMARY 0x15
+#define IFT_PTPSERIAL 0x16 /* Proprietary PTP serial */
+#define IFT_PPP 0x17 /* RFC 1331 */
+#define IFT_LOOP 0x18 /* loopback */
+#define IFT_EON 0x19 /* ISO over IP */
+#define IFT_XETHER 0x1a /* obsolete 3MB experimental ethernet */
+#define IFT_NSIP 0x1b /* XNS over IP */
+#define IFT_SLIP 0x1c /* IP over generic TTY */
+#define IFT_ULTRA 0x1d /* Ultra Technologies */
+#define IFT_DS3 0x1e /* Generic T3 */
+#define IFT_SIP 0x1f /* SMDS */
+#define IFT_FRELAY 0x20 /* Frame Relay DTE only */
+#define IFT_RS232 0x21
+#define IFT_PARA 0x22 /* parallel-port */
+#define IFT_ARCNET 0x23
+#define IFT_ARCNETPLUS 0x24
+#define IFT_ATM 0x25 /* ATM cells */
+#define IFT_MIOX25 0x26
+#define IFT_SONET 0x27 /* SONET or SDH */
+#define IFT_X25PLE 0x28
+#define IFT_ISO88022LLC 0x29
+#define IFT_LOCALTALK 0x2a
+#define IFT_SMDSDXI 0x2b
+#define IFT_FRELAYDCE 0x2c /* Frame Relay DCE */
+#define IFT_V35 0x2d
+#define IFT_HSSI 0x2e
+#define IFT_HIPPI 0x2f
+#define IFT_MODEM 0x30 /* Generic Modem */
+#define IFT_AAL5 0x31 /* AAL5 over ATM */
+#define IFT_SONETPATH 0x32
+#define IFT_SONETVT 0x33
+#define IFT_SMDSICIP 0x34 /* SMDS InterCarrier Interface */
+#define IFT_PROPVIRTUAL 0x35 /* Proprietary Virtual/internal */
+#define IFT_PROPMUX 0x36 /* Proprietary Multiplexing */
diff --git a/sys/net/netisr.h b/sys/net/netisr.h
new file mode 100644
index 000000000000..e2e465379d3f
--- /dev/null
+++ b/sys/net/netisr.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)netisr.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * The networking code runs off software interrupts.
+ *
+ * You can switch into the network by doing splnet() and return by splx().
+ * The software interrupt level for the network is higher than the software
+ * level for the clock (so you can enter the network in routines called
+ * at timeout time).
+ */
+#if defined(vax) || defined(tahoe)
+#define setsoftnet() mtpr(SIRR, 12)
+#endif
+
+/*
+ * Each ``pup-level-1'' input queue has a bit in a ``netisr'' status
+ * word which is used to de-multiplex a single software
+ * interrupt used for scheduling the network code to calls
+ * on the lowest level routine of each protocol.
+ */
+#define NETISR_RAW 0 /* same as AF_UNSPEC */
+#define NETISR_IP 2 /* same as AF_INET */
+#define NETISR_IMP 3 /* same as AF_IMPLINK */
+#define NETISR_NS 6 /* same as AF_NS */
+#define NETISR_ISO 7 /* same as AF_ISO */
+#define NETISR_CCITT 10 /* same as AF_CCITT */
+#define NETISR_ARP 18 /* same as AF_LINK */
+
+#define schednetisr(anisr) { netisr |= 1<<(anisr); setsoftnet(); }
+
+#ifdef i386
+/* XXX Temporary -- soon to vanish - wfj */
+#define NETISR_SCLK 11 /* softclock */
+#define NETISR_AST 12 /* ast -- resched */
+
+#undef schednetisr
+#define schednetisr(anisr) {\
+ if(netisr == 0) { \
+ softem++; \
+ } \
+ netisr |= 1<<(anisr); \
+}
+#ifndef LOCORE
+#ifdef KERNEL
+int softem;
+#endif
+#endif
+#endif /* i386 */
+
+#ifndef LOCORE
+#ifdef KERNEL
+int netisr; /* scheduling bits for network */
+#endif
+#endif
diff --git a/sys/net/radix.c b/sys/net/radix.c
new file mode 100644
index 000000000000..f182eb77abfa
--- /dev/null
+++ b/sys/net/radix.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.c 8.2 (Berkeley) 1/4/94
+ */
+
+/*
+ * Routines to build and maintain radix trees for routing lookups.
+ */
+#ifndef RNF_NORMAL
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#define M_DONTWAIT M_NOWAIT
+#ifdef KERNEL
+#include <sys/domain.h>
+#endif
+#endif
+
+#include <net/radix.h>
+
+int max_keylen;
+struct radix_mask *rn_mkfreelist;
+struct radix_node_head *mask_rnhead;
+static int gotOddMasks;
+static char *maskedKey;
+static char *rn_zeros, *rn_ones;
+
+#define rn_masktop (mask_rnhead->rnh_treetop)
+#undef Bcmp
+#define Bcmp(a, b, l) (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l))
+/*
+ * The data structure for the keys is a radix tree with one way
+ * branching removed. The index rn_b at an internal node n represents a bit
+ * position to be tested. The tree is arranged so that all descendants
+ * of a node n have keys whose bits all agree up to position rn_b - 1.
+ * (We say the index of n is rn_b.)
+ *
+ * There is at least one descendant which has a one bit at position rn_b,
+ * and at least one with a zero there.
+ *
+ * A route is determined by a pair of key and mask. We require that the
+ * bit-wise logical and of the key and mask to be the key.
+ * We define the index of a route to associated with the mask to be
+ * the first bit number in the mask where 0 occurs (with bit number 0
+ * representing the highest order bit).
+ *
+ * We say a mask is normal if every bit is 0, past the index of the mask.
+ * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
+ * and m is a normal mask, then the route applies to every descendant of n.
+ * If the index(m) < rn_b, this implies the trailing last few bits of k
+ * before bit b are all 0, (and hence consequently true of every descendant
+ * of n), so the route applies to all descendants of the node as well.
+ *
+ * The present version of the code makes no use of normal routes,
+ * but similar logic shows that a non-normal mask m such that
+ * index(m) <= index(n) could potentially apply to many children of n.
+ * Thus, for each non-host route, we attach its mask to a list at an internal
+ * node as high in the tree as we can go.
+ */
+
+struct radix_node *
+rn_search(v_arg, head)
+ void *v_arg;
+ struct radix_node *head;
+{
+ register struct radix_node *x;
+ register caddr_t v;
+
+ for (x = head, v = v_arg; x->rn_b >= 0;) {
+ if (x->rn_bmask & v[x->rn_off])
+ x = x->rn_r;
+ else
+ x = x->rn_l;
+ }
+ return (x);
+};
+
+struct radix_node *
+rn_search_m(v_arg, head, m_arg)
+ struct radix_node *head;
+ void *v_arg, *m_arg;
+{
+ register struct radix_node *x;
+ register caddr_t v = v_arg, m = m_arg;
+
+ for (x = head; x->rn_b >= 0;) {
+ if ((x->rn_bmask & m[x->rn_off]) &&
+ (x->rn_bmask & v[x->rn_off]))
+ x = x->rn_r;
+ else
+ x = x->rn_l;
+ }
+ return x;
+};
+
+int
+rn_refines(m_arg, n_arg)
+ void *m_arg, *n_arg;
+{
+ register caddr_t m = m_arg, n = n_arg;
+ register caddr_t lim, lim2 = lim = n + *(u_char *)n;
+ int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
+ int masks_are_equal = 1;
+
+ if (longer > 0)
+ lim -= longer;
+ while (n < lim) {
+ if (*n & ~(*m))
+ return 0;
+ if (*n++ != *m++)
+ masks_are_equal = 0;
+
+ }
+ while (n < lim2)
+ if (*n++)
+ return 0;
+ if (masks_are_equal && (longer < 0))
+ for (lim2 = m - longer; m < lim2; )
+ if (*m++)
+ return 1;
+ return (!masks_are_equal);
+}
+
+
+struct radix_node *
+rn_match(v_arg, head)
+ void *v_arg;
+ struct radix_node_head *head;
+{
+ caddr_t v = v_arg;
+ register struct radix_node *t = head->rnh_treetop, *x;
+ register caddr_t cp = v, cp2, cp3;
+ caddr_t cplim, mstart;
+ struct radix_node *saved_t, *top = t;
+ int off = t->rn_off, vlen = *(u_char *)cp, matched_off;
+
+ /*
+ * Open code rn_search(v, top) to avoid overhead of extra
+ * subroutine call.
+ */
+ for (; t->rn_b >= 0; ) {
+ if (t->rn_bmask & cp[t->rn_off])
+ t = t->rn_r;
+ else
+ t = t->rn_l;
+ }
+ /*
+ * See if we match exactly as a host destination
+ */
+ cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
+ for (; cp < cplim; cp++, cp2++)
+ if (*cp != *cp2)
+ goto on1;
+ /*
+ * This extra grot is in case we are explicitly asked
+ * to look up the default. Ugh!
+ */
+ if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
+ t = t->rn_dupedkey;
+ return t;
+on1:
+ matched_off = cp - v;
+ saved_t = t;
+ do {
+ if (t->rn_mask) {
+ /*
+ * Even if we don't match exactly as a hosts;
+ * we may match if the leaf we wound up at is
+ * a route to a net.
+ */
+ cp3 = matched_off + t->rn_mask;
+ cp2 = matched_off + t->rn_key;
+ for (; cp < cplim; cp++)
+ if ((*cp2++ ^ *cp) & *cp3++)
+ break;
+ if (cp == cplim)
+ return t;
+ cp = matched_off + v;
+ }
+ } while (t = t->rn_dupedkey);
+ t = saved_t;
+ /* start searching up the tree */
+ do {
+ register struct radix_mask *m;
+ t = t->rn_p;
+ if (m = t->rn_mklist) {
+ /*
+ * After doing measurements here, it may
+ * turn out to be faster to open code
+ * rn_search_m here instead of always
+ * copying and masking.
+ */
+ off = min(t->rn_off, matched_off);
+ mstart = maskedKey + off;
+ do {
+ cp2 = mstart;
+ cp3 = m->rm_mask + off;
+ for (cp = v + off; cp < cplim;)
+ *cp2++ = *cp++ & *cp3++;
+ x = rn_search(maskedKey, t);
+ while (x && x->rn_mask != m->rm_mask)
+ x = x->rn_dupedkey;
+ if (x &&
+ (Bcmp(mstart, x->rn_key + off,
+ vlen - off) == 0))
+ return x;
+ } while (m = m->rm_mklist);
+ }
+ } while (t != top);
+ return 0;
+};
+
+#ifdef RN_DEBUG
+int rn_nodenum;
+struct radix_node *rn_clist;
+int rn_saveinfo;
+int rn_debug = 1;
+#endif
+
+struct radix_node *
+rn_newpair(v, b, nodes)
+ void *v;
+ int b;
+ struct radix_node nodes[2];
+{
+ register struct radix_node *tt = nodes, *t = tt + 1;
+ t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7);
+ t->rn_l = tt; t->rn_off = b >> 3;
+ tt->rn_b = -1; tt->rn_key = (caddr_t)v; tt->rn_p = t;
+ tt->rn_flags = t->rn_flags = RNF_ACTIVE;
+#ifdef RN_DEBUG
+ tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
+ tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
+#endif
+ return t;
+}
+
+struct radix_node *
+rn_insert(v_arg, head, dupentry, nodes)
+ void *v_arg;
+ struct radix_node_head *head;
+ int *dupentry;
+ struct radix_node nodes[2];
+{
+ caddr_t v = v_arg;
+ struct radix_node *top = head->rnh_treetop;
+ int head_off = top->rn_off, vlen = (int)*((u_char *)v);
+ register struct radix_node *t = rn_search(v_arg, top);
+ register caddr_t cp = v + head_off;
+ register int b;
+ struct radix_node *tt;
+ /*
+ *find first bit at which v and t->rn_key differ
+ */
+ {
+ register caddr_t cp2 = t->rn_key + head_off;
+ register int cmp_res;
+ caddr_t cplim = v + vlen;
+
+ while (cp < cplim)
+ if (*cp2++ != *cp++)
+ goto on1;
+ *dupentry = 1;
+ return t;
+on1:
+ *dupentry = 0;
+ cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
+ for (b = (cp - v) << 3; cmp_res; b--)
+ cmp_res >>= 1;
+ }
+ {
+ register struct radix_node *p, *x = top;
+ cp = v;
+ do {
+ p = x;
+ if (cp[x->rn_off] & x->rn_bmask)
+ x = x->rn_r;
+ else x = x->rn_l;
+ } while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
+#ifdef RN_DEBUG
+ if (rn_debug)
+ printf("Going In:\n"), traverse(p);
+#endif
+ t = rn_newpair(v_arg, b, nodes); tt = t->rn_l;
+ if ((cp[p->rn_off] & p->rn_bmask) == 0)
+ p->rn_l = t;
+ else
+ p->rn_r = t;
+ x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */
+ if ((cp[t->rn_off] & t->rn_bmask) == 0) {
+ t->rn_r = x;
+ } else {
+ t->rn_r = tt; t->rn_l = x;
+ }
+#ifdef RN_DEBUG
+ if (rn_debug)
+ printf("Coming out:\n"), traverse(p);
+#endif
+ }
+ return (tt);
+}
+
+struct radix_node *
+rn_addmask(n_arg, search, skip)
+ int search, skip;
+ void *n_arg;
+{
+ caddr_t netmask = (caddr_t)n_arg;
+ register struct radix_node *x;
+ register caddr_t cp, cplim;
+ register int b, mlen, j;
+ int maskduplicated;
+
+ mlen = *(u_char *)netmask;
+ if (search) {
+ x = rn_search(netmask, rn_masktop);
+ mlen = *(u_char *)netmask;
+ if (Bcmp(netmask, x->rn_key, mlen) == 0)
+ return (x);
+ }
+ R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+ if (x == 0)
+ return (0);
+ Bzero(x, max_keylen + 2 * sizeof (*x));
+ cp = (caddr_t)(x + 2);
+ Bcopy(netmask, cp, mlen);
+ netmask = cp;
+ x = rn_insert(netmask, mask_rnhead, &maskduplicated, x);
+ /*
+ * Calculate index of mask.
+ */
+ cplim = netmask + mlen;
+ for (cp = netmask + skip; cp < cplim; cp++)
+ if (*(u_char *)cp != 0xff)
+ break;
+ b = (cp - netmask) << 3;
+ if (cp != cplim) {
+ if (*cp != 0) {
+ gotOddMasks = 1;
+ for (j = 0x80; j; b++, j >>= 1)
+ if ((j & *cp) == 0)
+ break;
+ }
+ }
+ x->rn_b = -1 - b;
+ return (x);
+}
+
+struct radix_node *
+rn_addroute(v_arg, n_arg, head, treenodes)
+ void *v_arg, *n_arg;
+ struct radix_node_head *head;
+ struct radix_node treenodes[2];
+{
+ caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
+ register struct radix_node *t, *x, *tt;
+ struct radix_node *saved_tt, *top = head->rnh_treetop;
+ short b = 0, b_leaf;
+ int mlen, keyduplicated;
+ caddr_t cplim;
+ struct radix_mask *m, **mp;
+
+ /*
+ * In dealing with non-contiguous masks, there may be
+ * many different routes which have the same mask.
+ * We will find it useful to have a unique pointer to
+ * the mask to speed avoiding duplicate references at
+ * nodes and possibly save time in calculating indices.
+ */
+ if (netmask) {
+ x = rn_search(netmask, rn_masktop);
+ mlen = *(u_char *)netmask;
+ if (Bcmp(netmask, x->rn_key, mlen) != 0) {
+ x = rn_addmask(netmask, 0, top->rn_off);
+ if (x == 0)
+ return (0);
+ }
+ netmask = x->rn_key;
+ b = -1 - x->rn_b;
+ }
+ /*
+ * Deal with duplicated keys: attach node to previous instance
+ */
+ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
+ if (keyduplicated) {
+ do {
+ if (tt->rn_mask == netmask)
+ return (0);
+ t = tt;
+ if (netmask == 0 ||
+ (tt->rn_mask && rn_refines(netmask, tt->rn_mask)))
+ break;
+ } while (tt = tt->rn_dupedkey);
+ /*
+ * If the mask is not duplicated, we wouldn't
+ * find it among possible duplicate key entries
+ * anyway, so the above test doesn't hurt.
+ *
+ * We sort the masks for a duplicated key the same way as
+ * in a masklist -- most specific to least specific.
+ * This may require the unfortunate nuisance of relocating
+ * the head of the list.
+ */
+ if (tt && t == saved_tt) {
+ struct radix_node *xx = x;
+ /* link in at head of list */
+ (tt = treenodes)->rn_dupedkey = t;
+ tt->rn_flags = t->rn_flags;
+ tt->rn_p = x = t->rn_p;
+ if (x->rn_l == t) x->rn_l = tt; else x->rn_r = tt;
+ saved_tt = tt; x = xx;
+ } else {
+ (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
+ t->rn_dupedkey = tt;
+ }
+#ifdef RN_DEBUG
+ t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
+ tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
+#endif
+ t = saved_tt;
+ tt->rn_key = (caddr_t) v;
+ tt->rn_b = -1;
+ tt->rn_flags = t->rn_flags & ~RNF_ROOT;
+ }
+ /*
+ * Put mask in tree.
+ */
+ if (netmask) {
+ tt->rn_mask = netmask;
+ tt->rn_b = x->rn_b;
+ }
+ t = saved_tt->rn_p;
+ b_leaf = -1 - t->rn_b;
+ if (t->rn_r == saved_tt) x = t->rn_l; else x = t->rn_r;
+ /* Promote general routes from below */
+ if (x->rn_b < 0) {
+ if (x->rn_mask && (x->rn_b >= b_leaf) && x->rn_mklist == 0) {
+ MKGet(m);
+ if (m) {
+ Bzero(m, sizeof *m);
+ m->rm_b = x->rn_b;
+ m->rm_mask = x->rn_mask;
+ x->rn_mklist = t->rn_mklist = m;
+ }
+ }
+ } else if (x->rn_mklist) {
+ /*
+ * Skip over masks whose index is > that of new node
+ */
+ for (mp = &x->rn_mklist; m = *mp; mp = &m->rm_mklist)
+ if (m->rm_b >= b_leaf)
+ break;
+ t->rn_mklist = m; *mp = 0;
+ }
+ /* Add new route to highest possible ancestor's list */
+ if ((netmask == 0) || (b > t->rn_b ))
+ return tt; /* can't lift at all */
+ b_leaf = tt->rn_b;
+ do {
+ x = t;
+ t = t->rn_p;
+ } while (b <= t->rn_b && x != top);
+ /*
+ * Search through routes associated with node to
+ * insert new route according to index.
+ * For nodes of equal index, place more specific
+ * masks first.
+ */
+ cplim = netmask + mlen;
+ for (mp = &x->rn_mklist; m = *mp; mp = &m->rm_mklist) {
+ if (m->rm_b < b_leaf)
+ continue;
+ if (m->rm_b > b_leaf)
+ break;
+ if (m->rm_mask == netmask) {
+ m->rm_refs++;
+ tt->rn_mklist = m;
+ return tt;
+ }
+ if (rn_refines(netmask, m->rm_mask))
+ break;
+ }
+ MKGet(m);
+ if (m == 0) {
+ printf("Mask for route not entered\n");
+ return (tt);
+ }
+ Bzero(m, sizeof *m);
+ m->rm_b = b_leaf;
+ m->rm_mask = netmask;
+ m->rm_mklist = *mp;
+ *mp = m;
+ tt->rn_mklist = m;
+ return tt;
+}
+
+struct radix_node *
+rn_delete(v_arg, netmask_arg, head)
+ void *v_arg, *netmask_arg;
+ struct radix_node_head *head;
+{
+ register struct radix_node *t, *p, *x, *tt;
+ struct radix_mask *m, *saved_m, **mp;
+ struct radix_node *dupedkey, *saved_tt, *top;
+ caddr_t v, netmask;
+ int b, head_off, vlen;
+
+ v = v_arg;
+ netmask = netmask_arg;
+ x = head->rnh_treetop;
+ tt = rn_search(v, x);
+ head_off = x->rn_off;
+ vlen = *(u_char *)v;
+ saved_tt = tt;
+ top = x;
+ if (tt == 0 ||
+ Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
+ return (0);
+ /*
+ * Delete our route from mask lists.
+ */
+ if (dupedkey = tt->rn_dupedkey) {
+ if (netmask)
+ netmask = rn_search(netmask, rn_masktop)->rn_key;
+ while (tt->rn_mask != netmask)
+ if ((tt = tt->rn_dupedkey) == 0)
+ return (0);
+ }
+ if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
+ goto on1;
+ if (m->rm_mask != tt->rn_mask) {
+ printf("rn_delete: inconsistent annotation\n");
+ goto on1;
+ }
+ if (--m->rm_refs >= 0)
+ goto on1;
+ b = -1 - tt->rn_b;
+ t = saved_tt->rn_p;
+ if (b > t->rn_b)
+ goto on1; /* Wasn't lifted at all */
+ do {
+ x = t;
+ t = t->rn_p;
+ } while (b <= t->rn_b && x != top);
+ for (mp = &x->rn_mklist; m = *mp; mp = &m->rm_mklist)
+ if (m == saved_m) {
+ *mp = m->rm_mklist;
+ MKFree(m);
+ break;
+ }
+ if (m == 0)
+ printf("rn_delete: couldn't find our annotation\n");
+on1:
+ /*
+ * Eliminate us from tree
+ */
+ if (tt->rn_flags & RNF_ROOT)
+ return (0);
+#ifdef RN_DEBUG
+ /* Get us out of the creation list */
+ for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
+ if (t) t->rn_ybro = tt->rn_ybro;
+#endif
+ t = tt->rn_p;
+ if (dupedkey) {
+ if (tt == saved_tt) {
+ x = dupedkey; x->rn_p = t;
+ if (t->rn_l == tt) t->rn_l = x; else t->rn_r = x;
+ } else {
+ for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
+ p = p->rn_dupedkey;
+ if (p) p->rn_dupedkey = tt->rn_dupedkey;
+ else printf("rn_delete: couldn't find us\n");
+ }
+ t = tt + 1;
+ if (t->rn_flags & RNF_ACTIVE) {
+#ifndef RN_DEBUG
+ *++x = *t; p = t->rn_p;
+#else
+ b = t->rn_info; *++x = *t; t->rn_info = b; p = t->rn_p;
+#endif
+ if (p->rn_l == t) p->rn_l = x; else p->rn_r = x;
+ x->rn_l->rn_p = x; x->rn_r->rn_p = x;
+ }
+ goto out;
+ }
+ if (t->rn_l == tt) x = t->rn_r; else x = t->rn_l;
+ p = t->rn_p;
+ if (p->rn_r == t) p->rn_r = x; else p->rn_l = x;
+ x->rn_p = p;
+ /*
+ * Demote routes attached to us.
+ */
+ if (t->rn_mklist) {
+ if (x->rn_b >= 0) {
+ for (mp = &x->rn_mklist; m = *mp;)
+ mp = &m->rm_mklist;
+ *mp = t->rn_mklist;
+ } else {
+ for (m = t->rn_mklist; m;) {
+ struct radix_mask *mm = m->rm_mklist;
+ if (m == x->rn_mklist && (--(m->rm_refs) < 0)) {
+ x->rn_mklist = 0;
+ MKFree(m);
+ } else
+ printf("%s %x at %x\n",
+ "rn_delete: Orphaned Mask", m, x);
+ m = mm;
+ }
+ }
+ }
+ /*
+ * We may be holding an active internal node in the tree.
+ */
+ x = tt + 1;
+ if (t != x) {
+#ifndef RN_DEBUG
+ *t = *x;
+#else
+ b = t->rn_info; *t = *x; t->rn_info = b;
+#endif
+ t->rn_l->rn_p = t; t->rn_r->rn_p = t;
+ p = x->rn_p;
+ if (p->rn_l == x) p->rn_l = t; else p->rn_r = t;
+ }
+out:
+ tt->rn_flags &= ~RNF_ACTIVE;
+ tt[1].rn_flags &= ~RNF_ACTIVE;
+ return (tt);
+}
+
+int
+rn_walktree(h, f, w)
+ struct radix_node_head *h;
+ register int (*f)();
+ void *w;
+{
+ int error;
+ struct radix_node *base, *next;
+ register struct radix_node *rn = h->rnh_treetop;
+ /*
+ * This gets complicated because we may delete the node
+ * while applying the function f to it, so we need to calculate
+ * the successor node in advance.
+ */
+ /* First time through node, go left */
+ while (rn->rn_b >= 0)
+ rn = rn->rn_l;
+ for (;;) {
+ base = rn;
+ /* If at right child go back up, otherwise, go right */
+ while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0)
+ rn = rn->rn_p;
+ /* Find the next *leaf* since next node might vanish, too */
+ for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;)
+ rn = rn->rn_l;
+ next = rn;
+ /* Process leaves */
+ while (rn = base) {
+ base = rn->rn_dupedkey;
+ if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
+ return (error);
+ }
+ rn = next;
+ if (rn->rn_flags & RNF_ROOT)
+ return (0);
+ }
+ /* NOTREACHED */
+}
+
+int
+rn_inithead(head, off)
+ void **head;
+ int off;
+{
+ register struct radix_node_head *rnh;
+ register struct radix_node *t, *tt, *ttt;
+ if (*head)
+ return (1);
+ R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
+ if (rnh == 0)
+ return (0);
+ Bzero(rnh, sizeof (*rnh));
+ *head = rnh;
+ t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
+ ttt = rnh->rnh_nodes + 2;
+ t->rn_r = ttt;
+ t->rn_p = t;
+ tt = t->rn_l;
+ tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
+ tt->rn_b = -1 - off;
+ *ttt = *tt;
+ ttt->rn_key = rn_ones;
+ rnh->rnh_addaddr = rn_addroute;
+ rnh->rnh_deladdr = rn_delete;
+ rnh->rnh_matchaddr = rn_match;
+ rnh->rnh_walktree = rn_walktree;
+ rnh->rnh_treetop = t;
+ return (1);
+}
+
+void
+rn_init()
+{
+ char *cp, *cplim;
+#ifdef KERNEL
+ struct domain *dom;
+
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_maxrtkey > max_keylen)
+ max_keylen = dom->dom_maxrtkey;
+#endif
+ if (max_keylen == 0) {
+ printf("rn_init: radix functions require max_keylen be set\n");
+ return;
+ }
+ R_Malloc(rn_zeros, char *, 3 * max_keylen);
+ if (rn_zeros == NULL)
+ panic("rn_init");
+ Bzero(rn_zeros, 3 * max_keylen);
+ rn_ones = cp = rn_zeros + max_keylen;
+ maskedKey = cplim = rn_ones + max_keylen;
+ while (cp < cplim)
+ *cp++ = -1;
+ if (rn_inithead((void **)&mask_rnhead, 0) == 0)
+ panic("rn_init 2");
+}
diff --git a/sys/net/radix.h b/sys/net/radix.h
new file mode 100644
index 000000000000..a11057f0439e
--- /dev/null
+++ b/sys/net/radix.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _RADIX_H_
+#define _RADIX_H_
+
+/*
+ * Radix search tree node layout.
+ */
+
+struct radix_node {
+ struct radix_mask *rn_mklist; /* list of masks contained in subtree */
+ struct radix_node *rn_p; /* parent */
+ short rn_b; /* bit offset; -1-index(netmask) */
+ char rn_bmask; /* node: mask for bit test*/
+ u_char rn_flags; /* enumerated next */
+#define RNF_NORMAL 1 /* leaf contains normal route */
+#define RNF_ROOT 2 /* leaf is root leaf for tree */
+#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */
+ union {
+ struct { /* leaf only data: */
+ caddr_t rn_Key; /* object of search */
+ caddr_t rn_Mask; /* netmask, if present */
+ struct radix_node *rn_Dupedkey;
+ } rn_leaf;
+ struct { /* node only data: */
+ int rn_Off; /* where to start compare */
+ struct radix_node *rn_L;/* progeny */
+ struct radix_node *rn_R;/* progeny */
+ }rn_node;
+ } rn_u;
+#ifdef RN_DEBUG
+ int rn_info;
+ struct radix_node *rn_twin;
+ struct radix_node *rn_ybro;
+#endif
+};
+
+#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
+#define rn_key rn_u.rn_leaf.rn_Key
+#define rn_mask rn_u.rn_leaf.rn_Mask
+#define rn_off rn_u.rn_node.rn_Off
+#define rn_l rn_u.rn_node.rn_L
+#define rn_r rn_u.rn_node.rn_R
+
+/*
+ * Annotations to tree concerning potential routes applying to subtrees.
+ */
+
+extern struct radix_mask {
+ short rm_b; /* bit offset; -1-index(netmask) */
+ char rm_unused; /* cf. rn_bmask */
+ u_char rm_flags; /* cf. rn_flags */
+ struct radix_mask *rm_mklist; /* more masks to try */
+ caddr_t rm_mask; /* the mask */
+ int rm_refs; /* # of references to this struct */
+} *rn_mkfreelist;
+
+#define MKGet(m) {\
+ if (rn_mkfreelist) {\
+ m = rn_mkfreelist; \
+ rn_mkfreelist = (m)->rm_mklist; \
+ } else \
+ R_Malloc(m, struct radix_mask *, sizeof (*(m))); }\
+
+#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
+
+struct radix_node_head {
+ struct radix_node *rnh_treetop;
+ int rnh_addrsize; /* permit, but not require fixed keys */
+ int rnh_pktsize; /* permit, but not require fixed keys */
+ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
+ __P((void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]));
+ struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
+ __P((void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]));
+ struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
+ __P((void *v, struct radix_node_head *head));
+ struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
+ __P((void *v, struct radix_node_head *head));
+ int (*rnh_walktree) /* traverse tree */
+ __P((struct radix_node_head *head, int (*f)(), void *w));
+ struct radix_node rnh_nodes[3]; /* empty tree for common case */
+};
+
+
+#ifndef KERNEL
+#define Bcmp(a, b, n) bcmp(((char *)(a)), ((char *)(b)), (n))
+#define Bzero(p, n) bzero((char *)(p), (int)(n));
+#define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n)))
+#define Free(p) free((char *)p);
+#else
+#define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n));
+#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_DONTWAIT))
+#define Free(p) free((caddr_t)p, M_RTABLE);
+
+void rn_init __P((void));
+int rn_inithead __P((void **, int));
+int rn_refines __P((void *, void *));
+int rn_walktree __P((struct radix_node_head *, int (*)(), void *));
+struct radix_node
+ *rn_addmask __P((void *, int, int)),
+ *rn_addroute __P((void *, void *, struct radix_node_head *,
+ struct radix_node [2])),
+ *rn_delete __P((void *, void *, struct radix_node_head *)),
+ *rn_insert __P((void *, struct radix_node_head *, int *,
+ struct radix_node [2])),
+ *rn_match __P((void *, struct radix_node_head *)),
+ *rn_newpair __P((void *, int, struct radix_node[2])),
+ *rn_search __P((void *, struct radix_node *)),
+ *rn_search_m __P((void *, struct radix_node *, void *));
+
+#endif /*KERNEL*/
+#endif /* _RADIX_H_ */
diff --git a/sys/net/raw_cb.c b/sys/net/raw_cb.c
new file mode 100644
index 000000000000..e44192d2e83f
--- /dev/null
+++ b/sys/net/raw_cb.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_cb.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+#include <netinet/in.h>
+
+/*
+ * Routines to manage the raw protocol control blocks.
+ *
+ * TODO:
+ * hash lookups by protocol family/protocol + address family
+ * take care of unique address problems per AF?
+ * redo address binding to allow wildcards
+ */
+
+u_long raw_sendspace = RAWSNDQ;
+u_long raw_recvspace = RAWRCVQ;
+
+/*
+ * Allocate a control block and a nominal amount
+ * of buffer space for the socket.
+ */
+int
+raw_attach(so, proto)
+ register struct socket *so;
+ int proto;
+{
+ register struct rawcb *rp = sotorawcb(so);
+ int error;
+
+ /*
+ * It is assumed that raw_attach is called
+ * after space has been allocated for the
+ * rawcb.
+ */
+ if (rp == 0)
+ return (ENOBUFS);
+ if (error = soreserve(so, raw_sendspace, raw_recvspace))
+ return (error);
+ rp->rcb_socket = so;
+ rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
+ rp->rcb_proto.sp_protocol = proto;
+ insque(rp, &rawcb);
+ return (0);
+}
+
+/*
+ * Detach the raw connection block and discard
+ * socket resources.
+ */
+void
+raw_detach(rp)
+ register struct rawcb *rp;
+{
+ struct socket *so = rp->rcb_socket;
+
+ so->so_pcb = 0;
+ sofree(so);
+ remque(rp);
+#ifdef notdef
+ if (rp->rcb_laddr)
+ m_freem(dtom(rp->rcb_laddr));
+ rp->rcb_laddr = 0;
+#endif
+ free((caddr_t)(rp), M_PCB);
+}
+
+/*
+ * Disconnect and possibly release resources.
+ */
+void
+raw_disconnect(rp)
+ struct rawcb *rp;
+{
+
+#ifdef notdef
+ if (rp->rcb_faddr)
+ m_freem(dtom(rp->rcb_faddr));
+ rp->rcb_faddr = 0;
+#endif
+ if (rp->rcb_socket->so_state & SS_NOFDREF)
+ raw_detach(rp);
+}
+
+#ifdef notdef
+int
+raw_bind(so, nam)
+ register struct socket *so;
+ struct mbuf *nam;
+{
+ struct sockaddr *addr = mtod(nam, struct sockaddr *);
+ register struct rawcb *rp;
+
+ if (ifnet == 0)
+ return (EADDRNOTAVAIL);
+ rp = sotorawcb(so);
+ nam = m_copym(nam, 0, M_COPYALL, M_WAITOK);
+ rp->rcb_laddr = mtod(nam, struct sockaddr *);
+ return (0);
+}
+#endif
diff --git a/sys/net/raw_cb.h b/sys/net/raw_cb.h
new file mode 100644
index 000000000000..6003e181edb4
--- /dev/null
+++ b/sys/net/raw_cb.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_cb.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Raw protocol interface control block. Used
+ * to tie a socket to the generic raw interface.
+ */
+struct rawcb {
+ struct rawcb *rcb_next; /* doubly linked list */
+ struct rawcb *rcb_prev;
+ struct socket *rcb_socket; /* back pointer to socket */
+ struct sockaddr *rcb_faddr; /* destination address */
+ struct sockaddr *rcb_laddr; /* socket's address */
+ struct sockproto rcb_proto; /* protocol family, protocol */
+};
+
+#define sotorawcb(so) ((struct rawcb *)(so)->so_pcb)
+
+/*
+ * Nominal space allocated to a raw socket.
+ */
+#define RAWSNDQ 8192
+#define RAWRCVQ 8192
+
+#ifdef KERNEL
+struct rawcb rawcb; /* head of list */
+
+int raw_attach __P((struct socket *, int));
+void raw_ctlinput __P((int, struct sockaddr *));
+void raw_detach __P((struct rawcb *));
+void raw_disconnect __P((struct rawcb *));
+void raw_init __P((void));
+void raw_input __P((struct mbuf *,
+ struct sockproto *, struct sockaddr *, struct sockaddr *));
+int raw_usrreq __P((struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *));
+#endif
diff --git a/sys/net/raw_usrreq.c b/sys/net/raw_usrreq.c
new file mode 100644
index 000000000000..560106ef95cd
--- /dev/null
+++ b/sys/net/raw_usrreq.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/netisr.h>
+#include <net/raw_cb.h>
+
+/*
+ * Initialize raw connection block q.
+ */
+void
+raw_init()
+{
+
+ rawcb.rcb_next = rawcb.rcb_prev = &rawcb;
+}
+
+
+/*
+ * Raw protocol input routine. Find the socket
+ * associated with the packet(s) and move them over. If
+ * nothing exists for this packet, drop it.
+ */
+/*
+ * Raw protocol interface.
+ */
+void
+raw_input(m0, proto, src, dst)
+ struct mbuf *m0;
+ register struct sockproto *proto;
+ struct sockaddr *src, *dst;
+{
+ register struct rawcb *rp;
+ register struct mbuf *m = m0;
+ register int sockets = 0;
+ struct socket *last;
+
+ last = 0;
+ for (rp = rawcb.rcb_next; rp != &rawcb; rp = rp->rcb_next) {
+ if (rp->rcb_proto.sp_family != proto->sp_family)
+ continue;
+ if (rp->rcb_proto.sp_protocol &&
+ rp->rcb_proto.sp_protocol != proto->sp_protocol)
+ continue;
+ /*
+ * We assume the lower level routines have
+ * placed the address in a canonical format
+ * suitable for a structure comparison.
+ *
+ * Note that if the lengths are not the same
+ * the comparison will fail at the first byte.
+ */
+#define equal(a1, a2) \
+ (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0)
+ if (rp->rcb_laddr && !equal(rp->rcb_laddr, dst))
+ continue;
+ if (rp->rcb_faddr && !equal(rp->rcb_faddr, src))
+ continue;
+ if (last) {
+ struct mbuf *n;
+ if (n = m_copy(m, 0, (int)M_COPYALL)) {
+ if (sbappendaddr(&last->so_rcv, src,
+ n, (struct mbuf *)0) == 0)
+ /* should notify about lost packet */
+ m_freem(n);
+ else {
+ sorwakeup(last);
+ sockets++;
+ }
+ }
+ }
+ last = rp->rcb_socket;
+ }
+ if (last) {
+ if (sbappendaddr(&last->so_rcv, src,
+ m, (struct mbuf *)0) == 0)
+ m_freem(m);
+ else {
+ sorwakeup(last);
+ sockets++;
+ }
+ } else
+ m_freem(m);
+}
+
+/*ARGSUSED*/
+void
+raw_ctlinput(cmd, arg)
+ int cmd;
+ struct sockaddr *arg;
+{
+
+ if (cmd < 0 || cmd > PRC_NCMDS)
+ return;
+ /* INCOMPLETE */
+}
+
+/*ARGSUSED*/
+int
+raw_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ register struct rawcb *rp = sotorawcb(so);
+ register int error = 0;
+ int len;
+
+ if (req == PRU_CONTROL)
+ return (EOPNOTSUPP);
+ if (control && control->m_len) {
+ error = EOPNOTSUPP;
+ goto release;
+ }
+ if (rp == 0) {
+ error = EINVAL;
+ goto release;
+ }
+ switch (req) {
+
+ /*
+ * Allocate a raw control block and fill in the
+ * necessary info to allow packets to be routed to
+ * the appropriate raw interface routine.
+ */
+ case PRU_ATTACH:
+ if ((so->so_state & SS_PRIV) == 0) {
+ error = EACCES;
+ break;
+ }
+ error = raw_attach(so, (int)nam);
+ break;
+
+ /*
+ * Destroy state just before socket deallocation.
+ * Flush data or not depending on the options.
+ */
+ case PRU_DETACH:
+ if (rp == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ raw_detach(rp);
+ break;
+
+#ifdef notdef
+ /*
+ * If a socket isn't bound to a single address,
+ * the raw input routine will hand it anything
+ * within that protocol family (assuming there's
+ * nothing else around it should go to).
+ */
+ case PRU_CONNECT:
+ if (rp->rcb_faddr) {
+ error = EISCONN;
+ break;
+ }
+ nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+ rp->rcb_faddr = mtod(nam, struct sockaddr *);
+ soisconnected(so);
+ break;
+
+ case PRU_BIND:
+ if (rp->rcb_laddr) {
+ error = EINVAL; /* XXX */
+ break;
+ }
+ error = raw_bind(so, nam);
+ break;
+#endif
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ goto release;
+
+ case PRU_DISCONNECT:
+ if (rp->rcb_faddr == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ raw_disconnect(rp);
+ soisdisconnected(so);
+ break;
+
+ /*
+ * Mark the connection as being incapable of further input.
+ */
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ /*
+ * Ship a packet out. The appropriate raw output
+ * routine handles any massaging necessary.
+ */
+ case PRU_SEND:
+ if (nam) {
+ if (rp->rcb_faddr) {
+ error = EISCONN;
+ break;
+ }
+ rp->rcb_faddr = mtod(nam, struct sockaddr *);
+ } else if (rp->rcb_faddr == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ error = (*so->so_proto->pr_output)(m, so);
+ m = NULL;
+ if (nam)
+ rp->rcb_faddr = 0;
+ break;
+
+ case PRU_ABORT:
+ raw_disconnect(rp);
+ sofree(so);
+ soisdisconnected(so);
+ break;
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ /*
+ * Not supported.
+ */
+ case PRU_RCVOOB:
+ case PRU_RCVD:
+ return(EOPNOTSUPP);
+
+ case PRU_LISTEN:
+ case PRU_ACCEPT:
+ case PRU_SENDOOB:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_SOCKADDR:
+ if (rp->rcb_laddr == 0) {
+ error = EINVAL;
+ break;
+ }
+ len = rp->rcb_laddr->sa_len;
+ bcopy((caddr_t)rp->rcb_laddr, mtod(nam, caddr_t), (unsigned)len);
+ nam->m_len = len;
+ break;
+
+ case PRU_PEERADDR:
+ if (rp->rcb_faddr == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ len = rp->rcb_faddr->sa_len;
+ bcopy((caddr_t)rp->rcb_faddr, mtod(nam, caddr_t), (unsigned)len);
+ nam->m_len = len;
+ break;
+
+ default:
+ panic("raw_usrreq");
+ }
+release:
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
diff --git a/sys/net/route.c b/sys/net/route.c
new file mode 100644
index 000000000000..96902dace19e
--- /dev/null
+++ b/sys/net/route.c
@@ -0,0 +1,538 @@
+/*
+ * Copyright (c) 1980, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)route.c 8.2 (Berkeley) 11/15/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/ioctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+
+#ifdef NS
+#include <netns/ns.h>
+#endif
+
+#define SA(p) ((struct sockaddr *)(p))
+
+int rttrash; /* routes not in table but not freed */
+struct sockaddr wildcard; /* zero valued cookie for wildcard searches */
+
+void
+rtable_init(table)
+ void **table;
+{
+ struct domain *dom;
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_rtattach)
+ dom->dom_rtattach(&table[dom->dom_family],
+ dom->dom_rtoffset);
+}
+
+void
+route_init()
+{
+ rn_init(); /* initialize all zeroes, all ones, mask table */
+ rtable_init((void **)rt_tables);
+}
+
+/*
+ * Packet routing routines.
+ */
+void
+rtalloc(ro)
+ register struct route *ro;
+{
+ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
+ return; /* XXX */
+ ro->ro_rt = rtalloc1(&ro->ro_dst, 1);
+}
+
+struct rtentry *
+rtalloc1(dst, report)
+ register struct sockaddr *dst;
+ int report;
+{
+ register struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ register struct rtentry *rt;
+ register struct radix_node *rn;
+ struct rtentry *newrt = 0;
+ struct rt_addrinfo info;
+ int s = splnet(), err = 0, msgtype = RTM_MISS;
+
+ if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
+ ((rn->rn_flags & RNF_ROOT) == 0)) {
+ newrt = rt = (struct rtentry *)rn;
+ if (report && (rt->rt_flags & RTF_CLONING)) {
+ err = rtrequest(RTM_RESOLVE, dst, SA(0),
+ SA(0), 0, &newrt);
+ if (err) {
+ newrt = rt;
+ rt->rt_refcnt++;
+ goto miss;
+ }
+ if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
+ msgtype = RTM_RESOLVE;
+ goto miss;
+ }
+ } else
+ rt->rt_refcnt++;
+ } else {
+ rtstat.rts_unreach++;
+ miss: if (report) {
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] = dst;
+ rt_missmsg(msgtype, &info, 0, err);
+ }
+ }
+ splx(s);
+ return (newrt);
+}
+
+void
+rtfree(rt)
+ register struct rtentry *rt;
+{
+ register struct ifaddr *ifa;
+
+ if (rt == 0)
+ panic("rtfree");
+ rt->rt_refcnt--;
+ if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
+ if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+ panic ("rtfree 2");
+ rttrash--;
+ if (rt->rt_refcnt < 0) {
+ printf("rtfree: %x not freed (neg refs)\n", rt);
+ return;
+ }
+ ifa = rt->rt_ifa;
+ IFAFREE(ifa);
+ Free(rt_key(rt));
+ Free(rt);
+ }
+}
+
+void
+ifafree(ifa)
+ register struct ifaddr *ifa;
+{
+ if (ifa == NULL)
+ panic("ifafree");
+ if (ifa->ifa_refcnt == 0)
+ free(ifa, M_IFADDR);
+ else
+ ifa->ifa_refcnt--;
+}
+
+/*
+ * Force a routing table entry to the specified
+ * destination to go through the given gateway.
+ * Normally called as a result of a routing redirect
+ * message from the network layer.
+ *
+ * N.B.: must be called at splnet
+ *
+ */
+int
+rtredirect(dst, gateway, netmask, flags, src, rtp)
+ struct sockaddr *dst, *gateway, *netmask, *src;
+ int flags;
+ struct rtentry **rtp;
+{
+ register struct rtentry *rt;
+ int error = 0;
+ short *stat = 0;
+ struct rt_addrinfo info;
+ struct ifaddr *ifa;
+
+ /* verify the gateway is directly reachable */
+ if ((ifa = ifa_ifwithnet(gateway)) == 0) {
+ error = ENETUNREACH;
+ goto out;
+ }
+ rt = rtalloc1(dst, 0);
+ /*
+ * If the redirect isn't from our current router for this dst,
+ * it's either old or wrong. If it redirects us to ourselves,
+ * we have a routing loop, perhaps as a result of an interface
+ * going down recently.
+ */
+#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
+ if (!(flags & RTF_DONE) && rt &&
+ (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
+ error = EINVAL;
+ else if (ifa_ifwithaddr(gateway))
+ error = EHOSTUNREACH;
+ if (error)
+ goto done;
+ /*
+ * Create a new entry if we just got back a wildcard entry
+ * or the the lookup failed. This is necessary for hosts
+ * which use routing redirects generated by smart gateways
+ * to dynamically build the routing tables.
+ */
+ if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
+ goto create;
+ /*
+ * Don't listen to the redirect if it's
+ * for a route to an interface.
+ */
+ if (rt->rt_flags & RTF_GATEWAY) {
+ if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
+ /*
+ * Changing from route to net => route to host.
+ * Create new route, rather than smashing route to net.
+ */
+ create:
+ flags |= RTF_GATEWAY | RTF_DYNAMIC;
+ error = rtrequest((int)RTM_ADD, dst, gateway,
+ netmask, flags,
+ (struct rtentry **)0);
+ stat = &rtstat.rts_dynamic;
+ } else {
+ /*
+ * Smash the current notion of the gateway to
+ * this destination. Should check about netmask!!!
+ */
+ rt->rt_flags |= RTF_MODIFIED;
+ flags |= RTF_MODIFIED;
+ stat = &rtstat.rts_newgateway;
+ rt_setgate(rt, rt_key(rt), gateway);
+ }
+ } else
+ error = EHOSTUNREACH;
+done:
+ if (rt) {
+ if (rtp && !error)
+ *rtp = rt;
+ else
+ rtfree(rt);
+ }
+out:
+ if (error)
+ rtstat.rts_badredirect++;
+ else if (stat != NULL)
+ (*stat)++;
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] = dst;
+ info.rti_info[RTAX_GATEWAY] = gateway;
+ info.rti_info[RTAX_NETMASK] = netmask;
+ info.rti_info[RTAX_AUTHOR] = src;
+ rt_missmsg(RTM_REDIRECT, &info, flags, error);
+}
+
+/*
+* Routing table ioctl interface.
+*/
+int
+rtioctl(req, data, p)
+ int req;
+ caddr_t data;
+ struct proc *p;
+{
+ return (EOPNOTSUPP);
+}
+
+struct ifaddr *
+ifa_ifwithroute(flags, dst, gateway)
+ int flags;
+ struct sockaddr *dst, *gateway;
+{
+ register struct ifaddr *ifa;
+ if ((flags & RTF_GATEWAY) == 0) {
+ /*
+ * If we are adding a route to an interface,
+ * and the interface is a pt to pt link
+ * we should search for the destination
+ * as our clue to the interface. Otherwise
+ * we can use the local address.
+ */
+ ifa = 0;
+ if (flags & RTF_HOST)
+ ifa = ifa_ifwithdstaddr(dst);
+ if (ifa == 0)
+ ifa = ifa_ifwithaddr(gateway);
+ } else {
+ /*
+ * If we are adding a route to a remote net
+ * or host, the gateway may still be on the
+ * other end of a pt to pt link.
+ */
+ ifa = ifa_ifwithdstaddr(gateway);
+ }
+ if (ifa == 0)
+ ifa = ifa_ifwithnet(gateway);
+ if (ifa == 0) {
+ struct rtentry *rt = rtalloc1(dst, 0);
+ if (rt == 0)
+ return (0);
+ rt->rt_refcnt--;
+ if ((ifa = rt->rt_ifa) == 0)
+ return (0);
+ }
+ if (ifa->ifa_addr->sa_family != dst->sa_family) {
+ struct ifaddr *oifa = ifa;
+ ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
+ if (ifa == 0)
+ ifa = oifa;
+ }
+ return (ifa);
+}
+
+#define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+
+int
+rtrequest(req, dst, gateway, netmask, flags, ret_nrt)
+ int req, flags;
+ struct sockaddr *dst, *gateway, *netmask;
+ struct rtentry **ret_nrt;
+{
+ int s = splnet(); int error = 0;
+ register struct rtentry *rt;
+ register struct radix_node *rn;
+ register struct radix_node_head *rnh;
+ struct ifaddr *ifa;
+ struct sockaddr *ndst;
+#define senderr(x) { error = x ; goto bad; }
+
+ if ((rnh = rt_tables[dst->sa_family]) == 0)
+ senderr(ESRCH);
+ if (flags & RTF_HOST)
+ netmask = 0;
+ switch (req) {
+ case RTM_DELETE:
+ if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0)
+ senderr(ESRCH);
+ if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+ panic ("rtrequest delete");
+ rt = (struct rtentry *)rn;
+ rt->rt_flags &= ~RTF_UP;
+ if (rt->rt_gwroute) {
+ rt = rt->rt_gwroute; RTFREE(rt);
+ (rt = (struct rtentry *)rn)->rt_gwroute = 0;
+ }
+ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
+ ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
+ rttrash++;
+ if (ret_nrt)
+ *ret_nrt = rt;
+ else if (rt->rt_refcnt <= 0) {
+ rt->rt_refcnt++;
+ rtfree(rt);
+ }
+ break;
+
+ case RTM_RESOLVE:
+ if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
+ senderr(EINVAL);
+ ifa = rt->rt_ifa;
+ flags = rt->rt_flags & ~RTF_CLONING;
+ gateway = rt->rt_gateway;
+ if ((netmask = rt->rt_genmask) == 0)
+ flags |= RTF_HOST;
+ goto makeroute;
+
+ case RTM_ADD:
+ if ((ifa = ifa_ifwithroute(flags, dst, gateway)) == 0)
+ senderr(ENETUNREACH);
+ makeroute:
+ R_Malloc(rt, struct rtentry *, sizeof(*rt));
+ if (rt == 0)
+ senderr(ENOBUFS);
+ Bzero(rt, sizeof(*rt));
+ rt->rt_flags = RTF_UP | flags;
+ if (rt_setgate(rt, dst, gateway)) {
+ Free(rt);
+ senderr(ENOBUFS);
+ }
+ ndst = rt_key(rt);
+ if (netmask) {
+ rt_maskedcopy(dst, ndst, netmask);
+ } else
+ Bcopy(dst, ndst, dst->sa_len);
+ rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
+ rnh, rt->rt_nodes);
+ if (rn == 0) {
+ if (rt->rt_gwroute)
+ rtfree(rt->rt_gwroute);
+ Free(rt_key(rt));
+ Free(rt);
+ senderr(EEXIST);
+ }
+ ifa->ifa_refcnt++;
+ rt->rt_ifa = ifa;
+ rt->rt_ifp = ifa->ifa_ifp;
+ if (req == RTM_RESOLVE)
+ rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
+ if (ifa->ifa_rtrequest)
+ ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : 0));
+ if (ret_nrt) {
+ *ret_nrt = rt;
+ rt->rt_refcnt++;
+ }
+ break;
+ }
+bad:
+ splx(s);
+ return (error);
+}
+
+int
+rt_setgate(rt0, dst, gate)
+ struct rtentry *rt0;
+ struct sockaddr *dst, *gate;
+{
+ caddr_t new, old;
+ int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
+ register struct rtentry *rt = rt0;
+
+ if (rt->rt_gateway == 0 || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
+ old = (caddr_t)rt_key(rt);
+ R_Malloc(new, caddr_t, dlen + glen);
+ if (new == 0)
+ return 1;
+ rt->rt_nodes->rn_key = new;
+ } else {
+ new = rt->rt_nodes->rn_key;
+ old = 0;
+ }
+ Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
+ if (old) {
+ Bcopy(dst, new, dlen);
+ Free(old);
+ }
+ if (rt->rt_gwroute) {
+ rt = rt->rt_gwroute; RTFREE(rt);
+ rt = rt0; rt->rt_gwroute = 0;
+ }
+ if (rt->rt_flags & RTF_GATEWAY) {
+ rt->rt_gwroute = rtalloc1(gate, 1);
+ }
+ return 0;
+}
+
+void
+rt_maskedcopy(src, dst, netmask)
+ struct sockaddr *src, *dst, *netmask;
+{
+ register u_char *cp1 = (u_char *)src;
+ register u_char *cp2 = (u_char *)dst;
+ register u_char *cp3 = (u_char *)netmask;
+ u_char *cplim = cp2 + *cp3;
+ u_char *cplim2 = cp2 + *cp1;
+
+ *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
+ cp3 += 2;
+ if (cplim > cplim2)
+ cplim = cplim2;
+ while (cp2 < cplim)
+ *cp2++ = *cp1++ & *cp3++;
+ if (cp2 < cplim2)
+ bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
+}
+
+/*
+ * Set up a routing table entry, normally
+ * for an interface.
+ */
+int
+rtinit(ifa, cmd, flags)
+ register struct ifaddr *ifa;
+ int cmd, flags;
+{
+ register struct rtentry *rt;
+ register struct sockaddr *dst;
+ register struct sockaddr *deldst;
+ struct mbuf *m = 0;
+ struct rtentry *nrt = 0;
+ int error;
+
+ dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
+ if (cmd == RTM_DELETE) {
+ if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
+ m = m_get(M_WAIT, MT_SONAME);
+ deldst = mtod(m, struct sockaddr *);
+ rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
+ dst = deldst;
+ }
+ if (rt = rtalloc1(dst, 0)) {
+ rt->rt_refcnt--;
+ if (rt->rt_ifa != ifa) {
+ if (m)
+ (void) m_free(m);
+ return (flags & RTF_HOST ? EHOSTUNREACH
+ : ENETUNREACH);
+ }
+ }
+ }
+ error = rtrequest(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask,
+ flags | ifa->ifa_flags, &nrt);
+ if (m)
+ (void) m_free(m);
+ if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) {
+ rt_newaddrmsg(cmd, ifa, error, nrt);
+ if (rt->rt_refcnt <= 0) {
+ rt->rt_refcnt++;
+ rtfree(rt);
+ }
+ }
+ if (cmd == RTM_ADD && error == 0 && (rt = nrt)) {
+ rt->rt_refcnt--;
+ if (rt->rt_ifa != ifa) {
+ printf("rtinit: wrong ifa (%x) was (%x)\n", ifa,
+ rt->rt_ifa);
+ if (rt->rt_ifa->ifa_rtrequest)
+ rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
+ IFAFREE(rt->rt_ifa);
+ rt->rt_ifa = ifa;
+ rt->rt_ifp = ifa->ifa_ifp;
+ ifa->ifa_refcnt++;
+ if (ifa->ifa_rtrequest)
+ ifa->ifa_rtrequest(RTM_ADD, rt, SA(0));
+ }
+ rt_newaddrmsg(cmd, ifa, error, nrt);
+ }
+ return (error);
+}
diff --git a/sys/net/route.h b/sys/net/route.h
new file mode 100644
index 000000000000..2fbed9ea0a14
--- /dev/null
+++ b/sys/net/route.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)route.h 8.3 (Berkeley) 4/19/94
+ */
+
+/*
+ * Kernel resident routing tables.
+ *
+ * The routing tables are initialized when interface addresses
+ * are set by making entries for all directly connected interfaces.
+ */
+
+/*
+ * A route consists of a destination address and a reference
+ * to a routing entry. These are often held by protocols
+ * in their control blocks, e.g. inpcb.
+ */
+struct route {
+ struct rtentry *ro_rt;
+ struct sockaddr ro_dst;
+};
+
+/*
+ * These numbers are used by reliable protocols for determining
+ * retransmission behavior and are included in the routing structure.
+ */
+struct rt_metrics {
+ u_long rmx_locks; /* Kernel must leave these values alone */
+ u_long rmx_mtu; /* MTU for this path */
+ u_long rmx_hopcount; /* max hops expected */
+ u_long rmx_expire; /* lifetime for route, e.g. redirect */
+ u_long rmx_recvpipe; /* inbound delay-bandwith product */
+ u_long rmx_sendpipe; /* outbound delay-bandwith product */
+ u_long rmx_ssthresh; /* outbound gateway buffer limit */
+ u_long rmx_rtt; /* estimated round trip time */
+ u_long rmx_rttvar; /* estimated rtt variance */
+ u_long rmx_pksent; /* packets sent using this route */
+};
+
+/*
+ * rmx_rtt and rmx_rttvar are stored as microseconds;
+ * RTTTOPRHZ(rtt) converts to a value suitable for use
+ * by a protocol slowtimo counter.
+ */
+#define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */
+#define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ))
+
+/*
+ * We distinguish between routes to hosts and routes to networks,
+ * preferring the former if available. For each route we infer
+ * the interface to use from the gateway address supplied when
+ * the route was entered. Routes that forward packets through
+ * gateways are marked so that the output routines know to address the
+ * gateway rather than the ultimate destination.
+ */
+#ifndef RNF_NORMAL
+#include <net/radix.h>
+#endif
+struct rtentry {
+ struct radix_node rt_nodes[2]; /* tree glue, and other values */
+#define rt_key(r) ((struct sockaddr *)((r)->rt_nodes->rn_key))
+#define rt_mask(r) ((struct sockaddr *)((r)->rt_nodes->rn_mask))
+ struct sockaddr *rt_gateway; /* value */
+ short rt_flags; /* up/down?, host/net */
+ short rt_refcnt; /* # held references */
+ u_long rt_use; /* raw # packets forwarded */
+ struct ifnet *rt_ifp; /* the answer: interface to use */
+ struct ifaddr *rt_ifa; /* the answer: interface to use */
+ struct sockaddr *rt_genmask; /* for generation of cloned routes */
+ caddr_t rt_llinfo; /* pointer to link level info cache */
+ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */
+ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */
+};
+
+/*
+ * Following structure necessary for 4.3 compatibility;
+ * We should eventually move it to a compat file.
+ */
+struct ortentry {
+ u_long rt_hash; /* to speed lookups */
+ struct sockaddr rt_dst; /* key */
+ struct sockaddr rt_gateway; /* value */
+ short rt_flags; /* up/down?, host/net */
+ short rt_refcnt; /* # held references */
+ u_long rt_use; /* raw # packets forwarded */
+ struct ifnet *rt_ifp; /* the answer: interface to use */
+};
+
+#define RTF_UP 0x1 /* route usable */
+#define RTF_GATEWAY 0x2 /* destination is a gateway */
+#define RTF_HOST 0x4 /* host entry (net otherwise) */
+#define RTF_REJECT 0x8 /* host or net unreachable */
+#define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */
+#define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */
+#define RTF_DONE 0x40 /* message confirmed */
+#define RTF_MASK 0x80 /* subnet mask present */
+#define RTF_CLONING 0x100 /* generate new routes on use */
+#define RTF_XRESOLVE 0x200 /* external daemon resolves name */
+#define RTF_LLINFO 0x400 /* generated by ARP or ESIS */
+#define RTF_STATIC 0x800 /* manually added */
+#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */
+#define RTF_PROTO2 0x4000 /* protocol specific routing flag */
+#define RTF_PROTO1 0x8000 /* protocol specific routing flag */
+
+
+/*
+ * Routing statistics.
+ */
+struct rtstat {
+ short rts_badredirect; /* bogus redirect calls */
+ short rts_dynamic; /* routes created by redirects */
+ short rts_newgateway; /* routes modified by redirects */
+ short rts_unreach; /* lookups which failed */
+ short rts_wildcard; /* lookups satisfied by a wildcard */
+};
+/*
+ * Structures for routing messages.
+ */
+struct rt_msghdr {
+ u_short rtm_msglen; /* to skip over non-understood messages */
+ u_char rtm_version; /* future binary compatibility */
+ u_char rtm_type; /* message type */
+ u_short rtm_index; /* index for associated ifp */
+ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */
+ int rtm_addrs; /* bitmask identifying sockaddrs in msg */
+ pid_t rtm_pid; /* identify sender */
+ int rtm_seq; /* for sender to identify action */
+ int rtm_errno; /* why failed */
+ int rtm_use; /* from rtentry */
+ u_long rtm_inits; /* which metrics we are initializing */
+ struct rt_metrics rtm_rmx; /* metrics themselves */
+};
+
+#define RTM_VERSION 3 /* Up the ante and ignore older versions */
+
+#define RTM_ADD 0x1 /* Add Route */
+#define RTM_DELETE 0x2 /* Delete Route */
+#define RTM_CHANGE 0x3 /* Change Metrics or flags */
+#define RTM_GET 0x4 /* Report Metrics */
+#define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */
+#define RTM_REDIRECT 0x6 /* Told to use different route */
+#define RTM_MISS 0x7 /* Lookup failed on this address */
+#define RTM_LOCK 0x8 /* fix specified metrics */
+#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */
+#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */
+#define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */
+#define RTM_NEWADDR 0xc /* address being added to iface */
+#define RTM_DELADDR 0xd /* address being removed from iface */
+#define RTM_IFINFO 0xe /* iface going up/down etc. */
+
+#define RTV_MTU 0x1 /* init or lock _mtu */
+#define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */
+#define RTV_EXPIRE 0x4 /* init or lock _hopcount */
+#define RTV_RPIPE 0x8 /* init or lock _recvpipe */
+#define RTV_SPIPE 0x10 /* init or lock _sendpipe */
+#define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */
+#define RTV_RTT 0x40 /* init or lock _rtt */
+#define RTV_RTTVAR 0x80 /* init or lock _rttvar */
+
+/*
+ * Bitmask values for rtm_addr.
+ */
+#define RTA_DST 0x1 /* destination sockaddr present */
+#define RTA_GATEWAY 0x2 /* gateway sockaddr present */
+#define RTA_NETMASK 0x4 /* netmask sockaddr present */
+#define RTA_GENMASK 0x8 /* cloning mask sockaddr present */
+#define RTA_IFP 0x10 /* interface name sockaddr present */
+#define RTA_IFA 0x20 /* interface addr sockaddr present */
+#define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */
+#define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */
+
+/*
+ * Index offsets for sockaddr array for alternate internal encoding.
+ */
+#define RTAX_DST 0 /* destination sockaddr present */
+#define RTAX_GATEWAY 1 /* gateway sockaddr present */
+#define RTAX_NETMASK 2 /* netmask sockaddr present */
+#define RTAX_GENMASK 3 /* cloning mask sockaddr present */
+#define RTAX_IFP 4 /* interface name sockaddr present */
+#define RTAX_IFA 5 /* interface addr sockaddr present */
+#define RTAX_AUTHOR 6 /* sockaddr for author of redirect */
+#define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */
+#define RTAX_MAX 8 /* size of array to allocate */
+
+struct rt_addrinfo {
+ int rti_addrs;
+ struct sockaddr *rti_info[RTAX_MAX];
+};
+
+struct route_cb {
+ int ip_count;
+ int ns_count;
+ int iso_count;
+ int any_count;
+};
+
+#ifdef KERNEL
+#define RTFREE(rt) \
+ if ((rt)->rt_refcnt <= 1) \
+ rtfree(rt); \
+ else \
+ (rt)->rt_refcnt--;
+
+struct route_cb route_cb;
+struct rtstat rtstat;
+struct radix_node_head *rt_tables[AF_MAX+1];
+
+void route_init __P((void));
+int route_output __P((struct mbuf *, struct socket *));
+int route_usrreq __P((struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *));
+void rt_ifmsg __P((struct ifnet *));
+void rt_maskedcopy __P((struct sockaddr *,
+ struct sockaddr *, struct sockaddr *));
+void rt_missmsg __P((int, struct rt_addrinfo *, int, int));
+void rt_newaddrmsg __P((int, struct ifaddr *, int, struct rtentry *));
+int rt_setgate __P((struct rtentry *,
+ struct sockaddr *, struct sockaddr *));
+void rt_setmetrics __P((u_long, struct rt_metrics *, struct rt_metrics *));
+void rtable_init __P((void **));
+void rtalloc __P((struct route *));
+struct rtentry *
+ rtalloc1 __P((struct sockaddr *, int));
+void rtfree __P((struct rtentry *));
+int rtinit __P((struct ifaddr *, int, int));
+int rtioctl __P((int, caddr_t, struct proc *));
+int rtredirect __P((struct sockaddr *, struct sockaddr *,
+ struct sockaddr *, int, struct sockaddr *, struct rtentry **));
+int rtrequest __P((int, struct sockaddr *,
+ struct sockaddr *, struct sockaddr *, int, struct rtentry **));
+#endif
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
new file mode 100644
index 000000000000..d128121708d9
--- /dev/null
+++ b/sys/net/rtsock.c
@@ -0,0 +1,833 @@
+/*
+ * Copyright (c) 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)rtsock.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+struct sockaddr route_dst = { 2, PF_ROUTE, };
+struct sockaddr route_src = { 2, PF_ROUTE, };
+struct sockproto route_proto = { PF_ROUTE, };
+
+struct walkarg {
+ int w_op, w_arg, w_given, w_needed, w_tmemsize;
+ caddr_t w_where, w_tmem;
+};
+
+static struct mbuf *
+ rt_msg1 __P((int, struct rt_addrinfo *));
+static int rt_msg2 __P((int,
+ struct rt_addrinfo *, caddr_t, struct walkarg *));
+static void rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *));
+
+/* Sleazy use of local variables throughout file, warning!!!! */
+#define dst info.rti_info[RTAX_DST]
+#define gate info.rti_info[RTAX_GATEWAY]
+#define netmask info.rti_info[RTAX_NETMASK]
+#define genmask info.rti_info[RTAX_GENMASK]
+#define ifpaddr info.rti_info[RTAX_IFP]
+#define ifaaddr info.rti_info[RTAX_IFA]
+#define brdaddr info.rti_info[RTAX_BRD]
+
+/*ARGSUSED*/
+int
+route_usrreq(so, req, m, nam, control)
+ register struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ register int error = 0;
+ register struct rawcb *rp = sotorawcb(so);
+ int s;
+
+ if (req == PRU_ATTACH) {
+ MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
+ if (so->so_pcb = (caddr_t)rp)
+ bzero(so->so_pcb, sizeof(*rp));
+
+ }
+ if (req == PRU_DETACH && rp) {
+ int af = rp->rcb_proto.sp_protocol;
+ if (af == AF_INET)
+ route_cb.ip_count--;
+ else if (af == AF_NS)
+ route_cb.ns_count--;
+ else if (af == AF_ISO)
+ route_cb.iso_count--;
+ route_cb.any_count--;
+ }
+ s = splnet();
+ error = raw_usrreq(so, req, m, nam, control);
+ rp = sotorawcb(so);
+ if (req == PRU_ATTACH && rp) {
+ int af = rp->rcb_proto.sp_protocol;
+ if (error) {
+ free((caddr_t)rp, M_PCB);
+ splx(s);
+ return (error);
+ }
+ if (af == AF_INET)
+ route_cb.ip_count++;
+ else if (af == AF_NS)
+ route_cb.ns_count++;
+ else if (af == AF_ISO)
+ route_cb.iso_count++;
+ rp->rcb_faddr = &route_src;
+ route_cb.any_count++;
+ soisconnected(so);
+ so->so_options |= SO_USELOOPBACK;
+ }
+ splx(s);
+ return (error);
+}
+
+/*ARGSUSED*/
+int
+route_output(m, so)
+ register struct mbuf *m;
+ struct socket *so;
+{
+ register struct rt_msghdr *rtm = 0;
+ register struct rtentry *rt = 0;
+ struct rtentry *saved_nrt = 0;
+ struct rt_addrinfo info;
+ int len, error = 0;
+ struct ifnet *ifp = 0;
+ struct ifaddr *ifa = 0;
+
+#define senderr(e) { error = e; goto flush;}
+ if (m == 0 || ((m->m_len < sizeof(long)) &&
+ (m = m_pullup(m, sizeof(long))) == 0))
+ return (ENOBUFS);
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("route_output");
+ len = m->m_pkthdr.len;
+ if (len < sizeof(*rtm) ||
+ len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
+ dst = 0;
+ senderr(EINVAL);
+ }
+ R_Malloc(rtm, struct rt_msghdr *, len);
+ if (rtm == 0) {
+ dst = 0;
+ senderr(ENOBUFS);
+ }
+ m_copydata(m, 0, len, (caddr_t)rtm);
+ if (rtm->rtm_version != RTM_VERSION) {
+ dst = 0;
+ senderr(EPROTONOSUPPORT);
+ }
+ rtm->rtm_pid = curproc->p_pid;
+ info.rti_addrs = rtm->rtm_addrs;
+ rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info);
+ if (dst == 0)
+ senderr(EINVAL);
+ if (genmask) {
+ struct radix_node *t;
+ t = rn_addmask((caddr_t)genmask, 1, 2);
+ if (t && Bcmp(genmask, t->rn_key, *(u_char *)genmask) == 0)
+ genmask = (struct sockaddr *)(t->rn_key);
+ else
+ senderr(ENOBUFS);
+ }
+ switch (rtm->rtm_type) {
+
+ case RTM_ADD:
+ if (gate == 0)
+ senderr(EINVAL);
+ error = rtrequest(RTM_ADD, dst, gate, netmask,
+ rtm->rtm_flags, &saved_nrt);
+ if (error == 0 && saved_nrt) {
+ rt_setmetrics(rtm->rtm_inits,
+ &rtm->rtm_rmx, &saved_nrt->rt_rmx);
+ saved_nrt->rt_refcnt--;
+ saved_nrt->rt_genmask = genmask;
+ }
+ break;
+
+ case RTM_DELETE:
+ error = rtrequest(RTM_DELETE, dst, gate, netmask,
+ rtm->rtm_flags, (struct rtentry **)0);
+ break;
+
+ case RTM_GET:
+ case RTM_CHANGE:
+ case RTM_LOCK:
+ rt = rtalloc1(dst, 0);
+ if (rt == 0)
+ senderr(ESRCH);
+ if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */
+ struct radix_node *rn;
+ extern struct radix_node_head *mask_rnhead;
+
+ if (Bcmp(dst, rt_key(rt), dst->sa_len) != 0)
+ senderr(ESRCH);
+ if (netmask && (rn = rn_search(netmask,
+ mask_rnhead->rnh_treetop)))
+ netmask = (struct sockaddr *)rn->rn_key;
+ for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey)
+ if (netmask == (struct sockaddr *)rn->rn_mask)
+ break;
+ if (rn == 0)
+ senderr(ETOOMANYREFS);
+ rt = (struct rtentry *)rn;
+ }
+ switch(rtm->rtm_type) {
+
+ case RTM_GET:
+ dst = rt_key(rt);
+ gate = rt->rt_gateway;
+ netmask = rt_mask(rt);
+ genmask = rt->rt_genmask;
+ if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
+ if (ifp = rt->rt_ifp) {
+ ifpaddr = ifp->if_addrlist->ifa_addr;
+ ifaaddr = rt->rt_ifa->ifa_addr;
+ rtm->rtm_index = ifp->if_index;
+ } else {
+ ifpaddr = 0;
+ ifaaddr = 0;
+ }
+ }
+ len = rt_msg2(RTM_GET, &info, (caddr_t)0,
+ (struct walkarg *)0);
+ if (len > rtm->rtm_msglen) {
+ struct rt_msghdr *new_rtm;
+ R_Malloc(new_rtm, struct rt_msghdr *, len);
+ if (new_rtm == 0)
+ senderr(ENOBUFS);
+ Bcopy(rtm, new_rtm, rtm->rtm_msglen);
+ Free(rtm); rtm = new_rtm;
+ }
+ (void)rt_msg2(RTM_GET, &info, (caddr_t)rtm,
+ (struct walkarg *)0);
+ rtm->rtm_flags = rt->rt_flags;
+ rtm->rtm_rmx = rt->rt_rmx;
+ rtm->rtm_addrs = info.rti_addrs;
+ break;
+
+ case RTM_CHANGE:
+ if (gate && rt_setgate(rt, rt_key(rt), gate))
+ senderr(EDQUOT);
+ /* new gateway could require new ifaddr, ifp;
+ flags may also be different; ifp may be specified
+ by ll sockaddr when protocol address is ambiguous */
+ if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
+ (ifp = ifa->ifa_ifp))
+ ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
+ ifp);
+ else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
+ (ifa = ifa_ifwithroute(rt->rt_flags,
+ rt_key(rt), gate)))
+ ifp = ifa->ifa_ifp;
+ if (ifa) {
+ register struct ifaddr *oifa = rt->rt_ifa;
+ if (oifa != ifa) {
+ if (oifa && oifa->ifa_rtrequest)
+ oifa->ifa_rtrequest(RTM_DELETE,
+ rt, gate);
+ IFAFREE(rt->rt_ifa);
+ rt->rt_ifa = ifa;
+ ifa->ifa_refcnt++;
+ rt->rt_ifp = ifp;
+ }
+ }
+ rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
+ &rt->rt_rmx);
+ if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
+ rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate);
+ if (genmask)
+ rt->rt_genmask = genmask;
+ /*
+ * Fall into
+ */
+ case RTM_LOCK:
+ rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
+ rt->rt_rmx.rmx_locks |=
+ (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
+ break;
+ }
+ break;
+
+ default:
+ senderr(EOPNOTSUPP);
+ }
+
+flush:
+ if (rtm) {
+ if (error)
+ rtm->rtm_errno = error;
+ else
+ rtm->rtm_flags |= RTF_DONE;
+ }
+ if (rt)
+ rtfree(rt);
+ {
+ register struct rawcb *rp = 0;
+ /*
+ * Check to see if we don't want our own messages.
+ */
+ if ((so->so_options & SO_USELOOPBACK) == 0) {
+ if (route_cb.any_count <= 1) {
+ if (rtm)
+ Free(rtm);
+ m_freem(m);
+ return (error);
+ }
+ /* There is another listener, so construct message */
+ rp = sotorawcb(so);
+ }
+ if (rtm) {
+ m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
+ Free(rtm);
+ }
+ if (rp)
+ rp->rcb_proto.sp_family = 0; /* Avoid us */
+ if (dst)
+ route_proto.sp_protocol = dst->sa_family;
+ raw_input(m, &route_proto, &route_src, &route_dst);
+ if (rp)
+ rp->rcb_proto.sp_family = PF_ROUTE;
+ }
+ return (error);
+}
+
+void
+rt_setmetrics(which, in, out)
+ u_long which;
+ register struct rt_metrics *in, *out;
+{
+#define metric(f, e) if (which & (f)) out->e = in->e;
+ metric(RTV_RPIPE, rmx_recvpipe);
+ metric(RTV_SPIPE, rmx_sendpipe);
+ metric(RTV_SSTHRESH, rmx_ssthresh);
+ metric(RTV_RTT, rmx_rtt);
+ metric(RTV_RTTVAR, rmx_rttvar);
+ metric(RTV_HOPCOUNT, rmx_hopcount);
+ metric(RTV_MTU, rmx_mtu);
+ metric(RTV_EXPIRE, rmx_expire);
+#undef metric
+}
+
+#define ROUNDUP(a) \
+ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
+
+static void
+rt_xaddrs(cp, cplim, rtinfo)
+ register caddr_t cp, cplim;
+ register struct rt_addrinfo *rtinfo;
+{
+ register struct sockaddr *sa;
+ register int i;
+
+ bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
+ for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
+ if ((rtinfo->rti_addrs & (1 << i)) == 0)
+ continue;
+ rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
+ ADVANCE(cp, sa);
+ }
+}
+
+/*
+ * Copy data from a buffer back into the indicated mbuf chain,
+ * starting "off" bytes from the beginning, extending the mbuf
+ * chain if necessary.
+ */
+void
+m_copyback(m0, off, len, cp)
+ struct mbuf *m0;
+ register int off;
+ register int len;
+ caddr_t cp;
+{
+ register int mlen;
+ register struct mbuf *m = m0, *n;
+ int totlen = 0;
+
+ if (m0 == 0)
+ return;
+ while (off > (mlen = m->m_len)) {
+ off -= mlen;
+ totlen += mlen;
+ if (m->m_next == 0) {
+ n = m_getclr(M_DONTWAIT, m->m_type);
+ if (n == 0)
+ goto out;
+ n->m_len = min(MLEN, len + off);
+ m->m_next = n;
+ }
+ m = m->m_next;
+ }
+ while (len > 0) {
+ mlen = min (m->m_len - off, len);
+ bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
+ cp += mlen;
+ len -= mlen;
+ mlen += off;
+ off = 0;
+ totlen += mlen;
+ if (len == 0)
+ break;
+ if (m->m_next == 0) {
+ n = m_get(M_DONTWAIT, m->m_type);
+ if (n == 0)
+ break;
+ n->m_len = min(MLEN, len);
+ m->m_next = n;
+ }
+ m = m->m_next;
+ }
+out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
+ m->m_pkthdr.len = totlen;
+}
+
+static struct mbuf *
+rt_msg1(type, rtinfo)
+ int type;
+ register struct rt_addrinfo *rtinfo;
+{
+ register struct rt_msghdr *rtm;
+ register struct mbuf *m;
+ register int i;
+ register struct sockaddr *sa;
+ int len, dlen;
+
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == 0)
+ return (m);
+ switch (type) {
+
+ case RTM_DELADDR:
+ case RTM_NEWADDR:
+ len = sizeof(struct ifa_msghdr);
+ break;
+
+ case RTM_IFINFO:
+ len = sizeof(struct if_msghdr);
+ break;
+
+ default:
+ len = sizeof(struct rt_msghdr);
+ }
+ if (len > MHLEN)
+ panic("rt_msg1");
+ m->m_pkthdr.len = m->m_len = len;
+ m->m_pkthdr.rcvif = 0;
+ rtm = mtod(m, struct rt_msghdr *);
+ bzero((caddr_t)rtm, len);
+ for (i = 0; i < RTAX_MAX; i++) {
+ if ((sa = rtinfo->rti_info[i]) == NULL)
+ continue;
+ rtinfo->rti_addrs |= (1 << i);
+ dlen = ROUNDUP(sa->sa_len);
+ m_copyback(m, len, dlen, (caddr_t)sa);
+ len += dlen;
+ }
+ if (m->m_pkthdr.len != len) {
+ m_freem(m);
+ return (NULL);
+ }
+ rtm->rtm_msglen = len;
+ rtm->rtm_version = RTM_VERSION;
+ rtm->rtm_type = type;
+ return (m);
+}
+
+static int
+rt_msg2(type, rtinfo, cp, w)
+ int type;
+ register struct rt_addrinfo *rtinfo;
+ caddr_t cp;
+ struct walkarg *w;
+{
+ register int i;
+ int len, dlen, second_time = 0;
+ caddr_t cp0;
+
+ rtinfo->rti_addrs = 0;
+again:
+ switch (type) {
+
+ case RTM_DELADDR:
+ case RTM_NEWADDR:
+ len = sizeof(struct ifa_msghdr);
+ break;
+
+ case RTM_IFINFO:
+ len = sizeof(struct if_msghdr);
+ break;
+
+ default:
+ len = sizeof(struct rt_msghdr);
+ }
+ if (cp0 = cp)
+ cp += len;
+ for (i = 0; i < RTAX_MAX; i++) {
+ register struct sockaddr *sa;
+
+ if ((sa = rtinfo->rti_info[i]) == 0)
+ continue;
+ rtinfo->rti_addrs |= (1 << i);
+ dlen = ROUNDUP(sa->sa_len);
+ if (cp) {
+ bcopy((caddr_t)sa, cp, (unsigned)dlen);
+ cp += dlen;
+ }
+ len += dlen;
+ }
+ if (cp == 0 && w != NULL && !second_time) {
+ register struct walkarg *rw = w;
+
+ rw->w_needed += len;
+ if (rw->w_needed <= 0 && rw->w_where) {
+ if (rw->w_tmemsize < len) {
+ if (rw->w_tmem)
+ free(rw->w_tmem, M_RTABLE);
+ if (rw->w_tmem = (caddr_t)
+ malloc(len, M_RTABLE, M_NOWAIT))
+ rw->w_tmemsize = len;
+ }
+ if (rw->w_tmem) {
+ cp = rw->w_tmem;
+ second_time = 1;
+ goto again;
+ } else
+ rw->w_where = 0;
+ }
+ }
+ if (cp) {
+ register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+
+ rtm->rtm_version = RTM_VERSION;
+ rtm->rtm_type = type;
+ rtm->rtm_msglen = len;
+ }
+ return (len);
+}
+
+/*
+ * This routine is called to generate a message from the routing
+ * socket indicating that a redirect has occured, a routing lookup
+ * has failed, or that a protocol has detected timeouts to a particular
+ * destination.
+ */
+void
+rt_missmsg(type, rtinfo, flags, error)
+ int type, flags, error;
+ register struct rt_addrinfo *rtinfo;
+{
+ register struct rt_msghdr *rtm;
+ register struct mbuf *m;
+ struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
+
+ if (route_cb.any_count == 0)
+ return;
+ m = rt_msg1(type, rtinfo);
+ if (m == 0)
+ return;
+ rtm = mtod(m, struct rt_msghdr *);
+ rtm->rtm_flags = RTF_DONE | flags;
+ rtm->rtm_errno = error;
+ rtm->rtm_addrs = rtinfo->rti_addrs;
+ route_proto.sp_protocol = sa ? sa->sa_family : 0;
+ raw_input(m, &route_proto, &route_src, &route_dst);
+}
+
+/*
+ * This routine is called to generate a message from the routing
+ * socket indicating that the status of a network interface has changed.
+ */
+void
+rt_ifmsg(ifp)
+ register struct ifnet *ifp;
+{
+ register struct if_msghdr *ifm;
+ struct mbuf *m;
+ struct rt_addrinfo info;
+
+ if (route_cb.any_count == 0)
+ return;
+ bzero((caddr_t)&info, sizeof(info));
+ m = rt_msg1(RTM_IFINFO, &info);
+ if (m == 0)
+ return;
+ ifm = mtod(m, struct if_msghdr *);
+ ifm->ifm_index = ifp->if_index;
+ ifm->ifm_flags = ifp->if_flags;
+ ifm->ifm_data = ifp->if_data;
+ ifm->ifm_addrs = 0;
+ route_proto.sp_protocol = 0;
+ raw_input(m, &route_proto, &route_src, &route_dst);
+}
+
+/*
+ * This is called to generate messages from the routing socket
+ * indicating a network interface has had addresses associated with it.
+ * if we ever reverse the logic and replace messages TO the routing
+ * socket indicate a request to configure interfaces, then it will
+ * be unnecessary as the routing socket will automatically generate
+ * copies of it.
+ */
+void
+rt_newaddrmsg(cmd, ifa, error, rt)
+ int cmd, error;
+ register struct ifaddr *ifa;
+ register struct rtentry *rt;
+{
+ struct rt_addrinfo info;
+ struct sockaddr *sa;
+ int pass;
+ struct mbuf *m;
+ struct ifnet *ifp = ifa->ifa_ifp;
+
+ if (route_cb.any_count == 0)
+ return;
+ for (pass = 1; pass < 3; pass++) {
+ bzero((caddr_t)&info, sizeof(info));
+ if ((cmd == RTM_ADD && pass == 1) ||
+ (cmd == RTM_DELETE && pass == 2)) {
+ register struct ifa_msghdr *ifam;
+ int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
+
+ ifaaddr = sa = ifa->ifa_addr;
+ ifpaddr = ifp->if_addrlist->ifa_addr;
+ netmask = ifa->ifa_netmask;
+ brdaddr = ifa->ifa_dstaddr;
+ if ((m = rt_msg1(ncmd, &info)) == NULL)
+ continue;
+ ifam = mtod(m, struct ifa_msghdr *);
+ ifam->ifam_index = ifp->if_index;
+ ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_addrs = info.rti_addrs;
+ }
+ if ((cmd == RTM_ADD && pass == 2) ||
+ (cmd == RTM_DELETE && pass == 1)) {
+ register struct rt_msghdr *rtm;
+
+ if (rt == 0)
+ continue;
+ netmask = rt_mask(rt);
+ dst = sa = rt_key(rt);
+ gate = rt->rt_gateway;
+ if ((m = rt_msg1(cmd, &info)) == NULL)
+ continue;
+ rtm = mtod(m, struct rt_msghdr *);
+ rtm->rtm_index = ifp->if_index;
+ rtm->rtm_flags |= rt->rt_flags;
+ rtm->rtm_errno = error;
+ rtm->rtm_addrs = info.rti_addrs;
+ }
+ route_proto.sp_protocol = sa ? sa->sa_family : 0;
+ raw_input(m, &route_proto, &route_src, &route_dst);
+ }
+}
+
+/*
+ * This is used in dumping the kernel table via sysctl().
+ */
+int
+sysctl_dumpentry(rn, w)
+ struct radix_node *rn;
+ register struct walkarg *w;
+{
+ register struct rtentry *rt = (struct rtentry *)rn;
+ int error = 0, size;
+ struct rt_addrinfo info;
+
+ if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
+ return 0;
+ bzero((caddr_t)&info, sizeof(info));
+ dst = rt_key(rt);
+ gate = rt->rt_gateway;
+ netmask = rt_mask(rt);
+ genmask = rt->rt_genmask;
+ size = rt_msg2(RTM_GET, &info, 0, w);
+ if (w->w_where && w->w_tmem) {
+ register struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
+
+ rtm->rtm_flags = rt->rt_flags;
+ rtm->rtm_use = rt->rt_use;
+ rtm->rtm_rmx = rt->rt_rmx;
+ rtm->rtm_index = rt->rt_ifp->if_index;
+ rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
+ rtm->rtm_addrs = info.rti_addrs;
+ if (error = copyout((caddr_t)rtm, w->w_where, size))
+ w->w_where = NULL;
+ else
+ w->w_where += size;
+ }
+ return (error);
+}
+
+int
+sysctl_iflist(af, w)
+ int af;
+ register struct walkarg *w;
+{
+ register struct ifnet *ifp;
+ register struct ifaddr *ifa;
+ struct rt_addrinfo info;
+ int len, error = 0;
+
+ bzero((caddr_t)&info, sizeof(info));
+ for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+ if (w->w_arg && w->w_arg != ifp->if_index)
+ continue;
+ ifa = ifp->if_addrlist;
+ ifpaddr = ifa->ifa_addr;
+ len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w);
+ ifpaddr = 0;
+ if (w->w_where && w->w_tmem) {
+ register struct if_msghdr *ifm;
+
+ ifm = (struct if_msghdr *)w->w_tmem;
+ ifm->ifm_index = ifp->if_index;
+ ifm->ifm_flags = ifp->if_flags;
+ ifm->ifm_data = ifp->if_data;
+ ifm->ifm_addrs = info.rti_addrs;
+ if (error = copyout((caddr_t)ifm, w->w_where, len))
+ return (error);
+ w->w_where += len;
+ }
+ while (ifa = ifa->ifa_next) {
+ if (af && af != ifa->ifa_addr->sa_family)
+ continue;
+ ifaaddr = ifa->ifa_addr;
+ netmask = ifa->ifa_netmask;
+ brdaddr = ifa->ifa_dstaddr;
+ len = rt_msg2(RTM_NEWADDR, &info, 0, w);
+ if (w->w_where && w->w_tmem) {
+ register struct ifa_msghdr *ifam;
+
+ ifam = (struct ifa_msghdr *)w->w_tmem;
+ ifam->ifam_index = ifa->ifa_ifp->if_index;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_addrs = info.rti_addrs;
+ if (error = copyout(w->w_tmem, w->w_where, len))
+ return (error);
+ w->w_where += len;
+ }
+ }
+ ifaaddr = netmask = brdaddr = 0;
+ }
+ return (0);
+}
+
+int
+sysctl_rtable(name, namelen, where, given, new, newlen)
+ int *name;
+ int namelen;
+ caddr_t where;
+ size_t *given;
+ caddr_t *new;
+ size_t newlen;
+{
+ register struct radix_node_head *rnh;
+ int i, s, error = EINVAL;
+ u_char af;
+ struct walkarg w;
+
+ if (new)
+ return (EPERM);
+ if (namelen != 3)
+ return (EINVAL);
+ af = name[0];
+ Bzero(&w, sizeof(w));
+ w.w_where = where;
+ w.w_given = *given;
+ w.w_needed = 0 - w.w_given;
+ w.w_op = name[1];
+ w.w_arg = name[2];
+
+ s = splnet();
+ switch (w.w_op) {
+
+ case NET_RT_DUMP:
+ case NET_RT_FLAGS:
+ for (i = 1; i <= AF_MAX; i++)
+ if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
+ (error = rnh->rnh_walktree(rnh,
+ sysctl_dumpentry, &w)))
+ break;
+ break;
+
+ case NET_RT_IFLIST:
+ error = sysctl_iflist(af, &w);
+ }
+ splx(s);
+ if (w.w_tmem)
+ free(w.w_tmem, M_RTABLE);
+ w.w_needed += w.w_given;
+ if (where) {
+ *given = w.w_where - where;
+ if (*given < w.w_needed)
+ return (ENOMEM);
+ } else {
+ *given = (11 * w.w_needed) / 10;
+ }
+ return (error);
+}
+
+/*
+ * Definitions of protocols supported in the ROUTE domain.
+ */
+
+extern struct domain routedomain; /* or at least forward */
+
+struct protosw routesw[] = {
+{ SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR,
+ raw_input, route_output, raw_ctlinput, 0,
+ route_usrreq,
+ raw_init, 0, 0, 0,
+ sysctl_rtable,
+}
+};
+
+struct domain routedomain =
+ { PF_ROUTE, "route", route_init, 0, 0,
+ routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
diff --git a/sys/net/slcompress.c b/sys/net/slcompress.c
new file mode 100644
index 000000000000..70af9358e37b
--- /dev/null
+++ b/sys/net/slcompress.c
@@ -0,0 +1,535 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)slcompress.c 8.2 (Berkeley) 4/16/94
+ */
+
+/*
+ * Routines to compress and uncompess tcp packets (for transmission
+ * over low speed serial lines.
+ *
+ * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989:
+ * - Initial distribution.
+ *
+ * static char rcsid[] =
+ * "$Header: slcompress.c,v 1.19 89/12/31 08:52:59 van Exp $";
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/slcompress.h>
+
+#ifndef SL_NO_STATS
+#define INCR(counter) ++comp->counter;
+#else
+#define INCR(counter)
+#endif
+
+#define BCMP(p1, p2, n) bcmp((char *)(p1), (char *)(p2), (int)(n))
+#define BCOPY(p1, p2, n) bcopy((char *)(p1), (char *)(p2), (int)(n))
+#ifndef KERNEL
+#define ovbcopy bcopy
+#endif
+
+void
+sl_compress_init(comp)
+ struct slcompress *comp;
+{
+ register u_int i;
+ register struct cstate *tstate = comp->tstate;
+
+ bzero((char *)comp, sizeof(*comp));
+ for (i = MAX_STATES - 1; i > 0; --i) {
+ tstate[i].cs_id = i;
+ tstate[i].cs_next = &tstate[i - 1];
+ }
+ tstate[0].cs_next = &tstate[MAX_STATES - 1];
+ tstate[0].cs_id = 0;
+ comp->last_cs = &tstate[0];
+ comp->last_recv = 255;
+ comp->last_xmit = 255;
+ comp->flags = SLF_TOSS;
+}
+
+
+/* ENCODE encodes a number that is known to be non-zero. ENCODEZ
+ * checks for zero (since zero has to be encoded in the long, 3 byte
+ * form).
+ */
+#define ENCODE(n) { \
+ if ((u_short)(n) >= 256) { \
+ *cp++ = 0; \
+ cp[1] = (n); \
+ cp[0] = (n) >> 8; \
+ cp += 2; \
+ } else { \
+ *cp++ = (n); \
+ } \
+}
+#define ENCODEZ(n) { \
+ if ((u_short)(n) >= 256 || (u_short)(n) == 0) { \
+ *cp++ = 0; \
+ cp[1] = (n); \
+ cp[0] = (n) >> 8; \
+ cp += 2; \
+ } else { \
+ *cp++ = (n); \
+ } \
+}
+
+#define DECODEL(f) { \
+ if (*cp == 0) {\
+ (f) = htonl(ntohl(f) + ((cp[1] << 8) | cp[2])); \
+ cp += 3; \
+ } else { \
+ (f) = htonl(ntohl(f) + (u_long)*cp++); \
+ } \
+}
+
+#define DECODES(f) { \
+ if (*cp == 0) {\
+ (f) = htons(ntohs(f) + ((cp[1] << 8) | cp[2])); \
+ cp += 3; \
+ } else { \
+ (f) = htons(ntohs(f) + (u_long)*cp++); \
+ } \
+}
+
+#define DECODEU(f) { \
+ if (*cp == 0) {\
+ (f) = htons((cp[1] << 8) | cp[2]); \
+ cp += 3; \
+ } else { \
+ (f) = htons((u_long)*cp++); \
+ } \
+}
+
+u_int
+sl_compress_tcp(m, ip, comp, compress_cid)
+ struct mbuf *m;
+ register struct ip *ip;
+ struct slcompress *comp;
+ int compress_cid;
+{
+ register struct cstate *cs = comp->last_cs->cs_next;
+ register u_int hlen = ip->ip_hl;
+ register struct tcphdr *oth;
+ register struct tcphdr *th;
+ register u_int deltaS, deltaA;
+ register u_int changes = 0;
+ u_char new_seq[16];
+ register u_char *cp = new_seq;
+
+ /*
+ * Bail if this is an IP fragment or if the TCP packet isn't
+ * `compressible' (i.e., ACK isn't set or some other control bit is
+ * set). (We assume that the caller has already made sure the
+ * packet is IP proto TCP).
+ */
+ if ((ip->ip_off & htons(0x3fff)) || m->m_len < 40)
+ return (TYPE_IP);
+
+ th = (struct tcphdr *)&((int *)ip)[hlen];
+ if ((th->th_flags & (TH_SYN|TH_FIN|TH_RST|TH_ACK)) != TH_ACK)
+ return (TYPE_IP);
+ /*
+ * Packet is compressible -- we're going to send either a
+ * COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way we need
+ * to locate (or create) the connection state. Special case the
+ * most recently used connection since it's most likely to be used
+ * again & we don't have to do any reordering if it's used.
+ */
+ INCR(sls_packets)
+ if (ip->ip_src.s_addr != cs->cs_ip.ip_src.s_addr ||
+ ip->ip_dst.s_addr != cs->cs_ip.ip_dst.s_addr ||
+ *(int *)th != ((int *)&cs->cs_ip)[cs->cs_ip.ip_hl]) {
+ /*
+ * Wasn't the first -- search for it.
+ *
+ * States are kept in a circularly linked list with
+ * last_cs pointing to the end of the list. The
+ * list is kept in lru order by moving a state to the
+ * head of the list whenever it is referenced. Since
+ * the list is short and, empirically, the connection
+ * we want is almost always near the front, we locate
+ * states via linear search. If we don't find a state
+ * for the datagram, the oldest state is (re-)used.
+ */
+ register struct cstate *lcs;
+ register struct cstate *lastcs = comp->last_cs;
+
+ do {
+ lcs = cs; cs = cs->cs_next;
+ INCR(sls_searches)
+ if (ip->ip_src.s_addr == cs->cs_ip.ip_src.s_addr
+ && ip->ip_dst.s_addr == cs->cs_ip.ip_dst.s_addr
+ && *(int *)th == ((int *)&cs->cs_ip)[cs->cs_ip.ip_hl])
+ goto found;
+ } while (cs != lastcs);
+
+ /*
+ * Didn't find it -- re-use oldest cstate. Send an
+ * uncompressed packet that tells the other side what
+ * connection number we're using for this conversation.
+ * Note that since the state list is circular, the oldest
+ * state points to the newest and we only need to set
+ * last_cs to update the lru linkage.
+ */
+ INCR(sls_misses)
+ comp->last_cs = lcs;
+ hlen += th->th_off;
+ hlen <<= 2;
+ goto uncompressed;
+
+ found:
+ /*
+ * Found it -- move to the front on the connection list.
+ */
+ if (cs == lastcs)
+ comp->last_cs = lcs;
+ else {
+ lcs->cs_next = cs->cs_next;
+ cs->cs_next = lastcs->cs_next;
+ lastcs->cs_next = cs;
+ }
+ }
+
+ /*
+ * Make sure that only what we expect to change changed. The first
+ * line of the `if' checks the IP protocol version, header length &
+ * type of service. The 2nd line checks the "Don't fragment" bit.
+ * The 3rd line checks the time-to-live and protocol (the protocol
+ * check is unnecessary but costless). The 4th line checks the TCP
+ * header length. The 5th line checks IP options, if any. The 6th
+ * line checks TCP options, if any. If any of these things are
+ * different between the previous & current datagram, we send the
+ * current datagram `uncompressed'.
+ */
+ oth = (struct tcphdr *)&((int *)&cs->cs_ip)[hlen];
+ deltaS = hlen;
+ hlen += th->th_off;
+ hlen <<= 2;
+
+ if (((u_short *)ip)[0] != ((u_short *)&cs->cs_ip)[0] ||
+ ((u_short *)ip)[3] != ((u_short *)&cs->cs_ip)[3] ||
+ ((u_short *)ip)[4] != ((u_short *)&cs->cs_ip)[4] ||
+ th->th_off != oth->th_off ||
+ (deltaS > 5 &&
+ BCMP(ip + 1, &cs->cs_ip + 1, (deltaS - 5) << 2)) ||
+ (th->th_off > 5 &&
+ BCMP(th + 1, oth + 1, (th->th_off - 5) << 2)))
+ goto uncompressed;
+
+ /*
+ * Figure out which of the changing fields changed. The
+ * receiver expects changes in the order: urgent, window,
+ * ack, seq (the order minimizes the number of temporaries
+ * needed in this section of code).
+ */
+ if (th->th_flags & TH_URG) {
+ deltaS = ntohs(th->th_urp);
+ ENCODEZ(deltaS);
+ changes |= NEW_U;
+ } else if (th->th_urp != oth->th_urp)
+ /* argh! URG not set but urp changed -- a sensible
+ * implementation should never do this but RFC793
+ * doesn't prohibit the change so we have to deal
+ * with it. */
+ goto uncompressed;
+
+ if (deltaS = (u_short)(ntohs(th->th_win) - ntohs(oth->th_win))) {
+ ENCODE(deltaS);
+ changes |= NEW_W;
+ }
+
+ if (deltaA = ntohl(th->th_ack) - ntohl(oth->th_ack)) {
+ if (deltaA > 0xffff)
+ goto uncompressed;
+ ENCODE(deltaA);
+ changes |= NEW_A;
+ }
+
+ if (deltaS = ntohl(th->th_seq) - ntohl(oth->th_seq)) {
+ if (deltaS > 0xffff)
+ goto uncompressed;
+ ENCODE(deltaS);
+ changes |= NEW_S;
+ }
+
+ switch(changes) {
+
+ case 0:
+ /*
+ * Nothing changed. If this packet contains data and the
+ * last one didn't, this is probably a data packet following
+ * an ack (normal on an interactive connection) and we send
+ * it compressed. Otherwise it's probably a retransmit,
+ * retransmitted ack or window probe. Send it uncompressed
+ * in case the other side missed the compressed version.
+ */
+ if (ip->ip_len != cs->cs_ip.ip_len &&
+ ntohs(cs->cs_ip.ip_len) == hlen)
+ break;
+
+ /* (fall through) */
+
+ case SPECIAL_I:
+ case SPECIAL_D:
+ /*
+ * actual changes match one of our special case encodings --
+ * send packet uncompressed.
+ */
+ goto uncompressed;
+
+ case NEW_S|NEW_A:
+ if (deltaS == deltaA &&
+ deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
+ /* special case for echoed terminal traffic */
+ changes = SPECIAL_I;
+ cp = new_seq;
+ }
+ break;
+
+ case NEW_S:
+ if (deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
+ /* special case for data xfer */
+ changes = SPECIAL_D;
+ cp = new_seq;
+ }
+ break;
+ }
+
+ deltaS = ntohs(ip->ip_id) - ntohs(cs->cs_ip.ip_id);
+ if (deltaS != 1) {
+ ENCODEZ(deltaS);
+ changes |= NEW_I;
+ }
+ if (th->th_flags & TH_PUSH)
+ changes |= TCP_PUSH_BIT;
+ /*
+ * Grab the cksum before we overwrite it below. Then update our
+ * state with this packet's header.
+ */
+ deltaA = ntohs(th->th_sum);
+ BCOPY(ip, &cs->cs_ip, hlen);
+
+ /*
+ * We want to use the original packet as our compressed packet.
+ * (cp - new_seq) is the number of bytes we need for compressed
+ * sequence numbers. In addition we need one byte for the change
+ * mask, one for the connection id and two for the tcp checksum.
+ * So, (cp - new_seq) + 4 bytes of header are needed. hlen is how
+ * many bytes of the original packet to toss so subtract the two to
+ * get the new packet size.
+ */
+ deltaS = cp - new_seq;
+ cp = (u_char *)ip;
+ if (compress_cid == 0 || comp->last_xmit != cs->cs_id) {
+ comp->last_xmit = cs->cs_id;
+ hlen -= deltaS + 4;
+ cp += hlen;
+ *cp++ = changes | NEW_C;
+ *cp++ = cs->cs_id;
+ } else {
+ hlen -= deltaS + 3;
+ cp += hlen;
+ *cp++ = changes;
+ }
+ m->m_len -= hlen;
+ m->m_data += hlen;
+ *cp++ = deltaA >> 8;
+ *cp++ = deltaA;
+ BCOPY(new_seq, cp, deltaS);
+ INCR(sls_compressed)
+ return (TYPE_COMPRESSED_TCP);
+
+ /*
+ * Update connection state cs & send uncompressed packet ('uncompressed'
+ * means a regular ip/tcp packet but with the 'conversation id' we hope
+ * to use on future compressed packets in the protocol field).
+ */
+uncompressed:
+ BCOPY(ip, &cs->cs_ip, hlen);
+ ip->ip_p = cs->cs_id;
+ comp->last_xmit = cs->cs_id;
+ return (TYPE_UNCOMPRESSED_TCP);
+}
+
+
+int
+sl_uncompress_tcp(bufp, len, type, comp)
+ u_char **bufp;
+ int len;
+ u_int type;
+ struct slcompress *comp;
+{
+ register u_char *cp;
+ register u_int hlen, changes;
+ register struct tcphdr *th;
+ register struct cstate *cs;
+ register struct ip *ip;
+
+ switch (type) {
+
+ case TYPE_UNCOMPRESSED_TCP:
+ ip = (struct ip *) *bufp;
+ if (ip->ip_p >= MAX_STATES)
+ goto bad;
+ cs = &comp->rstate[comp->last_recv = ip->ip_p];
+ comp->flags &=~ SLF_TOSS;
+ ip->ip_p = IPPROTO_TCP;
+ hlen = ip->ip_hl;
+ hlen += ((struct tcphdr *)&((int *)ip)[hlen])->th_off;
+ hlen <<= 2;
+ BCOPY(ip, &cs->cs_ip, hlen);
+ cs->cs_ip.ip_sum = 0;
+ cs->cs_hlen = hlen;
+ INCR(sls_uncompressedin)
+ return (len);
+
+ default:
+ goto bad;
+
+ case TYPE_COMPRESSED_TCP:
+ break;
+ }
+ /* We've got a compressed packet. */
+ INCR(sls_compressedin)
+ cp = *bufp;
+ changes = *cp++;
+ if (changes & NEW_C) {
+ /* Make sure the state index is in range, then grab the state.
+ * If we have a good state index, clear the 'discard' flag. */
+ if (*cp >= MAX_STATES)
+ goto bad;
+
+ comp->flags &=~ SLF_TOSS;
+ comp->last_recv = *cp++;
+ } else {
+ /* this packet has an implicit state index. If we've
+ * had a line error since the last time we got an
+ * explicit state index, we have to toss the packet. */
+ if (comp->flags & SLF_TOSS) {
+ INCR(sls_tossed)
+ return (0);
+ }
+ }
+ cs = &comp->rstate[comp->last_recv];
+ hlen = cs->cs_ip.ip_hl << 2;
+ th = (struct tcphdr *)&((u_char *)&cs->cs_ip)[hlen];
+ th->th_sum = htons((*cp << 8) | cp[1]);
+ cp += 2;
+ if (changes & TCP_PUSH_BIT)
+ th->th_flags |= TH_PUSH;
+ else
+ th->th_flags &=~ TH_PUSH;
+
+ switch (changes & SPECIALS_MASK) {
+ case SPECIAL_I:
+ {
+ register u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen;
+ th->th_ack = htonl(ntohl(th->th_ack) + i);
+ th->th_seq = htonl(ntohl(th->th_seq) + i);
+ }
+ break;
+
+ case SPECIAL_D:
+ th->th_seq = htonl(ntohl(th->th_seq) + ntohs(cs->cs_ip.ip_len)
+ - cs->cs_hlen);
+ break;
+
+ default:
+ if (changes & NEW_U) {
+ th->th_flags |= TH_URG;
+ DECODEU(th->th_urp)
+ } else
+ th->th_flags &=~ TH_URG;
+ if (changes & NEW_W)
+ DECODES(th->th_win)
+ if (changes & NEW_A)
+ DECODEL(th->th_ack)
+ if (changes & NEW_S)
+ DECODEL(th->th_seq)
+ break;
+ }
+ if (changes & NEW_I) {
+ DECODES(cs->cs_ip.ip_id)
+ } else
+ cs->cs_ip.ip_id = htons(ntohs(cs->cs_ip.ip_id) + 1);
+
+ /*
+ * At this point, cp points to the first byte of data in the
+ * packet. If we're not aligned on a 4-byte boundary, copy the
+ * data down so the ip & tcp headers will be aligned. Then back up
+ * cp by the tcp/ip header length to make room for the reconstructed
+ * header (we assume the packet we were handed has enough space to
+ * prepend 128 bytes of header). Adjust the length to account for
+ * the new header & fill in the IP total length.
+ */
+ len -= (cp - *bufp);
+ if (len < 0)
+ /* we must have dropped some characters (crc should detect
+ * this but the old slip framing won't) */
+ goto bad;
+
+ if ((int)cp & 3) {
+ if (len > 0)
+ (void) ovbcopy(cp, (caddr_t)((int)cp &~ 3), len);
+ cp = (u_char *)((int)cp &~ 3);
+ }
+ cp -= cs->cs_hlen;
+ len += cs->cs_hlen;
+ cs->cs_ip.ip_len = htons(len);
+ BCOPY(&cs->cs_ip, cp, cs->cs_hlen);
+ *bufp = cp;
+
+ /* recompute the ip header checksum */
+ {
+ register u_short *bp = (u_short *)cp;
+ for (changes = 0; hlen > 0; hlen -= 2)
+ changes += *bp++;
+ changes = (changes & 0xffff) + (changes >> 16);
+ changes = (changes & 0xffff) + (changes >> 16);
+ ((struct ip *)cp)->ip_sum = ~ changes;
+ }
+ return (len);
+bad:
+ comp->flags |= SLF_TOSS;
+ INCR(sls_errorin)
+ return (0);
+}
diff --git a/sys/net/slcompress.h b/sys/net/slcompress.h
new file mode 100644
index 000000000000..cefe940f1981
--- /dev/null
+++ b/sys/net/slcompress.h
@@ -0,0 +1,157 @@
+/* slcompress.h 8.1 93/06/10 */
+/*
+ * Definitions for tcp compression routines.
+ *
+ * $Header: slcompress.h,v 1.10 89/12/31 08:53:02 van Exp $
+ *
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989:
+ * - Initial distribution.
+ */
+
+#define MAX_STATES 16 /* must be > 2 and < 256 */
+#define MAX_HDR MLEN /* XXX 4bsd-ism: should really be 128 */
+
+/*
+ * Compressed packet format:
+ *
+ * The first octet contains the packet type (top 3 bits), TCP
+ * 'push' bit, and flags that indicate which of the 4 TCP sequence
+ * numbers have changed (bottom 5 bits). The next octet is a
+ * conversation number that associates a saved IP/TCP header with
+ * the compressed packet. The next two octets are the TCP checksum
+ * from the original datagram. The next 0 to 15 octets are
+ * sequence number changes, one change per bit set in the header
+ * (there may be no changes and there are two special cases where
+ * the receiver implicitly knows what changed -- see below).
+ *
+ * There are 5 numbers which can change (they are always inserted
+ * in the following order): TCP urgent pointer, window,
+ * acknowlegement, sequence number and IP ID. (The urgent pointer
+ * is different from the others in that its value is sent, not the
+ * change in value.) Since typical use of SLIP links is biased
+ * toward small packets (see comments on MTU/MSS below), changes
+ * use a variable length coding with one octet for numbers in the
+ * range 1 - 255 and 3 octets (0, MSB, LSB) for numbers in the
+ * range 256 - 65535 or 0. (If the change in sequence number or
+ * ack is more than 65535, an uncompressed packet is sent.)
+ */
+
+/*
+ * Packet types (must not conflict with IP protocol version)
+ *
+ * The top nibble of the first octet is the packet type. There are
+ * three possible types: IP (not proto TCP or tcp with one of the
+ * control flags set); uncompressed TCP (a normal IP/TCP packet but
+ * with the 8-bit protocol field replaced by an 8-bit connection id --
+ * this type of packet syncs the sender & receiver); and compressed
+ * TCP (described above).
+ *
+ * LSB of 4-bit field is TCP "PUSH" bit (a worthless anachronism) and
+ * is logically part of the 4-bit "changes" field that follows. Top
+ * three bits are actual packet type. For backward compatibility
+ * and in the interest of conserving bits, numbers are chosen so the
+ * IP protocol version number (4) which normally appears in this nibble
+ * means "IP packet".
+ */
+
+/* packet types */
+#define TYPE_IP 0x40
+#define TYPE_UNCOMPRESSED_TCP 0x70
+#define TYPE_COMPRESSED_TCP 0x80
+#define TYPE_ERROR 0x00
+
+/* Bits in first octet of compressed packet */
+#define NEW_C 0x40 /* flag bits for what changed in a packet */
+#define NEW_I 0x20
+#define NEW_S 0x08
+#define NEW_A 0x04
+#define NEW_W 0x02
+#define NEW_U 0x01
+
+/* reserved, special-case values of above */
+#define SPECIAL_I (NEW_S|NEW_W|NEW_U) /* echoed interactive traffic */
+#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U) /* unidirectional data */
+#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
+
+#define TCP_PUSH_BIT 0x10
+
+
+/*
+ * "state" data for each active tcp conversation on the wire. This is
+ * basically a copy of the entire IP/TCP header from the last packet
+ * we saw from the conversation together with a small identifier
+ * the transmit & receive ends of the line use to locate saved header.
+ */
+struct cstate {
+ struct cstate *cs_next; /* next most recently used cstate (xmit only) */
+ u_short cs_hlen; /* size of hdr (receive only) */
+ u_char cs_id; /* connection # associated with this state */
+ u_char cs_filler;
+ union {
+ char csu_hdr[MAX_HDR];
+ struct ip csu_ip; /* ip/tcp hdr from most recent packet */
+ } slcs_u;
+};
+#define cs_ip slcs_u.csu_ip
+#define cs_hdr slcs_u.csu_hdr
+
+/*
+ * all the state data for one serial line (we need one of these
+ * per line).
+ */
+struct slcompress {
+ struct cstate *last_cs; /* most recently used tstate */
+ u_char last_recv; /* last rcvd conn. id */
+ u_char last_xmit; /* last sent conn. id */
+ u_short flags;
+#ifndef SL_NO_STATS
+ int sls_packets; /* outbound packets */
+ int sls_compressed; /* outbound compressed packets */
+ int sls_searches; /* searches for connection state */
+ int sls_misses; /* times couldn't find conn. state */
+ int sls_uncompressedin; /* inbound uncompressed packets */
+ int sls_compressedin; /* inbound compressed packets */
+ int sls_errorin; /* inbound unknown type packets */
+ int sls_tossed; /* inbound packets tossed because of error */
+#endif
+ struct cstate tstate[MAX_STATES]; /* xmit connection states */
+ struct cstate rstate[MAX_STATES]; /* receive connection states */
+};
+/* flag values */
+#define SLF_TOSS 1 /* tossing rcvd frames because of input err */
+
+void sl_compress_init __P((struct slcompress *));
+u_int sl_compress_tcp __P((struct mbuf *,
+ struct ip *, struct slcompress *, int));
+int sl_uncompress_tcp __P((u_char **, int, u_int, struct slcompress *));
diff --git a/sys/net/slip.h b/sys/net/slip.h
new file mode 100644
index 000000000000..4caeb464df33
--- /dev/null
+++ b/sys/net/slip.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)slip.h 8.1 (Berkeley) 2/12/94
+ */
+
+/* Ioctls operating on SLIP ttys. */
+#define SLIOCGUNIT _IOR('t', 88, int) /* get slip unit number */
+
+/*
+ * Definitions of the pseudo-link-level header attached to slip
+ * packets grabbed by the packet filter (bpf) traffic monitor.
+ */
+#define SLIP_HDRLEN 16 /* BPF SLIP header length */
+
+/* Offsets into BPF SLIP header. */
+#define SLX_DIR 0 /* direction; see below */
+#define SLX_CHDR 1 /* compressed header data */
+#define CHDR_LEN 15 /* length of compressed header data */
+
+#define SLIPDIR_IN 0 /* incoming */
+#define SLIPDIR_OUT 1 /* outgoing */
diff --git a/sys/netccitt/README.hdlc b/sys/netccitt/README.hdlc
new file mode 100644
index 000000000000..24b5fef96df9
--- /dev/null
+++ b/sys/netccitt/README.hdlc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ *
+ * @(#)README.hdlc 8.1 (Berkeley) 6/10/93
+ *
+ * X.25 HDLC DATA LINK LEVEL:
+ *
+ *
+ * This module implements the Link Level of the Open Systems Interconnect
+ * Model. The implementation is based on the ISO High-Level Data Link
+ * Control (HDLC). These procedures subscribe to the principles of the
+ * ISO-Class of Procedures for point-to-point. These procedures implement
+ * two-way asynchronous balanced mode (LAPB) as recommented by the CCITT.
+ *
+ * The HDLC protocol layer interface consists of the following procedures:
+ * Hd_init (pr_init)
+ * Hd_ouput (pr_output)
+ * Hd_input (pr_input)
+ * Hd_timer (pr_slowtimo)
+ *
+ * Note: Supervisory commands RR, RNR and REJ are not transmitted by this
+ * station.
+ *
+ * This station never enters a busy (RNR) condition.
+ *
+ * The "Generate_rr" variable can be set to FALSE. This means that
+ * we NEVER send an RR. This works just fine if the network level
+ * is X.25 packet protocol -- which it is.
+ *
+ * Currently, this is only a DTE implementation.
+ *
+ * Think about:
+ * If the remote is busy, no iframes are sent. The remote sends a RR
+ * to clear this condition. However, this RR may be damaged, causing
+ * a possible deadlock. A solution is to poll with iframe (P(S)==P(R)
+ * of RNR) indefinitly.
+ *
+ *
+ * Date: February 1984
+ *
+ * Author: Gerald W. Neufeld
+ *
+ * Installation: Department of Computer Science
+ * University of British Columbia
+ * Vancouver, BC, CANADA.
+ *
+ * History:
+ *
+ *
+ */
diff --git a/sys/netccitt/README.packet b/sys/netccitt/README.packet
new file mode 100644
index 000000000000..858d75cccdc8
--- /dev/null
+++ b/sys/netccitt/README.packet
@@ -0,0 +1,36 @@
+/*
+ * @(#)README.packet 8.1 (Berkeley) 6/10/93
+ *
+ * X.25 NETWORK PACKET LEVEL:
+ *
+ * This implementation is based on Recommentation X.25 as agreed at the
+ * March 1976 and the February 1980 meetings of CCITT Study Group VII.
+ * However, not all aspects are implemented. The following is a list of
+ * features which are not yet or may never be implemented:
+ *
+ * 1. D bit
+ * 2. PVC
+ * 3. fast select
+ *
+ *
+ * Note: This implementation is for DTEs only.
+ *
+ * Currently, only the 1976 verison is implemented.
+ *
+ *
+ * Date: February, 1984
+ *
+ * Author: Gerald W. Neufeld
+ *
+ * Installation: Department of Computer Science
+ * University of British Columbia
+ * Vancouver, BC, CANADA
+ *
+ * To Do: Find some reasonable heuristic for piggybacking packet
+ * level acks.
+ *
+ * Bugs: Clear might be sent before data is all out.
+ *
+ * History:
+ *
+ */
diff --git a/sys/netccitt/ccitt_proto.c b/sys/netccitt/ccitt_proto.c
new file mode 100644
index 000000000000..d832fd38f926
--- /dev/null
+++ b/sys/netccitt/ccitt_proto.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ccitt_proto.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+
+#include <netccitt/x25.h>
+
+#include <net/radix.h>
+
+/*
+ * Definitions of protocols supported in the CCITT domain.
+ */
+
+extern struct domain ccittdomain;
+#define DOMAIN &ccittdomain
+
+#ifdef LLC
+int llc_output();
+void llc_ctlinput(), llc_init(), llc_timer();
+#endif
+#ifdef HDLC
+int hd_output();
+void hd_ctlinput(), hd_init(), hd_timer();
+#endif
+int pk_usrreq(), pk_ctloutput();
+void pk_timer(), pk_init(), pk_input(), pk_ctlinput();
+
+struct protosw ccittsw[] = {
+#ifdef LLC
+ { 0, DOMAIN, IEEEPROTO_802LLC,0,
+ 0, llc_output, llc_ctlinput, 0,
+ 0,
+ llc_init, 0, llc_timer, 0,
+ },
+#endif
+#ifdef HDLC
+ { 0, DOMAIN, CCITTPROTO_HDLC,0,
+ 0, hd_output, hd_ctlinput, 0,
+ 0,
+ hd_init, 0, hd_timer, 0,
+ },
+#endif
+ { SOCK_STREAM, DOMAIN, CCITTPROTO_X25, PR_CONNREQUIRED|PR_ATOMIC|PR_WANTRCVD,
+ pk_input, 0, pk_ctlinput, pk_ctloutput,
+ pk_usrreq,
+ pk_init, 0, pk_timer, 0,
+ }
+};
+
+struct domain ccittdomain =
+ { AF_CCITT, "ccitt", 0, 0, 0, ccittsw,
+ &ccittsw[sizeof(ccittsw)/sizeof(ccittsw[0])], 0,
+ rn_inithead, 32, sizeof (struct sockaddr_x25) };
diff --git a/sys/netccitt/dll.h b/sys/netccitt/dll.h
new file mode 100644
index 000000000000..46ded88eda88
--- /dev/null
+++ b/sys/netccitt/dll.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) Dirk Husemann, Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dll.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * We define the additional PRC_* codes in here
+ */
+#ifdef KERNEL
+#ifndef PRC_IFUP
+#define PRC_IFUP 3
+#endif
+#define PRC_CONNECT_INDICATION 8
+#define PRC_CONNECT_REQUEST 9
+#define PRC_DISCONNECT_REQUEST 10
+#define PRC_DISCONNECT_INDICATION 11
+#define PRC_RESET_REQUEST 12
+#endif
+
+/*
+ * Data link layer configuration --- basically a copy of the relevant parts
+ * of x25config, implemented to become a little bit more network
+ * layer independent. (Probably only used for casting et al.)
+ */
+struct dllconfig {
+ u_short dllcfg_unused0:4,
+ dllcfg_unused1:4,
+ dllcfg_trace:1, /* link level tracing flag */
+ dllcfg_window:7; /* link level window size */
+ u_short dllcfg_xchxid:1, /* exchange XID (not yet) */
+ dllcfg_unused2:7; /* here be dragons */
+};
+
+struct dll_ctlinfo {
+ union {
+ struct {
+ struct dllconfig *dctli_up_cfg;
+ u_char dctli_up_lsap;
+ } CTLI_UP;
+ struct {
+ caddr_t dctli_down_pcb;
+ struct rtentry *dctli_down_rt;
+ struct dllconfig *dctli_down_llconf;
+ } CTLI_DOWN;
+ } CTLIun;
+};
+#define dlcti_cfg CTLIun.CTLI_UP.dctli_up_cfg
+#define dlcti_lsap CTLIun.CTLI_UP.dctli_up_lsap
+#define dlcti_pcb CTLIun.CTLI_DOWN.dctli_down_pcb
+#define dlcti_rt CTLIun.CTLI_DOWN.dctli_down_rt
+#define dlcti_conf CTLIun.CTLI_DOWN.dctli_down_llconf
diff --git a/sys/netccitt/hd_debug.c b/sys/netccitt/hd_debug.c
new file mode 100644
index 000000000000..b8a45a3f59a3
--- /dev/null
+++ b/sys/netccitt/hd_debug.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hd_debug.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+#ifdef HDLCDEBUG
+#define NTRACE 32
+
+struct hdlctrace {
+ struct hdcb *ht_hdp;
+ short ht_dir;
+ struct mbuf *ht_frame;
+ struct timeval ht_time;
+} hdtrace[NTRACE];
+
+int lasttracelogged, freezetrace;
+#endif
+
+hd_trace (hdp, direction, frame)
+struct hdcb *hdp;
+register struct Hdlc_frame *frame;
+{
+ register char *s;
+ register int nr, pf, ns, i;
+ struct Hdlc_iframe *iframe = (struct Hdlc_iframe *) frame;
+
+#ifdef HDLCDEBUG
+ hd_savetrace (hdp, direction, frame);
+#endif
+ if (hdp -> hd_xcp -> xc_ltrace) {
+ if (direction == RX)
+ printf ("F-In: ");
+ else if (direction == 2)
+ printf ("F-Xmt: ");
+ else
+ printf ("F-Out: ");
+
+ nr = iframe -> nr;
+ pf = iframe -> pf;
+ ns = iframe -> ns;
+
+ switch (hd_decode (hdp, frame)) {
+ case SABM:
+ printf ("SABM : PF=%d\n", pf);
+ break;
+
+ case DISC:
+ printf ("DISC : PF=%d\n", pf);
+ break;
+
+ case DM:
+ printf ("DM : PF=%d\n", pf);
+ break;
+
+ case FRMR:
+ {
+ register struct Frmr_frame *f = (struct Frmr_frame *)frame;
+
+ printf ("FRMR : PF=%d, TEXT=", pf);
+ for (s = (char *) frame, i = 0; i < 5; ++i, ++s)
+ printf ("%x ", (int) * s & 0xff);
+ printf ("\n");
+ printf ("control=%x v(s)=%d v(r)=%d w%d x%d y%d z%d\n",
+ f->frmr_control, f->frmr_ns, f->frmr_nr,
+ f->frmr_w, f->frmr_x, f->frmr_y, f->frmr_z);
+ break;
+ }
+
+ case UA:
+ printf ("UA : PF=%d\n", pf);
+ break;
+
+ case RR:
+ printf ("RR : N(R)=%d, PF=%d\n", nr, pf);
+ break;
+
+ case RNR:
+ printf ("RNR : N(R)=%d, PF=%d\n", nr, pf);
+ break;
+
+ case REJ:
+ printf ("REJ : N(R)=%d, PF=%d\n", nr, pf);
+ break;
+
+ case IFRAME:
+ {
+ register struct mbuf *m;
+ register int len = 0;
+
+ for(m = dtom (frame); m; m = m -> m_next)
+ len += m -> m_len;
+ len -= HDHEADERLN;
+ printf ("IFRAME : N(R)=%d, PF=%d, N(S)=%d, DATA(%d)=",
+ nr, pf, ns, len);
+ for (s = (char *)iframe->i_field, i = 0; i < 3; ++i, ++s)
+ printf ("%x ", (int) *s & 0xff);
+ printf ("\n");
+ break;
+ }
+
+ default:
+ printf ("ILLEGAL: ");
+ for (s = (char *) frame, i = 0; i < 5; ++i, ++s)
+ printf ("%x ", (int) *s & 0xff);
+ printf ("\n");
+ }
+
+ }
+}
+
+#ifdef HDLCDEBUG
+static
+hd_savetrace (hdp, dir, frame)
+struct hdcb *hdp;
+struct Hdlc_frame *frame;
+{
+ register struct hdlctrace *htp;
+ register struct mbuf *m;
+
+ if (freezetrace)
+ return;
+ htp = &hdtrace[lasttracelogged];
+ lasttracelogged = (lasttracelogged + 1) % NTRACE;
+ if (m = htp->ht_frame)
+ m_freem (m);
+ m = dtom (frame);
+ htp->ht_frame = m_copy (m, 0, m->m_len);
+ htp->ht_hdp = hdp;
+ htp->ht_dir = dir;
+ htp->ht_time = time;
+}
+
+hd_dumptrace (hdp)
+struct hdcb *hdp;
+{
+ register int i, ltrace;
+ register struct hdlctrace *htp;
+
+ freezetrace = 1;
+ hd_status (hdp);
+ printf ("retransmit queue:");
+ for (i = 0; i < 8; i++)
+ printf (" %x", hdp -> hd_retxq[i]);
+ printf ("\n");
+ ltrace = hdp -> hd_xcp -> xc_ltrace;
+ hdp -> hd_xcp -> xc_ltrace = 1;
+ for (i = 0; i < NTRACE; i++) {
+ htp = &hdtrace[(lasttracelogged + i) % NTRACE];
+ if (htp->ht_hdp != hdp || htp->ht_frame == 0)
+ continue;
+ printf ("%d/%d ", htp->ht_time.tv_sec & 0xff,
+ htp->ht_time.tv_usec / 10000);
+ hd_trace (htp->ht_hdp, htp->ht_dir,
+ mtod (htp->ht_frame, struct Hdlc_frame *));
+ m_freem (htp->ht_frame);
+ htp->ht_frame = 0;
+ }
+ hdp -> hd_xcp -> xc_ltrace = ltrace;
+ freezetrace = 0;
+}
+#endif
diff --git a/sys/netccitt/hd_input.c b/sys/netccitt/hd_input.c
new file mode 100644
index 000000000000..eb939d031991
--- /dev/null
+++ b/sys/netccitt/hd_input.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hd_input.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+static frame_reject();
+static rej_routine();
+static free_iframes();
+/*
+ * HDLC INPUT INTERFACE
+ *
+ * This routine is called when the HDLC physical device has
+ * completed reading a frame.
+ */
+
+hdintr ()
+{
+ register struct mbuf *m;
+ register struct hdcb *hdp;
+ register struct ifnet *ifp;
+ register int s;
+ static struct ifnet *lastifp;
+ static struct hdcb *lasthdp;
+
+ for (;;) {
+ s = splimp ();
+ IF_DEQUEUE (&hdintrq, m);
+ splx (s);
+ if (m == 0)
+ break;
+ if (m->m_len < HDHEADERLN) {
+ printf ("hdintr: packet too short (len=%d)\n",
+ m->m_len);
+ m_freem (m);
+ continue;
+ }
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("hdintr");
+ ifp = m->m_pkthdr.rcvif;
+
+ /*
+ * look up the appropriate hdlc control block
+ */
+
+ if (ifp == lastifp)
+ hdp = lasthdp;
+ else {
+ for (hdp = hdcbhead; hdp; hdp = hdp->hd_next)
+ if (hdp->hd_ifp == ifp)
+ break;
+ if (hdp == 0) {
+ printf ("hdintr: unknown interface %x\n", ifp);
+ m_freem (m);
+ continue;
+ }
+ lastifp = ifp;
+ lasthdp = hdp;
+ }
+
+ /* Process_rxframe returns FALSE if the frame was NOT queued
+ for the next higher layers. */
+ if (process_rxframe (hdp, m) == FALSE)
+ m_freem (m);
+ }
+}
+
+process_rxframe (hdp, fbuf)
+register struct hdcb *hdp;
+register struct mbuf *fbuf;
+{
+ register int queued = FALSE, frametype, pf;
+ register struct Hdlc_frame *frame;
+
+ frame = mtod (fbuf, struct Hdlc_frame *);
+ pf = ((struct Hdlc_iframe *) frame) -> pf;
+
+ hd_trace (hdp, RX, frame);
+ if (frame -> address != ADDRESS_A && frame -> address != ADDRESS_B)
+ return (queued);
+
+ switch ((frametype = hd_decode (hdp, frame)) + hdp->hd_state) {
+ case DM + DISC_SENT:
+ case UA + DISC_SENT:
+ /*
+ * Link now closed. Leave timer running
+ * so hd_timer() can periodically check the
+ * status of interface driver flag bit IFF_UP.
+ */
+ hdp->hd_state = DISCONNECTED;
+ break;
+
+ case DM + INIT:
+ case UA + INIT:
+ /*
+ * This is a non-standard state change needed for DCEs
+ * that do dynamic link selection. We can't go into the
+ * usual "SEND DM" state because a DM is a SARM in LAP.
+ */
+ hd_writeinternal (hdp, SABM, POLLOFF);
+ hdp->hd_state = SABM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case SABM + DM_SENT:
+ case SABM + WAIT_SABM:
+ hd_writeinternal (hdp, UA, pf);
+ case UA + SABM_SENT:
+ case UA + WAIT_UA:
+ KILL_TIMER (hdp);
+ hd_initvars (hdp);
+ hdp->hd_state = ABM;
+ hd_message (hdp, "Link level operational");
+ /* Notify the packet level - to send RESTART. */
+ (void) pk_ctlinput (PRC_LINKUP, hdp->hd_pkp);
+ break;
+
+ case SABM + SABM_SENT:
+ /* Got a SABM collision. Acknowledge the remote's SABM
+ via UA but still wait for UA. */
+ hd_writeinternal (hdp, UA, pf);
+ break;
+
+ case SABM + ABM:
+ /* Request to reset the link from the remote. */
+ KILL_TIMER (hdp);
+ hd_message (hdp, "Link reset");
+#ifdef HDLCDEBUG
+ hd_dumptrace (hdp);
+#endif
+ hd_flush (hdp->hd_ifp);
+ hd_writeinternal (hdp, UA, pf);
+ hd_initvars (hdp);
+ (void) pk_ctlinput (PRC_LINKRESET, hdp->hd_pkp);
+ hdp->hd_resets++;
+ break;
+
+ case SABM + WAIT_UA:
+ hd_writeinternal (hdp, UA, pf);
+ break;
+
+ case DM + ABM:
+ hd_message (hdp, "DM received: link down");
+#ifdef HDLCDEBUG
+ hd_dumptrace (hdp);
+#endif
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+ hd_flush (hdp->hd_ifp);
+ case DM + DM_SENT:
+ case DM + WAIT_SABM:
+ case DM + WAIT_UA:
+ hd_writeinternal (hdp, SABM, pf);
+ hdp->hd_state = SABM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case DISC + INIT:
+ case DISC + DM_SENT:
+ case DISC + SABM_SENT:
+ /* Note: This is a non-standard state change. */
+ hd_writeinternal (hdp, UA, pf);
+ hd_writeinternal (hdp, SABM, POLLOFF);
+ hdp->hd_state = SABM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case DISC + WAIT_UA:
+ hd_writeinternal (hdp, DM, pf);
+ SET_TIMER (hdp);
+ hdp->hd_state = DM_SENT;
+ break;
+
+ case DISC + ABM:
+ hd_message (hdp, "DISC received: link down");
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+ case DISC + WAIT_SABM:
+ hd_writeinternal (hdp, UA, pf);
+ hdp->hd_state = DM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case UA + ABM:
+ hd_message (hdp, "UA received: link down");
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+ case UA + WAIT_SABM:
+ hd_writeinternal (hdp, DM, pf);
+ hdp->hd_state = DM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case FRMR + DM_SENT:
+ hd_writeinternal (hdp, SABM, pf);
+ hdp->hd_state = SABM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case FRMR + WAIT_SABM:
+ hd_writeinternal (hdp, DM, pf);
+ hdp->hd_state = DM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case FRMR + ABM:
+ hd_message (hdp, "FRMR received: link down");
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+#ifdef HDLCDEBUG
+ hd_dumptrace (hdp);
+#endif
+ hd_flush (hdp->hd_ifp);
+ hd_writeinternal (hdp, SABM, pf);
+ hdp->hd_state = WAIT_UA;
+ SET_TIMER (hdp);
+ break;
+
+ case RR + ABM:
+ case RNR + ABM:
+ case REJ + ABM:
+ process_sframe (hdp, (struct Hdlc_sframe *)frame, frametype);
+ break;
+
+ case IFRAME + ABM:
+ queued = process_iframe (hdp, fbuf, (struct Hdlc_iframe *)frame);
+ break;
+
+ case IFRAME + SABM_SENT:
+ case RR + SABM_SENT:
+ case RNR + SABM_SENT:
+ case REJ + SABM_SENT:
+ hd_writeinternal (hdp, DM, POLLON);
+ hdp->hd_state = DM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case IFRAME + WAIT_SABM:
+ case RR + WAIT_SABM:
+ case RNR + WAIT_SABM:
+ case REJ + WAIT_SABM:
+ hd_writeinternal (hdp, FRMR, POLLOFF);
+ SET_TIMER (hdp);
+ break;
+
+ case ILLEGAL + SABM_SENT:
+ hdp->hd_unknown++;
+ hd_writeinternal (hdp, DM, POLLOFF);
+ hdp->hd_state = DM_SENT;
+ SET_TIMER (hdp);
+ break;
+
+ case ILLEGAL + ABM:
+ hd_message (hdp, "Unknown frame received: link down");
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+ case ILLEGAL + WAIT_SABM:
+ hdp->hd_unknown++;
+#ifdef HDLCDEBUG
+ hd_dumptrace (hdp);
+#endif
+ hd_writeinternal (hdp, FRMR, POLLOFF);
+ hdp->hd_state = WAIT_SABM;
+ SET_TIMER (hdp);
+ break;
+ }
+
+ return (queued);
+}
+
+process_iframe (hdp, fbuf, frame)
+register struct hdcb *hdp;
+struct mbuf *fbuf;
+register struct Hdlc_iframe *frame;
+{
+ register int nr = frame -> nr,
+ ns = frame -> ns,
+ pf = frame -> pf;
+ register int queued = FALSE;
+
+ /*
+ * Validate the iframe's N(R) value. It's N(R) value must be in
+ * sync with our V(S) value and our "last received nr".
+ */
+
+ if (valid_nr (hdp, nr, FALSE) == FALSE) {
+ frame_reject (hdp, Z, frame);
+ return (queued);
+ }
+
+
+ /*
+ * This section tests the IFRAME for proper sequence. That is, it's
+ * sequence number N(S) MUST be equal to V(S).
+ */
+
+ if (ns != hdp->hd_vr) {
+ hdp->hd_invalid_ns++;
+ if (pf || (hdp->hd_condition & REJ_CONDITION) == 0) {
+ hdp->hd_condition |= REJ_CONDITION;
+ /*
+ * Flush the transmit queue. This is ugly but we
+ * have no choice. A reject response must be
+ * immediately sent to the DCE. Failure to do so
+ * may result in another out of sequence iframe
+ * arriving (and thus sending another reject)
+ * before the first reject is transmitted. This
+ * will cause the DCE to receive two or more
+ * rejects back to back, which must never happen.
+ */
+ hd_flush (hdp->hd_ifp);
+ hd_writeinternal (hdp, REJ, pf);
+ }
+ return (queued);
+ }
+ hdp->hd_condition &= ~REJ_CONDITION;
+
+ /*
+ * This section finally tests the IFRAME's sequence number against
+ * the window size (K) and the sequence number of the last frame
+ * we have acknowledged. If the IFRAME is completely correct then
+ * it is queued for the packet level.
+ */
+
+ if (ns != (hdp -> hd_lasttxnr + hdp -> hd_xcp -> xc_lwsize) % MODULUS) {
+ hdp -> hd_vr = (hdp -> hd_vr + 1) % MODULUS;
+ if (pf == 1) {
+ /* Must generate a RR or RNR with final bit on. */
+ hd_writeinternal (hdp, RR, POLLON);
+ } else
+ /*
+ * Hopefully we can piggyback the RR, if not we will generate
+ * a RR when T3 timer expires.
+ */
+ if (hdp -> hd_rrtimer == 0)
+ hdp->hd_rrtimer = hd_t3;
+
+ /* Forward iframe to packet level of X.25. */
+ fbuf -> m_data += HDHEADERLN;
+ fbuf -> m_len -= HDHEADERLN;
+ fbuf -> m_pkthdr.len -= HDHEADERLN;
+ fbuf -> m_pkthdr.rcvif = (struct ifnet *)hdp -> hd_pkp;
+#ifdef BSD4_3
+ fbuf->m_act = 0; /* probably not necessary */
+#else
+ {
+ register struct mbuf *m;
+
+ for (m = fbuf; m -> m_next; m = m -> m_next)
+ m -> m_act = (struct mbuf *) 0;
+ m -> m_act = (struct mbuf *) 1;
+ }
+#endif
+ pk_input (fbuf);
+ queued = TRUE;
+ hd_start (hdp);
+ } else {
+ /*
+ * Here if the remote station has transmitted more iframes then
+ * the number which have been acknowledged plus K.
+ */
+ hdp->hd_invalid_ns++;
+ frame_reject (hdp, W, frame);
+ }
+ return (queued);
+}
+
+/*
+ * This routine is used to determine if a value (the middle parameter)
+ * is between two other values. The low value is the first parameter
+ * the high value is the last parameter. The routine checks the middle
+ * value to see if it is within the range of the first and last values.
+ * The reason we need this routine is the values are modulo some base
+ * hence a simple test for greater or less than is not sufficient.
+ */
+
+bool
+range_check (rear, value, front)
+int rear,
+ value,
+ front;
+{
+ register bool result = FALSE;
+
+ if (front > rear)
+ result = (rear <= value) && (value <= front);
+ else
+ result = (rear <= value) || (value <= front);
+
+ return (result);
+}
+
+/*
+ * This routine handles all the frame reject conditions which can
+ * arise as a result of secondary processing. The frame reject
+ * condition Y (frame length error) are handled elsewhere.
+ */
+
+static
+frame_reject (hdp, rejectcode, frame)
+struct hdcb *hdp;
+struct Hdlc_iframe *frame;
+{
+ register struct Frmr_frame *frmr = &hd_frmr;
+
+ frmr -> frmr_control = ((struct Hdlc_frame *) frame) -> control;
+
+ frmr -> frmr_ns = frame -> ns;
+ frmr -> frmr_f1_0 = 0;
+ frmr -> frmr_nr = frame -> nr;
+ frmr -> frmr_f2_0 = 0;
+
+ frmr -> frmr_0000 = 0;
+ frmr -> frmr_w = frmr -> frmr_x = frmr -> frmr_y =
+ frmr -> frmr_z = 0;
+ switch (rejectcode) {
+ case Z:
+ frmr -> frmr_z = 1;/* invalid N(R). */
+ break;
+
+ case Y:
+ frmr -> frmr_y = 1;/* iframe length error. */
+ break;
+
+ case X:
+ frmr -> frmr_x = 1;/* invalid information field. */
+ frmr -> frmr_w = 1;
+ break;
+
+ case W:
+ frmr -> frmr_w = 1;/* invalid N(S). */
+ }
+
+ hd_writeinternal (hdp, FRMR, POLLOFF);
+
+ hdp->hd_state = WAIT_SABM;
+ SET_TIMER (hdp);
+}
+
+/*
+ * This procedure is invoked when ever we receive a supervisor
+ * frame such as RR, RNR and REJ. All processing for these
+ * frames is done here.
+ */
+
+process_sframe (hdp, frame, frametype)
+register struct hdcb *hdp;
+register struct Hdlc_sframe *frame;
+int frametype;
+{
+ register int nr = frame -> nr, pf = frame -> pf, pollbit = 0;
+
+ if (valid_nr (hdp, nr, pf) == TRUE) {
+ switch (frametype) {
+ case RR:
+ hdp->hd_condition &= ~REMOTE_RNR_CONDITION;
+ break;
+
+ case RNR:
+ hdp->hd_condition |= REMOTE_RNR_CONDITION;
+ hdp->hd_retxcnt = 0;
+ break;
+
+ case REJ:
+ hdp->hd_condition &= ~REMOTE_RNR_CONDITION;
+ rej_routine (hdp, nr);
+ }
+
+ if (pf == 1) {
+ hdp->hd_retxcnt = 0;
+ hdp->hd_condition &= ~TIMER_RECOVERY_CONDITION;
+
+ if (frametype == RR && hdp->hd_lastrxnr == hdp->hd_vs
+ && hdp->hd_timer == 0 && hdp->hd_txq.head == 0)
+ hd_writeinternal(hdp, RR, pf);
+ else
+ /* If any iframes have been queued because of the
+ timer condition, transmit then now. */
+ if (hdp->hd_condition & REMOTE_RNR_CONDITION) {
+ /* Remote is busy or timer condition, so only
+ send one. */
+ if (hdp->hd_vs != hdp->hd_retxqi)
+ hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], pollbit);
+ }
+ else /* Flush the retransmit list first. */
+ while (hdp->hd_vs != hdp->hd_retxqi)
+ hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], POLLOFF);
+ }
+
+ hd_start (hdp);
+ } else
+ frame_reject (hdp, Z, (struct Hdlc_iframe *)frame); /* Invalid N(R). */
+}
+
+/*
+ * This routine tests the validity of the N(R) which we have received.
+ * If it is ok, then all the iframes which it acknowledges (if any)
+ * will be freed.
+ */
+
+bool
+valid_nr (hdp, nr, finalbit)
+register struct hdcb *hdp;
+register int finalbit;
+{
+ /* Make sure it really does acknowledge something. */
+ if (hdp->hd_lastrxnr == nr)
+ return (TRUE);
+
+ /*
+ * This section validates the frame's N(R) value. It's N(R) value
+ * must be in syncronization with our V(S) value and our "last
+ * received nr" variable. If it is correct then we are able to send
+ * more IFRAME's, else frame reject condition is entered.
+ */
+
+ if (range_check (hdp->hd_lastrxnr, nr, hdp->hd_vs) == FALSE) {
+ if ((hdp->hd_condition & TIMER_RECOVERY_CONDITION) &&
+ range_check (hdp->hd_vs, nr, hdp->hd_xx) == TRUE)
+ hdp->hd_vs = nr;
+
+ else {
+ hdp->hd_invalid_nr++;
+ return (FALSE);
+ }
+ }
+
+ /*
+ * If we get to here, we do have a valid frame but it might be out
+ * of sequence. However, we should still accept the receive state
+ * number N(R) since it has already passed our previous test and it
+ * does acknowledge frames which we are sending.
+ */
+
+ KILL_TIMER (hdp);
+ free_iframes (hdp, &nr, finalbit);/* Free all acknowledged iframes */
+ if (nr != hdp->hd_vs)
+ SET_TIMER (hdp);
+
+ return (TRUE);
+}
+
+/*
+ * This routine determines how many iframes need to be retransmitted.
+ * It then resets the Send State Variable V(S) to accomplish this.
+ */
+
+static
+rej_routine (hdp, rejnr)
+register struct hdcb *hdp;
+register int rejnr;
+{
+ register int anchor;
+
+ /*
+ * Flush the output queue. Any iframes queued for
+ * transmission will be out of sequence.
+ */
+
+ hd_flush (hdp->hd_ifp);
+
+ /*
+ * Determine how many frames should be re-transmitted. In the case
+ * of a normal REJ this should be 1 to K. In the case of a timer
+ * recovery REJ (ie. a REJ with the Final Bit on) this could be 0.
+ */
+
+ anchor = hdp->hd_vs;
+ if (hdp->hd_condition & TIMER_RECOVERY_CONDITION)
+ anchor = hdp->hd_xx;
+
+ anchor = (anchor - rejnr + 8) % MODULUS;
+
+ if (anchor > 0) {
+
+ /* There is at least one iframe to retransmit. */
+ KILL_TIMER (hdp);
+ hdp->hd_vs = rejnr;
+
+ while (hdp->hd_vs != hdp->hd_retxqi)
+ hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], POLLOFF);
+
+ }
+ hd_start (hdp);
+}
+
+/*
+ * This routine frees iframes from the retransmit queue. It is called
+ * when a previously written iframe is acknowledged.
+ */
+
+static
+free_iframes (hdp, nr, finalbit)
+register struct hdcb *hdp;
+int *nr;
+register int finalbit;
+
+{
+ register int i, k;
+
+ /*
+ * We need to do the following because of a funny quirk in the
+ * protocol. This case occures when in Timer recovery condition
+ * we get a N(R) which acknowledges all the outstanding iframes
+ * but with the Final Bit off. In this case we need to save the last
+ * iframe for possible retransmission even though it has already been
+ * acknowledged!
+ */
+
+ if ((hdp->hd_condition & TIMER_RECOVERY_CONDITION) && *nr == hdp->hd_xx && finalbit == 0) {
+ *nr = (*nr - 1 + 8) % MODULUS;
+/* printf ("QUIRK\n"); */
+ }
+
+ k = (*nr - hdp->hd_lastrxnr + 8) % MODULUS;
+
+ /* Loop here freeing all acknowledged iframes. */
+ for (i = 0; i < k; ++i) {
+ m_freem (hdp->hd_retxq[hdp->hd_lastrxnr]);
+ hdp->hd_retxq[hdp->hd_lastrxnr] = 0;
+ hdp->hd_lastrxnr = (hdp->hd_lastrxnr + 1) % MODULUS;
+ }
+
+}
diff --git a/sys/netccitt/hd_output.c b/sys/netccitt/hd_output.c
new file mode 100644
index 000000000000..05992e1deb1c
--- /dev/null
+++ b/sys/netccitt/hd_output.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hd_output.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+/*
+ * HDLC OUTPUT INTERFACE
+ *
+ * This routine is called when the X.25 packet layer output routine
+ * has a information frame (iframe) to write. It is also called
+ * by the input and control routines of the HDLC layer.
+ */
+
+hd_output (hdp, m0)
+register struct hdcb *hdp;
+struct mbuf *m0;
+{
+ struct x25config *xcp;
+ register struct mbuf *m = m0;
+ int len;
+
+ if (m == NULL)
+ panic ("hd_output");
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic ("hd_output 2");
+
+ if (hdp->hd_state != ABM) {
+ m_freem (m);
+ return;
+ }
+
+ /*
+ * Make room for the hdlc header either by prepending
+ * another mbuf, or by adjusting the offset and length
+ * of the first mbuf in the mbuf chain.
+ */
+
+ M_PREPEND(m, HDHEADERLN, M_DONTWAIT);
+ if (m == NULL)
+ return;
+ for (len = 0; m; m = m->m_next)
+ len += m->m_len;
+ m = m0;
+ m->m_pkthdr.len = len;
+
+ hd_append (&hdp->hd_txq, m);
+ hd_start (hdp);
+}
+
+hd_start (hdp)
+register struct hdcb *hdp;
+{
+ register struct mbuf *m;
+
+ /*
+ * The iframe is only transmitted if all these conditions are FALSE.
+ * The iframe remains queued (hdp->hd_txq) however and will be
+ * transmitted as soon as these conditions are cleared.
+ */
+
+ while (!(hdp->hd_condition & (TIMER_RECOVERY_CONDITION | REMOTE_RNR_CONDITION | REJ_CONDITION))) {
+ if (hdp->hd_vs == (hdp->hd_lastrxnr + hdp->hd_xcp->xc_lwsize) % MODULUS) {
+
+ /* We have now exceeded the maximum number of
+ outstanding iframes. Therefore, we must wait
+ until at least one is acknowledged if this
+ condition is not turned off before we are
+ requested to write another iframe. */
+ hdp->hd_window_condition++;
+ break;
+ }
+
+ /* hd_remove top iframe from transmit queue. */
+ if ((m = hd_remove (&hdp->hd_txq)) == NULL)
+ break;
+
+ hd_send_iframe (hdp, m, POLLOFF);
+ }
+}
+
+/*
+ * This procedure is passed a buffer descriptor for an iframe. It builds
+ * the rest of the control part of the frame and then writes it out. It
+ * also starts the acknowledgement timer and keeps the iframe in the
+ * Retransmit queue (Retxq) just in case we have to do this again.
+ *
+ * Note: This routine is also called from hd_input.c when retransmission
+ * of old frames is required.
+ */
+
+hd_send_iframe (hdp, buf, poll_bit)
+register struct hdcb *hdp;
+register struct mbuf *buf;
+int poll_bit;
+{
+ register struct Hdlc_iframe *iframe;
+ struct mbuf *m;
+
+ KILL_TIMER (hdp);
+
+ if (buf == 0) {
+ printf ("hd_send_iframe: zero arg\n");
+#ifdef HDLCDEBUG
+ hd_status (hdp);
+ hd_dumptrace (hdp);
+#endif
+ hdp->hd_vs = (hdp->hd_vs + 7) % MODULUS;
+ return;
+ }
+ iframe = mtod (buf, struct Hdlc_iframe *);
+
+ iframe -> hdlc_0 = 0;
+ iframe -> nr = hdp->hd_vr;
+ iframe -> pf = poll_bit;
+ iframe -> ns = hdp->hd_vs;
+ iframe -> address = ADDRESS_B;
+ hdp->hd_lasttxnr = hdp->hd_vr;
+ hdp->hd_rrtimer = 0;
+
+ if (hdp->hd_vs == hdp->hd_retxqi) {
+ /* Check for retransmissions. */
+ /* Put iframe only once in the Retransmission queue. */
+ hdp->hd_retxq[hdp->hd_retxqi] = buf;
+ hdp->hd_retxqi = (hdp->hd_retxqi + 1) % MODULUS;
+ hdp->hd_iframes_out++;
+ }
+
+ hdp->hd_vs = (hdp->hd_vs + 1) % MODULUS;
+
+ hd_trace (hdp, TX, (struct Hdlc_frame *)iframe);
+
+ /* Write buffer on device. */
+ m = hdp->hd_dontcopy ? buf : m_copy(buf, 0, (int)M_COPYALL);
+ if (m == 0) {
+ printf("hdlc: out of mbufs\n");
+ return;
+ }
+ (*hdp->hd_output)(hdp, m);
+ SET_TIMER (hdp);
+}
+
+hd_ifoutput(hdp, m)
+register struct mbuf *m;
+register struct hdcb *hdp;
+{
+ /*
+ * Queue message on interface, and start output if interface
+ * not yet active.
+ */
+ register struct ifnet *ifp = hdp->hd_ifp;
+ int s = splimp();
+
+ if (IF_QFULL(&ifp->if_snd)) {
+ IF_DROP(&ifp->if_snd);
+ /* printf("%s%d: HDLC says OK to send but queue full, may hang\n",
+ ifp->if_name, ifp->if_unit);*/
+ m_freem(m);
+ } else {
+ IF_ENQUEUE(&ifp->if_snd, m);
+ if ((ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+ }
+ splx(s);
+}
+
+
+/*
+ * This routine gets control when the timer expires because we have not
+ * received an acknowledgement for a iframe.
+ */
+
+hd_resend_iframe (hdp)
+register struct hdcb *hdp;
+{
+
+ if (hdp->hd_retxcnt++ < hd_n2) {
+ if (!(hdp->hd_condition & TIMER_RECOVERY_CONDITION)) {
+ hdp->hd_xx = hdp->hd_vs;
+ hdp->hd_condition |= TIMER_RECOVERY_CONDITION;
+ }
+
+ hdp->hd_vs = hdp->hd_lastrxnr;
+ hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], POLLON);
+ } else {
+ /* At this point we have not received a RR even after N2
+ retries - attempt to reset link. */
+
+ hd_initvars (hdp);
+ hd_writeinternal (hdp, SABM, POLLOFF);
+ hdp->hd_state = WAIT_UA;
+ SET_TIMER (hdp);
+ hd_message (hdp, "Timer recovery failed: link down");
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+ }
+}
diff --git a/sys/netccitt/hd_subr.c b/sys/netccitt/hd_subr.c
new file mode 100644
index 000000000000..c75ab07568e9
--- /dev/null
+++ b/sys/netccitt/hd_subr.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hd_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+#include <netccitt/pk_var.h>
+
+hd_init ()
+{
+
+ hdintrq.ifq_maxlen = IFQ_MAXLEN;
+}
+
+hd_ctlinput (prc, addr)
+struct sockaddr *addr;
+{
+ register struct x25config *xcp = (struct x25config *)addr;
+ register struct hdcb *hdp;
+ register struct ifaddr *ifa;
+ struct ifnet *ifp;
+ caddr_t pk_newlink();
+
+ if (addr->sa_family != AF_CCITT)
+ return (EAFNOSUPPORT);
+ if (xcp->xc_lptype != HDLCPROTO_LAPB)
+ return (EPROTONOSUPPORT);
+ ifa = ifa_ifwithaddr(addr);
+ if (ifa == 0 || ifa->ifa_addr->sa_family != AF_CCITT ||
+ (ifp = ifa->ifa_ifp) == 0)
+ panic ("hd_ctlinput");
+ for (hdp = hdcbhead; hdp; hdp = hdp->hd_next)
+ if (hdp->hd_ifp == ifp)
+ break;
+
+ if (hdp == 0) { /* new interface */
+ int error, hd_ifoutput(), hd_output();
+
+ /* an hdcb is now too big to fit in an mbuf */
+ MALLOC(hdp, struct hdcb *, sizeof (*hdp), M_PCB, M_DONTWAIT);
+ if (hdp == 0)
+ return (ENOBUFS);
+ bzero((caddr_t)hdp, sizeof(*hdp));
+ hdp->hd_pkp =
+ (caddr_t) pk_newlink ((struct x25_ifaddr *) ifa,
+ (caddr_t) hdp);
+ ((struct x25_ifaddr *)ifa)->ia_pkcb =
+ (struct pkcb *) hdp->hd_pkp;
+ if (hdp -> hd_pkp == 0) {
+ free(hdp, M_PCB);
+ return (ENOBUFS);
+ }
+ hdp->hd_ifp = ifp;
+ hdp->hd_ifa = ifa;
+ hdp->hd_xcp = xcp;
+ hdp->hd_state = INIT;
+ hdp->hd_output = hd_ifoutput;
+ hdp->hd_next = hdcbhead;
+ hdcbhead = hdp;
+ } else if (hdp->hd_pkp == 0) { /* interface got reconfigured */
+ hdp->hd_pkp =
+ (caddr_t) pk_newlink ((struct x25_ifaddr *) ifa,
+ (caddr_t) hdp);
+ ((struct x25_ifaddr *)ifa)->ia_pkcb =
+ (struct pkcb *) hdp->hd_pkp;
+ if (hdp -> hd_pkp == 0) {
+ free(hdp, M_PCB);
+ return (ENOBUFS);
+ }
+ }
+
+ switch (prc) {
+ case PRC_IFUP:
+ if (xcp->xc_lwsize == 0 ||
+ xcp->xc_lwsize > MAX_WINDOW_SIZE)
+ xcp->xc_lwsize = MAX_WINDOW_SIZE;
+ if (hdp->hd_state == INIT)
+ SET_TIMER (hdp);
+ break;
+
+ case PRC_IFDOWN:
+ if (hdp->hd_state == ABM)
+ hd_message (hdp, "Operator shutdown: link closed");
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+
+ /* fall thru to ... */
+
+ case PRC_DISCONNECT_REQUEST:
+ /* drop reference to pkcb --- it's dead meat */
+ hdp->hd_pkp = (caddr_t) 0;
+ ((struct x25_ifaddr *)ifa)->ia_pkcb = (struct pkcb *) 0;
+
+ hd_writeinternal (hdp, DISC, POLLON);
+ hdp->hd_state = DISC_SENT;
+ SET_TIMER (hdp);
+ }
+ return (0);
+}
+
+hd_initvars (hdp)
+register struct hdcb *hdp;
+{
+ register struct mbuf *m;
+ register int i;
+
+ /* Clear Transmit queue. */
+ while ((m = hd_remove (&hdp->hd_txq)) != NULL)
+ m_freem (m);
+
+ /* Clear Retransmit queue. */
+ i = hdp->hd_lastrxnr;
+ while (i != hdp->hd_retxqi) {
+ m_freem (hdp->hd_retxq[i]);
+ i = (i + 1) % MODULUS;
+ }
+ hdp->hd_retxqi = 0;
+
+ hdp->hd_vs = hdp->hd_vr = 0;
+ hdp->hd_lasttxnr = hdp->hd_lastrxnr = 0;
+ hdp->hd_rrtimer = 0;
+ KILL_TIMER(hdp);
+ hdp->hd_retxcnt = 0;
+ hdp->hd_condition = 0;
+}
+
+hd_decode (hdp, frame)
+register struct hdcb *hdp;
+struct Hdlc_frame *frame;
+{
+ register int frametype = ILLEGAL;
+ register struct Hdlc_iframe *iframe = (struct Hdlc_iframe *) frame;
+ register struct Hdlc_sframe *sframe = (struct Hdlc_sframe *) frame;
+ register struct Hdlc_uframe *uframe = (struct Hdlc_uframe *) frame;
+
+ if (iframe -> hdlc_0 == 0) {
+ frametype = IFRAME;
+ hdp->hd_iframes_in++;
+ }
+
+ else if (sframe -> hdlc_01 == 1) {
+ /* Supervisory format. */
+ switch (sframe -> s2) {
+ case 0:
+ frametype = RR;
+ hdp->hd_rrs_in++;
+ break;
+
+ case 1:
+ frametype = RNR;
+ hdp->hd_rnrs_in++;
+ break;
+
+ case 2:
+ frametype = REJ;
+ hdp->hd_rejs_in++;
+ }
+ }
+ else if (uframe -> hdlc_11 == 3) {
+ /* Unnumbered format. */
+ switch (uframe -> m3) {
+ case 0:
+ frametype = DM;
+ break;
+
+ case 1:
+ frametype = SABM;
+ break;
+
+ case 2:
+ frametype = DISC;
+ break;
+
+ case 3:
+ frametype = UA;
+ break;
+
+ case 4:
+ frametype = FRMR;
+ hdp->hd_frmrs_in++;
+ }
+ }
+ return (frametype);
+}
+
+/*
+ * This routine is called when the HDLC layer internally generates a
+ * command or response for the remote machine ( eg. RR, UA etc. ).
+ * Only supervisory or unnumbered frames are processed.
+ */
+
+hd_writeinternal (hdp, frametype, pf)
+register struct hdcb *hdp;
+register int frametype, pf;
+{
+ register struct mbuf *buf;
+ struct Hdlc_frame *frame;
+ register struct Hdlc_sframe *sframe;
+ register struct Hdlc_uframe *uframe;
+
+ MGETHDR (buf, M_DONTWAIT, MT_HEADER);
+ if (buf == 0)
+ return;
+ frame = mtod (buf, struct Hdlc_frame *);
+ sframe = mtod (buf, struct Hdlc_sframe *);
+ uframe = mtod (buf, struct Hdlc_uframe *);
+
+ /* Assume a response - address structure for DTE */
+ frame -> address = ADDRESS_A;
+ buf -> m_len = 2;
+ buf -> m_act = buf -> m_next = NULL;
+
+ switch (frametype) {
+ case RR:
+ frame -> control = RR_CONTROL;
+ hdp->hd_rrs_out++;
+ break;
+
+ case RNR:
+ frame -> control = RNR_CONTROL;
+ hdp->hd_rnrs_out++;
+ break;
+
+ case REJ:
+ frame -> control = REJ_CONTROL;
+ hdp->hd_rejs_out++;
+ break;
+
+ case SABM:
+ frame -> control = SABM_CONTROL;
+ frame -> address = ADDRESS_B;
+ break;
+
+ case DISC:
+ if ((hdp->hd_ifp->if_flags & IFF_UP) == 0) {
+ hdp->hd_state = DISCONNECTED;
+ (void) m_freem (buf);
+ hd_flush (hdp->hd_ifp);
+ return;
+ }
+ frame -> control = DISC_CONTROL;
+ frame -> address = ADDRESS_B;
+ break;
+
+ case DM:
+ frame -> control = DM_CONTROL;
+ break;
+
+ case UA:
+ frame -> control = UA_CONTROL;
+ break;
+
+ case FRMR:
+ frame -> control = FRMR_CONTROL;
+ bcopy ((caddr_t)&hd_frmr, (caddr_t)frame -> info, 3);
+ buf -> m_len = 5;
+ hdp->hd_frmrs_out++;
+
+ }
+
+ if (sframe -> hdlc_01 == 1) {
+ /* Supervisory format - RR, REJ, or RNR. */
+ sframe -> nr = hdp->hd_vr;
+ sframe -> pf = pf;
+ hdp->hd_lasttxnr = hdp->hd_vr;
+ hdp->hd_rrtimer = 0;
+ }
+ else
+ uframe -> pf = pf;
+
+ hd_trace (hdp, TX, frame);
+ buf -> m_pkthdr.len = buf -> m_len;
+ (*hdp->hd_output) (hdp, buf);
+}
+
+struct mbuf *
+hd_remove (q)
+struct hdtxq *q;
+{
+ register struct mbuf *m;
+
+ m = q -> head;
+ if (m) {
+ if ((q -> head = m -> m_act) == NULL)
+ q -> tail = NULL;
+ m -> m_act = 0;
+ }
+ return (m);
+}
+
+hd_append (q, m)
+register struct hdtxq *q;
+register struct mbuf *m;
+{
+
+ m -> m_act = NULL;
+ if (q -> tail == NULL)
+ q -> head = m;
+ else
+ q -> tail -> m_act = m;
+ q -> tail = m;
+}
+
+hd_flush (ifp)
+struct ifnet *ifp;
+{
+ register struct mbuf *m;
+ register int s;
+
+ while (1) {
+ s = splimp ();
+ IF_DEQUEUE (&ifp->if_snd, m);
+ splx (s);
+ if (m == 0)
+ break;
+ m_freem (m);
+ }
+}
+
+hd_message (hdp, msg)
+struct hdcb *hdp;
+char *msg;
+{
+ char *format_ntn ();
+
+ if (hdcbhead -> hd_next)
+ printf ("HDLC(%s): %s\n", format_ntn (hdp->hd_xcp), msg);
+ else
+ printf ("HDLC: %s\n", msg);
+}
+
+#ifdef HDLCDEBUG
+hd_status (hdp)
+struct hdcb *hdp;
+{
+ printf ("HDLC STATUS:\n V(S)=%d, V(R)=%d, retxqi=%d,\n",
+ hdp->hd_vs, hdp->hd_vr, hdp->hd_retxqi);
+
+ printf ("Last_rx_nr=%d, Last_tx_nr=%d,\n Condition=%d, Xx=%d\n",
+ hdp->hd_lastrxnr, hdp->hd_lasttxnr, hdp->hd_condition, hdp->hd_xx);
+}
+#endif
diff --git a/sys/netccitt/hd_timer.c b/sys/netccitt/hd_timer.c
new file mode 100644
index 000000000000..a3bf12addf00
--- /dev/null
+++ b/sys/netccitt/hd_timer.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hd_timer.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+/*
+ * these can be patched with adb if the
+ * default values are inappropriate
+ */
+
+int hd_t1 = T1;
+int hd_t3 = T3;
+int hd_n2 = N2;
+
+/*
+ * HDLC TIMER
+ *
+ * This routine is called every 500ms by the kernel. Decrement timer by this
+ * amount - if expired then process the event.
+ */
+
+hd_timer ()
+{
+ register struct hdcb *hdp;
+ register int s = splimp ();
+
+ for (hdp = hdcbhead; hdp; hdp = hdp->hd_next) {
+ if (hdp->hd_rrtimer && (--hdp->hd_rrtimer == 0)) {
+ if (hdp->hd_lasttxnr != hdp->hd_vr)
+ hd_writeinternal (hdp, RR, POLLOFF);
+ }
+
+ if (!(hdp->hd_timer && --hdp->hd_timer == 0))
+ continue;
+
+ switch (hdp->hd_state) {
+ case INIT:
+ case DISC_SENT:
+ hd_writeinternal (hdp, DISC, POLLON);
+ break;
+
+ case ABM:
+ if (hdp->hd_lastrxnr != hdp->hd_vs) { /* XXX */
+ hdp->hd_timeouts++;
+ hd_resend_iframe (hdp);
+ }
+ break;
+
+ case WAIT_SABM:
+ hd_writeinternal (hdp, FRMR, POLLOFF);
+ if (++hdp->hd_retxcnt == hd_n2) {
+ hdp->hd_retxcnt = 0;
+ hd_writeinternal (hdp, SABM, POLLOFF);
+ hdp->hd_state = WAIT_UA;
+ }
+ break;
+
+ case DM_SENT:
+ if (++hdp->hd_retxcnt == hd_n2) {
+ /* Notify the packet level. */
+ (void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+ hdp->hd_retxcnt = 0;
+ hdp->hd_state = SABM_SENT;
+ hd_writeinternal (hdp, SABM, POLLOFF);
+ } else
+ hd_writeinternal (hdp, DM, POLLOFF);
+ break;
+
+ case WAIT_UA:
+ if (++hdp->hd_retxcnt == hd_n2) {
+ hdp->hd_retxcnt = 0;
+ hd_writeinternal (hdp, DM, POLLOFF);
+ hdp->hd_state = DM_SENT;
+ } else
+ hd_writeinternal (hdp, SABM, POLLOFF);
+ break;
+
+ case SABM_SENT:
+ /* Do this indefinitely. */
+ hd_writeinternal (hdp, SABM, POLLON);
+ break;
+
+ case DISCONNECTED:
+ /*
+ * Poll the interface driver flags waiting
+ * for the IFF_UP bit to come on.
+ */
+ if (hdp->hd_ifp->if_flags & IFF_UP)
+ hdp->hd_state = INIT;
+
+ }
+ SET_TIMER (hdp);
+ }
+
+ splx (s);
+}
diff --git a/sys/netccitt/hd_var.h b/sys/netccitt/hd_var.h
new file mode 100644
index 000000000000..5fefe0869a4d
--- /dev/null
+++ b/sys/netccitt/hd_var.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hd_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ *
+ * hdlc control block
+ *
+ */
+
+struct hdtxq {
+ struct mbuf *head;
+ struct mbuf *tail;
+};
+
+struct hdcb {
+ struct hdcb *hd_next; /* pointer to next hdlc control block */
+ char hd_state; /* link state */
+ char hd_vs; /* send state variable */
+ char hd_vr; /* receive state variable */
+ char hd_lastrxnr; /* last received N(R) */
+ char hd_lasttxnr; /* last transmitted N(R) */
+ char hd_condition;
+#define TIMER_RECOVERY_CONDITION 0x01
+#define REJ_CONDITION 0x02
+#define REMOTE_RNR_CONDITION 0X04
+ char hd_retxcnt;
+ char hd_xx;
+ struct hdtxq hd_txq;
+ struct mbuf *hd_retxq[MODULUS];
+ char hd_retxqi;
+ char hd_rrtimer;
+ char hd_timer;
+#define SET_TIMER(hdp) hdp->hd_timer = hd_t1
+#define KILL_TIMER(hdp) hdp->hd_timer = 0
+ char hd_dontcopy; /* if-driver doesn't free I-frames */
+ struct ifnet *hd_ifp; /* device's network visible interface */
+ struct ifaddr *hd_ifa; /* device's X.25 network address */
+ struct x25config *hd_xcp;
+ caddr_t hd_pkp; /* Level III junk */
+ int (*hd_output)(); /* separate entry for HDLC direct output */
+
+ /* link statistics */
+
+ long hd_iframes_in;
+ long hd_iframes_out;
+ long hd_rrs_in;
+ long hd_rrs_out;
+ short hd_rejs_in;
+ short hd_rejs_out;
+ long hd_window_condition;
+ short hd_invalid_ns;
+ short hd_invalid_nr;
+ short hd_timeouts;
+ short hd_resets;
+ short hd_unknown;
+ short hd_frmrs_in;
+ short hd_frmrs_out;
+ short hd_rnrs_in;
+ short hd_rnrs_out;
+};
+
+#ifdef KERNEL
+struct hdcb *hdcbhead; /* head of linked list of hdcb's */
+struct Frmr_frame hd_frmr; /* rejected frame diagnostic info */
+struct ifqueue hdintrq; /* hdlc packet input queue */
+
+int hd_t1; /* timer T1 value */
+int hd_t3; /* RR send timer */
+int hd_n2; /* frame retransmission limit */
+#endif
diff --git a/sys/netccitt/hdlc.h b/sys/netccitt/hdlc.h
new file mode 100644
index 000000000000..60cf7adf0722
--- /dev/null
+++ b/sys/netccitt/hdlc.h
@@ -0,0 +1,156 @@
+/*-
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hdlc.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef ORDER4
+#define FALSE 0
+#define TRUE 1
+typedef u_char octet;
+typedef char bool;
+
+/*
+ * HDLC Packet format definitions
+ * This will eventually have to be rewritten without reference
+ * to bit fields, to be compliant with ANSI C and alignment safe.
+ */
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define ORDER4(a, b, c, d) a , b , c , d
+#define ORDER5(a, b, c, d, e) a , b , c , d , e
+#endif
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define ORDER4(a, b, c, d) d , c , b , a
+#define ORDER5(a, b, c, d, e) e , d , c , b , a
+#endif
+#endif
+
+#define MAX_INFO_LEN 4096+3+4
+#define ADDRESS_A 3 /* B'00000011' */
+#define ADDRESS_B 1 /* B'00000001' */
+
+struct Hdlc_iframe {
+ octet address;
+ octet ORDER4(nr:3, pf:1, ns:3, hdlc_0:1);
+ octet i_field[MAX_INFO_LEN];
+};
+
+struct Hdlc_sframe {
+ octet address;
+ octet ORDER4(nr:3, pf:1, s2:2, hdlc_01:2);
+};
+
+struct Hdlc_uframe {
+ octet address;
+ octet ORDER4(m3:3, pf:1, m2:2, hdlc_11:2);
+};
+
+struct Frmr_frame {
+ octet address;
+ octet control;
+ octet frmr_control;
+ octet ORDER4(frmr_nr:3, frmr_f1_0:1, frmr_ns:3, frmr_f2_0:1);
+ octet ORDER5(frmr_0000:4, frmr_z:1, frmr_y:1, frmr_x:1, frmr_w:1);
+};
+
+#define HDHEADERLN 2
+#define MINFRLN 2 /* Minimum frame length. */
+
+struct Hdlc_frame {
+ octet address;
+ octet control;
+ octet info[3]; /* min for FRMR */
+};
+
+#define SABM_CONTROL 057 /* B'00101111' */
+#define UA_CONTROL 0143 /* B'01100011' */
+#define DISC_CONTROL 0103 /* B'01000011' */
+#define DM_CONTROL 017 /* B'00001111' */
+#define FRMR_CONTROL 0207 /* B'10000111' */
+#define RR_CONTROL 01 /* B'00000001' */
+#define RNR_CONTROL 05 /* B'00000101' */
+#define REJ_CONTROL 011 /* B'00001001' */
+
+#define POLLOFF 0
+#define POLLON 1
+
+/* Define Link State constants. */
+
+#define INIT 0
+#define DM_SENT 1
+#define SABM_SENT 2
+#define ABM 3
+#define WAIT_SABM 4
+#define WAIT_UA 5
+#define DISC_SENT 6
+#define DISCONNECTED 7
+#define MAXSTATE 8
+
+/* The following constants are used in a switch statement to process
+ frames read from the communications line. */
+
+#define SABM 0 * MAXSTATE
+#define DM 1 * MAXSTATE
+#define DISC 2 * MAXSTATE
+#define UA 3 * MAXSTATE
+#define FRMR 4 * MAXSTATE
+#define RR 5 * MAXSTATE
+#define RNR 6 * MAXSTATE
+#define REJ 7 * MAXSTATE
+#define IFRAME 8 * MAXSTATE
+#define ILLEGAL 9 * MAXSTATE
+
+#define T1 (3 * PR_SLOWHZ) /* IFRAME TIMEOUT - 3 seconds */
+#define T3 (T1 / 2) /* RR generate timeout - 1.5 seconds */
+#define N2 10
+#define MODULUS 8
+#define MAX_WINDOW_SIZE 7
+
+#define Z 0
+#define Y 1
+#define X 2
+#define W 3
+#define A 4
+
+#define TX 0
+#define RX 1
+
+bool range_check ();
+bool valid_nr ();
+struct mbuf *hd_remove ();
diff --git a/sys/netccitt/if_x25subr.c b/sys/netccitt/if_x25subr.c
new file mode 100644
index 000000000000..6f00496a18b3
--- /dev/null
+++ b/sys/netccitt/if_x25subr.c
@@ -0,0 +1,801 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_x25subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+
+#include <machine/mtpr.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/x25err.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#endif
+
+#ifdef NS
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+#ifdef ISO
+int tp_incoming();
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#endif
+
+extern struct ifnet loif;
+struct llinfo_x25 llinfo_x25 = {&llinfo_x25, &llinfo_x25};
+#ifndef _offsetof
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+#endif
+struct sockaddr *x25_dgram_sockmask;
+struct sockaddr_x25 x25_dgmask = {
+ _offsetof(struct sockaddr_x25, x25_udata[1]), /* _len */
+ 0, /* _family */
+ 0, /* _net */
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* _addr */
+ {0}, /* opts */
+ -1, /* _udlen */
+ {-1} /* _udata */
+};
+
+struct if_x25stats {
+ int ifx_wrongplen;
+ int ifx_nophdr;
+} if_x25stats;
+int x25_autoconnect = 0;
+
+#define senderr(x) {error = x; goto bad;}
+/*
+ * Ancillary routines
+ */
+static struct llinfo_x25 *
+x25_lxalloc(rt)
+register struct rtentry *rt;
+{
+ register struct llinfo_x25 *lx;
+ register struct sockaddr *dst = rt_key(rt);
+ register struct ifaddr *ifa;
+
+ MALLOC(lx, struct llinfo_x25 *, sizeof (*lx), M_PCB, M_NOWAIT);
+ if (lx == 0)
+ return lx;
+ Bzero(lx, sizeof(*lx));
+ lx->lx_rt = rt;
+ lx->lx_family = dst->sa_family;
+ rt->rt_refcnt++;
+ if (rt->rt_llinfo)
+ insque(lx, (struct llinfo_x25 *)rt->rt_llinfo);
+ else {
+ rt->rt_llinfo = (caddr_t)lx;
+ insque(lx, &llinfo_x25);
+ }
+ for (ifa = rt->rt_ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family == AF_CCITT)
+ lx->lx_ia = (struct x25_ifaddr *)ifa;
+ }
+ return lx;
+}
+x25_lxfree(lx)
+register struct llinfo_x25 *lx;
+{
+ register struct rtentry *rt = lx->lx_rt;
+ register struct pklcd *lcp = lx->lx_lcd;
+
+ if (lcp) {
+ lcp->lcd_upper = 0;
+ pk_disconnect(lcp);
+ }
+ if ((rt->rt_llinfo == (caddr_t)lx) && (lx->lx_next->lx_rt == rt))
+ rt->rt_llinfo = (caddr_t)lx->lx_next;
+ else
+ rt->rt_llinfo = 0;
+ RTFREE(rt);
+ remque(lx);
+ FREE(lx, M_PCB);
+}
+/*
+ * Process a x25 packet as datagram;
+ */
+x25_ifinput(lcp, m)
+struct pklcd *lcp;
+register struct mbuf *m;
+{
+ struct llinfo_x25 *lx = (struct llinfo_x25 *)lcp->lcd_upnext;
+ register struct ifnet *ifp;
+ struct ifqueue *inq;
+ extern struct timeval time;
+ int s, len, isr;
+
+ if (m == 0 || lcp->lcd_state != DATA_TRANSFER) {
+ x25_connect_callback(lcp, 0);
+ return;
+ }
+ pk_flowcontrol(lcp, 0, 1); /* Generate RR */
+ ifp = m->m_pkthdr.rcvif;
+ ifp->if_lastchange = time;
+ switch (m->m_type) {
+ default:
+ if (m)
+ m_freem(m);
+ return;
+
+ case MT_DATA:
+ /* FALLTHROUGH */;
+ }
+ switch (lx->lx_family) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ inq = &ipintrq;
+ break;
+
+#endif
+#ifdef NS
+ case AF_NS:
+ isr = NETISR_NS;
+ inq = &nsintrq;
+ break;
+
+#endif
+#ifdef ISO
+ case AF_ISO:
+ isr = NETISR_ISO;
+ inq = &clnlintrq;
+ break;
+#endif
+ default:
+ m_freem(m);
+ ifp->if_noproto++;
+ return;
+ }
+ s = splimp();
+ schednetisr(isr);
+ if (IF_QFULL(inq)) {
+ IF_DROP(inq);
+ m_freem(m);
+ } else {
+ IF_ENQUEUE(inq, m);
+ ifp->if_ibytes += m->m_pkthdr.len;
+ }
+ splx(s);
+}
+x25_connect_callback(lcp, m)
+register struct pklcd *lcp;
+register struct mbuf *m;
+{
+ register struct llinfo_x25 *lx = (struct llinfo_x25 *)lcp->lcd_upnext;
+ int do_clear = 1;
+ if (m == 0)
+ goto refused;
+ if (m->m_type != MT_CONTROL) {
+ printf("x25_connect_callback: should panic\n");
+ goto refused;
+ }
+ switch (pk_decode(mtod(m, struct x25_packet *))) {
+ case CALL_ACCEPTED:
+ lcp->lcd_upper = x25_ifinput;
+ if (lcp->lcd_sb.sb_mb)
+ lcp->lcd_send(lcp); /* XXX start queued packets */
+ return;
+ default:
+ do_clear = 0;
+ refused:
+ lcp->lcd_upper = 0;
+ lx->lx_lcd = 0;
+ if (do_clear)
+ pk_disconnect(lcp);
+ return;
+ }
+}
+#define SA(p) ((struct sockaddr *)(p))
+#define RT(p) ((struct rtentry *)(p))
+
+x25_dgram_incoming(lcp, m0)
+register struct pklcd *lcp;
+struct mbuf *m0;
+{
+ register struct rtentry *rt, *nrt;
+ register struct mbuf *m = m0->m_next; /* m0 has calling sockaddr_x25 */
+ void x25_rtrequest();
+
+ rt = rtalloc1(SA(&lcp->lcd_faddr), 0);
+ if (rt == 0) {
+refuse: lcp->lcd_upper = 0;
+ pk_close(lcp);
+ return;
+ }
+ rt->rt_refcnt--;
+ if ((nrt = RT(rt->rt_llinfo)) == 0 || rt_mask(rt) != x25_dgram_sockmask)
+ goto refuse;
+ if ((nrt->rt_flags & RTF_UP) == 0) {
+ rt->rt_llinfo = (caddr_t)rtalloc1(rt->rt_gateway, 0);
+ rtfree(nrt);
+ if ((nrt = RT(rt->rt_llinfo)) == 0)
+ goto refuse;
+ nrt->rt_refcnt--;
+ }
+ if (nrt->rt_ifa == 0 || nrt->rt_ifa->ifa_rtrequest != x25_rtrequest)
+ goto refuse;
+ lcp->lcd_send(lcp); /* confirm call */
+ x25_rtattach(lcp, nrt);
+ m_freem(m);
+}
+
+/*
+ * X.25 output routine.
+ */
+x25_ifoutput(ifp, m0, dst, rt)
+struct ifnet *ifp;
+struct mbuf *m0;
+struct sockaddr *dst;
+register struct rtentry *rt;
+{
+ register struct mbuf *m = m0;
+ register struct llinfo_x25 *lx;
+ struct pklcd *lcp;
+ int s, error = 0;
+
+int plen;
+for (plen = 0; m; m = m->m_next)
+ plen += m->m_len;
+m = m0;
+
+ if ((ifp->if_flags & IFF_UP) == 0)
+ senderr(ENETDOWN);
+ while (rt == 0 || (rt->rt_flags & RTF_GATEWAY)) {
+ if (rt) {
+ if (rt->rt_llinfo) {
+ rt = (struct rtentry *)rt->rt_llinfo;
+ continue;
+ }
+ dst = rt->rt_gateway;
+ }
+ if ((rt = rtalloc1(dst, 1)) == 0)
+ senderr(EHOSTUNREACH);
+ rt->rt_refcnt--;
+ }
+ /*
+ * Sanity checks.
+ */
+ if ((rt->rt_ifp != ifp) ||
+ (rt->rt_flags & (RTF_CLONING | RTF_GATEWAY)) ||
+ ((lx = (struct llinfo_x25 *)rt->rt_llinfo) == 0)) {
+ senderr(ENETUNREACH);
+ }
+if ((m->m_flags & M_PKTHDR) == 0) {
+ if_x25stats.ifx_nophdr++;
+ m = m_gethdr(M_NOWAIT, MT_HEADER);
+ if (m == 0)
+ senderr(ENOBUFS);
+ m->m_pkthdr.len = plen;
+ m->m_next = m0;
+}
+if (plen != m->m_pkthdr.len) {
+ if_x25stats.ifx_wrongplen++;
+ m->m_pkthdr.len = plen;
+}
+next_circuit:
+ lcp = lx->lx_lcd;
+ if (lcp == 0) {
+ lx->lx_lcd = lcp = pk_attach((struct socket *)0);
+ if (lcp == 0)
+ senderr(ENOBUFS);
+ lcp->lcd_upper = x25_connect_callback;
+ lcp->lcd_upnext = (caddr_t)lx;
+ lcp->lcd_packetsize = lx->lx_ia->ia_xc.xc_psize;
+ lcp->lcd_flags = X25_MBS_HOLD;
+ }
+ switch (lcp->lcd_state) {
+ case READY:
+ if (dst->sa_family == AF_INET &&
+ ifp->if_type == IFT_X25DDN &&
+ rt->rt_gateway->sa_family != AF_CCITT)
+ x25_ddnip_to_ccitt(dst, rt);
+ if (rt->rt_gateway->sa_family != AF_CCITT) {
+ if ((rt->rt_flags & RTF_XRESOLVE) == 0)
+ senderr(EHOSTUNREACH);
+ } else if (x25_autoconnect)
+ error = pk_connect(lcp,
+ (struct sockaddr_x25 *)rt->rt_gateway);
+ if (error)
+ senderr(error);
+ /* FALLTHROUGH */
+ case SENT_CALL:
+ case DATA_TRANSFER:
+ if (sbspace(&lcp->lcd_sb) < 0) {
+ lx = lx->lx_next;
+ if (lx->lx_rt != rt)
+ senderr(ENOSPC);
+ goto next_circuit;
+ }
+ if (lx->lx_ia)
+ lcp->lcd_dg_timer =
+ lx->lx_ia->ia_xc.xc_dg_idletimo;
+ pk_send(lcp, m);
+ break;
+ default:
+ /*
+ * We count on the timer routine to close idle
+ * connections, if there are not enough circuits to go
+ * around.
+ *
+ * So throw away data for now.
+ * After we get it all working, we'll rewrite to handle
+ * actively closing connections (other than by timers),
+ * when circuits get tight.
+ *
+ * In the DDN case, the imp itself closes connections
+ * under heavy load.
+ */
+ error = ENOBUFS;
+ bad:
+ if (m)
+ m_freem(m);
+ }
+ return (error);
+}
+
+/*
+ * Simpleminded timer routine.
+ */
+x25_iftimeout(ifp)
+struct ifnet *ifp;
+{
+ register struct pkcb *pkcb = 0;
+ register struct pklcd **lcpp, *lcp;
+ int s = splimp();
+
+ FOR_ALL_PKCBS(pkcb)
+ if (pkcb->pk_ia->ia_ifp == ifp)
+ for (lcpp = pkcb->pk_chan + pkcb->pk_maxlcn;
+ --lcpp > pkcb->pk_chan;)
+ if ((lcp = *lcpp) &&
+ lcp->lcd_state == DATA_TRANSFER &&
+ (lcp->lcd_flags & X25_DG_CIRCUIT) &&
+ (lcp->lcd_dg_timer && --lcp->lcd_dg_timer == 0)) {
+ lcp->lcd_upper(lcp, 0);
+ }
+ splx(s);
+}
+/*
+ * This routine gets called when validating additions of new routes
+ * or deletions of old ones.
+ */
+x25_rtrequest(cmd, rt, dst)
+register struct rtentry *rt;
+struct sockaddr *dst;
+{
+ register struct llinfo_x25 *lx = (struct llinfo_x25 *)rt->rt_llinfo;
+ register struct sockaddr_x25 *sa =(struct sockaddr_x25 *)rt->rt_gateway;
+ register struct pklcd *lcp;
+
+ /* would put this pk_init, except routing table doesn't
+ exist yet. */
+ if (x25_dgram_sockmask == 0) {
+ struct radix_node *rn_addmask();
+ x25_dgram_sockmask =
+ SA(rn_addmask((caddr_t)&x25_dgmask, 0, 4)->rn_key);
+ }
+ if (rt->rt_flags & RTF_GATEWAY) {
+ if (rt->rt_llinfo)
+ RTFREE((struct rtentry *)rt->rt_llinfo);
+ rt->rt_llinfo = (cmd == RTM_ADD) ?
+ (caddr_t)rtalloc1(rt->rt_gateway, 1) : 0;
+ return;
+ }
+ if ((rt->rt_flags & RTF_HOST) == 0)
+ return;
+ if (cmd == RTM_DELETE) {
+ while (rt->rt_llinfo)
+ x25_lxfree((struct llinfo *)rt->rt_llinfo);
+ x25_rtinvert(RTM_DELETE, rt->rt_gateway, rt);
+ return;
+ }
+ if (lx == 0 && (lx = x25_lxalloc(rt)) == 0)
+ return;
+ if ((lcp = lx->lx_lcd) && lcp->lcd_state != READY) {
+ /*
+ * This can only happen on a RTM_CHANGE operation
+ * though cmd will be RTM_ADD.
+ */
+ if (lcp->lcd_ceaddr &&
+ Bcmp(rt->rt_gateway, lcp->lcd_ceaddr,
+ lcp->lcd_ceaddr->x25_len) != 0) {
+ x25_rtinvert(RTM_DELETE, lcp->lcd_ceaddr, rt);
+ lcp->lcd_upper = 0;
+ pk_disconnect(lcp);
+ }
+ lcp = 0;
+ }
+ x25_rtinvert(RTM_ADD, rt->rt_gateway, rt);
+}
+
+int x25_dont_rtinvert = 0;
+
+x25_rtinvert(cmd, sa, rt)
+register struct sockaddr *sa;
+register struct rtentry *rt;
+{
+ struct rtentry *rt2 = 0;
+ /*
+ * rt_gateway contains PID indicating which proto
+ * family on the other end, so will be different
+ * from general host route via X.25.
+ */
+ if (rt->rt_ifp->if_type == IFT_X25DDN || x25_dont_rtinvert)
+ return;
+ if (sa->sa_family != AF_CCITT)
+ return;
+ if (cmd != RTM_DELETE) {
+ rtrequest(RTM_ADD, sa, rt_key(rt), x25_dgram_sockmask,
+ RTF_PROTO2, &rt2);
+ if (rt2) {
+ rt2->rt_llinfo = (caddr_t) rt;
+ rt->rt_refcnt++;
+ }
+ return;
+ }
+ rt2 = rt;
+ if ((rt = rtalloc1(sa, 0)) == 0 ||
+ (rt->rt_flags & RTF_PROTO2) == 0 ||
+ rt->rt_llinfo != (caddr_t)rt2) {
+ printf("x25_rtchange: inverse route screwup\n");
+ return;
+ } else
+ rt2->rt_refcnt--;
+ rtrequest(RTM_DELETE, sa, rt_key(rt2), x25_dgram_sockmask,
+ 0, (struct rtentry **) 0);
+}
+
+static struct sockaddr_x25 blank_x25 = {sizeof blank_x25, AF_CCITT};
+/*
+ * IP to X25 address routine copyright ACC, used by permission.
+ */
+union imp_addr {
+ struct in_addr ip;
+ struct imp {
+ u_char s_net;
+ u_char s_host;
+ u_char s_lh;
+ u_char s_impno;
+ } imp;
+};
+
+/*
+ * The following is totally bogus and here only to preserve
+ * the IP to X.25 translation.
+ */
+x25_ddnip_to_ccitt(src, rt)
+struct sockaddr_in *src;
+register struct rtentry *rt;
+{
+ register struct sockaddr_x25 *dst = (struct sockaddr_x25 *)rt->rt_gateway;
+ union imp_addr imp_addr;
+ int imp_no, imp_port, temp;
+ char *x25addr = dst->x25_addr;
+
+
+ imp_addr.ip = src->sin_addr;
+ *dst = blank_x25;
+ if ((imp_addr.imp.s_net & 0x80) == 0x00) { /* class A */
+ imp_no = imp_addr.imp.s_impno;
+ imp_port = imp_addr.imp.s_host;
+ } else if ((imp_addr.imp.s_net & 0xc0) == 0x80) { /* class B */
+ imp_no = imp_addr.imp.s_impno;
+ imp_port = imp_addr.imp.s_lh;
+ } else { /* class C */
+ imp_no = imp_addr.imp.s_impno / 32;
+ imp_port = imp_addr.imp.s_impno % 32;
+ }
+
+ x25addr[0] = 12; /* length */
+ /* DNIC is cleared by struct copy above */
+
+ if (imp_port < 64) { /* Physical: 0000 0 IIIHH00 [SS] *//* s_impno
+ * -> III, s_host -> HH */
+ x25addr[5] = 0; /* set flag bit */
+ x25addr[6] = imp_no / 100;
+ x25addr[7] = (imp_no % 100) / 10;
+ x25addr[8] = imp_no % 10;
+ x25addr[9] = imp_port / 10;
+ x25addr[10] = imp_port % 10;
+ } else { /* Logical: 0000 1 RRRRR00 [SS] *//* s
+ * _host * 256 + s_impno -> RRRRR */
+ temp = (imp_port << 8) + imp_no;
+ x25addr[5] = 1;
+ x25addr[6] = temp / 10000;
+ x25addr[7] = (temp % 10000) / 1000;
+ x25addr[8] = (temp % 1000) / 100;
+ x25addr[9] = (temp % 100) / 10;
+ x25addr[10] = temp % 10;
+ }
+}
+
+/*
+ * This routine is a sketch and is not to be believed!!!!!
+ *
+ * This is a utility routine to be called by x25 devices when a
+ * call request is honored with the intent of starting datagram forwarding.
+ */
+x25_dg_rtinit(dst, ia, af)
+struct sockaddr_x25 *dst;
+register struct x25_ifaddr *ia;
+{
+ struct sockaddr *sa = 0;
+ struct rtentry *rt;
+ struct in_addr my_addr;
+ static struct sockaddr_in sin = {sizeof(sin), AF_INET};
+
+ if (ia->ia_ifp->if_type == IFT_X25DDN && af == AF_INET) {
+ /*
+ * Inverse X25 to IP mapping copyright and courtesy ACC.
+ */
+ int imp_no, imp_port, temp;
+ union imp_addr imp_addr;
+ {
+ /*
+ * First determine our IP addr for network
+ */
+ register struct in_ifaddr *ina;
+ extern struct in_ifaddr *in_ifaddr;
+
+ for (ina = in_ifaddr; ina; ina = ina->ia_next)
+ if (ina->ia_ifp == ia->ia_ifp) {
+ my_addr = ina->ia_addr.sin_addr;
+ break;
+ }
+ }
+ {
+
+ register char *x25addr = dst->x25_addr;
+
+ switch (x25addr[5] & 0x0f) {
+ case 0: /* Physical: 0000 0 IIIHH00 [SS] */
+ imp_no =
+ ((int) (x25addr[6] & 0x0f) * 100) +
+ ((int) (x25addr[7] & 0x0f) * 10) +
+ ((int) (x25addr[8] & 0x0f));
+
+
+ imp_port =
+ ((int) (x25addr[9] & 0x0f) * 10) +
+ ((int) (x25addr[10] & 0x0f));
+ break;
+ case 1: /* Logical: 0000 1 RRRRR00 [SS] */
+ temp = ((int) (x25addr[6] & 0x0f) * 10000)
+ + ((int) (x25addr[7] & 0x0f) * 1000)
+ + ((int) (x25addr[8] & 0x0f) * 100)
+ + ((int) (x25addr[9] & 0x0f) * 10)
+ + ((int) (x25addr[10] & 0x0f));
+
+ imp_port = temp >> 8;
+ imp_no = temp & 0xff;
+ break;
+ default:
+ return (0L);
+ }
+ imp_addr.ip = my_addr;
+ if ((imp_addr.imp.s_net & 0x80) == 0x00) {
+ /* class A */
+ imp_addr.imp.s_host = imp_port;
+ imp_addr.imp.s_impno = imp_no;
+ imp_addr.imp.s_lh = 0;
+ } else if ((imp_addr.imp.s_net & 0xc0) == 0x80) {
+ /* class B */
+ imp_addr.imp.s_lh = imp_port;
+ imp_addr.imp.s_impno = imp_no;
+ } else {
+ /* class C */
+ imp_addr.imp.s_impno = (imp_no << 5) + imp_port;
+ }
+ }
+ sin.sin_addr = imp_addr.ip;
+ sa = (struct sockaddr *)&sin;
+ } else {
+ /*
+ * This uses the X25 routing table to do inverse
+ * lookup of x25 address to sockaddr.
+ */
+ if (rt = rtalloc1(SA(dst), 0)) {
+ sa = rt->rt_gateway;
+ rt->rt_refcnt--;
+ }
+ }
+ /*
+ * Call to rtalloc1 will create rtentry for reverse path
+ * to callee by virtue of cloning magic and will allocate
+ * space for local control block.
+ */
+ if (sa && (rt = rtalloc1(sa, 1)))
+ rt->rt_refcnt--;
+}
+int x25_startproto = 1;
+
+pk_init()
+{
+ /*
+ * warning, sizeof (struct sockaddr_x25) > 32,
+ * but contains no data of interest beyond 32
+ */
+ if (x25_startproto) {
+ pk_protolisten(0xcc, 1, x25_dgram_incoming);
+ pk_protolisten(0x81, 1, x25_dgram_incoming);
+ }
+}
+
+struct x25_dgproto {
+ u_char spi;
+ u_char spilen;
+ int (*f)();
+} x25_dgprototab[] = {
+#if defined(ISO) && defined(TPCONS)
+{ 0x0, 0, tp_incoming},
+#endif
+{ 0xcc, 1, x25_dgram_incoming},
+{ 0xcd, 1, x25_dgram_incoming},
+{ 0x81, 1, x25_dgram_incoming},
+};
+
+pk_user_protolisten(info)
+register u_char *info;
+{
+ register struct x25_dgproto *dp = x25_dgprototab
+ + ((sizeof x25_dgprototab) / (sizeof *dp));
+ register struct pklcd *lcp;
+
+ while (dp > x25_dgprototab)
+ if ((--dp)->spi == info[0])
+ goto gotspi;
+ return ESRCH;
+
+gotspi: if (info[1])
+ return pk_protolisten(dp->spi, dp->spilen, dp->f);
+ for (lcp = pk_listenhead; lcp; lcp = lcp->lcd_listen)
+ if (lcp->lcd_laddr.x25_udlen == dp->spilen &&
+ Bcmp(&dp->spi, lcp->lcd_laddr.x25_udata, dp->spilen) == 0) {
+ pk_disconnect(lcp);
+ return 0;
+ }
+ return ESRCH;
+}
+
+/*
+ * This routine transfers an X.25 circuit to or from a routing entry.
+ * If the supplied circuit is * in DATA_TRANSFER state, it is added to the
+ * routing entry. If freshly allocated, it glues back the vc from
+ * the rtentry to the socket.
+ */
+pk_rtattach(so, m0)
+register struct socket *so;
+struct mbuf *m0;
+{
+ register struct pklcd *lcp = (struct pklcd *)so->so_pcb;
+ register struct mbuf *m = m0;
+ struct sockaddr *dst = mtod(m, struct sockaddr *);
+ register struct rtentry *rt = rtalloc1(dst, 0);
+ register struct llinfo_x25 *lx;
+ caddr_t cp;
+#define ROUNDUP(a) \
+ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+#define transfer_sockbuf(s, f, l) \
+ while (m = (s)->sb_mb)\
+ {(s)->sb_mb = m->m_act; m->m_act = 0; sbfree((s), m); f(l, m);}
+
+ if (rt)
+ rt->rt_refcnt--;
+ cp = (dst->sa_len < m->m_len) ? ROUNDUP(dst->sa_len) + (caddr_t)dst : 0;
+ while (rt &&
+ ((cp == 0 && rt_mask(rt) != 0) ||
+ (cp != 0 && (rt_mask(rt) == 0 ||
+ Bcmp(cp, rt_mask(rt), rt_mask(rt)->sa_len)) != 0)))
+ rt = (struct rtentry *)rt->rt_nodes->rn_dupedkey;
+ if (rt == 0 || (rt->rt_flags & RTF_GATEWAY) ||
+ (lx = (struct llinfo_x25 *)rt->rt_llinfo) == 0)
+ return ESRCH;
+ if (lcp == 0)
+ return ENOTCONN;
+ switch (lcp->lcd_state) {
+ default:
+ return ENOTCONN;
+
+ case READY:
+ /* Detach VC from rtentry */
+ if (lx->lx_lcd == 0)
+ return ENOTCONN;
+ lcp->lcd_so = 0;
+ pk_close(lcp);
+ lcp = lx->lx_lcd;
+ if (lx->lx_next->lx_rt == rt)
+ x25_lxfree(lx);
+ lcp->lcd_so = so;
+ lcp->lcd_upper = 0;
+ lcp->lcd_upnext = 0;
+ transfer_sockbuf(&lcp->lcd_sb, sbappendrecord, &so->so_snd);
+ soisconnected(so);
+ return 0;
+
+ case DATA_TRANSFER:
+ /* Add VC to rtentry */
+ lcp->lcd_so = 0;
+ lcp->lcd_sb = so->so_snd; /* structure copy */
+ bzero((caddr_t)&so->so_snd, sizeof(so->so_snd)); /* XXXXXX */
+ so->so_pcb = 0;
+ x25_rtattach(lcp, rt);
+ transfer_sockbuf(&so->so_rcv, x25_ifinput, lcp);
+ soisdisconnected(so);
+ }
+ return 0;
+}
+x25_rtattach(lcp0, rt)
+register struct pklcd *lcp0;
+struct rtentry *rt;
+{
+ register struct llinfo_x25 *lx = (struct llinfo_x25 *)rt->rt_llinfo;
+ register struct pklcd *lcp;
+ register struct mbuf *m;
+ if (lcp = lx->lx_lcd) { /* adding an additional VC */
+ if (lcp->lcd_state == READY) {
+ transfer_sockbuf(&lcp->lcd_sb, pk_output, lcp0);
+ lcp->lcd_upper = 0;
+ pk_close(lcp);
+ } else {
+ lx = x25_lxalloc(rt);
+ if (lx == 0)
+ return ENOBUFS;
+ }
+ }
+ lx->lx_lcd = lcp = lcp0;
+ lcp->lcd_upper = x25_ifinput;
+ lcp->lcd_upnext = (caddr_t)lx;
+}
diff --git a/sys/netccitt/llc_input.c b/sys/netccitt/llc_input.c
new file mode 100644
index 000000000000..7a01973d9794
--- /dev/null
+++ b/sys/netccitt/llc_input.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright (C) Dirk Husemann, Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)llc_input.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+/*
+ * This module implements LLC as specified by ISO 8802-2.
+ */
+
+
+/*
+ * llcintr() handles all LLC frames (except ISO CLNS ones for the time being)
+ * and tries to pass them on to the appropriate network layer entity.
+ */
+void
+llcintr()
+{
+ register struct mbuf *m;
+ register int i;
+ register int frame_kind;
+ register u_char cmdrsp;
+ struct llc_linkcb *linkp;
+ struct rtentry *sirt;
+ struct npaidbentry *sapinfo;
+ struct sdl_hdr *sdlhdr;
+ struct llc *frame;
+ char *c;
+ long expected_len;
+
+ struct ifnet *ifp;
+ struct rtentry *llrt;
+ struct rtentry *nlrt;
+
+ for (;;) {
+ i = splimp();
+ IF_DEQUEUE(&llcintrq, m);
+ splx(i);
+ if (m == 0)
+ break;
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("llcintr no HDR");
+#endif
+ /*
+ * Get ifp this packet was received on
+ */
+ ifp = m->m_pkthdr.rcvif;
+
+ sdlhdr = mtod(m, struct sdl_hdr *);
+
+ /*
+ * [Copied from net/ip_input.c]
+ *
+ * Check that the amount of data in the buffers is
+ * at least as much as the LLC header tells us.
+ * Trim mbufs if longer than expected.
+ * Drop packets if shorter than we think they are.
+ *
+ * Layout of mbuf chain at this point:
+ *
+ * +-------------------------------+----+ -\
+ * | sockaddr_dl src - sdlhdr_src | 20 | \
+ * +-------------------------------+----+ |
+ * | sockaddr_dl dst - sdlhdr_dst | 20 | > sizeof(struct sdl_hdr) == 44
+ * +-------------------------------+----+ |
+ * | LLC frame len - sdlhdr_len | 04 | /
+ * +-------------------------------+----+ -/
+ * /
+ * | m_next
+ * \
+ * +----------------------------+----+ -\
+ * | llc DSAP | 01 | \
+ * +----------------------------+----+ |
+ * | llc SSAP | 01 | |
+ * +----------------------------+----+ > sdlhdr_len
+ * | llc control | 01 | |
+ * +----------------------------+----+ |
+ * | ... | | /
+ * -/
+ *
+ * Thus the we expect to have exactly
+ * (sdlhdr->sdlhdr_len+sizeof(struct sdl_hdr)) in the mbuf chain
+ */
+ expected_len = sdlhdr->sdlhdr_len + sizeof(struct sdl_hdr);
+
+ if (m->m_pkthdr.len < expected_len) {
+ m_freem(m);
+ continue;
+ }
+ if (m->m_pkthdr.len > expected_len) {
+ if (m->m_len == m->m_pkthdr.len) {
+ m->m_len = expected_len;
+ m->m_pkthdr.len = expected_len;
+ } else
+ m_adj(m, expected_len - m->m_pkthdr.len);
+ }
+
+ /*
+ * Get llc header
+ */
+ if (m->m_len > sizeof(struct sdl_hdr))
+ frame = mtod((struct mbuf *)((struct sdl_hdr*)(m+1)),
+ struct llc *);
+ else frame = mtod(m->m_next, struct llc *);
+ if (frame == (struct llc *) NULL)
+ panic("llcintr no llc header");
+
+ /*
+ * Now check for bogus I/S frame, i.e. those with a control
+ * field telling us they're an I/S frame yet their length
+ * is less than the established I/S frame length (DSAP + SSAP +
+ * control + N(R)&P/F = 4) --- we drop those suckers
+ */
+ if (((frame->llc_control & 0x03) != 0x03)
+ && ((expected_len - sizeof(struct sdl_hdr)) < LLC_ISFRAMELEN)) {
+ m_freem(m);
+ printf("llc: hurz error\n");
+ continue;
+ }
+
+ /*
+ * Get link control block for the addressed link connection.
+ * If there is none we take care of it later on.
+ */
+ cmdrsp = (frame->llc_ssap & 0x01);
+ frame->llc_ssap &= ~0x01;
+ if (llrt = rtalloc1((struct sockaddr *)&sdlhdr->sdlhdr_src, 0))
+ llrt->rt_refcnt--;
+#ifdef notyet
+ else llrt = npaidb_enter(&sdlhdr->sdlhdr_src, 0, 0, 0);
+#endif /* notyet */
+ else {
+ /*
+ * We cannot do anything currently here as we
+ * don't `know' this link --- drop it
+ */
+ m_freem(m);
+ continue;
+ }
+ linkp = ((struct npaidbentry *)(llrt->rt_llinfo))->np_link;
+ nlrt = ((struct npaidbentry *)(llrt->rt_llinfo))->np_rt;
+
+ /*
+ * If the link is not existing right now, we can try and look up
+ * the SAP info block.
+ */
+ if ((linkp == 0) && frame->llc_ssap)
+ sapinfo = llc_getsapinfo(frame->llc_dsap, ifp);
+
+ /*
+ * Handle XID and TEST frames
+ * XID: if DLSAP == 0, return type-of-services
+ * window-0
+ * DLSAP-0
+ * format-identifier-?
+ * if DLSAP != 0, locate sapcb and return
+ * type-of-services
+ * SAP-window
+ * format-identifier-?
+ * TEST: swap (snpah_dst, snpah_src) and return frame
+ *
+ * Also toggle the CMD/RESP bit
+ *
+ * Is this behaviour correct? Check ISO 8802-2 (90)!
+ */
+ frame_kind = llc_decode(frame, (struct llc_linkcb *)0);
+ switch(frame_kind) {
+ case LLCFT_XID:
+ if (linkp || sapinfo) {
+ if (linkp)
+ frame->llc_window = linkp->llcl_window;
+ else frame->llc_window = sapinfo->si_window;
+ frame->llc_fid = 9; /* XXX */
+ frame->llc_class = sapinfo->si_class;
+ frame->llc_ssap = frame->llc_dsap;
+ } else {
+ frame->llc_window = 0;
+ frame->llc_fid = 9;
+ frame->llc_class = 1;
+ frame->llc_dsap = frame->llc_ssap = 0;
+ }
+
+ /* fall thru to */
+ case LLCFT_TEST:
+ sdl_swapaddr(&(mtod(m, struct sdl_hdr *)->sdlhdr_dst),
+ &(mtod(m, struct sdl_hdr *)->sdlhdr_src));
+
+ /* Now set the CMD/RESP bit */
+ frame->llc_ssap |= (cmdrsp == 0x0 ? 0x1 : 0x0);
+
+ /* Ship it out again */
+ (*ifp->if_output)(ifp, m,
+ (struct sockaddr *) &(mtod(m, struct sdl_hdr *)->sdlhdr_dst),
+ (struct rtentry *) 0);
+ continue;
+ }
+
+ /*
+ * Create link control block in case it is not existing
+ */
+ if (linkp == 0 && sapinfo) {
+ if ((linkp = llc_newlink(&sdlhdr->sdlhdr_src, ifp, nlrt,
+ (nlrt == 0) ? 0 : nlrt->rt_llinfo,
+ llrt)) == 0) {
+ printf("llcintr: couldn't create new link\n");
+ m_freem(m);
+ continue;
+ }
+ ((struct npaidbentry *)llrt->rt_llinfo)->np_link = linkp;
+ } else if (linkp == 0) {
+ /* The link is not known to us, drop the frame and continue */
+ m_freem(m);
+ continue;
+ }
+
+ /*
+ * Drop SNPA header and get rid of empty mbuf at the
+ * front of the mbuf chain (I don't like 'em)
+ */
+ m_adj(m, sizeof(struct sdl_hdr));
+ /*
+ * LLC_UFRAMELEN is sufficient, m_pullup() will pull up
+ * the min(m->m_len, maxprotohdr_len [=40]) thus doing
+ * the trick ...
+ */
+ if ((m = m_pullup(m, LLC_UFRAMELEN)))
+ /*
+ * Pass it on thru the elements of procedure
+ */
+ llc_input(linkp, m, cmdrsp);
+ }
+ return;
+}
+
+/*
+ * llc_input() --- We deal with the various incoming frames here.
+ * Basically we (indirectly) call the appropriate
+ * state handler function that's pointed to by
+ * llcl_statehandler.
+ *
+ * The statehandler returns an action code ---
+ * further actions like
+ * o notify network layer
+ * o block further sending
+ * o deblock link
+ * o ...
+ * are then enacted accordingly.
+ */
+llc_input(struct llc_linkcb *linkp, struct mbuf *m, u_char cmdrsp)
+{
+ int frame_kind;
+ int pollfinal;
+ int action = 0;
+ struct llc *frame;
+ struct ifnet *ifp = linkp->llcl_if;
+
+ if ((frame = mtod(m, struct llc *)) == (struct llc *) 0) {
+ m_freem(m);
+ return 0;
+ }
+ pollfinal = ((frame->llc_control & 0x03) == 0x03) ?
+ LLCGBITS(frame->llc_control, u_pf) :
+ LLCGBITS(frame->llc_control_ext, s_pf);
+
+ /*
+ * first decode the frame
+ */
+ frame_kind = llc_decode(frame, linkp);
+
+ switch (action = llc_statehandler(linkp, frame, frame_kind, cmdrsp,
+ pollfinal)) {
+ case LLC_DATA_INDICATION:
+ m_adj(m, LLC_ISFRAMELEN);
+ if (m = m_pullup(m, NLHDRSIZEGUESS)) {
+ m->m_pkthdr.rcvif = (struct ifnet *)linkp->llcl_nlnext;
+ (*linkp->llcl_sapinfo->si_input)(m);
+ }
+ break;
+ }
+
+ /* release mbuf if not an info frame */
+ if (action != LLC_DATA_INDICATION && m)
+ m_freem(m);
+
+ /* try to get frames out ... */
+ llc_start(linkp);
+
+ return 0;
+}
+
+/*
+ * This routine is called by configuration setup. It sets up a station control
+ * block and notifies all registered upper level protocols.
+ */
+caddr_t
+llc_ctlinput(int prc, struct sockaddr *addr, caddr_t info)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ struct dll_ctlinfo *ctlinfo = (struct dll_ctlinfo *)info;
+ u_char sap;
+ struct dllconfig *config;
+ caddr_t pcb;
+ struct rtentry *nlrt;
+ struct rtentry *llrt;
+ struct llc_linkcb *linkp;
+ register int i;
+
+ /* info must point to something valid at all times */
+ if (info == 0)
+ return 0;
+
+ if (prc == PRC_IFUP || prc == PRC_IFDOWN) {
+ /* we use either this set ... */
+ ifa = ifa_ifwithaddr(addr);
+ ifp = ifa ? ifa->ifa_ifp : 0;
+ if (ifp == 0)
+ return 0;
+
+ sap = ctlinfo->dlcti_lsap;
+ config = ctlinfo->dlcti_cfg;
+ pcb = (caddr_t) 0;
+ nlrt = (struct rtentry *) 0;
+ } else {
+ /* or this one */
+ sap = 0;
+ config = (struct dllconfig *) 0;
+ pcb = ctlinfo->dlcti_pcb;
+ nlrt = ctlinfo->dlcti_rt;
+
+ if ((llrt = rtalloc1(nlrt->rt_gateway, 0)))
+ llrt->rt_refcnt--;
+ else return 0;
+
+ linkp = ((struct npaidbentry *)llrt->rt_llinfo)->np_link;
+ }
+
+ switch (prc) {
+ case PRC_IFUP:
+ (void) llc_setsapinfo(ifp, addr->sa_family, sap, config);
+ return 0;
+
+ case PRC_IFDOWN: {
+ register struct llc_linkcb *linkp;
+ register struct llc_linkcb *nlinkp;
+ register int i;
+
+ /*
+ * All links are accessible over the doubly linked list llccb_q
+ */
+ if (!LQEMPTY) {
+ /*
+ * A for-loop is not that great an idea as the linkp
+ * will get deleted by llc_timer()
+ */
+ linkp = LQFIRST;
+ while (LQVALID(linkp)) {
+ nlinkp = LQNEXT(linkp);
+ if (linkp->llcl_if = ifp) {
+ i = splimp();
+ (void)llc_statehandler(linkp, (struct llc *)0,
+ NL_DISCONNECT_REQUEST,
+ 0, 1);
+ splx(i);
+ }
+ linkp = nlinkp;
+ }
+ }
+ }
+
+ case PRC_CONNECT_REQUEST:
+ if (linkp == 0) {
+ if ((linkp = llc_newlink((struct sockaddr_dl *) nlrt->rt_gateway,
+ nlrt->rt_ifp, nlrt,
+ pcb, llrt)) == 0)
+ return (0);
+ ((struct npaidbentry *)llrt->rt_llinfo)->np_link = linkp;
+ i = splimp();
+ (void)llc_statehandler(linkp, (struct llc *) 0,
+ NL_CONNECT_REQUEST, 0, 1);
+ splx(i);
+ }
+ return ((caddr_t)linkp);
+
+ case PRC_DISCONNECT_REQUEST:
+ if (linkp == 0)
+ panic("no link control block!");
+
+ i = splimp();
+ (void)llc_statehandler(linkp, (struct llc *) 0,
+ NL_DISCONNECT_REQUEST, 0, 1);
+ splx(i);
+
+ /*
+ * The actual removal of the link control block is done by the
+ * cleaning neutrum (i.e. llc_timer()).
+ */
+ break;
+
+ case PRC_RESET_REQUEST:
+ if (linkp == 0)
+ panic("no link control block!");
+
+ i = splimp();
+ (void)llc_statehandler(linkp, (struct llc *) 0,
+ NL_RESET_REQUEST, 0, 1);
+ splx(i);
+
+ break;
+
+ }
+
+ return 0;
+}
diff --git a/sys/netccitt/llc_output.c b/sys/netccitt/llc_output.c
new file mode 100644
index 000000000000..98d0328a5f55
--- /dev/null
+++ b/sys/netccitt/llc_output.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) Dirk Husemann, Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)llc_output.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+/*
+ * llc_output() --- called by an upper layer (network layer) entity whenever
+ * there is an INFO frame to be transmitted. We enqueue the
+ * info frame and call llc_start() to do the actual sending.
+ */
+
+llc_output(struct llc_linkcb *linkp, struct mbuf *m)
+{
+ register int i;
+
+ i = splimp();
+ LLC_ENQUEUE(linkp, m);
+ llc_start(linkp);
+ splx(i);
+
+}
+
+
+/*
+ * llc_start() --- We try to subsequently dequeue all the frames available and
+ * send them out.
+ */
+void
+llc_start(struct llc_linkcb *linkp)
+{
+ register int i;
+ register struct mbuf *m;
+ int action;
+
+ while ((LLC_STATEEQ(linkp, NORMAL) || LLC_STATEEQ(linkp, BUSY) ||
+ LLC_STATEEQ(linkp, REJECT)) &&
+ (linkp->llcl_slotsfree > 0) &&
+ (LLC_GETFLAG(linkp, REMOTE_BUSY) == 0)) {
+ LLC_DEQUEUE(linkp, m);
+ if (m == NULL)
+ break;
+ LLC_SETFRAME(linkp, m);
+ (void)llc_statehandler(linkp, (struct llc *) 0, NL_DATA_REQUEST,
+ 0, 0);
+ }
+}
+
+
+/*
+ * llc_send() --- Handles single frames. If dealing with INFO frames we need to
+ * prepend the LLC header, otherwise we just allocate an mbuf.
+ * In both cases the actual send is done by llc_rawsend().
+ */
+llc_send(struct llc_linkcb *linkp, int frame_kind, int cmdrsp, int pollfinal)
+{
+ register struct mbuf *m = (struct mbuf *)0;
+ register struct llc *frame;
+
+ if (frame_kind == LLCFT_INFO)
+ m = linkp->llcl_output_buffers[llc_seq2slot(linkp,
+ linkp->llcl_vs)];
+ LLC_GETHDR(frame, m);
+
+ /* pass it on to llc_rawsend() */
+ llc_rawsend(linkp, m, frame, frame_kind, linkp->llcl_vs, cmdrsp, pollfinal);
+
+ if (frame_kind == LLCFT_INFO)
+ LLC_INC(linkp->llcl_vs);
+
+ return 0;
+}
+
+/*
+ * llc_resend() --- llc_resend() retransmits all unacknowledged INFO frames.
+ */
+llc_resend(struct llc_linkcb *linkp, int cmdrsp, int pollfinal)
+{
+ register struct llc *frame;
+ register struct mbuf *m;
+ register int seq, slot;
+
+ if (linkp->llcl_slotsfree < linkp->llcl_window)
+ /* assert lock between nr_received & V(S) */
+ if (linkp->llcl_nr_received != linkp->llcl_vs)
+ panic("llc: V(S) != N(R) received\n");
+
+ for (slot = llc_seq2slot(linkp, linkp->llcl_vs);
+ slot != linkp->llcl_freeslot;
+ LLC_INC(linkp->llcl_vs),
+ slot = llc_seq2slot(linkp, linkp->llcl_vs)) {
+ m = linkp->llcl_output_buffers[slot];
+ LLC_GETHDR(frame, m);
+ llc_rawsend(linkp, m, frame, LLCFT_INFO, linkp->llcl_vs,
+ cmdrsp, pollfinal);
+ pollfinal = 0;
+ }
+
+ return 0;
+}
+
+/*
+ * llc_rawsend() --- constructs an LLC frame and sends it out via the
+ * associated interface of the link control block.
+ *
+ * We need to make sure that outgoing frames have the correct length,
+ * in particular the 4 byte ones (RR, RNR, REJ) as LLC_GETHDR() will
+ * set the mbuf len to 3 as default len for non INFO frames ...
+ *
+ * Frame kind Length (w/o MAC header, {D,S}SAP incl.)
+ * --------------------------------------------------------------
+ * DISC, SABME, UA, DM 3 bytes ({D,S}SAP + CONTROL)
+ * RR, RNR, REJ 4 bytes ({D,S}SAP + CONTROL0 + CONTROL1)
+ * XID 6 bytes ({D,S}SAP + CONTROL0 + FI,CLASS,WINDOW)
+ * FRMR 7 bytes ({D,S}SAP + CONTROL0 + REJ CONTROL,V(S),V(R),CAUSE)
+ * INFO 4 -- MTU
+ * UI, TEST 3 -- MTU
+ *
+ */
+#define LLC_SETLEN(m, l) (m)->m_pkthdr.len = (m)->m_len = (l)
+
+llc_rawsend(struct llc_linkcb *linkp, struct mbuf *m, struct llc *frame,
+ int frame_kind, int vs, int cmdrsp, int pollfinal)
+{
+ register short adjust = LLC_UFRAMELEN;
+ struct ifnet *ifp;
+
+ switch (frame_kind) {
+ /* supervisory and information frames */
+ case LLCFT_INFO:
+ frame->llc_control = LLC_INFO;
+ LLCSBITS(frame->llc_control, i_ns, vs);
+ LLCSBITS(frame->llc_control_ext, i_nr, linkp->llcl_vr);
+ adjust = LLC_ISFRAMELEN;
+ break;
+ case LLCFT_RR:
+ frame->llc_control = LLC_RR;
+ LLC_SETLEN(m, LLC_ISFRAMELEN);
+ LLCSBITS(frame->llc_control_ext, s_nr, linkp->llcl_vr);
+ adjust = LLC_ISFRAMELEN;
+ break;
+ case LLCFT_RNR:
+ frame->llc_control = LLC_RNR;
+ LLC_SETLEN(m, LLC_ISFRAMELEN);
+ LLCSBITS(frame->llc_control_ext, s_nr, linkp->llcl_vr);
+ adjust = LLC_ISFRAMELEN;
+ break;
+ case LLCFT_REJ:
+ frame->llc_control = LLC_REJ;
+ LLC_SETLEN(m, LLC_ISFRAMELEN);
+ LLCSBITS(frame->llc_control_ext, s_nr, linkp->llcl_vr);
+ adjust = LLC_ISFRAMELEN;
+ break;
+ /* unnumbered frames */
+ case LLCFT_DM:
+ frame->llc_control = LLC_DM;
+ break;
+ case LLCFT_SABME:
+ frame->llc_control = LLC_SABME;
+ break;
+ case LLCFT_DISC:
+ frame->llc_control = LLC_DISC;
+ break;
+ case LLCFT_UA:
+ frame->llc_control = LLC_UA;
+ break;
+ case LLCFT_UI:
+ frame->llc_control = LLC_UI;
+ break;
+ case LLCFT_FRMR:
+ frame->llc_control = LLC_FRMR;
+ /* get more space --- FRMR frame are longer then usual */
+ LLC_SETLEN(m, LLC_FRMRLEN);
+ bcopy((caddr_t) &linkp->llcl_frmrinfo,
+ (caddr_t) &frame->llc_frmrinfo,
+ sizeof(struct frmrinfo));
+ break;
+ default:
+ /*
+ * We don't send {XID, TEST} frames
+ */
+ if (m)
+ m_freem(m);
+ return;
+ }
+
+ /*
+ * Fill in DSAP/SSAP
+ */
+ frame->llc_dsap = frame->llc_ssap = LLSAPADDR(&linkp->llcl_addr);
+ frame->llc_ssap |= cmdrsp;
+
+ /*
+ * Check for delayed action pending. ISO 8802-2, 7.9.2 (5)
+ * and ISO 8802-2, 7.9.2.3 (32), (34), (36) pertain to this
+ * piece of code --- hopefully we got it right here (i.e.
+ * in the spirit of (32), (34), and (36) ...
+ */
+ switch (frame_kind) {
+ case LLCFT_RR:
+ case LLCFT_RNR:
+ case LLCFT_REJ:
+ case LLCFT_INFO:
+ switch (LLC_GETFLAG(linkp, DACTION)) {
+ case LLC_DACKCMD:
+ case LLC_DACKRSP:
+ LLC_STOPTIMER(linkp, DACTION);
+ break;
+ case LLC_DACKCMDPOLL:
+ if (cmdrsp == LLC_CMD) {
+ pollfinal = 1;
+ LLC_STOPTIMER(linkp, DACTION);
+ }
+ break;
+ case LLC_DACKRSPFINAL:
+ if (cmdrsp == LLC_RSP) {
+ pollfinal = 1;
+ LLC_STOPTIMER(linkp, DACTION);
+ }
+ break;
+ }
+ break;
+ }
+
+ if (adjust == LLC_UFRAMELEN)
+ LLCSBITS(frame->llc_control, u_pf, pollfinal);
+ else LLCSBITS(frame->llc_control_ext, s_pf, pollfinal);
+
+ /*
+ * Get interface to send frame onto
+ */
+ ifp = linkp->llcl_if;
+ if (frame_kind == LLCFT_INFO) {
+ /*
+ * send out a copy of the frame, retain the
+ * original
+ */
+ (*ifp->if_output)(ifp, m_copy(m, 0, (int)M_COPYALL),
+ rt_key(linkp->llcl_nlrt),
+ linkp->llcl_nlrt);
+ /*
+ * Account for the LLC header and let it ``disappear''
+ * as the raw info frame payload is what we hold in
+ * the output_buffers of the link.
+ */
+ m_adj(m, LLC_ISFRAMELEN);
+ } else (*ifp->if_output)(ifp, m,
+ rt_key(linkp->llcl_nlrt),
+ linkp->llcl_nlrt);
+}
+
diff --git a/sys/netccitt/llc_subr.c b/sys/netccitt/llc_subr.c
new file mode 100644
index 000000000000..46848fdf5bce
--- /dev/null
+++ b/sys/netccitt/llc_subr.c
@@ -0,0 +1,2356 @@
+/*
+ * Copyright (C) Dirk Husemann, Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)llc_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+/*
+ * Frame names for diagnostic messages
+ */
+char *frame_names[] = { "INFO", "RR", "RNR", "REJ", "DM", "SABME", "DISC",
+ "UA", "FRMR", "UI", "XID", "TEST", "ILLEGAL", "TIMER", "N2xT1"};
+
+
+/*
+ * Trace level
+ */
+int llc_tracelevel = LLCTR_URGENT;
+
+/*
+ * Values for accessing various bitfields
+ */
+struct bitslice llc_bitslice[] = {
+/* mask, shift value */
+ { 0x1, 0x0 },
+ { 0xfe, 0x1 },
+ { 0x3, 0x0 },
+ { 0xc, 0x2 },
+ { 0x10, 0x4 },
+ { 0xe0, 0x5 },
+ { 0x1f, 0x0 }
+};
+
+/*
+ * We keep the link control blocks on a doubly linked list -
+ * primarily for checking in llc_time()
+ */
+
+struct llccb_q llccb_q = { &llccb_q, &llccb_q };
+
+/*
+ * Flag for signalling wether route tree for AF_LINK has been
+ * initialized yet.
+ */
+
+int af_link_rts_init_done = 0;
+
+
+/*
+ * Functions dealing with struct sockaddr_dl */
+
+/* Compare sdl_a w/ sdl_b */
+
+sdl_cmp(struct sockaddr_dl *sdl_a, struct sockaddr_dl *sdl_b)
+{
+ if (LLADDRLEN(sdl_a) != LLADDRLEN(sdl_b))
+ return(1);
+ return(bcmp((caddr_t) sdl_a->sdl_data, (caddr_t) sdl_b->sdl_data,
+ LLADDRLEN(sdl_a)));
+}
+
+/* Copy sdl_f to sdl_t */
+
+sdl_copy(struct sockaddr_dl *sdl_f, struct sockaddr_dl *sdl_t)
+{
+ bcopy((caddr_t) sdl_f, (caddr_t) sdl_t, sdl_f->sdl_len);
+}
+
+/* Swap sdl_a w/ sdl_b */
+
+sdl_swapaddr(struct sockaddr_dl *sdl_a, struct sockaddr_dl *sdl_b)
+{
+ struct sockaddr_dl sdl_tmp;
+
+ sdl_copy(sdl_a, &sdl_tmp);
+ sdl_copy(sdl_b, sdl_a);
+ sdl_copy(&sdl_tmp, sdl_b);
+}
+
+/* Fetch the sdl of the associated if */
+
+struct sockaddr_dl *
+sdl_getaddrif(struct ifnet *ifp)
+{
+ register struct ifaddr *ifa;
+
+ for(ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ if (ifa->ifa_addr->sa_family == AF_LINK )
+ return((struct sockaddr_dl *)(ifa->ifa_addr));
+
+ return((struct sockaddr_dl *)0);
+}
+
+/* Check addr of interface with the one given */
+
+sdl_checkaddrif(struct ifnet *ifp, struct sockaddr_dl *sdl_c)
+{
+ register struct ifaddr *ifa;
+
+ for(ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ if ((ifa->ifa_addr->sa_family == AF_LINK ) &&
+ !sdl_cmp((struct sockaddr_dl *)(ifa->ifa_addr), sdl_c))
+ return(1);
+
+ return(0);
+}
+
+/* Build an sdl from MAC addr, DLSAP addr, and interface */
+
+sdl_setaddrif(struct ifnet *ifp, u_char *mac_addr, u_char dlsap_addr,
+ u_char mac_len, struct sockaddr_dl *sdl_to)
+{
+ register struct sockaddr_dl *sdl_tmp;
+
+ if ((sdl_tmp = sdl_getaddrif(ifp)) ) {
+ sdl_copy(sdl_tmp, sdl_to);
+ bcopy((caddr_t) mac_addr, (caddr_t) LLADDR(sdl_to), mac_len);
+ *(LLADDR(sdl_to)+mac_len) = dlsap_addr;
+ sdl_to->sdl_alen = mac_len+1;
+ return(1);
+ } else return(0);
+}
+
+/* Fill out the sdl header aggregate */
+
+sdl_sethdrif(struct ifnet *ifp, u_char *mac_src, u_char dlsap_src, u_char *mac_dst,
+ u_char dlsap_dst, u_char mac_len, struct sdl_hdr *sdlhdr_to)
+{
+ if ( !sdl_setaddrif(ifp, mac_src, dlsap_src, mac_len,
+ &sdlhdr_to->sdlhdr_src) ||
+ !sdl_setaddrif(ifp, mac_dst, dlsap_dst, mac_len,
+ &sdlhdr_to->sdlhdr_dst) )
+ return(0);
+ else return(1);
+}
+
+static struct sockaddr_dl sap_saddr;
+static struct sockaddr_dl sap_sgate = {
+ sizeof(struct sockaddr_dl), /* _len */
+ AF_LINK /* _af */
+};
+
+/*
+ * Set sapinfo for SAP address, llcconfig, af, and interface
+ */
+struct npaidbentry *
+llc_setsapinfo(struct ifnet *ifp, u_char af, u_char sap, struct dllconfig *llconf)
+{
+ struct protosw *pp;
+ struct sockaddr_dl *ifdl_addr;
+ struct rtentry *sirt = (struct rtentry *)0;
+ struct npaidbentry *sapinfo;
+ u_char saploc;
+ int size = sizeof(struct npaidbentry);
+
+ USES_AF_LINK_RTS;
+
+ /*
+ * We rely/assume that only STREAM protocols will make use of
+ * connection oriented LLC2. If this will one day not be the
+ * case this will obviously fail.
+ */
+ pp = pffindtype (af, SOCK_STREAM);
+ if (pp == 0 || pp->pr_input == 0 || pp->pr_ctlinput == 0) {
+ printf("network level protosw error");
+ return 0;
+ }
+
+ /*
+ * We need a way to jot down the LLC2 configuration for
+ * a certain LSAP address. To do this we enter
+ * a "route" for the SAP.
+ */
+ ifdl_addr = sdl_getaddrif(ifp);
+ sdl_copy(ifdl_addr, &sap_saddr);
+ sdl_copy(ifdl_addr, &sap_sgate);
+ saploc = LLSAPLOC(&sap_saddr, ifp);
+ sap_saddr.sdl_data[saploc] = sap;
+ sap_saddr.sdl_alen++;
+
+ /* now enter it */
+ rtrequest(RTM_ADD, (struct sockaddr *)&sap_saddr,
+ (struct sockaddr *)&sap_sgate, 0, 0, &sirt);
+ if (sirt == 0)
+ return 0;
+
+ /* Plug in config information in rt->rt_llinfo */
+
+ sirt->rt_llinfo = malloc(size , M_PCB, M_WAITOK);
+ sapinfo = (struct npaidbentry *) sirt->rt_llinfo;
+ if (sapinfo) {
+ bzero ((caddr_t)sapinfo, size);
+ /*
+ * For the time being we support LLC CLASS II here
+ * only
+ */
+ sapinfo->si_class = LLC_CLASS_II;
+ sapinfo->si_window = llconf->dllcfg_window;
+ sapinfo->si_trace = llconf->dllcfg_trace;
+ if (sapinfo->si_trace)
+ llc_tracelevel--;
+ else llc_tracelevel++;
+ sapinfo->si_input = pp->pr_input;
+ sapinfo->si_ctlinput = (caddr_t (*)())pp->pr_ctlinput;
+
+ return (sapinfo);
+ }
+
+ return 0;
+}
+
+/*
+ * Get sapinfo for SAP address and interface
+ */
+struct npaidbentry *
+llc_getsapinfo(u_char sap, struct ifnet *ifp)
+{
+ struct sockaddr_dl *ifdl_addr;
+ struct sockaddr_dl si_addr;
+ struct rtentry *sirt;
+ u_char saploc;
+
+ USES_AF_LINK_RTS;
+
+ ifdl_addr = sdl_getaddrif(ifp);
+ sdl_copy(ifdl_addr, &si_addr);
+ saploc = LLSAPLOC(&si_addr, ifp);
+ si_addr.sdl_data[saploc] = sap;
+ si_addr.sdl_alen++;
+
+ if ((sirt = rtalloc1((struct sockaddr *)&si_addr, 0)))
+ sirt->rt_refcnt--;
+ else return(0);
+
+ return((struct npaidbentry *)sirt->rt_llinfo);
+}
+
+/*
+ * llc_seq2slot() --- We only allocate enough memory to hold the window. This
+ * introduces the necessity to keep track of two ``pointers''
+ *
+ * o llcl_freeslot the next free slot to be used
+ * this one advances modulo llcl_window
+ * o llcl_projvs the V(S) associated with the next frame
+ * to be set via llcl_freeslot
+ * this one advances modulo LLC_MAX_SEQUENCE
+ *
+ * A new frame is inserted at llcl_output_buffers[llcl_freeslot], after
+ * which both llcl_freeslot and llcl_projvs are incremented.
+ *
+ * The slot sl(sn) for any given sequence number sn is given by
+ *
+ * sl(sn) = (llcl_freeslot + llcl_window - 1 - (llcl_projvs +
+ * LLC_MAX_SEQUENCE- sn) % LLC_MAX_SEQUENCE) %
+ * llcl_window
+ *
+ * i.e. we first calculate the number of frames we need to ``go back''
+ * from the current one (really the next one, but that doesn't matter as
+ * llcl_projvs is likewise of by plus one) and subtract that from the
+ * pointer to the most recently taken frame (llcl_freeslot - 1).
+ */
+
+short
+llc_seq2slot(struct llc_linkcb *linkp, short seqn)
+{
+ register sn = 0;
+
+ sn = (linkp->llcl_freeslot + linkp->llcl_window -
+ (linkp->llcl_projvs + LLC_MAX_SEQUENCE - seqn) %
+ LLC_MAX_SEQUENCE) % linkp->llcl_window;
+
+ return sn;
+}
+
+/*
+ * LLC2 link state handler
+ *
+ * There is in most cases one function per LLC2 state. The LLC2 standard
+ * ISO 8802-2 allows in some cases for ambiguities, i.e. we have the choice
+ * to do one thing or the other. Right now I have just chosen one but have also
+ * indicated the spot by "multiple possibilities". One could make the behavior
+ * in those cases configurable, allowing the superuser to enter a profile word
+ * (32/64 bits, whatever is needed) that would suit her needs [I quite like
+ * that idea, perhaps I'll get around to it].
+ *
+ * [Preceeding each state handler function is the description as taken from
+ * ISO 8802-2, section 7.9.2.1]
+ */
+
+/*
+ * ADM --- The connection component is in the asynchronous disconnected mode.
+ * It can accept an SABME PDU from a remote LLC SSAP or, at the request
+ * of the service access point user, can initiate an SABME PDU
+ * transmission to a remote LLC DSAP, to establish a data link
+ * connection. It also responds to a DISC command PDU and to any
+ * command PDU with the P bit set to ``1''.
+ */
+int
+llc_state_ADM(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_CONNECT_REQUEST:
+ llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_SETFLAG(linkp, S, 0);
+ linkp->llcl_retry = 0;
+ LLC_NEWSTATE(linkp, SETUP);
+ break;
+ case LLCFT_SABME + LLC_CMD:
+ /*
+ * ISO 8802-2, table 7-1, ADM state says to set
+ * the P flag, yet this will cause an SABME [P] to be
+ * answered with an UA only, not an UA [F], all
+ * other `disconnected' states set the F flag, so ...
+ */
+ LLC_SETFLAG(linkp, F, pollfinal);
+ LLC_NEWSTATE(linkp, CONN);
+ action = LLC_CONNECT_INDICATION;
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+ break;
+ default:
+ if (cmdrsp == LLC_CMD && pollfinal == 1)
+ llc_send(linkp, LLCFT_DM, LLC_RSP, 1);
+ /* remain in ADM state */
+ }
+
+ return action;
+}
+
+/*
+ * CONN --- The local connection component has received an SABME PDU from a
+ * remote LLC SSAP, and it is waiting for the local user to accept or
+ * refuse the connection.
+ */
+int
+llc_state_CONN(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_CONNECT_RESPONSE:
+ llc_send(linkp, LLCFT_UA, LLC_RSP, LLC_GETFLAG(linkp, F));
+ LLC_RESETCOUNTER(linkp);
+ LLC_SETFLAG(linkp, P, 0);
+ LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+ LLC_NEWSTATE(linkp, NORMAL);
+ break;
+ case NL_DISCONNECT_REQUEST:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, LLC_GETFLAG(linkp, F));
+ LLC_NEWSTATE(linkp, ADM);
+ break;
+ case LLCFT_SABME + LLC_CMD:
+ LLC_SETFLAG(linkp, F, pollfinal);
+ break;
+ case LLCFT_DM + LLC_RSP:
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ /* all other frames effect nothing here */
+ }
+
+ return action;
+}
+
+/*
+ * RESET_WAIT --- The local connection component is waiting for the local user
+ * to indicate a RESET_REQUEST or a DISCONNECT_REQUEST.
+ */
+int
+llc_state_RESET_WAIT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_RESET_REQUEST:
+ if (LLC_GETFLAG(linkp, S) == 0) {
+ llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry = 0;
+ LLC_NEWSTATE(linkp, RESET);
+ } else {
+ llc_send(linkp, LLCFT_UA, LLC_RSP,
+ LLC_GETFLAG(linkp, F));
+ LLC_RESETCOUNTER(linkp);
+ LLC_SETFLAG(linkp, P, 0);
+ LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_RESET_CONFIRM;
+ }
+ break;
+ case NL_DISCONNECT_REQUEST:
+ if (LLC_GETFLAG(linkp, S) == 0) {
+ llc_send(linkp, LLCFT_DISC, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry = 0;
+ LLC_NEWSTATE(linkp, D_CONN);
+ } else {
+ llc_send(linkp, LLCFT_DM, LLC_RSP,
+ LLC_GETFLAG(linkp, F));
+ LLC_NEWSTATE(linkp, ADM);
+ }
+ break;
+ case LLCFT_DM + LLC_RSP:
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLCFT_SABME + LLC_CMD:
+ LLC_SETFLAG(linkp, S, 1);
+ LLC_SETFLAG(linkp, F, pollfinal);
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ }
+
+ return action;
+}
+
+/*
+ * RESET_CHECK --- The local connection component is waiting for the local user
+ * to accept or refuse a remote reset request.
+ */
+int
+llc_state_RESET_CHECK(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_RESET_RESPONSE:
+ llc_send(linkp, LLCFT_UA, LLC_RSP, LLC_GETFLAG(linkp, F));
+ LLC_RESETCOUNTER(linkp);
+ LLC_SETFLAG(linkp, P, 0);
+ LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+ LLC_NEWSTATE(linkp, NORMAL);
+ break;
+ case NL_DISCONNECT_REQUEST:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, LLC_GETFLAG(linkp, F));
+ LLC_NEWSTATE(linkp, ADM);
+ break;
+ case LLCFT_DM + LLC_RSP:
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLCFT_SABME + LLC_CMD:
+ LLC_SETFLAG(linkp, F, pollfinal);
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ }
+
+ return action;
+}
+
+/*
+ * SETUP --- The connection component has transmitted an SABME command PDU to a
+ * remote LLC DSAP and is waiting for a reply.
+ */
+int
+llc_state_SETUP(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case LLCFT_SABME + LLC_CMD:
+ LLC_RESETCOUNTER(linkp);
+ llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+ LLC_SETFLAG(linkp, S, 1);
+ break;
+ case LLCFT_UA + LLC_RSP:
+ if (LLC_GETFLAG(linkp, P) == pollfinal) {
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_RESETCOUNTER(linkp);
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_CONNECT_CONFIRM;
+ }
+ break;
+ case LLC_ACK_TIMER_EXPIRED:
+ if (LLC_GETFLAG(linkp, S) == 1) {
+ LLC_SETFLAG(linkp, P, 0);
+ LLC_SETFLAG(linkp, REMOTE_BUSY, 0),
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_CONNECT_CONFIRM;
+ } else if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry++;
+ } else {
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ }
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLCFT_DM + LLC_RSP:
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ }
+
+ return action;
+}
+
+/*
+ * RESET --- As a result of a service access point user request or the receipt
+ * of a FRMR response PDU, the local connection component has sent an
+ * SABME command PDU to the remote LLC DSAP to reset the data link
+ * connection and is waiting for a reply.
+ */
+int
+llc_state_RESET(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case LLCFT_SABME + LLC_CMD:
+ LLC_RESETCOUNTER(linkp);
+ LLC_SETFLAG(linkp, S, 1);
+ llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+ break;
+ case LLCFT_UA + LLC_RSP:
+ if (LLC_GETFLAG(linkp, P) == pollfinal) {
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_RESETCOUNTER(linkp);
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_RESET_CONFIRM;
+ }
+ break;
+ case LLC_ACK_TIMER_EXPIRED:
+ if (LLC_GETFLAG(linkp, S) == 1) {
+ LLC_SETFLAG(linkp, P, 0);
+ LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_RESET_CONFIRM;
+ } else if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry++;
+ } else {
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ }
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLCFT_DM + LLC_RSP:
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ }
+
+ return action;
+}
+
+/*
+ * D_CONN --- At the request of the service access point user, the local LLC
+ * has sent a DISC command PDU to the remote LLC DSAP and is waiting
+ * for a reply.
+ */
+int
+llc_state_D_CONN(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case LLCFT_SABME + LLC_CMD:
+ llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ break;
+ case LLCFT_UA + LLC_RSP:
+ if (LLC_GETFLAG(linkp, P) == pollfinal) {
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ }
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+ break;
+ case LLCFT_DM + LLC_RSP:
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ break;
+ case LLC_ACK_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_DISC, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry++;
+ } else LLC_NEWSTATE(linkp, ADM);
+ break;
+ }
+
+ return action;
+}
+
+/*
+ * ERROR --- The local connection component has detected an error in a received
+ * PDU and has sent a FRMR response PDU. It is waiting for a reply from
+ * the remote connection component.
+ */
+int
+llc_state_ERROR(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case LLCFT_SABME + LLC_CMD:
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, RESET_CHECK);
+ action = LLC_RESET_INDICATION_REMOTE;
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLCFT_DM + LLC_RSP:
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLCFT_FRMR + LLC_RSP:
+ LLC_STOP_ACK_TIMER(linkp);
+ LLC_SETFLAG(linkp, S, 0);
+ LLC_NEWSTATE(linkp, RESET_WAIT);
+ action = LLC_FRMR_RECEIVED;
+ break;
+ case LLC_ACK_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_FRMR, LLC_RSP, 0);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry++;
+ } else {
+ LLC_SETFLAG(linkp, S, 0);
+ LLC_NEWSTATE(linkp, RESET_WAIT);
+ action = LLC_RESET_INDICATION_LOCAL;
+ }
+ break;
+ default:
+ if (cmdrsp == LLC_CMD){
+ llc_send(linkp, LLCFT_FRMR, LLC_RSP, pollfinal);
+ LLC_START_ACK_TIMER(linkp);
+ }
+ break;
+
+ }
+
+ return action;
+}
+
+/*
+ * NORMAL, BUSY, REJECT, AWAIT, AWAIT_BUSY, and AWAIT_REJECT all share
+ * a common core state handler.
+ */
+int
+llc_state_NBRAcore(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = 0;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_DISCONNECT_REQUEST:
+ llc_send(linkp, LLCFT_DISC, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry = 0;
+ LLC_NEWSTATE(linkp, D_CONN);
+ break;
+ case NL_RESET_REQUEST:
+ llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+ LLC_SETFLAG(linkp, P, pollfinal);
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry = 0;
+ LLC_SETFLAG(linkp, S, 0);
+ LLC_NEWSTATE(linkp, RESET);
+ break;
+ case LLCFT_SABME + LLC_CMD:
+ LLC_SETFLAG(linkp, F, pollfinal);
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_NEWSTATE(linkp, RESET_CHECK);
+ action = LLC_RESET_INDICATION_REMOTE;
+ break;
+ case LLCFT_DISC + LLC_CMD:
+ llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLCFT_FRMR + LLC_RSP:
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_SETFLAG(linkp, S, 0);
+ LLC_NEWSTATE(linkp, RESET_WAIT);
+ action = LLC_FRMR_RECEIVED;
+ break;
+ case LLCFT_DM + LLC_RSP:
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_NEWSTATE(linkp, ADM);
+ action = LLC_DISCONNECT_INDICATION;
+ break;
+ case LLC_INVALID_NR + LLC_CMD:
+ case LLC_INVALID_NS + LLC_CMD:
+ LLC_SETFRMR(linkp, frame, cmdrsp,
+ (frame_kind == LLC_INVALID_NR ? LLC_FRMR_Z :
+ (LLC_FRMR_V | LLC_FRMR_W)));
+ llc_send(linkp, LLCFT_FRMR, LLC_RSP, pollfinal);
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry = 0;
+ LLC_NEWSTATE(linkp, ERROR);
+ action = LLC_FRMR_SENT;
+ break;
+ case LLC_INVALID_NR + LLC_RSP:
+ case LLC_INVALID_NS + LLC_RSP:
+ case LLCFT_UA + LLC_RSP:
+ case LLC_BAD_PDU: {
+ char frmrcause = 0;
+
+ switch (frame_kind) {
+ case LLC_INVALID_NR: frmrcause = LLC_FRMR_Z; break;
+ case LLC_INVALID_NS: frmrcause = LLC_FRMR_V | LLC_FRMR_W; break;
+ default: frmrcause = LLC_FRMR_W;
+ }
+ LLC_SETFRMR(linkp, frame, cmdrsp, frmrcause);
+ llc_send(linkp, LLCFT_FRMR, LLC_RSP, 0);
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry = 0;
+ LLC_NEWSTATE(linkp, ERROR);
+ action = LLC_FRMR_SENT;
+ break;
+ }
+ default:
+ if (cmdrsp == LLC_RSP && pollfinal == 1 &&
+ LLC_GETFLAG(linkp, P) == 0) {
+ LLC_SETFRMR(linkp, frame, cmdrsp, LLC_FRMR_W);
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_START_ACK_TIMER(linkp);
+ linkp->llcl_retry = 0;
+ LLC_NEWSTATE(linkp, ERROR);
+ action = LLC_FRMR_SENT;
+ }
+ break;
+ case LLC_P_TIMER_EXPIRED:
+ case LLC_ACK_TIMER_EXPIRED:
+ case LLC_REJ_TIMER_EXPIRED:
+ case LLC_BUSY_TIMER_EXPIRED:
+ if (linkp->llcl_retry >= llc_n2) {
+ LLC_STOP_ALL_TIMERS(linkp);
+ LLC_SETFLAG(linkp, S, 0);
+ LLC_NEWSTATE(linkp, RESET_WAIT);
+ action = LLC_RESET_INDICATION_LOCAL;
+ }
+ break;
+ }
+
+ return action;
+}
+
+/*
+ * NORMAL --- A data link connection exists between the local LLC service access
+ * point and the remote LLC service access point. Sending and
+ * reception of information and supervisory PDUs can be performed.
+ */
+int
+llc_state_NORMAL(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = LLC_PASSITON;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_DATA_REQUEST:
+ if (LLC_GETFLAG(linkp, REMOTE_BUSY) == 0) {
+#ifdef not_now
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ /* multiple possibilities */
+ llc_send(linkp, LLCFT_INFO, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+ LLC_START_ACK_TIMER(linkp);
+ } else {
+#endif
+ /* multiple possibilities */
+ llc_send(linkp, LLCFT_INFO, LLC_CMD, 0);
+ if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+ LLC_START_ACK_TIMER(linkp);
+#ifdef not_now
+ }
+#endif
+ action = 0;
+ }
+ break;
+ case LLC_LOCAL_BUSY_DETECTED:
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ /* multiple possibilities --- action-wise */
+ /* multiple possibilities --- CMD/RSP-wise */
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_START_P_TIMER(linkp);
+ LLC_SETFLAG(linkp, DATA, 0);
+ LLC_NEWSTATE(linkp, BUSY);
+ action = 0;
+ } else {
+ /* multiple possibilities --- CMD/RSP-wise */
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_SETFLAG(linkp, DATA, 0);
+ LLC_NEWSTATE(linkp, BUSY);
+ action = 0;
+ }
+ break;
+ case LLC_INVALID_NS + LLC_CMD:
+ case LLC_INVALID_NS + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_REJ, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, REJECT);
+ action = 0;
+ } else if (pollfinal == 0 && p == 1) {
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, REJECT);
+ action = 0;
+ } else if ((pollfinal == 0 && p == 0) ||
+ (pollfinal == 1 && p == 1 && cmdrsp == LLC_RSP)) {
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_START_P_TIMER(linkp);
+ LLC_START_REJ_TIMER(linkp);
+ if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else action = 0;
+ LLC_NEWSTATE(linkp, REJECT);
+ }
+ break;
+ }
+ case LLCFT_INFO + LLC_CMD:
+ case LLCFT_INFO + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ LLC_INC(linkp->llcl_vr);
+ LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = LLC_DATA_INDICATION;
+ } else if (pollfinal == 0 && p == 1) {
+ LLC_INC(linkp->llcl_vr);
+ LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = LLC_DATA_INDICATION;
+ } else if ((pollfinal == 0 && p == 0 && cmdrsp == LLC_CMD) ||
+ (pollfinal == p && cmdrsp == LLC_RSP)) {
+ LLC_INC(linkp->llcl_vr);
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (cmdrsp == LLC_RSP && pollfinal == 1)
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ action = LLC_DATA_INDICATION;
+ }
+ break;
+ }
+ case LLCFT_RR + LLC_CMD:
+ case LLCFT_RR + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if ((pollfinal == 0) ||
+ (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLCFT_RNR + LLC_CMD:
+ case LLCFT_RNR + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ } else if ((pollfinal == 0) ||
+ (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLCFT_REJ + LLC_CMD:
+ case LLCFT_REJ + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ llc_resend(linkp, LLC_RSP, 1);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if (pollfinal == 0 && p == 1) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if ((pollfinal == 0 && p == 0 && cmdrsp == LLC_CMD) ||
+ (pollfinal == p && cmdrsp == LLC_RSP)) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_START_P_TIMER(linkp);
+ llc_resend(linkp, LLC_CMD, 1);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case NL_INITIATE_PF_CYCLE:
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ action = 0;
+ }
+ break;
+ case LLC_P_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ LLC_NEWSTATE(linkp, AWAIT);
+ action = 0;
+ }
+ break;
+ case LLC_ACK_TIMER_EXPIRED:
+ case LLC_BUSY_TIMER_EXPIRED:
+ if ((LLC_GETFLAG(linkp, P) == 0)
+ && (linkp->llcl_retry < llc_n2)) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ LLC_NEWSTATE(linkp, AWAIT);
+ action = 0;
+ }
+ break;
+ }
+ if (action == LLC_PASSITON)
+ action = llc_state_NBRAcore(linkp, frame, frame_kind,
+ cmdrsp, pollfinal);
+
+ return action;
+}
+
+/*
+ * BUSY --- A data link connection exists between the local LLC service access
+ * point and the remote LLC service access point. I PDUs may be sent.
+ * Local conditions make it likely that the information feld of
+ * received I PDUs will be ignored. Supervisory PDUs may be both sent
+ * and received.
+ */
+int
+llc_state_BUSY(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = LLC_PASSITON;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_DATA_REQUEST:
+ if (LLC_GETFLAG(linkp, REMOTE_BUSY) == 0)
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ llc_send(linkp, LLCFT_INFO, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+ LLC_START_ACK_TIMER(linkp);
+ action = 0;
+ } else {
+ llc_send(linkp, LLCFT_INFO, LLC_CMD, 0);
+ if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+ LLC_START_ACK_TIMER(linkp);
+ action = 0;
+ }
+ break;
+ case LLC_LOCAL_BUSY_CLEARED: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int df = LLC_GETFLAG(linkp, DATA);
+
+ switch (df) {
+ case 1:
+ if (p == 0) {
+ /* multiple possibilities */
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_START_P_TIMER(linkp);
+ LLC_NEWSTATE(linkp, REJECT);
+ action = 0;
+ } else {
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, REJECT);
+ action = 0;
+ }
+ break;
+ case 0:
+ if (p == 0) {
+ /* multiple possibilities */
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = 0;
+ } else {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = 0;
+ }
+ break;
+ case 2:
+ if (p == 0) {
+ /* multiple possibilities */
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_NEWSTATE(linkp, REJECT);
+ action = 0;
+ } else {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+ LLC_NEWSTATE(linkp, REJECT);
+ action =0;
+ }
+ break;
+ }
+ break;
+ }
+ case LLC_INVALID_NS + LLC_CMD:
+ case LLC_INVALID_NS + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (LLC_GETFLAG(linkp, DATA) == 0)
+ LLC_SETFLAG(linkp, DATA, 1);
+ action = 0;
+ } else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+ (cmdrsp == LLC_RSP && pollfinal == p)) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (LLC_GETFLAG(linkp, DATA) == 0)
+ LLC_SETFLAG(linkp, DATA, 1);
+ if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else action = 0;
+ } else if (pollfinal == 0 && p == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (LLC_GETFLAG(linkp, DATA) == 0)
+ LLC_SETFLAG(linkp, DATA, 1);
+ action = 0;
+ }
+ break;
+ }
+ case LLCFT_INFO + LLC_CMD:
+ case LLCFT_INFO + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ LLC_INC(linkp->llcl_vr);
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (LLC_GETFLAG(linkp, DATA) == 2)
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_SETFLAG(linkp, DATA, 0);
+ action = LLC_DATA_INDICATION;
+ } else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+ (cmdrsp == LLC_RSP && pollfinal == p)) {
+ LLC_INC(linkp->llcl_vr);
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (LLC_GETFLAG(linkp, DATA) == 2)
+ LLC_STOP_REJ_TIMER(linkp);
+ if (cmdrsp == LLC_RSP && pollfinal == 1)
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ action = LLC_DATA_INDICATION;
+ } else if (pollfinal == 0 && p == 1) {
+ LLC_INC(linkp->llcl_vr);
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (LLC_GETFLAG(linkp, DATA) == 2)
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_SETFLAG(linkp, DATA, 0);
+ action = LLC_DATA_INDICATION;
+ }
+ break;
+ }
+ case LLCFT_RR + LLC_CMD:
+ case LLCFT_RR + LLC_RSP:
+ case LLCFT_RNR + LLC_CMD:
+ case LLCFT_RNR + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (frame_kind == LLCFT_RR) {
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else {
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ }
+ } else if (pollfinal = 0 ||
+ (cmdrsp == LLC_RSP && pollfinal == 1)) {
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (frame_kind == LLCFT_RR) {
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else {
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ }
+ }
+ break;
+ }
+ case LLCFT_REJ + LLC_CMD:
+ case LLCFT_REJ + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+ (cmdrsp == LLC_RSP && pollfinal == p)) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if (pollfinal == 0 && p == 1) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case NL_INITIATE_PF_CYCLE:
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ action = 0;
+ }
+ break;
+ case LLC_P_TIMER_EXPIRED:
+ /* multiple possibilities */
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ LLC_NEWSTATE(linkp, AWAIT_BUSY);
+ action = 0;
+ }
+ break;
+ case LLC_ACK_TIMER_EXPIRED:
+ case LLC_BUSY_TIMER_EXPIRED:
+ if (LLC_GETFLAG(linkp, P) == 0 && linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ LLC_NEWSTATE(linkp, AWAIT_BUSY);
+ action = 0;
+ }
+ break;
+ case LLC_REJ_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2)
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ /* multiple possibilities */
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ LLC_SETFLAG(linkp, DATA, 1);
+ LLC_NEWSTATE(linkp, AWAIT_BUSY);
+ action = 0;
+ } else{
+ LLC_SETFLAG(linkp, DATA, 1);
+ LLC_NEWSTATE(linkp, BUSY);
+ action = 0;
+ }
+
+ break;
+ }
+ if (action == LLC_PASSITON)
+ action = llc_state_NBRAcore(linkp, frame, frame_kind,
+ cmdrsp, pollfinal);
+
+ return action;
+}
+
+/*
+ * REJECT --- A data link connection exists between the local LLC service
+ * access point and the remote LLC service access point. The local
+ * connection component has requested that the remote connection
+ * component resend a specific I PDU that the local connection
+ * componnent has detected as being out of sequence. Both I PDUs and
+ * supervisory PDUs may be sent and received.
+ */
+int
+llc_state_REJECT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = LLC_PASSITON;
+
+ switch(frame_kind + cmdrsp) {
+ case NL_DATA_REQUEST:
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ llc_send(linkp, LLCFT_INFO, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+ LLC_START_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, REJECT);
+ action = 0;
+ } else {
+ llc_send(linkp, LLCFT_INFO, LLC_CMD, 0);
+ if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+ LLC_START_ACK_TIMER(linkp);
+ LLC_NEWSTATE(linkp, REJECT);
+ action = 0;
+ }
+ break;
+ case NL_LOCAL_BUSY_DETECTED:
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_SETFLAG(linkp, DATA, 2);
+ LLC_NEWSTATE(linkp, BUSY);
+ action = 0;
+ } else {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_SETFLAG(linkp, DATA, 2);
+ LLC_NEWSTATE(linkp, BUSY);
+ action = 0;
+ }
+ break;
+ case LLC_INVALID_NS + LLC_CMD:
+ case LLC_INVALID_NS + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = 0;
+ } else if (pollfinal == 0 ||
+ (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else action = 0;
+ }
+ break;
+ }
+ case LLCFT_INFO + LLC_CMD:
+ case LLCFT_INFO + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ LLC_INC(linkp->llcl_vr);
+ LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_DATA_INDICATION;
+ } else if ((cmdrsp = LLC_RSP && pollfinal == p) ||
+ (cmdrsp == LLC_CMD && pollfinal == 0 && p == 0)) {
+ LLC_INC(linkp->llcl_vr);
+ LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ if (cmdrsp == LLC_RSP && pollfinal == 1)
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_DATA_INDICATION;
+ } else if (pollfinal == 0 && p == 1) {
+ LLC_INC(linkp->llcl_vr);
+ LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 0);
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_DATA_INDICATION;
+ }
+ break;
+ }
+ case LLCFT_RR + LLC_CMD:
+ case LLCFT_RR + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if (pollfinal == 0 ||
+ (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLCFT_RNR + LLC_CMD:
+ case LLCFT_RNR + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ } else if (pollfinal == 0 ||
+ (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = 0;
+ }
+ break;
+ }
+ case LLCFT_REJ + LLC_CMD:
+ case LLCFT_REJ + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ llc_resend(linkp, LLC_RSP, 1);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+ (cmdrsp == LLC_RSP && pollfinal == p)) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if (pollfinal == 0 && p == 1) {
+ linkp->llcl_vs = nr;
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case NL_INITIATE_PF_CYCLE:
+ if (LLC_GETFLAG(linkp, P) == 0) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ action = 0;
+ }
+ break;
+ case LLC_REJ_TIMER_EXPIRED:
+ if (LLC_GETFLAG(linkp, P) == 0 && linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_START_REJ_TIMER(linkp);
+ linkp->llcl_retry++;
+ action = 0;
+ }
+ case LLC_P_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_START_REJ_TIMER(linkp);
+ linkp->llcl_retry++;
+ LLC_NEWSTATE(linkp, AWAIT_REJECT);
+ action = 0;
+ }
+ break;
+ case LLC_ACK_TIMER_EXPIRED:
+ case LLC_BUSY_TIMER_EXPIRED:
+ if (LLC_GETFLAG(linkp, P) == 0 && linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_START_REJ_TIMER(linkp);
+ linkp->llcl_retry++;
+ /*
+ * I cannot locate the description of RESET_V(S)
+ * in ISO 8802-2, table 7-1, state REJECT, last event,
+ * and assume they meant to set V(S) to 0 ...
+ */
+ linkp->llcl_vs = 0; /* XXX */
+ LLC_NEWSTATE(linkp, AWAIT_REJECT);
+ action = 0;
+ }
+
+ break;
+ }
+ if (action == LLC_PASSITON)
+ action = llc_state_NBRAcore(linkp, frame, frame_kind,
+ cmdrsp, pollfinal);
+
+ return action;
+}
+
+/*
+ * AWAIT --- A data link connection exists between the local LLC service access
+ * point and the remote LLC service access point. The local LLC is
+ * performing a timer recovery operation and has sent a command PDU
+ * with the P bit set to ``1'', and is awaiting an acknowledgement
+ * from the remote LLC. I PDUs may be received but not sent.
+ * Supervisory PDUs may be both sent and received.
+ */
+int
+llc_state_AWAIT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = LLC_PASSITON;
+
+ switch(frame_kind + cmdrsp) {
+ case LLC_LOCAL_BUSY_DETECTED:
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_SETFLAG(linkp, DATA, 0);
+ LLC_NEWSTATE(linkp, AWAIT_BUSY);
+ action = 0;
+ break;
+ case LLC_INVALID_NS + LLC_CMD:
+ case LLC_INVALID_NS + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_REJ, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, AWAIT_REJECT);
+ action = 0;
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, REJECT);
+ } else if (pollfinal == 0) {
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, AWAIT_REJECT);
+ action = 0;
+ }
+ break;
+ }
+ case LLCFT_INFO + LLC_RSP:
+ case LLCFT_INFO + LLC_CMD: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ LLC_INC(linkp->llcl_vr);
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = LLC_DATA_INDICATION;
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ llc_resend(linkp, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_DATA_INDICATION;
+ } else if (pollfinal == 0) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = LLC_DATA_INDICATION;
+ }
+ break;
+ }
+ case LLCFT_RR + LLC_CMD:
+ case LLCFT_RR + LLC_RSP:
+ case LLCFT_REJ + LLC_CMD:
+ case LLCFT_REJ + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, NORMAL);
+ } else if (pollfinal == 0) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLCFT_RNR + LLC_CMD:
+ case LLCFT_RNR + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (pollfinal == 1 && cmdrsp == LLC_CMD) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ } else if (pollfinal == 1 && cmdrsp == LLC_RSP) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, NORMAL);
+ } else if (pollfinal == 0) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLC_P_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ action = 0;
+ }
+ break;
+ }
+ if (action == LLC_PASSITON)
+ action = llc_state_NBRAcore(linkp, frame, frame_kind,
+ cmdrsp, pollfinal);
+
+ return action;
+}
+
+/*
+ * AWAIT_BUSY --- A data link connection exists between the local LLC service
+ * access point and the remote LLC service access point. The
+ * local LLC is performing a timer recovery operation and has
+ * sent a command PDU with the P bit set to ``1'', and is
+ * awaiting an acknowledgement from the remote LLC. I PDUs may
+ * not be sent. Local conditions make it likely that the
+ * information feld of receoved I PDUs will be ignored.
+ * Supervisory PDUs may be both sent and received.
+ */
+int
+llc_state_AWAIT_BUSY(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = LLC_PASSITON;
+
+ switch(frame_kind + cmdrsp) {
+ case LLC_LOCAL_BUSY_CLEARED:
+ switch (LLC_GETFLAG(linkp, DATA)) {
+ case 1:
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+ LLC_START_REJ_TIMER(linkp);
+ LLC_NEWSTATE(linkp, AWAIT_REJECT);
+ action = 0;
+ break;
+ case 0:
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+ LLC_NEWSTATE(linkp, AWAIT);
+ action = 0;
+ break;
+ case 2:
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+ LLC_NEWSTATE(linkp, AWAIT_REJECT);
+ action = 0;
+ break;
+ }
+ break;
+ case LLC_INVALID_NS + LLC_CMD:
+ case LLC_INVALID_NS + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SETFLAG(linkp, DATA, 1);
+ action = 0;
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ /* optionally */
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ LLC_SETFLAG(linkp, DATA, 1);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_NEWSTATE(linkp, BUSY);
+ } else if (pollfinal == 0) {
+ /* optionally */
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SETFLAG(linkp, DATA, 1);
+ action = 0;
+ }
+ }
+ case LLCFT_INFO + LLC_CMD:
+ case LLCFT_INFO + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_INC(linkp->llcl_vr);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SETFLAG(linkp, DATA, 0);
+ action = LLC_DATA_INDICATION;
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_INC(linkp->llcl_vr);
+ LLC_START_P_TIMER(linkp);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_SETFLAG(linkp, DATA, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_NEWSTATE(linkp, BUSY);
+ action = LLC_DATA_INDICATION;
+ } else if (pollfinal == 0) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_INC(linkp->llcl_vr);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SETFLAG(linkp, DATA, 0);
+ action = LLC_DATA_INDICATION;
+ }
+ break;
+ }
+ case LLCFT_RR + LLC_CMD:
+ case LLCFT_REJ + LLC_CMD:
+ case LLCFT_RR + LLC_RSP:
+ case LLCFT_REJ + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, BUSY);
+ } else if (pollfinal == 0) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLCFT_RNR + LLC_CMD:
+ case LLCFT_RNR + LLC_RSP: {
+ register int p = LLC_GETFLAG(linkp, P);
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, BUSY);
+ } else if (pollfinal == 0) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLC_P_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ action = 0;
+ }
+ break;
+ }
+ if (action == LLC_PASSITON)
+ action = llc_state_NBRAcore(linkp, frame, frame_kind,
+ cmdrsp, pollfinal);
+
+ return action;
+}
+
+/*
+ * AWAIT_REJECT --- A data link connection exists between the local LLC service
+ * access point and the remote LLC service access point. The
+ * local connection component has requested that the remote
+ * connection component re-transmit a specific I PDU that the
+ * local connection component has detected as being out of
+ * sequence. Before the local LLC entered this state it was
+ * performing a timer recovery operation and had sent a
+ * command PDU with the P bit set to ``1'', and is still
+ * awaiting an acknowledgment from the remote LLC. I PDUs may
+ * be received but not transmitted. Supervisory PDUs may be
+ * both transmitted and received.
+ */
+int
+llc_state_AWAIT_REJECT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ int action = LLC_PASSITON;
+
+ switch(frame_kind + cmdrsp) {
+ case LLC_LOCAL_BUSY_DETECTED:
+ llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+ LLC_SETFLAG(linkp, DATA, 2);
+ LLC_NEWSTATE(linkp, AWAIT_BUSY);
+ action = 0;
+ break;
+ case LLC_INVALID_NS + LLC_CMD:
+ case LLC_INVALID_NS + LLC_RSP: {
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = 0;
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ llc_resend(linkp, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, REJECT);
+ } else if (pollfinal == 0) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ action = 0;
+ }
+ break;
+ }
+ case LLCFT_INFO + LLC_CMD:
+ case LLCFT_INFO + LLC_RSP: {
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ LLC_INC(linkp->llcl_vr);
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_NEWSTATE(linkp, AWAIT);
+ action = LLC_DATA_INDICATION;
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_INC(linkp->llcl_vr);
+ LLC_STOP_P_TIMER(linkp);
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ llc_resend(linkp, LLC_CMD, 0);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, NORMAL);
+ action = LLC_DATA_INDICATION;
+ } else if (pollfinal == 0) {
+ LLC_INC(linkp->llcl_vr);
+ llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+ LLC_STOP_REJ_TIMER(linkp);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_NEWSTATE(linkp, AWAIT);
+ action = LLC_DATA_INDICATION;
+ }
+ break;
+ }
+ case LLCFT_RR + LLC_CMD:
+ case LLCFT_REJ + LLC_CMD:
+ case LLCFT_RR + LLC_RSP:
+ case LLCFT_REJ + LLC_RSP: {
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ llc_resend(linkp, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, REJECT);
+ } else if (pollfinal == 0) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_CLEAR_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLCFT_RNR + LLC_CMD:
+ case LLCFT_RNR + LLC_RSP: {
+ register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+ if (cmdrsp == LLC_CMD && pollfinal == 1) {
+ llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ } else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ linkp->llcl_vs = nr;
+ LLC_STOP_P_TIMER(linkp);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ LLC_NEWSTATE(linkp, REJECT);
+ } else if (pollfinal == 0) {
+ LLC_UPDATE_NR_RECEIVED(linkp, nr);
+ LLC_SET_REMOTE_BUSY(linkp, action);
+ }
+ break;
+ }
+ case LLC_P_TIMER_EXPIRED:
+ if (linkp->llcl_retry < llc_n2) {
+ llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+ LLC_START_P_TIMER(linkp);
+ linkp->llcl_retry++;
+ action = 0;
+ }
+ break;
+ }
+ if (action == LLC_PASSITON)
+ action = llc_state_NBRAcore(linkp, frame, frame_kind,
+ cmdrsp, pollfinal);
+
+ return action;
+}
+
+
+/*
+ * llc_statehandler() --- Wrapper for llc_state_*() functions.
+ * Deals with action codes and checks for
+ * ``stuck'' links.
+ */
+
+int
+llc_statehandler(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+ int cmdrsp, int pollfinal)
+{
+ register int action = 0;
+
+ /*
+ * To check for ``zombie'' links each time llc_statehandler() gets called
+ * the AGE timer of linkp is reset. If it expires llc_timer() will
+ * take care of the link --- i.e. kill it 8=)
+ */
+ LLC_STARTTIMER(linkp, AGE);
+
+ /*
+ * Now call the current statehandler function.
+ */
+ action = (*linkp->llcl_statehandler)(linkp, frame, frame_kind,
+ cmdrsp, pollfinal);
+once_more_and_again:
+ switch (action) {
+ case LLC_CONNECT_INDICATION: {
+ int naction;
+
+ LLC_TRACE(linkp, LLCTR_INTERESTING, "CONNECT INDICATION");
+ linkp->llcl_nlnext =
+ (*linkp->llcl_sapinfo->si_ctlinput)
+ (PRC_CONNECT_INDICATION,
+ (struct sockaddr *) &linkp->llcl_addr, (caddr_t) linkp);
+ if (linkp->llcl_nlnext == 0)
+ naction = NL_DISCONNECT_REQUEST;
+ else naction = NL_CONNECT_RESPONSE;
+ action = (*linkp->llcl_statehandler)(linkp, frame, naction, 0, 0);
+ goto once_more_and_again;
+ }
+ case LLC_CONNECT_CONFIRM:
+ /* llc_resend(linkp, LLC_CMD, 0); */
+ llc_start(linkp);
+ break;
+ case LLC_DISCONNECT_INDICATION:
+ LLC_TRACE(linkp, LLCTR_INTERESTING, "DISCONNECT INDICATION");
+ (*linkp->llcl_sapinfo->si_ctlinput)
+ (PRC_DISCONNECT_INDICATION,
+ (struct sockaddr *) &linkp->llcl_addr, linkp->llcl_nlnext);
+ break;
+ /* internally visible only */
+ case LLC_RESET_CONFIRM:
+ case LLC_RESET_INDICATION_LOCAL:
+ /*
+ * not much we can do here, the state machine either makes it or
+ * brakes it ...
+ */
+ break;
+ case LLC_RESET_INDICATION_REMOTE:
+ LLC_TRACE(linkp, LLCTR_SHOULDKNOW, "RESET INDICATION (REMOTE)");
+ action = (*linkp->llcl_statehandler)(linkp, frame,
+ NL_RESET_RESPONSE, 0, 0);
+ goto once_more_and_again;
+ case LLC_FRMR_SENT:
+ LLC_TRACE(linkp, LLCTR_URGENT, "FRMR SENT");
+ break;
+ case LLC_FRMR_RECEIVED:
+ LLC_TRACE(linkp, LLCTR_URGEN, "FRMR RECEIVED");
+ action = (*linkp->llcl_statehandler)(linkp, frame,
+ NL_RESET_REQUEST, 0, 0);
+
+ goto once_more_and_again;
+ case LLC_REMOTE_BUSY:
+ LLC_TRACE(linkp, LLCTR_SHOULDKNOW, "REMOTE BUSY");
+ break;
+ case LLC_REMOTE_NOT_BUSY:
+ LLC_TRACE(linkp, LLCTR_SHOULDKNOW, "REMOTE BUSY CLEARED");
+ /*
+ * try to get queued frames out
+ */
+ llc_start(linkp);
+ break;
+ }
+
+ /*
+ * Only LLC_DATA_INDICATION is for the time being
+ * passed up to the network layer entity.
+ * The remaining action codes are for the time
+ * being visible internally only.
+ * However, this can/may be changed if necessary.
+ */
+
+ return action;
+}
+
+
+/*
+ * Core LLC2 routines
+ */
+
+/*
+ * The INIT call. This routine is called once after the system is booted.
+ */
+
+llc_init()
+{
+ llcintrq.ifq_maxlen = IFQ_MAXLEN;
+}
+
+
+/*
+ * In case of a link reset we need to shuffle the frames queued inside the
+ * LLC2 window.
+ */
+
+void
+llc_resetwindow(struct llc_linkcb *linkp)
+{
+ register struct mbuf *mptr = (struct mbuf *) 0;
+ register struct mbuf *anchor = (struct mbuf *)0;
+ register short i;
+
+ /* Pick up all queued frames and collect them in a linked mbuf list */
+ if (linkp->llcl_slotsfree != linkp->llcl_window) {
+ i = llc_seq2slot(linkp, linkp->llcl_nr_received);
+ anchor = mptr = linkp->llcl_output_buffers[i];
+ for (; i != linkp->llcl_freeslot;
+ i = llc_seq2slot(linkp, i+1)) {
+ if (linkp->llcl_output_buffers[i]) {
+ mptr->m_nextpkt = linkp->llcl_output_buffers[i];
+ mptr = mptr->m_nextpkt;
+ } else panic("LLC2 window broken");
+ }
+ }
+ /* clean closure */
+ if (mptr)
+ mptr->m_nextpkt = (struct mbuf *) 0;
+
+ /* Now --- plug 'em in again */
+ if (anchor != (struct mbuf *)0) {
+ for (i = 0, mptr = anchor; mptr != (struct mbuf *) 0; i++) {
+ linkp->llcl_output_buffers[i] = mptr;
+ mptr = mptr->m_nextpkt;
+ linkp->llcl_output_buffers[i]->m_nextpkt = (struct mbuf *)0;
+ }
+ linkp->llcl_freeslot = i;
+ } else linkp->llcl_freeslot = 0;
+
+ /* We're resetting the link, the next frame to be acknowledged is 0 */
+ linkp->llcl_nr_received = 0;
+
+ /* set distance between LLC2 sequence number and the top of window to 0 */
+ linkp->llcl_projvs = linkp->llcl_freeslot;
+
+ return;
+}
+
+/*
+ * llc_newlink() --- We allocate enough memory to contain a link control block
+ * and initialize it properly. We don't intiate the actual setup
+ * of the LLC2 link here.
+ */
+struct llc_linkcb *
+llc_newlink(struct sockaddr_dl *dst, struct ifnet *ifp, struct rtentry *nlrt,
+ caddr_t nlnext, struct rtentry *llrt)
+{
+ struct llc_linkcb *nlinkp;
+ u_char sap = LLSAPADDR(dst);
+ short llcwindow;
+
+
+ /* allocate memory for link control block */
+ MALLOC(nlinkp, struct llc_linkcb *, sizeof(struct llc_linkcb),
+ M_PCB, M_DONTWAIT);
+ if (nlinkp == 0)
+ return (NULL);
+ bzero((caddr_t)nlinkp, sizeof(struct llc_linkcb));
+
+ /* copy link address */
+ sdl_copy(dst, &nlinkp->llcl_addr);
+
+ /* hold on to the network layer route entry */
+ nlinkp->llcl_nlrt = nlrt;
+
+ /* likewise the network layer control block */
+ nlinkp->llcl_nlnext = nlnext;
+
+ /* jot down the link layer route entry */
+ nlinkp->llcl_llrt = llrt;
+
+ /* reset writeq */
+ nlinkp->llcl_writeqh = nlinkp->llcl_writeqt = NULL;
+
+ /* setup initial state handler function */
+ nlinkp->llcl_statehandler = llc_state_ADM;
+
+ /* hold on to interface pointer */
+ nlinkp->llcl_if = ifp;
+
+ /* get service access point information */
+ nlinkp->llcl_sapinfo = llc_getsapinfo(sap, ifp);
+
+ /* get window size from SAP info block */
+ if ((llcwindow = nlinkp->llcl_sapinfo->si_window) == 0)
+ llcwindow = LLC_MAX_WINDOW;
+
+ /* allocate memory for window buffer */
+ MALLOC(nlinkp->llcl_output_buffers, struct mbuf **,
+ llcwindow*sizeof(struct mbuf *), M_PCB, M_DONTWAIT);
+ if (nlinkp->llcl_output_buffers == 0) {
+ FREE(nlinkp, M_PCB);
+ return(NULL);
+ }
+ bzero((caddr_t)nlinkp->llcl_output_buffers,
+ llcwindow*sizeof(struct mbuf *));
+
+ /* set window size & slotsfree */
+ nlinkp->llcl_slotsfree = nlinkp->llcl_window = llcwindow;
+
+ /* enter into linked listed of link control blocks */
+ insque(nlinkp, &llccb_q);
+
+ return(nlinkp);
+}
+
+/*
+ * llc_dellink() --- farewell to link control block
+ */
+llc_dellink(struct llc_linkcb *linkp)
+{
+ register struct mbuf *m;
+ register struct mbuf *n;
+ register struct npaidbentry *sapinfo = linkp->llcl_sapinfo;
+ register i;
+
+ /* notify upper layer of imminent death */
+ if (linkp->llcl_nlnext && sapinfo->si_ctlinput)
+ (*sapinfo->si_ctlinput)
+ (PRC_DISCONNECT_INDICATION,
+ (struct sockaddr *)&linkp->llcl_addr, linkp->llcl_nlnext);
+
+ /* pull the plug */
+ if (linkp->llcl_llrt)
+ ((struct npaidbentry *)(linkp->llcl_llrt->rt_llinfo))->np_link
+ = (struct llc_linkcb *) 0;
+
+ /* leave link control block queue */
+ remque(linkp);
+
+ /* drop queued packets */
+ for (m = linkp->llcl_writeqh; m;) {
+ n = m->m_act;
+ m_freem(m);
+ m = n;
+ }
+
+ /* drop packets in the window */
+ for(i = 0; i < linkp->llcl_window; i++)
+ if (linkp->llcl_output_buffers[i])
+ m_freem(linkp->llcl_output_buffers[i]);
+
+ /* return the window space */
+ FREE((caddr_t)linkp->llcl_output_buffers, M_PCB);
+
+ /* return the control block space --- now it's gone ... */
+ FREE((caddr_t)linkp, M_PCB);
+}
+
+llc_decode(struct llc* frame, struct llc_linkcb * linkp)
+{
+ register int ft = LLC_BAD_PDU;
+
+ if ((frame->llc_control & 01) == 0) {
+ ft = LLCFT_INFO;
+ /* S or U frame ? */
+ } else switch (frame->llc_control) {
+
+ /* U frames */
+ case LLC_UI:
+ case LLC_UI_P: ft = LLC_UI; break;
+ case LLC_DM:
+ case LLC_DM_P: ft =LLCFT_DM; break;
+ case LLC_DISC:
+ case LLC_DISC_P: ft = LLCFT_DISC; break;
+ case LLC_UA:
+ case LLC_UA_P: ft = LLCFT_UA; break;
+ case LLC_SABME:
+ case LLC_SABME_P: ft = LLCFT_SABME; break;
+ case LLC_FRMR:
+ case LLC_FRMR_P: ft = LLCFT_FRMR; break;
+ case LLC_XID:
+ case LLC_XID_P: ft = LLCFT_XID; break;
+ case LLC_TEST:
+ case LLC_TEST_P: ft = LLCFT_TEST; break;
+
+ /* S frames */
+ case LLC_RR: ft = LLCFT_RR; break;
+ case LLC_RNR: ft = LLCFT_RNR; break;
+ case LLC_REJ: ft = LLCFT_REJ; break;
+ } /* switch */
+
+ if (linkp) {
+ switch (ft) {
+ case LLCFT_INFO:
+ if (LLCGBITS(frame->llc_control, i_ns) != linkp->llcl_vr) {
+ ft = LLC_INVALID_NS;
+ break;
+ }
+ /* fall thru --- yeeeeeee */
+ case LLCFT_RR:
+ case LLCFT_RNR:
+ case LLCFT_REJ:
+ /* splash! */
+ if (LLC_NR_VALID(linkp, LLCGBITS(frame->llc_control_ext,
+ s_nr)) == 0)
+ ft = LLC_INVALID_NR;
+ break;
+ }
+ }
+
+ return ft;
+}
+
+/*
+ * llc_anytimersup() --- Checks if at least one timer is still up and running.
+ */
+int
+llc_anytimersup(struct llc_linkcb * linkp)
+{
+ register int i;
+
+ FOR_ALL_LLC_TIMERS(i)
+ if (linkp->llcl_timers[i] > 0)
+ break;
+ if (i == LLC_AGE_SHIFT)
+ return 0;
+ else return 1;
+}
+
+/*
+ * llc_link_dump() - dump link info
+ */
+
+#define SAL(s) ((struct sockaddr_dl *)&(s)->llcl_addr)
+#define CHECK(l, s) if (LLC_STATEEQ(l, s)) return #s
+
+char *timer_names[] = {"ACK", "P", "BUSY", "REJ", "AGE"};
+
+char *
+llc_getstatename(struct llc_linkcb *linkp)
+{
+ CHECK(linkp, ADM);
+ CHECK(linkp, CONN);
+ CHECK(linkp, RESET_WAIT);
+ CHECK(linkp, RESET_CHECK);
+ CHECK(linkp, SETUP);
+ CHECK(linkp, RESET);
+ CHECK(linkp, D_CONN);
+ CHECK(linkp, ERROR);
+ CHECK(linkp, NORMAL);
+ CHECK(linkp, BUSY);
+ CHECK(linkp, REJECT);
+ CHECK(linkp, AWAIT);
+ CHECK(linkp, AWAIT_BUSY);
+ CHECK(linkp, AWAIT_REJECT);
+
+ return "UNKNOWN - eh?";
+}
+
+void
+llc_link_dump(struct llc_linkcb* linkp, const char *message)
+{
+ register int i;
+ register char *state;
+
+ /* print interface */
+ printf("if %s%d\n", linkp->llcl_if->if_name, linkp->llcl_if->if_unit);
+
+ /* print message */
+ printf(">> %s <<\n", message);
+
+ /* print MAC and LSAP */
+ printf("llc addr ");
+ for (i = 0; i < (SAL(linkp)->sdl_alen)-2; i++)
+ printf("%x:", (char)*(LLADDR(SAL(linkp))+i) & 0xff);
+ printf("%x,", (char)*(LLADDR(SAL(linkp))+i) & 0xff);
+ printf("%x\n", (char)*(LLADDR(SAL(linkp))+i+1) & 0xff);
+
+ /* print state we're in and timers */
+ printf("state %s, ", llc_getstatename(linkp));
+ for (i = LLC_ACK_SHIFT; i < LLC_AGE_SHIFT; i++)
+ printf("%s-%c %d/", timer_names[i],
+ (linkp->llcl_timerflags & (1<<i) ? 'R' : 'S'),
+ linkp->llcl_timers[i]);
+ printf("%s-%c %d\n", timer_names[i], (linkp->llcl_timerflags & (1<<i) ?
+ 'R' : 'S'), linkp->llcl_timers[i]);
+
+ /* print flag values */
+ printf("flags P %d/F %d/S %d/DATA %d/REMOTE_BUSY %d\n",
+ LLC_GETFLAG(linkp, P), LLC_GETFLAG(linkp, S),
+ LLC_GETFLAG(linkp, DATA), LLC_GETFLAG(linkp, REMOTE_BUSY));
+
+ /* print send and receive state variables, ack, and window */
+ printf("V(R) %d/V(S) %d/N(R) received %d/window %d/freeslot %d\n",
+ linkp->llcl_vs, linkp->llcl_vr, linkp->llcl_nr_received,
+ linkp->llcl_window, linkp->llcl_freeslot);
+
+ /* further expansions can follow here */
+
+}
+
+void
+llc_trace(struct llc_linkcb *linkp, int level, const char *message)
+{
+ if (linkp->llcl_sapinfo->si_trace && level > llc_tracelevel)
+ llc_link_dump(linkp, message);
+
+ return;
+}
diff --git a/sys/netccitt/llc_timer.c b/sys/netccitt/llc_timer.c
new file mode 100644
index 000000000000..0aecd08b68d6
--- /dev/null
+++ b/sys/netccitt/llc_timer.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) Dirk Husemann, Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)llc_timer.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+
+/*
+ * Various timer values. They can be adjusted
+ * by patching the binary with adb if necessary.
+ */
+/* ISO 8802-2 timers */
+int llc_n2 = LLC_N2_VALUE;
+int llc_ACK_timer = LLC_ACK_TIMER;
+int llc_P_timer = LLC_P_TIMER;
+int llc_BUSY_timer = LLC_BUSY_TIMER;
+int llc_REJ_timer = LLC_REJ_TIMER;
+/* Implementation specific timers */
+int llc_AGE_timer = LLC_AGE_TIMER;
+int llc_DACTION_timer = LLC_DACTION_TIMER;
+
+/*
+ * The timer routine. We are called every 500ms by the kernel.
+ * Handle the various virtual timers.
+ */
+
+void
+llc_timer()
+{
+ register struct llc_linkcb *linkp;
+ register struct llc_linkcb *nlinkp;
+ register int timer;
+ register int action;
+ register int s = splimp();
+
+ /*
+ * All links are accessible over the doubly linked list llccb_q
+ */
+ if (!LQEMPTY) {
+ /*
+ * A for-loop is not that great an idea as the linkp
+ * might get deleted if the age timer has expired ...
+ */
+ linkp = LQFIRST;
+ while (LQVALID(linkp)) {
+ nlinkp = LQNEXT(linkp);
+ /*
+ * Check implementation specific timers first
+ */
+ /* The delayed action/acknowledge idle timer */
+ switch (LLC_TIMERXPIRED(linkp, DACTION)) {
+ case LLC_TIMER_RUNNING:
+ LLC_AGETIMER(linkp, DACTION);
+ break;
+ case LLC_TIMER_EXPIRED: {
+ register int cmdrsp;
+ register int pollfinal;
+
+ switch (LLC_GETFLAG(linkp, DACTION)) {
+ case LLC_DACKCMD:
+ cmdrsp = LLC_CMD, pollfinal = 0;
+ break;
+ case LLC_DACKCMDPOLL:
+ cmdrsp = LLC_CMD, pollfinal = 1;
+ break;
+ case LLC_DACKRSP:
+ cmdrsp = LLC_RSP, pollfinal = 0;
+ break;
+ case LLC_DACKRSPFINAL:
+ cmdrsp = LLC_RSP, pollfinal = 1;
+ break;
+ }
+ llc_send(linkp, LLCFT_RR, cmdrsp, pollfinal);
+ LLC_STOPTIMER(linkp, DACTION);
+ break;
+ }
+ }
+ /* The link idle timer */
+ switch (LLC_TIMERXPIRED(linkp, AGE)) {
+ case LLC_TIMER_RUNNING:
+ LLC_AGETIMER(linkp, AGE);
+ break;
+ case LLC_TIMER_EXPIRED:
+ /*
+ * Only crunch the link when really no
+ * timers are running any more.
+ */
+ if (llc_anytimersup(linkp) == 0) {
+ llc_dellink(linkp);
+ LLC_STOPTIMER(linkp, AGE);
+ goto gone;
+ } else {
+ LLC_STARTTIMER(linkp, AGE);
+ }
+ break;
+ }
+ /*
+ * Now, check all the ISO 8802-2 timers
+ */
+ FOR_ALL_LLC_TIMERS(timer) {
+ action = 0;
+ if ((linkp->llcl_timerflags & (1<<timer)) &&
+ (linkp->llcl_timers[timer] == 0)) {
+ switch (timer) {
+ case LLC_ACK_SHIFT:
+ action = LLC_ACK_TIMER_EXPIRED;
+ break;
+ case LLC_P_SHIFT:
+ action = LLC_P_TIMER_EXPIRED;
+ break;
+ case LLC_BUSY_SHIFT:
+ action = LLC_BUSY_TIMER_EXPIRED;
+ break;
+ case LLC_REJ_SHIFT:
+ action = LLC_REJ_TIMER_EXPIRED;
+ break;
+ }
+ linkp->llcl_timerflags &= ~(1<<timer);
+ (void)llc_statehandler(linkp, (struct llc *)0, action, 0, 1);
+ } else if (linkp->llcl_timers[timer] > 0)
+ linkp->llcl_timers[timer]--;
+ }
+
+gone: linkp = nlinkp;
+ }
+ }
+ splx (s);
+}
diff --git a/sys/netccitt/llc_var.h b/sys/netccitt/llc_var.h
new file mode 100644
index 000000000000..a27db52d37ab
--- /dev/null
+++ b/sys/netccitt/llc_var.h
@@ -0,0 +1,659 @@
+/*
+ * Copyright (C) Dirk Husemann, Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)llc_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef __STDC__
+/*
+ * Forward structure declarations for function prototypes [sic].
+ */
+struct llc;
+#endif
+
+#define NPAIDB_LINK 0
+
+struct npaidbentry {
+ union {
+ /* MAC,DLSAP -> CONS */
+ struct {
+ struct llc_linkcb *NE_link;
+ struct rtentry *NE_rt;
+ } NE;
+ /* SAP info for unconfigured incoming calls */
+ struct {
+ u_short SI_class;
+#define LLC_CLASS_I 0x1
+#define LLC_CLASS_II 0x3
+#define LLC_CLASS_III 0x4 /* Future */
+#define LLC_CLASS_IV 0x7 /* Future */
+ u_short SI_window;
+ u_short SI_trace;
+ u_short SI_xchxid;
+ void (*SI_input)
+ __P((struct mbuf *));
+ caddr_t (*SI_ctlinput)
+ __P((int, struct sockaddr *, caddr_t));
+ } SI;
+ } NESIun;
+};
+#define np_link NESIun.NE.NE_link
+#define np_rt NESIun.NE.NE_rt
+#define si_class NESIun.SI.SI_class
+#define si_window NESIun.SI.SI_window
+#define si_trace NESIun.SI.SI_trace
+#define si_xchxid NESIun.SI.SI_xchxid
+#define si_input NESIun.SI.SI_input
+#define si_ctlinput NESIun.SI.SI_ctlinput
+
+#define NPDL_SAPNETMASK 0x7e
+
+/*
+ * Definitions for accessing bitfields/bitslices inside
+ * LLC2 headers
+ */
+struct bitslice {
+ unsigned int bs_mask;
+ unsigned int bs_shift;
+};
+
+
+#define i_z 0
+#define i_ns 1
+#define i_pf 0
+#define i_nr 1
+#define s_oz 2
+#define s_selector 3
+#define s_pf 0
+#define s_nr 1
+#define u_bb 2
+#define u_select_other 3
+#define u_pf 4
+#define u_select 5
+#define f_vs 1
+#define f_cr 0
+#define f_vr 1
+#define f_wxyzv 6
+
+#define LLCGBITS(Arg, Index) (((Arg) & llc_bitslice[(Index)].bs_mask) >> llc_bitslice[(Index)].bs_shift)
+#define LLCSBITS(Arg, Index, Val) (Arg) |= (((Val) << llc_bitslice[(Index)].bs_shift) & llc_bitslice[(Index)].bs_mask)
+#define LLCCSBITS(Arg, Index, Val) (Arg) = (((Val) << llc_bitslice[(Index)].bs_shift) & llc_bitslice[(Index)].bs_mask)
+
+extern struct bitslice llc_bitslice[];
+
+#define LLC_CMD 0
+#define LLC_RSP 1
+#define LLC_MAXCMDRSP 2
+
+/*
+ * LLC events --- These events may either be frames received from the
+ * remote LLC DSAP, request from the network layer user,
+ * timer events from llc_timer(), or diagnostic events from
+ * llc_input().
+ */
+
+/* LLC frame types */
+#define LLCFT_INFO 0 * LLC_MAXCMDRSP
+#define LLCFT_RR 1 * LLC_MAXCMDRSP
+#define LLCFT_RNR 2 * LLC_MAXCMDRSP
+#define LLCFT_REJ 3 * LLC_MAXCMDRSP
+#define LLCFT_DM 4 * LLC_MAXCMDRSP
+#define LLCFT_SABME 5 * LLC_MAXCMDRSP
+#define LLCFT_DISC 6 * LLC_MAXCMDRSP
+#define LLCFT_UA 7 * LLC_MAXCMDRSP
+#define LLCFT_FRMR 8 * LLC_MAXCMDRSP
+#define LLCFT_UI 9 * LLC_MAXCMDRSP
+#define LLCFT_XID 10 * LLC_MAXCMDRSP
+#define LLCFT_TEST 11 * LLC_MAXCMDRSP
+
+/* LLC2 timer events */
+#define LLC_ACK_TIMER_EXPIRED 12 * LLC_MAXCMDRSP
+#define LLC_P_TIMER_EXPIRED 13 * LLC_MAXCMDRSP
+#define LLC_REJ_TIMER_EXPIRED 14 * LLC_MAXCMDRSP
+#define LLC_BUSY_TIMER_EXPIRED 15 * LLC_MAXCMDRSP
+
+/* LLC2 diagnostic events */
+#define LLC_INVALID_NR 16 * LLC_MAXCMDRSP
+#define LLC_INVALID_NS 17 * LLC_MAXCMDRSP
+#define LLC_BAD_PDU 18 * LLC_MAXCMDRSP
+#define LLC_LOCAL_BUSY_DETECTED 19 * LLC_MAXCMDRSP
+#define LLC_LOCAL_BUSY_CLEARED 20 * LLC_MAXCMDRSP
+
+/* Network layer user requests */
+/*
+ * NL_CONNECT_REQUEST --- The user has requested that a data link connection
+ * be established with a remote LLC DSAP.
+ */
+#define NL_CONNECT_REQUEST 21 * LLC_MAXCMDRSP
+/*
+ * NL_CONNECT_RESPONSE --- The user has accepted the data link connection.
+ */
+#define NL_CONNECT_RESPONSE 22 * LLC_MAXCMDRSP
+/*
+ * NL_RESET_REQUEST --- The user has requested that the data link with the
+ * remote LLC DSAP be reset.
+ */
+#define NL_RESET_REQUEST 23 * LLC_MAXCMDRSP
+/*
+ * NL_RESET_RESPONSE --- The user has accepted the reset of the data link
+ * connection.
+ */
+#define NL_RESET_RESPONSE 24 * LLC_MAXCMDRSP
+/*
+ * NL_DISCONNECT_REQUEST --- The user has requested that the data link
+ * connection with remote LLC DSAP be terminated.
+ */
+#define NL_DISCONNECT_REQUEST 25 * LLC_MAXCMDRSP
+/*
+ * NL_DATA_REQUEST --- The user has requested that a data unit be sent ot the
+ * remote LLC DSAP.
+ */
+#define NL_DATA_REQUEST 26 * LLC_MAXCMDRSP
+/*
+ * NL_INITIATE_PF_CYCLE --- The local LLC wants to initiate a P/F cycle.
+ */
+#define NL_INITIATE_PF_CYCLE 27 * LLC_MAXCMDRSP
+/*
+ * NL_LOCAL_BUSY_DETECTED --- The local entity has encountered a busy condition
+ */
+#define NL_LOCAL_BUSY_DETECTED 28 * LLC_MAXCMDRSP
+
+#define LLCFT_NONE 255
+
+/* return message from state handlers */
+
+/*
+ * LLC_CONNECT_INDICATION --- Inform the user that a connection has been
+ * requested by a remote LLC SSAP.
+ */
+#define LLC_CONNECT_INDICATION 1
+/*
+ * LLC_CONNECT_CONFIRM --- The connection service component indicates that the
+ * remote network entity has accepted the connection.
+ */
+#define LLC_CONNECT_CONFIRM 2
+/*
+ * LLC_DISCONNECT_INDICATION --- Inform the user that the remote network
+ * entity has intiated disconnection of the data
+ * link connection.
+ */
+#define LLC_DISCONNECT_INDICATION 3
+/*
+ * LLC_RESET_CONFIRM --- The connection service component indicates that the
+ * remote network entity has accepted the reset.
+ */
+#define LLC_RESET_CONFIRM 4
+/*
+ * LLC_RESET_INDICATION_REMOTE --- The remote network entity or remote peer
+ * has initiated a reset of the data link
+ * connection.
+ */
+#define LLC_RESET_INDICATION_REMOTE 5
+/*
+ * LLC_RESET_INDICATION_LOCAL --- The local LLC has determined that the data
+ * link connection is in need of
+ * reinitialization.
+ */
+#define LLC_RESET_INDICATION_LOCAL 6
+/*
+ * LLC_FRMR_RECEIVED --- The local connection service component has received a
+ * FRMR response PDU.
+ */
+#define LLC_FRMR_RECEIVED 7
+/*
+ * LLC_FRMR_SENT --- The local connection component has received an ivalid
+ * PDU, and has sent a FRMR response PDU.
+ */
+#define LLC_FRMR_SENT 8
+/*
+ * LLC_DATA_INDICATION --- The connection service component passes the data
+ * unit from the received I PDU to the user.
+ */
+#define LLC_DATA_INDICATION 9
+/*
+ * LLC_REMOTE_NOT_BUSY --- The remote LLC DSAP is no longer busy. The local
+ * connection service component will now accept a
+ * DATA_REQUEST.
+ */
+#define LLC_REMOTE_NOT_BUSY 10
+/*
+ * LLC_REMOTE_BUSY --- The remote LLC DSAP is busy. The local connection
+ * service component will not accept a DATA_REQUEST.
+ */
+#define LLC_REMOTE_BUSY 11
+
+/* Internal return code */
+#define LLC_PASSITON 255
+
+#define INFORMATION_CONTROL 0x00
+#define SUPERVISORY_CONTROL 0x02
+#define UNUMBERED_CONTROL 0x03
+
+/*
+ * Other necessary definitions
+ */
+
+#define LLC_MAX_SEQUENCE 128
+#define LLC_MAX_WINDOW 127
+#define LLC_WINDOW_SIZE 7
+
+/*
+ * Don't we love this one? CCITT likes to suck on bits 8=)
+ */
+#define NLHDRSIZEGUESS 3
+
+/*
+ * LLC control block
+ */
+
+struct llc_linkcb {
+ struct llccb_q {
+ struct llccb_q *q_forw; /* admin chain */
+ struct llccb_q *q_backw;
+ } llcl_q;
+ struct npaidbentry *llcl_sapinfo; /* SAP information */
+ struct sockaddr_dl llcl_addr; /* link snpa address */
+ struct rtentry *llcl_nlrt; /* layer 3 -> LLC */
+ struct rtentry *llcl_llrt; /* LLC -> layer 3 */
+ struct ifnet *llcl_if; /* our interface */
+ caddr_t llcl_nlnext; /* cb for network layer */
+ struct mbuf *llcl_writeqh; /* Write queue head */
+ struct mbuf *llcl_writeqt; /* Write queue tail */
+ struct mbuf **llcl_output_buffers;
+ short llcl_timers[6]; /* timer array */
+ long llcl_timerflags; /* flags signalling running timers */
+ int (*llcl_statehandler)
+ __P((struct llc_linkcb *, struct llc *, int, int, int));
+ int llcl_P_flag;
+ int llcl_F_flag;
+ int llcl_S_flag;
+ int llcl_DATA_flag;
+ int llcl_REMOTE_BUSY_flag;
+ int llcl_DACTION_flag; /* delayed action */
+ int llcl_retry;
+ /*
+ * The following components deal --- in one way or the other ---
+ * with the LLC2 window. Indicated by either [L] or [W] is the
+ * domain of the specific component:
+ *
+ * [L] The domain is 0--LLC_MAX_WINDOW
+ * [W] The domain is 0--llcl_window
+ */
+ short llcl_vr; /* next to receive [L] */
+ short llcl_vs; /* next to send [L] */
+ short llcl_nr_received; /* next frame to b ack'd [L] */
+ short llcl_freeslot; /* next free slot [W] */
+ short llcl_projvs; /* V(S) associated with freeslot */
+ short llcl_slotsfree; /* free slots [W] */
+ short llcl_window; /* window size */
+ /*
+ * In llcl_frmrinfo we jot down the last frmr info field, which we
+ * need to do as we need to be able to resend it in the ERROR state.
+ */
+ struct frmrinfo llcl_frmrinfo; /* last FRMR info field */
+};
+#define llcl_frmr_pdu0 llcl_frmrinfo.rej_pdu_0
+#define llcl_frmr_pdu1 llcl_frmrinfo.rej_pdu_1
+#define llcl_frmr_control llcl_frmrinfo.frmr_control
+#define llcl_frmr_control_ext llcl_frmrinfo.frmr_control_ext
+#define llcl_frmr_cause llcl_frmrinfo.frmr_cause
+
+#define LQNEXT(l) (struct llc_linkcb *)((l)->llcl_q.q_forw)
+#define LQEMPTY (llccb_q.q_forw == &llccb_q)
+#define LQFIRST (struct llc_linkcb *)(llccb_q.q_forw)
+#define LQVALID(l) (!((struct llccb_q *)(l) == &llccb_q))
+
+#define LLC_ENQUEUE(l, m) if ((l)->llcl_writeqh == NULL) { \
+ (l)->llcl_writeqh = (m); \
+ (l)->llcl_writeqt = (m); \
+ } else { \
+ (l)->llcl_writeqt->m_nextpkt = (m); \
+ (l)->llcl_writeqt = (m); \
+ }
+
+#define LLC_DEQUEUE(l, m) if ((l)->llcl_writeqh == NULL) \
+ (m) = NULL; \
+ else { \
+ (m) = (l)->llcl_writeqh; \
+ (l)->llcl_writeqh = (l)->llcl_writeqh->m_nextpkt; \
+ }
+
+#define LLC_SETFRAME(l, m) { \
+ if ((l)->llcl_slotsfree > 0) { \
+ (l)->llcl_slotsfree--; \
+ (l)->llcl_output_buffers[(l)->llcl_freeslot] = (m); \
+ (l)->llcl_freeslot = ((l)->llcl_freeslot+1) % (l)->llcl_window; \
+ LLC_INC((l)->llcl_projvs); \
+ } \
+ }
+
+/*
+ * handling of sockaddr_dl's
+ */
+
+#define LLADDRLEN(s) ((s)->sdl_alen + (s)->sdl_nlen)
+#define LLSAPADDR(s) ((s)->sdl_data[LLADDRLEN(s)-1] & 0xff)
+#define LLSAPLOC(s, if) ((s)->sdl_nlen + (if)->if_addrlen)
+
+struct sdl_hdr {
+ struct sockaddr_dl sdlhdr_dst;
+ struct sockaddr_dl sdlhdr_src;
+ long sdlhdr_len;
+};
+
+#define LLC_GETHDR(f,m) { \
+ struct mbuf *_m = (struct mbuf *) (m); \
+ if (_m) { \
+ M_PREPEND(_m, LLC_ISFRAMELEN, M_DONTWAIT); \
+ bzero(mtod(_m, caddr_t), LLC_ISFRAMELEN); \
+ } else { \
+ MGETHDR (_m, M_DONTWAIT, MT_HEADER); \
+ if (_m != NULL) { \
+ _m->m_pkthdr.len = _m->m_len = LLC_UFRAMELEN; \
+ _m->m_next = _m->m_act = NULL; \
+ bzero(mtod(_m, caddr_t), LLC_UFRAMELEN); \
+ } else return; \
+ } \
+ (m) = _m; \
+ (f) = mtod(m, struct llc *); \
+ }
+
+#define LLC_NEWSTATE(l, LLCstate) (l)->llcl_statehandler = llc_state_##LLCstate
+#define LLC_STATEEQ(l, LLCstate) ((l)->llcl_statehandler == llc_state_##LLCstate ? 1 : 0)
+
+#define LLC_ACK_SHIFT 0
+#define LLC_P_SHIFT 1
+#define LLC_BUSY_SHIFT 2
+#define LLC_REJ_SHIFT 3
+#define LLC_AGE_SHIFT 4
+#define LLC_DACTION_SHIFT 5
+
+#define LLC_TIMER_NOTRUNNING 0
+#define LLC_TIMER_RUNNING 1
+#define LLC_TIMER_EXPIRED 2
+
+#define LLC_STARTTIMER(l, LLCtimer) { \
+ (l)->llcl_timers[LLC_##LLCtimer##_SHIFT] = llc_##LLCtimer##_timer; \
+ (l)->llcl_timerflags |= (1<<LLC_##LLCtimer##_SHIFT); \
+ }
+#define LLC_STOPTIMER(l, LLCtimer) { \
+ (l)->llcl_timers[LLC_##LLCtimer##_SHIFT] = 0; \
+ (l)->llcl_timerflags &= ~(1<<LLC_##LLCtimer##_SHIFT); \
+ }
+#define LLC_AGETIMER(l, LLCtimer) if ((l)->llcl_timers[LLC_##LLCtimer##_SHIFT] > 0) \
+ (l)->llcl_timers[LLC_##LLCtimer##_SHIFT]--;
+
+#define LLC_TIMERXPIRED(l, LLCtimer) \
+ (((l)->llcl_timerflags & (1<<LLC_##LLCtimer##_SHIFT)) ? \
+ (((l)->llcl_timers[LLC_##LLCtimer##_SHIFT] == 0 ) ? \
+ LLC_TIMER_EXPIRED : LLC_TIMER_RUNNING) : LLC_TIMER_NOTRUNNING)
+
+#define FOR_ALL_LLC_TIMERS(t) for ((t) = LLC_ACK_SHIFT; (t) < LLC_AGE_SHIFT; (t)++)
+
+#define LLC_SETFLAG(l, LLCflag, v) (l)->llcl_##LLCflag##_flag = (v)
+#define LLC_GETFLAG(l, LLCflag) (l)->llcl_##LLCflag##_flag
+
+#define LLC_RESETCOUNTER(l) { \
+ (l)->llcl_vs = (l)->llcl_vr = (l)->llcl_retry = 0; \
+ llc_resetwindow((l)); \
+ }
+
+/*
+ * LLC2 macro definitions
+ */
+
+
+#define LLC_START_ACK_TIMER(l) LLC_STARTTIMER((l), ACK)
+#define LLC_STOP_ACK_TIMER(l) LLC_STOPTIMER((l), ACK)
+#define LLC_START_REJ_TIMER(l) LLC_STARTTIMER((l), REJ)
+#define LLC_STOP_REJ_TIMER(l) LLC_STOPTIMER((l), REJ)
+#define LLC_START_P_TIMER(l) { \
+ LLC_STARTTIMER((l), P); \
+ if (LLC_GETFLAG((l), P) == 0) \
+ (l)->llcl_retry = 0; \
+ LLC_SETFLAG((l), P, 1); \
+ }
+#define LLC_STOP_P_TIMER(l) { \
+ LLC_STOPTIMER((l), P); \
+ LLC_SETFLAG((l), P, 0); \
+ }
+#define LLC_STOP_ALL_TIMERS(l) { \
+ LLC_STOPTIMER((l), ACK); \
+ LLC_STOPTIMER((l), REJ); \
+ LLC_STOPTIMER((l), BUSY); \
+ LLC_STOPTIMER((l), P); \
+ }
+
+
+#define LLC_INC(i) (i) = ((i)+1) % LLC_MAX_SEQUENCE
+
+#define LLC_NR_VALID(l, nr) ((l)->llcl_vs < (l)->llcl_nr_received ? \
+ (((nr) >= (l)->llcl_nr_received) || \
+ ((nr) <= (l)->llcl_vs) ? 1 : 0) : \
+ (((nr) <= (l)->llcl_vs) && \
+ ((nr) >= (l)->llcl_nr_received) ? 1 : 0))
+
+#define LLC_UPDATE_P_FLAG(l, cr, pf) { \
+ if ((cr) == LLC_RSP && (pf) == 1) { \
+ LLC_SETFLAG((l), P, 0); \
+ LLC_STOPTIMER((l), P); \
+ } \
+ }
+
+#define LLC_UPDATE_NR_RECEIVED(l, nr) { \
+ while ((l)->llcl_nr_received != (nr)) { \
+ struct mbuf *_m; \
+ register short seq; \
+ if (_m = (l)->llcl_output_buffers[seq = llc_seq2slot((l), (l)->llcl_nr_received)]) \
+ m_freem(_m); \
+ (l)->llcl_output_buffers[seq] = NULL; \
+ LLC_INC((l)->llcl_nr_received); \
+ (l)->llcl_slotsfree++; \
+ } \
+ (l)->llcl_retry = 0; \
+ if ((l)->llcl_slotsfree < (l)->llcl_window) { \
+ LLC_START_ACK_TIMER(l); \
+ } else LLC_STOP_ACK_TIMER(l); \
+ LLC_STARTTIMER((l), DACTION); \
+ }
+
+#define LLC_SET_REMOTE_BUSY(l,a) { \
+ if (LLC_GETFLAG((l), REMOTE_BUSY) == 0) { \
+ LLC_SETFLAG((l), REMOTE_BUSY, 1); \
+ LLC_STARTTIMER((l), BUSY); \
+ (a) = LLC_REMOTE_BUSY; \
+ } else { \
+ (a) = 0; \
+ } \
+ }
+#define LLC_CLEAR_REMOTE_BUSY(l,a) { \
+ if (LLC_GETFLAG((l), REMOTE_BUSY) == 1) { \
+ LLC_SETFLAG((l), REMOTE_BUSY, 1); \
+ LLC_STOPTIMER((l), BUSY); \
+ if (LLC_STATEEQ((l), NORMAL) || \
+ LLC_STATEEQ((l), REJECT) || \
+ LLC_STATEEQ((l), BUSY)) \
+ llc_resend((l), LLC_CMD, 0); \
+ (a) = LLC_REMOTE_NOT_BUSY; \
+ } else { \
+ (a) = 0; \
+ } \
+ }
+
+#define LLC_DACKCMD 0x1
+#define LLC_DACKCMDPOLL 0x2
+#define LLC_DACKRSP 0x3
+#define LLC_DACKRSPFINAL 0x4
+
+#define LLC_SENDACKNOWLEDGE(l, cmd, pf) { \
+ if ((cmd) == LLC_CMD) { \
+ LLC_SETFLAG((l), DACTION, ((pf) == 0 ? LLC_DACKCMD : LLC_DACKCMDPOLL)); \
+ } else { \
+ LLC_SETFLAG((l), DACTION, ((pf) == 0 ? LLC_DACKRSP : LLC_DACKRSPFINAL)); \
+ } \
+ }
+
+#define LLC_FRMR_W (1<<0)
+#define LLC_FRMR_X (1<<1)
+#define LLC_FRMR_Y (1<<2)
+#define LLC_FRMR_Z (1<<3)
+#define LLC_FRMR_V (1<<4)
+
+#define LLC_SETFRMR(l, f, cr, c) { \
+ if ((f)->llc_control & 0x3) { \
+ (l)->llcl_frmr_pdu0 = (f)->llc_control; \
+ (l)->llcl_frmr_pdu1 = 0; \
+ } else { \
+ (l)->llcl_frmr_pdu0 = (f)->llc_control; \
+ (l)->llcl_frmr_pdu1 = (f)->llc_control_ext; \
+ } \
+ LLCCSBITS((l)->llcl_frmr_control, f_vs, (l)->llcl_vs); \
+ LLCCSBITS((l)->llcl_frmr_control_ext, f_cr, (cr)); \
+ LLCSBITS((l)->llcl_frmr_control_ext, f_vr, (l)->llcl_vr); \
+ LLCCSBITS((l)->llcl_frmr_cause, f_wxyzv, (c)); \
+ }
+
+/*
+ * LLC tracing levels:
+ * LLCTR_INTERESTING interesting event, we might care to know about
+ * it, but then again, we might not ...
+ * LLCTR_SHOULDKNOW we probably should know about this event
+ * LLCTR_URGENT something has gone utterly wrong ...
+ */
+#define LLCTR_INTERESTING 1
+#define LLCTR_SHOULDKNOW 2
+#define LLCTR_URGENT 3
+
+#ifdef LLCDEBUG
+#define LLC_TRACE(lp, l, msg) llc_trace((lp), (l), (msg))
+#else /* LLCDEBUG */
+#define LLC_TRACE(lp, l, msg) /* NOOP */
+#endif /* LLCDEBUG */
+
+#define LLC_N2_VALUE 15 /* up to 15 retries */
+#define LLC_ACK_TIMER 10 /* 5 secs */
+#define LLC_P_TIMER 4 /* 2 secs */
+#define LLC_BUSY_TIMER 12 /* 6 secs */
+#define LLC_REJ_TIMER 12 /* 6 secs */
+#define LLC_AGE_TIMER 40 /* 20 secs */
+#define LLC_DACTION_TIMER 2 /* 1 secs */
+
+#if defined (KERNEL) && defined(LLC)
+extern int llc_n2;
+extern int llc_ACK_timer;
+extern int llc_P_timer;
+extern int llc_REJ_timer;
+extern int llc_BUSY_timer;
+extern int llc_AGE_timer;
+extern int llc_DACTION_timer;
+
+extern int af_link_rts_init_done;
+
+#define USES_AF_LINK_RTS { \
+ if (!af_link_rts_init_done) { \
+ rn_inithead((void **)&rt_tables[AF_LINK], 32); \
+ af_link_rts_init_done++; \
+ } \
+ }
+
+struct ifqueue llcintrq;
+
+extern struct llccb_q llccb_q;
+extern char *frame_names[];
+
+/*
+ * Function prototypes
+ */
+int sdl_cmp __P((struct sockaddr_dl *, struct sockaddr_dl *));
+int sdl_copy __P((struct sockaddr_dl *, struct sockaddr_dl *));
+int sdl_swapaddr __P((struct sockaddr_dl *, struct sockaddr_dl *));
+int sdl_checkaddrif __P((struct ifnet *, struct sockaddr_dl *));
+int sdl_setaddrif __P((struct ifnet *, u_char *, u_char, u_char,
+ struct sockaddr_dl *));
+int sdl_sethdrif __P((struct ifnet *, u_char *, u_char, u_char *, u_char, u_char,
+ struct sdl_hdr *));
+struct npaidbentry *llc_setsapinfo __P((struct ifnet *, u_char, u_char,
+ struct dllconfig *));
+struct npaidbentry *llc_getsapinfo __P((u_char, struct ifnet *));
+struct rtentry *npaidb_enrich __P((short, caddr_t, struct sockaddr_dl *));
+int npaidb_destroy __P((struct rtentry *));
+short llc_seq2slot __P((struct llc_linkcb *, short));
+int llc_state_ADM __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_CONN __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_RESET_WAIT __P((struct llc_linkcb *, struct llc *,
+ int, int, int));
+int llc_state_RESET_CHECK __P((struct llc_linkcb *, struct llc *,
+ int, int, int));
+int llc_state_SETUP __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_RESET __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_D_CONN __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_ERROR __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_NBRAcore __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_NORMAL __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_BUSY __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_REJECT __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_AWAIT __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_AWAIT_BUSY __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_AWAIT_REJECT __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_statehandler __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_init __P((void));
+struct llc_linkcb *llc_newlink __P((struct sockaddr_dl *, struct ifnet *,
+ struct rtentry *, caddr_t, struct rtentry *));
+int llc_dellink __P((struct llc_linkcb *));
+int llc_anytimersup __P((struct llc_linkcb *));
+char * llc_getstatename __P((struct llc_linkcb *));
+void llc_link_dump __P((struct llc_linkcb *, const char *));
+void llc_trace __P((struct llc_linkcb *, int, const char *));
+void llc_resetwindow __P((struct llc_linkcb *));
+int llc_decode __P((struct llc *, struct llc_linkcb *));
+void llc_timer __P((void));
+void llcintr __P((void));
+int llc_input __P((struct llc_linkcb *, struct mbuf *, u_char));
+caddr_t llc_ctlinput __P((int, struct sockaddr *, caddr_t));
+int llc_output __P((struct llc_linkcb *, struct mbuf *));
+void llc_start __P((struct llc_linkcb *));
+int llc_send __P((struct llc_linkcb *, int, int, int));
+int llc_resend __P((struct llc_linkcb *, int, int));
+int llc_rawsend __P((struct llc_linkcb *, struct mbuf *, struct llc *, int, int,
+ int, int));
+int cons_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+int x25_llcglue __P((int, struct sockaddr *));
+
+#endif
+
+
diff --git a/sys/netccitt/pk.h b/sys/netccitt/pk.h
new file mode 100644
index 000000000000..528e0a680803
--- /dev/null
+++ b/sys/netccitt/pk.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ *
+ * X.25 Packet Level Definitions:
+ *
+ */
+
+/* Packet type identifier field defintions. */
+
+#define X25_CALL 11
+#define X25_CALL_ACCEPTED 15
+#define X25_CLEAR 19
+#define X25_CLEAR_CONFIRM 23
+#define X25_DATA 0
+#define X25_INTERRUPT 35
+#define X25_INTERRUPT_CONFIRM 39
+
+#define X25_RR 1
+#define X25_RNR 5
+#define X25_REJECT 9
+#define X25_RESET 27
+#define X25_RESET_CONFIRM 31
+#define X25_DIAGNOSTIC 241
+
+#define X25_RESTART 251
+#define X25_RESTART_CONFIRM 255
+
+/* Restart cause field definitions. */
+
+#define X25_RESTART_DTE_ORIGINATED 0
+#define X25_RESTART_LOCAL_PROCEDURE_ERROR 1
+#define X25_RESTART_NETWORK_CONGESTION 3
+#define X25_RESTART_NETWORK_OPERATIONAL 7
+#define X25_RESTART_DTE_ORIGINATED2 128
+
+
+/* Miscellaneous definitions. */
+
+#define DATA_PACKET_DESIGNATOR 0x01
+#define RR_OR_RNR_PACKET_DESIGNATOR 0x02
+#define RR_PACKET_DESIGNATOR 0x04
+
+#define DEFAULT_WINDOW_SIZE 2
+#define MODULUS 8
+
+#define ADDRLN 1
+#define MAXADDRLN 15
+#define FACILITIESLN 1
+#define MAXFACILITIESLN 10
+#define MAXUSERDATA 16
+#define MAXCALLINFOLN 1+15+1+10+16
+
+#define PACKET_OK 0
+#define IGNORE_PACKET 1
+#define ERROR_PACKET 2
+
+typedef char bool;
+#define FALSE 0
+#define TRUE 1
+
+/*
+ * X.25 Packet format definitions
+ * This will eventually have to be rewritten without reference
+ * to bit fields, to be ansi C compliant and allignment safe.
+ */
+
+typedef u_char octet;
+
+struct x25_calladdr {
+ octet addrlens;
+ octet address_field[MAXADDRLN];
+};
+
+struct x25_packet {
+ octet bits;
+ octet logical_channel_number;
+ octet packet_type;
+ octet packet_data;
+};
+#define packet_cause packet_data
+
+struct data_packet {
+ octet bits;
+};
+
+#define FACILITIES_REVERSE_CHARGE 0x1
+#define FACILITIES_THROUGHPUT 0x2
+#define FACILITIES_PACKETSIZE 0x42
+#define FACILITIES_WINDOWSIZE 0x43
+
+#define PKHEADERLN 3
+
+#define DP(xp) (((struct data_packet *)&(xp) -> packet_type) -> bits)
+#define PS(xp) X25GBITS(DP(xp), p_s)
+#define PR(xp) X25GBITS(DP(xp), p_r)
+#define MBIT(xp) X25GBITS(DP(xp), m_bit)
+#define SPR(xp, v) X25SBITS(DP(xp), p_r, (v))
+#define SPS(xp, v) X25SBITS(DP(xp), p_s, (v))
+#define SMBIT(xp, v) X25SBITS(DP(xp), m_bit, (v))
+
+#define LCN(xp) (xp -> logical_channel_number + \
+ (X25GBITS(xp -> bits, lc_group_number) ? (X25GBITS(xp -> bits, lc_group_number) << 8) : 0))
+#define SET_LCN(xp, lcn) ((xp -> logical_channel_number = lcn), \
+ (X25SBITS(xp -> bits, lc_group_number, lcn > 255 ? lcn >> 8 : 0)))
+
+struct mbuf *pk_template ();
+
+/* Define X.25 packet level states. */
+
+/* Call setup and clearing substates. */
+
+#define LISTEN 0
+#define READY 1
+#define RECEIVED_CALL 2
+#define SENT_CALL 3
+#define DATA_TRANSFER 4
+#define RECEIVED_CLEAR 5
+#define SENT_CLEAR 6
+
+/* DTE states. */
+
+#define DTE_WAITING 7
+#define DTE_RECEIVED_RESTART 8
+#define DTE_SENT_RESTART 9
+#define DTE_READY 0
+
+/* Cleaning out ... */
+
+#define LCN_ZOMBIE 10
+
+#define MAXSTATES 11
+
+/*
+ * The following definitions are used in a switch statement after
+ * determining the packet type. These values are returned by the
+ * pk_decode procedure.
+ */
+
+#define CALL 0 * MAXSTATES
+#define CALL_ACCEPTED 1 * MAXSTATES
+#define CLEAR 2 * MAXSTATES
+#define CLEAR_CONF 3 * MAXSTATES
+#define DATA 4 * MAXSTATES
+#define INTERRUPT 5 * MAXSTATES
+#define INTERRUPT_CONF 6 * MAXSTATES
+#define RR 7 * MAXSTATES
+#define RNR 8 * MAXSTATES
+#define RESET 9 * MAXSTATES
+#define RESET_CONF 10 * MAXSTATES
+#define RESTART 11 * MAXSTATES
+#define RESTART_CONF 12 * MAXSTATES
+#define REJECT 13 * MAXSTATES
+#define DIAG_TYPE 14 * MAXSTATES
+#define INVALID_PACKET 15 * MAXSTATES
+#define DELETE_PACKET INVALID_PACKET
+
+/*
+ * The following definitions are used by the restart procedures
+ * for noting wether the PLE is supposed to behave as DTE or DCE
+ * (essentially necessary for operation over LLC2)
+ */
+#define DTE_DXERESOLVING 0x0001
+#define DTE_PLAYDTE 0x0002
+#define DTE_PLAYDCE 0x0004
+#define DTE_CONNECTPENDING 0x0010
+#define DTE_PRETENDDTE 0x0020
+
+#define MAXRESTARTCOLLISIONS 10
diff --git a/sys/netccitt/pk_acct.c b/sys/netccitt/pk_acct.c
new file mode 100644
index 000000000000..fccd875285e0
--- /dev/null
+++ b/sys/netccitt/pk_acct.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_acct.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#include <netccitt/x25acct.h>
+
+
+struct vnode *pkacctp;
+/*
+ * Turn on packet accounting
+ */
+
+pk_accton (path)
+ char *path;
+{
+ register struct vnode *vp = NULL;
+ struct nameidata nd;
+ struct vnode *oacctp = pkacctp;
+ struct proc *p = curproc;
+ int error;
+
+ if (path == 0)
+ goto close;
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
+ if (error = vn_open (&nd, FWRITE, 0644))
+ return (error);
+ vp = nd.ni_vp;
+ VOP_UNLOCK(vp);
+ if (vp -> v_type != VREG) {
+ vrele (vp);
+ return (EACCES);
+ }
+ pkacctp = vp;
+ if (oacctp) {
+ close:
+ error = vn_close (oacctp, FWRITE, p -> p_ucred, p);
+ }
+ return (error);
+}
+
+/*
+ * Write a record on the accounting file.
+ */
+
+pk_acct (lcp)
+register struct pklcd *lcp;
+{
+ register struct vnode *vp;
+ register struct sockaddr_x25 *sa;
+ register char *src, *dst;
+ register int len;
+ register long etime;
+ static struct x25acct acbuf;
+
+ if ((vp = pkacctp) == 0)
+ return;
+ bzero ((caddr_t)&acbuf, sizeof (acbuf));
+ if (lcp -> lcd_ceaddr != 0)
+ sa = lcp -> lcd_ceaddr;
+ else if (lcp -> lcd_craddr != 0) {
+ sa = lcp -> lcd_craddr;
+ acbuf.x25acct_callin = 1;
+ } else
+ return;
+
+ if (sa -> x25_opts.op_flags & X25_REVERSE_CHARGE)
+ acbuf.x25acct_revcharge = 1;
+ acbuf.x25acct_stime = lcp -> lcd_stime;
+ acbuf.x25acct_etime = time.tv_sec - acbuf.x25acct_stime;
+ acbuf.x25acct_uid = curproc -> p_cred -> p_ruid;
+ acbuf.x25acct_psize = sa -> x25_opts.op_psize;
+ acbuf.x25acct_net = sa -> x25_net;
+ /*
+ * Convert address to bcd
+ */
+ src = sa -> x25_addr;
+ dst = acbuf.x25acct_addr;
+ for (len = 0; *src; len++)
+ if (len & 01)
+ *dst++ |= *src++ & 0xf;
+ else
+ *dst = *src++ << 4;
+ acbuf.x25acct_addrlen = len;
+
+ bcopy (sa -> x25_udata, acbuf.x25acct_udata,
+ sizeof (acbuf.x25acct_udata));
+ acbuf.x25acct_txcnt = lcp -> lcd_txcnt;
+ acbuf.x25acct_rxcnt = lcp -> lcd_rxcnt;
+
+ (void) vn_rdwr(UIO_WRITE, vp, (caddr_t)&acbuf, sizeof (acbuf),
+ (off_t)0, UIO_SYSSPACE, IO_UNIT|IO_APPEND,
+ curproc -> p_ucred, (int *)0,
+ (struct proc *)0);
+}
diff --git a/sys/netccitt/pk_debug.c b/sys/netccitt/pk_debug.c
new file mode 100644
index 000000000000..b5103557c569
--- /dev/null
+++ b/sys/netccitt/pk_debug.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_debug.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+char *pk_state[] = {
+ "Listen", "Ready", "Received-Call",
+ "Sent-Call", "Data-Transfer","Received-Clear",
+ "Sent-Clear",
+};
+
+char *pk_name[] = {
+ "Call", "Call-Conf", "Clear",
+ "Clear-Conf", "Data", "Intr", "Intr-Conf",
+ "Rr", "Rnr", "Reset", "Reset-Conf",
+ "Restart", "Restart-Conf", "Reject", "Diagnostic",
+ "Invalid"
+};
+
+pk_trace (xcp, m, dir)
+struct x25config *xcp;
+register struct mbuf *m;
+char *dir;
+{
+ register char *s;
+ struct x25_packet *xp = mtod(m, struct x25_packet *);
+ register int i, len = 0, cnt = 0;
+
+ if (xcp -> xc_ptrace == 0)
+ return;
+
+ i = pk_decode (xp) / MAXSTATES;
+ for (; m; m = m -> m_next) {
+ len = len + m -> m_len;
+ ++cnt;
+ }
+ printf ("LCN=%d %s: %s #=%d, len=%d ",
+ LCN(xp), dir, pk_name[i], cnt, len);
+ for (s = (char *) xp, i = 0; i < 5; ++i, ++s)
+ printf ("%x ", (int) * s & 0xff);
+ printf ("\n");
+}
+
+mbuf_cache(c, m)
+register struct mbuf_cache *c;
+struct mbuf *m;
+{
+ register struct mbuf **mp;
+
+ if (c->mbc_size != c->mbc_oldsize) {
+ unsigned zero_size, copy_size;
+ unsigned new_size = c->mbc_size * sizeof(m);
+ caddr_t cache = (caddr_t)c->mbc_cache;
+
+ if (new_size) {
+ c->mbc_cache = (struct mbuf **)
+ malloc(new_size, M_MBUF, M_NOWAIT);
+ if (c->mbc_cache == 0) {
+ c->mbc_cache = (struct mbuf **)cache;
+ return;
+ }
+ c->mbc_num %= c->mbc_size;
+ } else
+ c->mbc_cache = 0;
+ if (c->mbc_size < c->mbc_oldsize) {
+ register struct mbuf **mplim;
+ mp = c->mbc_size + (struct mbuf **)cache;
+ mplim = c->mbc_oldsize + (struct mbuf **)cache;
+ while (mp < mplim)
+ m_freem(*mp++);
+ zero_size = 0;
+ } else
+ zero_size = (c->mbc_size - c->mbc_oldsize) * sizeof(m);
+ copy_size = new_size - zero_size;
+ c->mbc_oldsize = c->mbc_size;
+ if (copy_size)
+ bcopy(cache, (caddr_t)c->mbc_cache, copy_size);
+ if (cache)
+ free(cache, M_MBUF);
+ if (zero_size)
+ bzero(copy_size + (caddr_t)c->mbc_cache, zero_size);
+ }
+ if (c->mbc_size == 0)
+ return;
+ mp = c->mbc_cache + c->mbc_num;
+ c->mbc_num = (1 + c->mbc_num) % c->mbc_size;
+ if (*mp)
+ m_freem(*mp);
+ if (*mp = m_copym(m, 0, M_COPYALL, M_DONTWAIT))
+ (*mp)->m_flags |= m->m_flags & 0x08;
+}
diff --git a/sys/netccitt/pk_input.c b/sys/netccitt/pk_input.c
new file mode 100644
index 000000000000..1f8f0bc71277
--- /dev/null
+++ b/sys/netccitt/pk_input.c
@@ -0,0 +1,1119 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_input.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#include <netccitt/llc_var.h>
+
+struct pkcb_q pkcb_q = {&pkcb_q, &pkcb_q};
+
+/*
+ * ccittintr() is the generic interrupt handler for HDLC, LLC2, and X.25. This
+ * allows to have kernel running X.25 but no HDLC or LLC2 or both (in case we
+ * employ boards that do all the stuff themselves, e.g. ADAX X.25 or TPS ISDN.)
+ */
+void
+ccittintr ()
+{
+ extern struct ifqueue pkintrq;
+ extern struct ifqueue hdintrq;
+ extern struct ifqueue llcintrq;
+
+#ifdef HDLC
+ if (hdintrq.ifq_len)
+ hdintr ();
+#endif
+#ifdef LLC
+ if (llcintrq.ifq_len)
+ llcintr ();
+#endif
+ if (pkintrq.ifq_len)
+ pkintr ();
+}
+
+struct pkcb *
+pk_newlink (ia, llnext)
+struct x25_ifaddr *ia;
+caddr_t llnext;
+{
+ register struct x25config *xcp = &ia -> ia_xc;
+ register struct pkcb *pkp;
+ register struct pklcd *lcp;
+ register struct protosw *pp;
+ unsigned size;
+
+ pp = pffindproto (AF_CCITT, (int) xcp -> xc_lproto, 0);
+ if (pp == 0 || pp -> pr_output == 0) {
+ pk_message (0, xcp, "link level protosw error");
+ return ((struct pkcb *)0);
+ }
+ /*
+ * Allocate a network control block structure
+ */
+ size = sizeof (struct pkcb);
+ pkp = (struct pkcb *) malloc (size, M_PCB, M_WAITOK);
+ if (pkp == 0)
+ return ((struct pkcb *)0);
+ bzero ((caddr_t) pkp, size);
+ pkp -> pk_lloutput = pp -> pr_output;
+ pkp -> pk_llctlinput = (caddr_t (*)()) pp -> pr_ctlinput;
+ pkp -> pk_xcp = xcp;
+ pkp -> pk_ia = ia;
+ pkp -> pk_state = DTE_WAITING;
+ pkp -> pk_llnext = llnext;
+ insque (pkp, &pkcb_q);
+
+ /*
+ * set defaults
+ */
+
+ if (xcp -> xc_pwsize == 0)
+ xcp -> xc_pwsize = DEFAULT_WINDOW_SIZE;
+ if (xcp -> xc_psize == 0)
+ xcp -> xc_psize = X25_PS128;
+ /*
+ * Allocate logical channel descriptor vector
+ */
+
+ (void) pk_resize (pkp);
+ return (pkp);
+}
+
+
+pk_dellink (pkp)
+register struct pkcb *pkp;
+{
+ register int i;
+ register struct protosw *pp;
+
+ /*
+ * Essentially we have the choice to
+ * (a) go ahead and let the route be deleted and
+ * leave the pkcb associated with that route
+ * as it is, i.e. the connections stay open
+ * (b) do a pk_disconnect() on all channels associated
+ * with the route via the pkcb and then proceed.
+ *
+ * For the time being we stick with (b)
+ */
+
+ for (i = 1; i < pkp -> pk_maxlcn; ++i)
+ if (pkp -> pk_chan[i])
+ pk_disconnect (pkp -> pk_chan[i]);
+
+ /*
+ * Free the pkcb
+ */
+
+ /*
+ * First find the protoswitch to get hold of the link level
+ * protocol to be notified that the packet level entity is
+ * dissolving ...
+ */
+ pp = pffindproto (AF_CCITT, (int) pkp -> pk_xcp -> xc_lproto, 0);
+ if (pp == 0 || pp -> pr_output == 0) {
+ pk_message (0, pkp -> pk_xcp, "link level protosw error");
+ return (EPROTONOSUPPORT);
+ }
+
+ pkp -> pk_refcount--;
+ if (!pkp -> pk_refcount) {
+ struct dll_ctlinfo ctlinfo;
+
+ remque (pkp);
+ if (pkp -> pk_rt -> rt_llinfo == (caddr_t) pkp)
+ pkp -> pk_rt -> rt_llinfo = (caddr_t) NULL;
+
+ /*
+ * Tell the link level that the pkcb is dissolving
+ */
+ if (pp -> pr_ctlinput && pkp -> pk_llnext) {
+ ctlinfo.dlcti_pcb = pkp -> pk_llnext;
+ ctlinfo.dlcti_rt = pkp -> pk_rt;
+ (pp -> pr_ctlinput)(PRC_DISCONNECT_REQUEST,
+ pkp -> pk_xcp, &ctlinfo);
+ }
+ free ((caddr_t) pkp -> pk_chan, M_IFADDR);
+ free ((caddr_t) pkp, M_PCB);
+ }
+
+ return (0);
+}
+
+
+pk_resize (pkp)
+register struct pkcb *pkp;
+{
+ struct pklcd *dev_lcp = 0;
+ struct x25config *xcp = pkp -> pk_xcp;
+ if (pkp -> pk_chan &&
+ (pkp -> pk_maxlcn != xcp -> xc_maxlcn)) {
+ pk_restart (pkp, X25_RESTART_NETWORK_CONGESTION);
+ dev_lcp = pkp -> pk_chan[0];
+ free ((caddr_t) pkp -> pk_chan, M_IFADDR);
+ pkp -> pk_chan = 0;
+ }
+ if (pkp -> pk_chan == 0) {
+ unsigned size;
+ pkp -> pk_maxlcn = xcp -> xc_maxlcn;
+ size = (pkp -> pk_maxlcn + 1) * sizeof (struct pklcd *);
+ pkp -> pk_chan =
+ (struct pklcd **) malloc (size, M_IFADDR, M_WAITOK);
+ if (pkp -> pk_chan) {
+ bzero ((caddr_t) pkp -> pk_chan, size);
+ /*
+ * Allocate a logical channel descriptor for lcn 0
+ */
+ if (dev_lcp == 0 &&
+ (dev_lcp = pk_attach ((struct socket *)0)) == 0)
+ return (ENOBUFS);
+ dev_lcp -> lcd_state = READY;
+ dev_lcp -> lcd_pkp = pkp;
+ pkp -> pk_chan[0] = dev_lcp;
+ } else {
+ if (dev_lcp)
+ pk_close (dev_lcp);
+ return (ENOBUFS);
+ }
+ }
+ return 0;
+}
+
+/*
+ * This procedure is called by the link level whenever the link
+ * becomes operational, is reset, or when the link goes down.
+ */
+/*VARARGS*/
+caddr_t
+pk_ctlinput (code, src, addr)
+ struct sockaddr *src;
+ caddr_t addr;
+{
+ register struct pkcb *pkp = (struct pkcb *) addr;
+
+ switch (code) {
+ case PRC_LINKUP:
+ if (pkp -> pk_state == DTE_WAITING)
+ pk_restart (pkp, X25_RESTART_NETWORK_CONGESTION);
+ break;
+
+ case PRC_LINKDOWN:
+ pk_restart (pkp, -1); /* Clear all active circuits */
+ pkp -> pk_state = DTE_WAITING;
+ break;
+
+ case PRC_LINKRESET:
+ pk_restart (pkp, X25_RESTART_NETWORK_CONGESTION);
+ break;
+
+ case PRC_CONNECT_INDICATION: {
+ struct rtentry *llrt;
+
+ if ((llrt = rtalloc1(src, 0)) == 0)
+ return 0;
+ else llrt -> rt_refcnt--;
+
+ pkp = (((struct npaidbentry *) llrt -> rt_llinfo) -> np_rt) ?
+ (struct pkcb *)(((struct npaidbentry *) llrt -> rt_llinfo) -> np_rt -> rt_llinfo) : (struct pkcb *) 0;
+ if (pkp == (struct pkcb *) 0)
+ return 0;
+ pkp -> pk_llnext = addr;
+
+ return ((caddr_t) pkp);
+ }
+ case PRC_DISCONNECT_INDICATION:
+ pk_restart (pkp, -1) ; /* Clear all active circuits */
+ pkp -> pk_state = DTE_WAITING;
+ pkp -> pk_llnext = (caddr_t) 0;
+ }
+ return (0);
+}
+struct ifqueue pkintrq;
+/*
+ * This routine is called if there are semi-smart devices that do HDLC
+ * in hardware and want to queue the packet and call level 3 directly
+ */
+pkintr ()
+{
+ register struct mbuf *m;
+ register struct ifaddr *ifa;
+ register struct ifnet *ifp;
+ register int s;
+
+ for (;;) {
+ s = splimp ();
+ IF_DEQUEUE (&pkintrq, m);
+ splx (s);
+ if (m == 0)
+ break;
+ if (m -> m_len < PKHEADERLN) {
+ printf ("pkintr: packet too short (len=%d)\n",
+ m -> m_len);
+ m_freem (m);
+ continue;
+ }
+ pk_input (m);
+ }
+}
+struct mbuf *pk_bad_packet;
+struct mbuf_cache pk_input_cache = {0 };
+/*
+ * X.25 PACKET INPUT
+ *
+ * This procedure is called by a link level procedure whenever
+ * an information frame is received. It decodes the packet and
+ * demultiplexes based on the logical channel number.
+ *
+ * We change the original conventions of the UBC code here --
+ * since there may be multiple pkcb's for a given interface
+ * of type 802.2 class 2, we retrieve which one it is from
+ * m_pkthdr.rcvif (which has been overwritten by lower layers);
+ * That field is then restored for the benefit of upper layers which
+ * may make use of it, such as CLNP.
+ *
+ */
+
+#define RESTART_DTE_ORIGINATED(xp) (((xp) -> packet_cause == X25_RESTART_DTE_ORIGINATED) || \
+ ((xp) -> packet_cause >= X25_RESTART_DTE_ORIGINATED2))
+
+pk_input (m)
+register struct mbuf *m;
+{
+ register struct x25_packet *xp;
+ register struct pklcd *lcp;
+ register struct socket *so = 0;
+ register struct pkcb *pkp;
+ int ptype, lcn, lcdstate = LISTEN;
+
+ if (pk_input_cache.mbc_size || pk_input_cache.mbc_oldsize)
+ mbuf_cache (&pk_input_cache, m);
+ if ((m -> m_flags & M_PKTHDR) == 0)
+ panic ("pkintr");
+
+ if ((pkp = (struct pkcb *) m -> m_pkthdr.rcvif) == 0)
+ return;
+ xp = mtod (m, struct x25_packet *);
+ ptype = pk_decode (xp);
+ lcn = LCN(xp);
+ lcp = pkp -> pk_chan[lcn];
+
+ /*
+ * If the DTE is in Restart state, then it will ignore data,
+ * interrupt, call setup and clearing, flow control and reset
+ * packets.
+ */
+ if (lcn < 0 || lcn > pkp -> pk_maxlcn) {
+ pk_message (lcn, pkp -> pk_xcp, "illegal lcn");
+ m_freem (m);
+ return;
+ }
+
+ pk_trace (pkp -> pk_xcp, m, "P-In");
+
+ if (pkp -> pk_state != DTE_READY && ptype != RESTART && ptype != RESTART_CONF) {
+ m_freem (m);
+ return;
+ }
+ if (lcp) {
+ so = lcp -> lcd_so;
+ lcdstate = lcp -> lcd_state;
+ } else {
+ if (ptype == CLEAR) { /* idle line probe (Datapac specific) */
+ /* send response on lcd 0's output queue */
+ lcp = pkp -> pk_chan[0];
+ lcp -> lcd_template = pk_template (lcn, X25_CLEAR_CONFIRM);
+ pk_output (lcp);
+ m_freem (m);
+ return;
+ }
+ if (ptype != CALL)
+ ptype = INVALID_PACKET;
+ }
+
+ if (lcn == 0 && ptype != RESTART && ptype != RESTART_CONF) {
+ pk_message (0, pkp -> pk_xcp, "illegal ptype (%d, %s) on lcn 0",
+ ptype, pk_name[ptype / MAXSTATES]);
+ if (pk_bad_packet)
+ m_freem (pk_bad_packet);
+ pk_bad_packet = m;
+ return;
+ }
+
+ m -> m_pkthdr.rcvif = pkp -> pk_ia -> ia_ifp;
+
+ switch (ptype + lcdstate) {
+ /*
+ * Incoming Call packet received.
+ */
+ case CALL + LISTEN:
+ pk_incoming_call (pkp, m);
+ break;
+
+ /*
+ * Call collision: Just throw this "incoming call" away since
+ * the DCE will ignore it anyway.
+ */
+ case CALL + SENT_CALL:
+ pk_message ((int) lcn, pkp -> pk_xcp,
+ "incoming call collision");
+ break;
+
+ /*
+ * Call confirmation packet received. This usually means our
+ * previous connect request is now complete.
+ */
+ case CALL_ACCEPTED + SENT_CALL:
+ MCHTYPE(m, MT_CONTROL);
+ pk_call_accepted (lcp, m);
+ break;
+
+ /*
+ * This condition can only happen if the previous state was
+ * SENT_CALL. Just ignore the packet, eventually a clear
+ * confirmation should arrive.
+ */
+ case CALL_ACCEPTED + SENT_CLEAR:
+ break;
+
+ /*
+ * Clear packet received. This requires a complete tear down
+ * of the virtual circuit. Free buffers and control blocks.
+ * and send a clear confirmation.
+ */
+ case CLEAR + READY:
+ case CLEAR + RECEIVED_CALL:
+ case CLEAR + SENT_CALL:
+ case CLEAR + DATA_TRANSFER:
+ lcp -> lcd_state = RECEIVED_CLEAR;
+ lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CLEAR_CONFIRM);
+ pk_output (lcp);
+ pk_clearcause (pkp, xp);
+ if (lcp -> lcd_upper) {
+ MCHTYPE(m, MT_CONTROL);
+ lcp -> lcd_upper (lcp, m);
+ }
+ pk_close (lcp);
+ lcp = 0;
+ break;
+
+ /*
+ * Clear collision: Treat this clear packet as a confirmation.
+ */
+ case CLEAR + SENT_CLEAR:
+ pk_close (lcp);
+ break;
+
+ /*
+ * Clear confirmation received. This usually means the virtual
+ * circuit is now completely removed.
+ */
+ case CLEAR_CONF + SENT_CLEAR:
+ pk_close (lcp);
+ break;
+
+ /*
+ * A clear confirmation on an unassigned logical channel - just
+ * ignore it. Note: All other packets on an unassigned channel
+ * results in a clear.
+ */
+ case CLEAR_CONF + READY:
+ case CLEAR_CONF + LISTEN:
+ break;
+
+ /*
+ * Data packet received. Pass on to next level. Move the Q and M
+ * bits into the data portion for the next level.
+ */
+ case DATA + DATA_TRANSFER:
+ if (lcp -> lcd_reset_condition) {
+ ptype = DELETE_PACKET;
+ break;
+ }
+
+ /*
+ * Process the P(S) flow control information in this Data packet.
+ * Check that the packets arrive in the correct sequence and that
+ * they are within the "lcd_input_window". Input window rotation is
+ * initiated by the receive interface.
+ */
+
+ if (PS(xp) != ((lcp -> lcd_rsn + 1) % MODULUS) ||
+ PS(xp) == ((lcp -> lcd_input_window + lcp -> lcd_windowsize) % MODULUS)) {
+ m_freem (m);
+ pk_procerror (RESET, lcp, "p(s) flow control error", 1);
+ break;
+ }
+ lcp -> lcd_rsn = PS(xp);
+
+ if (pk_ack (lcp, PR(xp)) != PACKET_OK) {
+ m_freem (m);
+ break;
+ }
+ m -> m_data += PKHEADERLN;
+ m -> m_len -= PKHEADERLN;
+ m -> m_pkthdr.len -= PKHEADERLN;
+
+ lcp -> lcd_rxcnt++;
+ if (lcp -> lcd_flags & X25_MBS_HOLD) {
+ register struct mbuf *n = lcp -> lcd_cps;
+ int mbit = MBIT(xp);
+ octet q_and_d_bits;
+
+ if (n) {
+ n -> m_pkthdr.len += m -> m_pkthdr.len;
+ while (n -> m_next)
+ n = n -> m_next;
+ n -> m_next = m;
+ m = lcp -> lcd_cps;
+
+ if (lcp -> lcd_cpsmax &&
+ n -> m_pkthdr.len > lcp -> lcd_cpsmax) {
+ pk_procerror (RESET, lcp,
+ "C.P.S. overflow", 128);
+ return;
+ }
+ q_and_d_bits = 0xc0 & *(octet *) xp;
+ xp = (struct x25_packet *)
+ (mtod (m, octet *) - PKHEADERLN);
+ *(octet *) xp |= q_and_d_bits;
+ }
+ if (mbit) {
+ lcp -> lcd_cps = m;
+ pk_flowcontrol (lcp, 0, 1);
+ return;
+ }
+ lcp -> lcd_cps = 0;
+ }
+ if (so == 0)
+ break;
+ if (lcp -> lcd_flags & X25_MQBIT) {
+ octet t = (X25GBITS(xp -> bits, q_bit)) ? t = 0x80 : 0;
+
+ if (MBIT(xp))
+ t |= 0x40;
+ m -> m_data -= 1;
+ m -> m_len += 1;
+ m -> m_pkthdr.len += 1;
+ *mtod (m, octet *) = t;
+ }
+
+ /*
+ * Discard Q-BIT packets if the application
+ * doesn't want to be informed of M and Q bit status
+ */
+ if (X25GBITS(xp -> bits, q_bit)
+ && (lcp -> lcd_flags & X25_MQBIT) == 0) {
+ m_freem (m);
+ /*
+ * NB. This is dangerous: sending a RR here can
+ * cause sequence number errors if a previous data
+ * packet has not yet been passed up to the application
+ * (RR's are normally generated via PRU_RCVD).
+ */
+ pk_flowcontrol (lcp, 0, 1);
+ } else {
+ sbappendrecord (&so -> so_rcv, m);
+ sorwakeup (so);
+ }
+ break;
+
+ /*
+ * Interrupt packet received.
+ */
+ case INTERRUPT + DATA_TRANSFER:
+ if (lcp -> lcd_reset_condition)
+ break;
+ lcp -> lcd_intrdata = xp -> packet_data;
+ lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_INTERRUPT_CONFIRM);
+ pk_output (lcp);
+ m -> m_data += PKHEADERLN;
+ m -> m_len -= PKHEADERLN;
+ m -> m_pkthdr.len -= PKHEADERLN;
+ MCHTYPE(m, MT_OOBDATA);
+ if (so) {
+ if (so -> so_options & SO_OOBINLINE)
+ sbinsertoob (&so -> so_rcv, m);
+ else
+ m_freem (m);
+ sohasoutofband (so);
+ }
+ break;
+
+ /*
+ * Interrupt confirmation packet received.
+ */
+ case INTERRUPT_CONF + DATA_TRANSFER:
+ if (lcp -> lcd_reset_condition)
+ break;
+ if (lcp -> lcd_intrconf_pending == TRUE)
+ lcp -> lcd_intrconf_pending = FALSE;
+ else
+ pk_procerror (RESET, lcp, "unexpected packet", 43);
+ break;
+
+ /*
+ * Receiver ready received. Rotate the output window and output
+ * any data packets waiting transmission.
+ */
+ case RR + DATA_TRANSFER:
+ if (lcp -> lcd_reset_condition ||
+ pk_ack (lcp, PR(xp)) != PACKET_OK) {
+ ptype = DELETE_PACKET;
+ break;
+ }
+ if (lcp -> lcd_rnr_condition == TRUE)
+ lcp -> lcd_rnr_condition = FALSE;
+ pk_output (lcp);
+ break;
+
+ /*
+ * Receiver Not Ready received. Packets up to the P(R) can be
+ * be sent. Condition is cleared with a RR.
+ */
+ case RNR + DATA_TRANSFER:
+ if (lcp -> lcd_reset_condition ||
+ pk_ack (lcp, PR(xp)) != PACKET_OK) {
+ ptype = DELETE_PACKET;
+ break;
+ }
+ lcp -> lcd_rnr_condition = TRUE;
+ break;
+
+ /*
+ * Reset packet received. Set state to FLOW_OPEN. The Input and
+ * Output window edges ar set to zero. Both the send and receive
+ * numbers are reset. A confirmation is returned.
+ */
+ case RESET + DATA_TRANSFER:
+ if (lcp -> lcd_reset_condition)
+ /* Reset collision. Just ignore packet. */
+ break;
+
+ pk_resetcause (pkp, xp);
+ lcp -> lcd_window_condition = lcp -> lcd_rnr_condition =
+ lcp -> lcd_intrconf_pending = FALSE;
+ lcp -> lcd_output_window = lcp -> lcd_input_window =
+ lcp -> lcd_last_transmitted_pr = 0;
+ lcp -> lcd_ssn = 0;
+ lcp -> lcd_rsn = MODULUS - 1;
+
+ lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_RESET_CONFIRM);
+ pk_output (lcp);
+
+ pk_flush (lcp);
+ if (so == 0)
+ break;
+ wakeup ((caddr_t) & so -> so_timeo);
+ sorwakeup (so);
+ sowwakeup (so);
+ break;
+
+ /*
+ * Reset confirmation received.
+ */
+ case RESET_CONF + DATA_TRANSFER:
+ if (lcp -> lcd_reset_condition) {
+ lcp -> lcd_reset_condition = FALSE;
+ pk_output (lcp);
+ }
+ else
+ pk_procerror (RESET, lcp, "unexpected packet", 32);
+ break;
+
+ case DATA + SENT_CLEAR:
+ ptype = DELETE_PACKET;
+ case RR + SENT_CLEAR:
+ case RNR + SENT_CLEAR:
+ case INTERRUPT + SENT_CLEAR:
+ case INTERRUPT_CONF + SENT_CLEAR:
+ case RESET + SENT_CLEAR:
+ case RESET_CONF + SENT_CLEAR:
+ /* Just ignore p if we have sent a CLEAR already.
+ */
+ break;
+
+ /*
+ * Restart sets all the permanent virtual circuits to the "Data
+ * Transfer" stae and all the switched virtual circuits to the
+ * "Ready" state.
+ */
+ case RESTART + READY:
+ switch (pkp -> pk_state) {
+ case DTE_SENT_RESTART:
+ /*
+ * Restart collision.
+ * If case the restart cause is "DTE originated" we
+ * have a DTE-DTE situation and are trying to resolve
+ * who is going to play DTE/DCE [ISO 8208:4.2-4.5]
+ */
+ if (RESTART_DTE_ORIGINATED(xp)) {
+ pk_restart (pkp, X25_RESTART_DTE_ORIGINATED);
+ pk_message (0, pkp -> pk_xcp,
+ "RESTART collision");
+ if ((pkp -> pk_restartcolls++) > MAXRESTARTCOLLISIONS) {
+ pk_message (0, pkp -> pk_xcp,
+ "excessive RESTART collisions");
+ pkp -> pk_restartcolls = 0;
+ }
+ break;
+ }
+ pkp -> pk_state = DTE_READY;
+ pkp -> pk_dxerole |= DTE_PLAYDTE;
+ pkp -> pk_dxerole &= ~DTE_PLAYDCE;
+ pk_message (0, pkp -> pk_xcp,
+ "Packet level operational");
+ pk_message (0, pkp -> pk_xcp,
+ "Assuming DTE role");
+ if (pkp -> pk_dxerole & DTE_CONNECTPENDING)
+ pk_callcomplete (pkp);
+ break;
+
+ default:
+ pk_restart (pkp, -1);
+ pk_restartcause (pkp, xp);
+ pkp -> pk_chan[0] -> lcd_template = pk_template (0,
+ X25_RESTART_CONFIRM);
+ pk_output (pkp -> pk_chan[0]);
+ pkp -> pk_state = DTE_READY;
+ pkp -> pk_dxerole |= RESTART_DTE_ORIGINATED(xp) ? DTE_PLAYDCE :
+ DTE_PLAYDTE;
+ if (pkp -> pk_dxerole & DTE_PLAYDTE) {
+ pkp -> pk_dxerole &= ~DTE_PLAYDCE;
+ pk_message (0, pkp -> pk_xcp,
+ "Assuming DTE role");
+ } else {
+ pkp -> pk_dxerole &= ~DTE_PLAYDTE;
+ pk_message (0, pkp -> pk_xcp,
+ "Assuming DCE role");
+ }
+ if (pkp -> pk_dxerole & DTE_CONNECTPENDING)
+ pk_callcomplete (pkp);
+ }
+ break;
+
+ /*
+ * Restart confirmation received. All logical channels are set
+ * to READY.
+ */
+ case RESTART_CONF + READY:
+ switch (pkp -> pk_state) {
+ case DTE_SENT_RESTART:
+ pkp -> pk_state = DTE_READY;
+ pkp -> pk_dxerole |= DTE_PLAYDTE;
+ pkp -> pk_dxerole &= ~DTE_PLAYDCE;
+ pk_message (0, pkp -> pk_xcp,
+ "Packet level operational");
+ pk_message (0, pkp -> pk_xcp,
+ "Assuming DTE role");
+ if (pkp -> pk_dxerole & DTE_CONNECTPENDING)
+ pk_callcomplete (pkp);
+ break;
+
+ default:
+ /* Restart local procedure error. */
+ pk_restart (pkp, X25_RESTART_LOCAL_PROCEDURE_ERROR);
+ pkp -> pk_state = DTE_SENT_RESTART;
+ pkp -> pk_dxerole &= ~(DTE_PLAYDTE | DTE_PLAYDCE);
+ }
+ break;
+
+ default:
+ if (lcp) {
+ pk_procerror (CLEAR, lcp, "unknown packet error", 33);
+ pk_message (lcn, pkp -> pk_xcp,
+ "\"%s\" unexpected in \"%s\" state",
+ pk_name[ptype/MAXSTATES], pk_state[lcdstate]);
+ } else
+ pk_message (lcn, pkp -> pk_xcp,
+ "packet arrived on unassigned lcn");
+ break;
+ }
+ if (so == 0 && lcp && lcp -> lcd_upper && lcdstate == DATA_TRANSFER) {
+ if (ptype != DATA && ptype != INTERRUPT)
+ MCHTYPE(m, MT_CONTROL);
+ lcp -> lcd_upper (lcp, m);
+ } else if (ptype != DATA && ptype != INTERRUPT)
+ m_freem (m);
+}
+
+static
+prune_dnic (from, to, dnicname, xcp)
+char *from, *to, *dnicname;
+register struct x25config *xcp;
+{
+ register char *cp1 = from, *cp2 = from;
+ if (xcp -> xc_prepnd0 && *cp1 == '0') {
+ from = ++cp1;
+ goto copyrest;
+ }
+ if (xcp -> xc_nodnic) {
+ for (cp1 = dnicname; *cp2 = *cp1++;)
+ cp2++;
+ cp1 = from;
+ }
+copyrest:
+ for (cp1 = dnicname; *cp2 = *cp1++;)
+ cp2++;
+}
+/* static */
+pk_simple_bsd (from, to, lower, len)
+register octet *from, *to;
+register len, lower;
+{
+ register int c;
+ while (--len >= 0) {
+ c = *from;
+ if (lower & 0x01)
+ *from++;
+ else
+ c >>= 4;
+ c &= 0x0f; c |= 0x30; *to++ = c; lower++;
+ }
+ *to = 0;
+}
+
+/*static octet * */
+pk_from_bcd (a, iscalling, sa, xcp)
+register struct x25_calladdr *a;
+register struct sockaddr_x25 *sa;
+register struct x25config *xcp;
+{
+ octet buf[MAXADDRLN+1];
+ octet *cp;
+ unsigned count;
+
+ bzero ((caddr_t) sa, sizeof (*sa));
+ sa -> x25_len = sizeof (*sa);
+ sa -> x25_family = AF_CCITT;
+ if (iscalling) {
+ cp = a -> address_field + (X25GBITS(a -> addrlens, called_addrlen) / 2);
+ count = X25GBITS(a -> addrlens, calling_addrlen);
+ pk_simple_bsd (cp, buf, X25GBITS(a -> addrlens, called_addrlen), count);
+ } else {
+ count = X25GBITS(a -> addrlens, called_addrlen);
+ pk_simple_bsd (a -> address_field, buf, 0, count);
+ }
+ if (xcp -> xc_addr.x25_net && (xcp -> xc_nodnic || xcp -> xc_prepnd0)) {
+ octet dnicname[sizeof (long) * NBBY/3 + 2];
+
+ sprintf ((char *) dnicname, "%d", xcp -> xc_addr.x25_net);
+ prune_dnic ((char *) buf, sa -> x25_addr, dnicname, xcp);
+ } else
+ bcopy ((caddr_t) buf, (caddr_t) sa -> x25_addr, count + 1);
+}
+
+static
+save_extra (m0, fp, so)
+struct mbuf *m0;
+octet *fp;
+struct socket *so;
+{
+ register struct mbuf *m;
+ struct cmsghdr cmsghdr;
+ if (m = m_copy (m, 0, (int)M_COPYALL)) {
+ int off = fp - mtod (m0, octet *);
+ int len = m -> m_pkthdr.len - off + sizeof (cmsghdr);
+ cmsghdr.cmsg_len = len;
+ cmsghdr.cmsg_level = AF_CCITT;
+ cmsghdr.cmsg_type = PK_FACILITIES;
+ m_adj (m, off);
+ M_PREPEND (m, sizeof (cmsghdr), M_DONTWAIT);
+ if (m == 0)
+ return;
+ bcopy ((caddr_t)&cmsghdr, mtod (m, caddr_t), sizeof (cmsghdr));
+ MCHTYPE(m, MT_CONTROL);
+ sbappendrecord (&so -> so_rcv, m);
+ }
+}
+
+/*
+ * This routine handles incoming call packets. It matches the protocol
+ * field on the Call User Data field (usually the first four bytes) with
+ * sockets awaiting connections.
+ */
+
+pk_incoming_call (pkp, m0)
+struct mbuf *m0;
+struct pkcb *pkp;
+{
+ register struct pklcd *lcp = 0, *l;
+ register struct sockaddr_x25 *sa;
+ register struct x25_calladdr *a;
+ register struct socket *so = 0;
+ struct x25_packet *xp = mtod (m0, struct x25_packet *);
+ struct mbuf *m;
+ struct x25config *xcp = pkp -> pk_xcp;
+ int len = m0 -> m_pkthdr.len;
+ unsigned udlen;
+ char *errstr = "server unavailable";
+ octet *u, *facp;
+ int lcn = LCN(xp);
+
+ /* First, copy the data from the incoming call packet to a X25 address
+ descriptor. It is to be regretted that you have
+ to parse the facilities into a sockaddr to determine
+ if reverse charging is being requested */
+ if ((m = m_get (M_DONTWAIT, MT_SONAME)) == 0)
+ return;
+ sa = mtod (m, struct sockaddr_x25 *);
+ a = (struct x25_calladdr *) &xp -> packet_data;
+ facp = u = (octet *) (a -> address_field +
+ ((X25GBITS(a -> addrlens, called_addrlen) + X25GBITS(a -> addrlens, calling_addrlen) + 1) / 2));
+ u += *u + 1;
+ udlen = min (16, ((octet *) xp) + len - u);
+ if (udlen < 0)
+ udlen = 0;
+ pk_from_bcd (a, 1, sa, pkp -> pk_xcp); /* get calling address */
+ pk_parse_facilities (facp, sa);
+ bcopy ((caddr_t) u, sa -> x25_udata, udlen);
+ sa -> x25_udlen = udlen;
+
+ /*
+ * Now, loop through the listen sockets looking for a match on the
+ * PID. That is the first few octets of the user data field.
+ * This is the closest thing to a port number for X.25 packets.
+ * It does provide a way of multiplexing services at the user level.
+ */
+
+ for (l = pk_listenhead; l; l = l -> lcd_listen) {
+ struct sockaddr_x25 *sxp = l -> lcd_ceaddr;
+
+ if (bcmp (sxp -> x25_udata, u, sxp -> x25_udlen))
+ continue;
+ if (sxp -> x25_net &&
+ sxp -> x25_net != xcp -> xc_addr.x25_net)
+ continue;
+ /*
+ * don't accept incoming calls with the D-Bit on
+ * unless the server agrees
+ */
+ if (X25GBITS(xp -> bits, d_bit) && !(sxp -> x25_opts.op_flags & X25_DBIT)) {
+ errstr = "incoming D-Bit mismatch";
+ break;
+ }
+ /*
+ * don't accept incoming collect calls unless
+ * the server sets the reverse charging option.
+ */
+ if ((sxp -> x25_opts.op_flags & (X25_OLDSOCKADDR|X25_REVERSE_CHARGE)) == 0 &&
+ sa -> x25_opts.op_flags & X25_REVERSE_CHARGE) {
+ errstr = "incoming collect call refused";
+ break;
+ }
+ if (l -> lcd_so) {
+ if (so = sonewconn (l -> lcd_so, SS_ISCONNECTED))
+ lcp = (struct pklcd *) so -> so_pcb;
+ } else
+ lcp = pk_attach ((struct socket *) 0);
+ if (lcp == 0) {
+ /*
+ * Insufficient space or too many unaccepted
+ * connections. Just throw the call away.
+ */
+ errstr = "server malfunction";
+ break;
+ }
+ lcp -> lcd_upper = l -> lcd_upper;
+ lcp -> lcd_upnext = l -> lcd_upnext;
+ lcp -> lcd_lcn = lcn;
+ lcp -> lcd_state = RECEIVED_CALL;
+ sa -> x25_opts.op_flags |= (sxp -> x25_opts.op_flags &
+ ~X25_REVERSE_CHARGE) | l -> lcd_flags;
+ pk_assoc (pkp, lcp, sa);
+ lcp -> lcd_faddr = *sa;
+ lcp -> lcd_laddr.x25_udlen = sxp -> x25_udlen;
+ lcp -> lcd_craddr = &lcp -> lcd_faddr;
+ lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CALL_ACCEPTED);
+ if (lcp -> lcd_flags & X25_DBIT) {
+ if (X25GBITS(xp -> bits, d_bit))
+ X25SBITS(mtod (lcp -> lcd_template,
+ struct x25_packet *) -> bits, d_bit, 1);
+ else
+ lcp -> lcd_flags &= ~X25_DBIT;
+ }
+ if (so) {
+ pk_output (lcp);
+ soisconnected (so);
+ if (so -> so_options & SO_OOBINLINE)
+ save_extra (m0, facp, so);
+ } else if (lcp -> lcd_upper) {
+ (*lcp -> lcd_upper) (lcp, m0);
+ }
+ (void) m_free (m);
+ return;
+ }
+
+ /*
+ * If the call fails for whatever reason, we still need to build a
+ * skeleton LCD in order to be able to properly receive the CLEAR
+ * CONFIRMATION.
+ */
+#ifdef WATERLOO /* be explicit */
+ if (l == 0 && bcmp (sa -> x25_udata, "ean", 3) == 0)
+ pk_message (lcn, pkp -> pk_xcp, "host=%s ean%c: %s",
+ sa -> x25_addr, sa -> x25_udata[3] & 0xff, errstr);
+ else if (l == 0 && bcmp (sa -> x25_udata, "\1\0\0\0", 4) == 0)
+ pk_message (lcn, pkp -> pk_xcp, "host=%s x29d: %s",
+ sa -> x25_addr, errstr);
+ else
+#endif
+ pk_message (lcn, pkp -> pk_xcp, "host=%s pid=%x %x %x %x: %s",
+ sa -> x25_addr, sa -> x25_udata[0] & 0xff,
+ sa -> x25_udata[1] & 0xff, sa -> x25_udata[2] & 0xff,
+ sa -> x25_udata[3] & 0xff, errstr);
+ if ((lcp = pk_attach ((struct socket *)0)) == 0) {
+ (void) m_free (m);
+ return;
+ }
+ lcp -> lcd_lcn = lcn;
+ lcp -> lcd_state = RECEIVED_CALL;
+ pk_assoc (pkp, lcp, sa);
+ (void) m_free (m);
+ pk_clear (lcp, 0, 1);
+}
+
+pk_call_accepted (lcp, m)
+struct pklcd *lcp;
+struct mbuf *m;
+{
+ register struct x25_calladdr *ap;
+ register octet *fcp;
+ struct x25_packet *xp = mtod (m, struct x25_packet *);
+ int len = m -> m_len;
+
+ lcp -> lcd_state = DATA_TRANSFER;
+ if (lcp -> lcd_so)
+ soisconnected (lcp -> lcd_so);
+ if ((lcp -> lcd_flags & X25_DBIT) && (X25GBITS(xp -> bits, d_bit) == 0))
+ lcp -> lcd_flags &= ~X25_DBIT;
+ if (len > 3) {
+ ap = (struct x25_calladdr *) &xp -> packet_data;
+ fcp = (octet *) ap -> address_field + (X25GBITS(ap -> addrlens, calling_addrlen) +
+ X25GBITS(ap -> addrlens, called_addrlen) + 1) / 2;
+ if (fcp + *fcp <= ((octet *) xp) + len)
+ pk_parse_facilities (fcp, lcp -> lcd_ceaddr);
+ }
+ pk_assoc (lcp -> lcd_pkp, lcp, lcp -> lcd_ceaddr);
+ if (lcp -> lcd_so == 0 && lcp -> lcd_upper)
+ lcp -> lcd_upper (lcp, m);
+}
+
+pk_parse_facilities (fcp, sa)
+register octet *fcp;
+register struct sockaddr_x25 *sa;
+{
+ register octet *maxfcp;
+
+ maxfcp = fcp + *fcp;
+ fcp++;
+ while (fcp < maxfcp) {
+ /*
+ * Ignore national DCE or DTE facilities
+ */
+ if (*fcp == 0 || *fcp == 0xff)
+ break;
+ switch (*fcp) {
+ case FACILITIES_WINDOWSIZE:
+ sa -> x25_opts.op_wsize = fcp[1];
+ fcp += 3;
+ break;
+
+ case FACILITIES_PACKETSIZE:
+ sa -> x25_opts.op_psize = fcp[1];
+ fcp += 3;
+ break;
+
+ case FACILITIES_THROUGHPUT:
+ sa -> x25_opts.op_speed = fcp[1];
+ fcp += 2;
+ break;
+
+ case FACILITIES_REVERSE_CHARGE:
+ if (fcp[1] & 01)
+ sa -> x25_opts.op_flags |= X25_REVERSE_CHARGE;
+ /*
+ * Datapac specific: for a X.25(1976) DTE, bit 2
+ * indicates a "hi priority" (eg. international) call.
+ */
+ if (fcp[1] & 02 && sa -> x25_opts.op_psize == 0)
+ sa -> x25_opts.op_psize = X25_PS128;
+ fcp += 2;
+ break;
+
+ default:
+/*printf("unknown facility %x, class=%d\n", *fcp, (*fcp & 0xc0) >> 6);*/
+ switch ((*fcp & 0xc0) >> 6) {
+ case 0: /* class A */
+ fcp += 2;
+ break;
+
+ case 1:
+ fcp += 3;
+ break;
+
+ case 2:
+ fcp += 4;
+ break;
+
+ case 3:
+ fcp++;
+ fcp += *fcp;
+ }
+ }
+ }
+}
diff --git a/sys/netccitt/pk_llcsubr.c b/sys/netccitt/pk_llcsubr.c
new file mode 100644
index 000000000000..d8cc5016a28d
--- /dev/null
+++ b/sys/netccitt/pk_llcsubr.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) Dirk Husemann, Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_llcsubr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#include <netccitt/llc_var.h>
+
+
+/*
+ * Routing support for X.25
+ *
+ * We distinguish between two cases:
+ * RTF_HOST:
+ * rt_key(rt) X.25 address of host
+ * rt_gateway SNPA (MAC+DLSAP) address of host
+ * rt_llinfo pkcb for rt_key(rt)
+ *
+ * RTF_GATEWAY
+ * rt_key(rt) X.25 address of host or suitably masked network
+ * rt_gateway X.25 address of next X.25 gateway (switch)
+ * rt_llinfo rtentry for rt_gateway address
+ * ought to be of type RTF_HOST
+ *
+ *
+ * Mapping of X.121 to pkcbs:
+ *
+ * HDLC uses the DTE-DCE model of X.25, therefore we need a many-to-one
+ * relationship, i.e.:
+ *
+ * {X.121_a, X.121_b, X.121_c, ..., X.121_i} -> pkcb_0
+ *
+ * LLC2 utilizes the DTE-DTE model of X.25, resulting effectively in a
+ * one-to-one relationship, i.e.:
+ *
+ * {X.121_j} -> pkcb_1a
+ * {X.121_k} -> pkcb_1b
+ * ...
+ * {X.121_q} -> pkcb_1q
+ *
+ * It might make sense to allow a many-to-one relation for LLC2 also,
+ *
+ * {X.121_r, X.121_s, X.121_t, X.121_u} -> pkcb_2a
+ *
+ * This would make addresses X.121_[r-u] essentially aliases of one
+ * address ({X.121_[r-u]} would constitute a representative set).
+ *
+ * Each one-to-one relation must obviously be entered individually with
+ * a route add command, whereas a many-to-one relationship can be
+ * either entered individually or generated by using a netmask.
+ *
+ * To facilitate dealings the many-to-one case for LLC2 can only be
+ * established via a netmask.
+ *
+ */
+
+#define XTRACTPKP(rt) ((rt)->rt_flags & RTF_GATEWAY ? \
+ ((rt)->rt_llinfo ? \
+ (struct pkcb *) ((struct rtentry *)((rt)->rt_llinfo))->rt_llinfo : \
+ (struct pkcb *) NULL) : \
+ (struct pkcb *)((rt)->rt_llinfo))
+
+#define equal(a1, a2) (bcmp((caddr_t)(a1), \
+ (caddr_t)(a2), \
+ (a1)->sa_len) == 0)
+#define XIFA(rt) ((struct x25_ifaddr *)((rt)->rt_ifa))
+#define SA(s) ((struct sockaddr *)s)
+
+int
+cons_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *dst)
+{
+ register struct pkcb *pkp;
+ register int i;
+ register char one_to_one;
+ struct pkcb *pk_newlink();
+ struct rtentry *npaidb_enter();
+
+ pkp = XTRACTPKP(rt);
+
+ switch(cmd) {
+ case RTM_RESOLVE:
+ case RTM_ADD:
+ if (pkp)
+ return(EEXIST);
+
+ if (rt->rt_flags & RTF_GATEWAY) {
+ if (rt->rt_llinfo)
+ RTFREE((struct rtentry *)rt->rt_llinfo);
+ rt->rt_llinfo = (caddr_t) rtalloc1(rt->rt_gateway, 1);
+ return(0);
+ }
+ /*
+ * Assumptions: (1) ifnet structure is filled in
+ * (2) at least the pkcb created via
+ * x25config (ifconfig?) has been
+ * set up already.
+ * (3) HDLC interfaces have an if_type of
+ * IFT_X25{,DDN}, LLC2 interfaces
+ * anything else (any better way to
+ * do this?)
+ *
+ */
+ if (!rt->rt_ifa)
+ return (ENETDOWN);
+
+ /*
+ * We differentiate between dealing with a many-to-one
+ * (HDLC: DTE-DCE) and a one-to-one (LLC2: DTE-DTE)
+ * relationship (by looking at the if type).
+ *
+ * Only in case of the many-to-one relationship (HDLC)
+ * we set the ia->ia_pkcb pointer to the pkcb allocated
+ * via pk_newlink() as we will use just that one pkcb for
+ * future route additions (the rtentry->rt_llinfo pointer
+ * points to the pkcb allocated for that route).
+ *
+ * In case of the one-to-one relationship (LLC2) we
+ * create a new pkcb (via pk_newlink()) for each new rtentry.
+ *
+ * NOTE: Only in case of HDLC does ia->ia_pkcb point
+ * to a pkcb, in the LLC2 case it doesn't (as we don't
+ * need it here)!
+ */
+ one_to_one = ISISO8802(rt->rt_ifp);
+
+ if (!(pkp = XIFA(rt)->ia_pkcb) && !one_to_one)
+ XIFA(rt)->ia_pkcb = pkp =
+ pk_newlink(XIFA(rt), (caddr_t) 0);
+ else if (one_to_one &&
+ !equal(rt->rt_gateway, rt->rt_ifa->ifa_addr)) {
+ pkp = pk_newlink(XIFA(rt), (caddr_t) 0);
+ /*
+ * We also need another route entry for mapping
+ * MAC+LSAP->X.25 address
+ */
+ pkp->pk_llrt = npaidb_enter(rt->rt_gateway, rt_key(rt), rt, 0);
+ }
+ if (pkp) {
+ if (!pkp->pk_rt)
+ pkp->pk_rt = rt;
+ pkp->pk_refcount++;
+ }
+ rt->rt_llinfo = (caddr_t) pkp;
+
+ return(0);
+
+ case RTM_DELETE:
+ {
+ /*
+ * The pkp might be empty if we are dealing
+ * with an interface route entry for LLC2, in this
+ * case we don't need to do anything ...
+ */
+ if (pkp) {
+ if ( rt->rt_flags & RTF_GATEWAY ) {
+ if (rt->rt_llinfo)
+ RTFREE((struct rtentry *)rt->rt_llinfo);
+ return(0);
+ }
+
+ if (pkp->pk_llrt)
+ npaidb_destroy(pkp->pk_llrt);
+
+ pk_dellink (pkp);
+
+ return(0);
+ }
+ }
+ }
+}
+
+/*
+ * Network Protocol Addressing Information DataBase (npaidb)
+ *
+ * To speed up locating the entity dealing with an LLC packet use is made
+ * of a routing tree. This npaidb routing tree is handled
+ * by the normal rn_*() routines just like (almost) any other routing tree.
+ *
+ * The mapping being done by the npaidb_*() routines is as follows:
+ *
+ * Key: MAC,LSAP (enhancing struct sockaddr_dl)
+ * Gateway: sockaddr_x25 (i.e. X.25 address - X.121 or NSAP)
+ * Llinfo: npaidbentry {
+ * struct llc_linkcb *npaidb_linkp;
+ * struct rtentry *npaidb_rt;
+ * }
+ *
+ * Using the npaidbentry provided by llinfo we can then access
+ *
+ * o the pkcb by using (struct pkcb *) (npaidb_rt->rt_llinfo)
+ * o the linkcb via npaidb_linkp
+ *
+ * The following functions are provided
+ *
+ * o npaidb_enter(struct sockaddr_dl *sdl, struct sockaddr_x25 *sx25,
+ * struct struct llc_linkcb *link, struct rtentry *rt)
+ *
+ * o npaidb_enrich(short type, caddr_t info)
+ *
+ */
+
+struct sockaddr_dl npdl_netmask = {
+ sizeof(struct sockaddr_dl), /* _len */
+ 0, /* _family */
+ 0, /* _index */
+ 0, /* _type */
+ -1, /* _nlen */
+ -1, /* _alen */
+ -1, /* _slen */
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* _data */
+};
+struct sockaddr npdl_dummy;
+
+int npdl_datasize = sizeof(struct sockaddr_dl)-
+ ((int)((caddr_t)&((struct sockaddr_dl *)0)->sdl_data[0]));
+
+struct rtentry *
+npaidb_enter(struct sockaddr_dl *key, struct sockaddr *value,
+ struct rtentry *rt, struct llc_linkcb *link)
+{
+ struct rtentry *nprt; register int i;
+
+ USES_AF_LINK_RTS;
+
+ if ((nprt = rtalloc1(SA(key), 0)) == 0) {
+ register u_int size = sizeof(struct npaidbentry);
+ register u_char saploc = LLSAPLOC(key, rt->rt_ifp);
+
+ /*
+ * set up netmask: LLC2 packets have the lowest bit set in
+ * response packets (e.g. 0x7e for command packets, 0x7f for
+ * response packets), to facilitate the lookup we use a netmask
+ * of 11111110 for the SAP position. The remaining positions
+ * are zeroed out.
+ */
+ npdl_netmask.sdl_data[saploc] = NPDL_SAPNETMASK;
+ bzero((caddr_t)&npdl_netmask.sdl_data[saploc+1],
+ npdl_datasize-saploc-1);
+
+ if (value == 0)
+ value = &npdl_dummy;
+
+ /* now enter it */
+ rtrequest(RTM_ADD, SA(key), SA(value),
+ SA(&npdl_netmask), 0, &nprt);
+
+ /* and reset npdl_netmask */
+ for (i = saploc; i < npdl_datasize; i++)
+ npdl_netmask.sdl_data[i] = -1;
+
+ nprt->rt_llinfo = malloc(size , M_PCB, M_WAITOK);
+ if (nprt->rt_llinfo) {
+ bzero (nprt->rt_llinfo, size);
+ ((struct npaidbentry *) (nprt->rt_llinfo))->np_rt = rt;
+ }
+ } else nprt->rt_refcnt--;
+ return nprt;
+}
+
+struct rtentry *
+npaidb_enrich(short type, caddr_t info, struct sockaddr_dl *sdl)
+{
+ struct rtentry *rt;
+
+ USES_AF_LINK_RTS;
+
+ if (rt = rtalloc1((struct sockaddr *)sdl, 0)) {
+ rt->rt_refcnt--;
+ switch (type) {
+ case NPAIDB_LINK:
+ ((struct npaidbentry *)(rt->rt_llinfo))->np_link =
+ (struct llc_linkcb *) info;
+ break;
+ }
+ return rt;
+ }
+
+ return ((struct rtentry *) 0);
+
+}
+
+npaidb_destroy(struct rtentry *rt)
+{
+ USES_AF_LINK_RTS;
+
+ if (rt->rt_llinfo)
+ free((caddr_t) rt->rt_llinfo, M_PCB);
+ return(rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt),
+ 0, 0));
+}
+
+
+#ifdef LLC
+/*
+ * Glue between X.25 and LLC2
+ */
+int
+x25_llcglue(int prc, struct sockaddr *addr)
+{
+ register struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)addr;
+ register struct x25_ifaddr *x25ifa;
+ struct dll_ctlinfo ctlinfo;
+
+ if((x25ifa = (struct x25_ifaddr *)ifa_ifwithaddr(addr)) == 0)
+ return 0;
+
+ ctlinfo.dlcti_cfg =
+ (struct dllconfig *)(((struct sockaddr_x25 *)(&x25ifa->ia_xc))+1);
+ ctlinfo.dlcti_lsap = LLC_X25_LSAP;
+
+ return ((int)llc_ctlinput(prc, addr, (caddr_t)&ctlinfo));
+}
+#endif /* LLC */
diff --git a/sys/netccitt/pk_output.c b/sys/netccitt/pk_output.c
new file mode 100644
index 000000000000..ccc02a4c3274
--- /dev/null
+++ b/sys/netccitt/pk_output.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_output.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+struct mbuf_cache pk_output_cache = {0 }, pk_input_cache;
+struct mbuf *nextpk ();
+
+pk_output (lcp)
+register struct pklcd *lcp;
+{
+ register struct x25_packet *xp;
+ register struct mbuf *m;
+ register struct pkcb *pkp = lcp -> lcd_pkp;
+
+ if (lcp == 0 || pkp == 0) {
+ printf ("pk_output: zero arg\n");
+ return;
+ }
+
+ while ((m = nextpk (lcp)) != NULL) {
+ xp = mtod (m, struct x25_packet *);
+
+ switch (pk_decode (xp) + lcp -> lcd_state) {
+ /*
+ * All the work is already done - just set the state and
+ * pass to peer.
+ */
+ case CALL + READY:
+ lcp -> lcd_state = SENT_CALL;
+ lcp -> lcd_timer = pk_t21;
+ break;
+
+ /*
+ * Just set the state to allow packet to flow and send the
+ * confirmation.
+ */
+ case CALL_ACCEPTED + RECEIVED_CALL:
+ lcp -> lcd_state = DATA_TRANSFER;
+ break;
+
+ /*
+ * Just set the state. Keep the LCD around till the clear
+ * confirmation is returned.
+ */
+ case CLEAR + RECEIVED_CALL:
+ case CLEAR + SENT_CALL:
+ case CLEAR + DATA_TRANSFER:
+ lcp -> lcd_state = SENT_CLEAR;
+ lcp -> lcd_retry = 0;
+ /* fall through */
+
+ case CLEAR + SENT_CLEAR:
+ lcp -> lcd_timer = pk_t23;
+ lcp -> lcd_retry++;
+ break;
+
+ case CLEAR_CONF + RECEIVED_CLEAR:
+ case CLEAR_CONF + SENT_CLEAR:
+ case CLEAR_CONF + READY:
+ lcp -> lcd_state = READY;
+ break;
+
+ case DATA + DATA_TRANSFER:
+ SPS(xp, lcp -> lcd_ssn);
+ lcp -> lcd_input_window =
+ (lcp -> lcd_rsn + 1) % MODULUS;
+ SPR(xp, lcp -> lcd_input_window);
+ lcp -> lcd_last_transmitted_pr = lcp -> lcd_input_window;
+ lcp -> lcd_ssn = (lcp -> lcd_ssn + 1) % MODULUS;
+ if (lcp -> lcd_ssn == ((lcp -> lcd_output_window + lcp -> lcd_windowsize) % MODULUS))
+ lcp -> lcd_window_condition = TRUE;
+ break;
+
+ case INTERRUPT + DATA_TRANSFER:
+#ifdef ancient_history
+ xp -> packet_data = 0;
+#endif
+ lcp -> lcd_intrconf_pending = TRUE;
+ break;
+
+ case INTERRUPT_CONF + DATA_TRANSFER:
+ break;
+
+ case RR + DATA_TRANSFER:
+ case RNR + DATA_TRANSFER:
+ lcp -> lcd_input_window =
+ (lcp -> lcd_rsn + 1) % MODULUS;
+ SPR(xp, lcp -> lcd_input_window);
+ lcp -> lcd_last_transmitted_pr = lcp -> lcd_input_window;
+ break;
+
+ case RESET + DATA_TRANSFER:
+ lcp -> lcd_reset_condition = TRUE;
+ break;
+
+ case RESET_CONF + DATA_TRANSFER:
+ lcp -> lcd_reset_condition = FALSE;
+ break;
+
+ /*
+ * A restart should be only generated internally. Therefore
+ * all logic for restart is in the pk_restart routine.
+ */
+ case RESTART + READY:
+ lcp -> lcd_timer = pk_t20;
+ break;
+
+ /*
+ * Restarts are all handled internally. Therefore all the
+ * logic for the incoming restart packet is handled in the
+ * pk_input routine.
+ */
+ case RESTART_CONF + READY:
+ break;
+
+ default:
+ m_freem (m);
+ return;
+ }
+
+ /* Trace the packet. */
+ pk_trace (pkp -> pk_xcp, m, "P-Out");
+
+ /* Pass the packet on down to the link layer */
+ if (pk_input_cache.mbc_size || pk_input_cache.mbc_oldsize) {
+ m->m_flags |= 0x08;
+ mbuf_cache(&pk_input_cache, m);
+ }
+ (*pkp -> pk_lloutput) (pkp -> pk_llnext, m, pkp -> pk_rt);
+ }
+}
+
+/*
+ * This procedure returns the next packet to send or null. A
+ * packet is composed of one or more mbufs.
+ */
+
+struct mbuf *
+nextpk (lcp)
+struct pklcd *lcp;
+{
+ register struct mbuf *m, *n;
+ struct socket *so = lcp -> lcd_so;
+ register struct sockbuf *sb = & (so ? so -> so_snd : lcp -> lcd_sb);
+
+ if (lcp -> lcd_template) {
+ m = lcp -> lcd_template;
+ lcp -> lcd_template = NULL;
+ } else {
+ if (lcp -> lcd_rnr_condition || lcp -> lcd_window_condition ||
+ lcp -> lcd_reset_condition)
+ return (NULL);
+
+ if ((m = sb -> sb_mb) == 0)
+ return (NULL);
+
+ sb -> sb_mb = m -> m_nextpkt;
+ m->m_act = 0;
+ for (n = m; n; n = n -> m_next)
+ sbfree (sb, n);
+ }
+ return (m);
+}
diff --git a/sys/netccitt/pk_subr.c b/sys/netccitt/pk_subr.c
new file mode 100644
index 000000000000..44c43b6f3f6b
--- /dev/null
+++ b/sys/netccitt/pk_subr.c
@@ -0,0 +1,1192 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/x25.h>
+#include <netccitt/x25err.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+int pk_sendspace = 1024 * 2 + 8;
+int pk_recvspace = 1024 * 2 + 8;
+
+struct pklcd_q pklcd_q = {&pklcd_q, &pklcd_q};
+
+struct x25bitslice x25_bitslice[] = {
+/* mask, shift value */
+ { 0xf0, 0x4 },
+ { 0xf, 0x0 },
+ { 0x80, 0x7 },
+ { 0x40, 0x6 },
+ { 0x30, 0x4 },
+ { 0xe0, 0x5 },
+ { 0x10, 0x4 },
+ { 0xe, 0x1 },
+ { 0x1, 0x0 }
+};
+
+
+/*
+ * Attach X.25 protocol to socket, allocate logical channel descripter
+ * and buffer space, and enter LISTEN state if we are to accept
+ * IN-COMMING CALL packets.
+ *
+ */
+
+struct pklcd *
+pk_attach (so)
+struct socket *so;
+{
+ register struct pklcd *lcp;
+ register int error = ENOBUFS;
+ int pk_output ();
+
+ MALLOC(lcp, struct pklcd *, sizeof (*lcp), M_PCB, M_NOWAIT);
+ if (lcp) {
+ bzero ((caddr_t)lcp, sizeof (*lcp));
+ insque (&lcp -> lcd_q, &pklcd_q);
+ lcp -> lcd_state = READY;
+ lcp -> lcd_send = pk_output;
+ if (so) {
+ error = soreserve (so, pk_sendspace, pk_recvspace);
+ lcp -> lcd_so = so;
+ if (so -> so_options & SO_ACCEPTCONN)
+ lcp -> lcd_state = LISTEN;
+ } else
+ sbreserve (&lcp -> lcd_sb, pk_sendspace);
+ }
+ if (so) {
+ so -> so_pcb = (caddr_t) lcp;
+ so -> so_error = error;
+ }
+ return (lcp);
+}
+
+/*
+ * Disconnect X.25 protocol from socket.
+ */
+
+pk_disconnect (lcp)
+register struct pklcd *lcp;
+{
+ register struct socket *so = lcp -> lcd_so;
+ register struct pklcd *l, *p;
+
+ switch (lcp -> lcd_state) {
+ case LISTEN:
+ for (p = 0, l = pk_listenhead; l && l != lcp; p = l, l = l -> lcd_listen);
+ if (p == 0) {
+ if (l != 0)
+ pk_listenhead = l -> lcd_listen;
+ }
+ else
+ if (l != 0)
+ p -> lcd_listen = l -> lcd_listen;
+ pk_close (lcp);
+ break;
+
+ case READY:
+ pk_acct (lcp);
+ pk_close (lcp);
+ break;
+
+ case SENT_CLEAR:
+ case RECEIVED_CLEAR:
+ break;
+
+ default:
+ pk_acct (lcp);
+ if (so) {
+ soisdisconnecting (so);
+ sbflush (&so -> so_rcv);
+ }
+ pk_clear (lcp, 241, 0); /* Normal Disconnect */
+
+ }
+}
+
+/*
+ * Close an X.25 Logical Channel. Discard all space held by the
+ * connection and internal descriptors. Wake up any sleepers.
+ */
+
+pk_close (lcp)
+struct pklcd *lcp;
+{
+ register struct socket *so = lcp -> lcd_so;
+
+ /*
+ * If the X.25 connection is torn down due to link
+ * level failure (e.g. LLC2 FRMR) and at the same the user
+ * level is still filling up the socket send buffer that
+ * send buffer is locked. An attempt to sbflush () that send
+ * buffer will lead us into - no, not temptation but - panic!
+ * So - we'll just check wether the send buffer is locked
+ * and if that's the case we'll mark the lcp as zombie and
+ * have the pk_timer () do the cleaning ...
+ */
+
+ if (so && so -> so_snd.sb_flags & SB_LOCK)
+ lcp -> lcd_state = LCN_ZOMBIE;
+ else
+ pk_freelcd (lcp);
+
+ if (so == NULL)
+ return;
+
+ so -> so_pcb = 0;
+ soisdisconnected (so);
+ /* sofree (so); /* gak!!! you can't do that here */
+}
+
+/*
+ * Create a template to be used to send X.25 packets on a logical
+ * channel. It allocates an mbuf and fills in a skeletal packet
+ * depending on its type. This packet is passed to pk_output where
+ * the remainer of the packet is filled in.
+*/
+
+struct mbuf *
+pk_template (lcn, type)
+int lcn, type;
+{
+ register struct mbuf *m;
+ register struct x25_packet *xp;
+
+ MGETHDR (m, M_DONTWAIT, MT_HEADER);
+ if (m == 0)
+ panic ("pk_template");
+ m -> m_act = 0;
+
+ /*
+ * Efficiency hack: leave a four byte gap at the beginning
+ * of the packet level header with the hope that this will
+ * be enough room for the link level to insert its header.
+ */
+ m -> m_data += max_linkhdr;
+ m -> m_pkthdr.len = m -> m_len = PKHEADERLN;
+
+ xp = mtod (m, struct x25_packet *);
+ *(long *)xp = 0; /* ugly, but fast */
+/* xp -> q_bit = 0;*/
+ X25SBITS(xp -> bits, fmt_identifier, 1);
+/* xp -> lc_group_number = 0;*/
+
+ SET_LCN(xp, lcn);
+ xp -> packet_type = type;
+
+ return (m);
+}
+
+/*
+ * This routine restarts all the virtual circuits. Actually,
+ * the virtual circuits are not "restarted" as such. Instead,
+ * any active switched circuit is simply returned to READY
+ * state.
+ */
+
+pk_restart (pkp, restart_cause)
+register struct pkcb *pkp;
+int restart_cause;
+{
+ register struct mbuf *m;
+ register struct pklcd *lcp;
+ register int i;
+
+ /* Restart all logical channels. */
+ if (pkp -> pk_chan == 0)
+ return;
+
+ /*
+ * Don't do this if we're doing a restart issued from
+ * inside pk_connect () --- which is only done if and
+ * only if the X.25 link is down, i.e. a RESTART needs
+ * to be done to get it up.
+ */
+ if (!(pkp -> pk_dxerole & DTE_CONNECTPENDING)) {
+ for (i = 1; i <= pkp -> pk_maxlcn; ++i)
+ if ((lcp = pkp -> pk_chan[i]) != NULL) {
+ if (lcp -> lcd_so) {
+ lcp -> lcd_so -> so_error = ENETRESET;
+ pk_close (lcp);
+ } else {
+ pk_flush (lcp);
+ lcp -> lcd_state = READY;
+ if (lcp -> lcd_upper)
+ lcp -> lcd_upper (lcp, 0);
+ }
+ }
+ }
+
+ if (restart_cause < 0)
+ return;
+
+ pkp -> pk_state = DTE_SENT_RESTART;
+ pkp -> pk_dxerole &= ~(DTE_PLAYDCE | DTE_PLAYDTE);
+ lcp = pkp -> pk_chan[0];
+ m = lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_RESTART);
+ m -> m_pkthdr.len = m -> m_len += 2;
+ mtod (m, struct x25_packet *) -> packet_data = 0; /* DTE only */
+ mtod (m, octet *)[4] = restart_cause;
+ pk_output (lcp);
+}
+
+
+/*
+ * This procedure frees up the Logical Channel Descripter.
+ */
+
+pk_freelcd (lcp)
+register struct pklcd *lcp;
+{
+ if (lcp == NULL)
+ return;
+
+ if (lcp -> lcd_lcn > 0)
+ lcp -> lcd_pkp -> pk_chan[lcp -> lcd_lcn] = NULL;
+
+ pk_flush (lcp);
+ remque (&lcp -> lcd_q);
+ free ((caddr_t)lcp, M_PCB);
+}
+
+static struct x25_ifaddr *
+pk_ifwithaddr (sx)
+ struct sockaddr_x25 *sx;
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ register struct x25_ifaddr *ia;
+ char *addr = sx -> x25_addr;
+
+ for (ifp = ifnet; ifp; ifp = ifp -> if_next)
+ for (ifa = ifp -> if_addrlist; ifa; ifa = ifa -> ifa_next)
+ if (ifa -> ifa_addr -> sa_family == AF_CCITT) {
+ ia = (struct x25_ifaddr *)ifa;
+ if (bcmp (addr, ia -> ia_xc.xc_addr.x25_addr,
+ 16) == 0)
+ return (ia);
+
+ }
+ return ((struct x25_ifaddr *)0);
+}
+
+
+/*
+ * Bind a address and protocol value to a socket. The important
+ * part is the protocol value - the first four characters of the
+ * Call User Data field.
+ */
+
+#define XTRACTPKP(rt) ((rt) -> rt_flags & RTF_GATEWAY ? \
+ ((rt) -> rt_llinfo ? \
+ (struct pkcb *) ((struct rtentry *)((rt) -> rt_llinfo)) -> rt_llinfo : \
+ (struct pkcb *) NULL) : \
+ (struct pkcb *)((rt) -> rt_llinfo))
+
+pk_bind (lcp, nam)
+struct pklcd *lcp;
+struct mbuf *nam;
+{
+ register struct pklcd *pp;
+ register struct sockaddr_x25 *sa;
+
+ if (nam == NULL)
+ return (EADDRNOTAVAIL);
+ if (lcp -> lcd_ceaddr) /* XXX */
+ return (EADDRINUSE);
+ if (pk_checksockaddr (nam))
+ return (EINVAL);
+ sa = mtod (nam, struct sockaddr_x25 *);
+
+ /*
+ * If the user wishes to accept calls only from a particular
+ * net (net != 0), make sure the net is known
+ */
+
+ if (sa -> x25_addr[0]) {
+ if (!pk_ifwithaddr (sa))
+ return (ENETUNREACH);
+ } else if (sa -> x25_net) {
+ if (!ifa_ifwithnet ((struct sockaddr *)sa))
+ return (ENETUNREACH);
+ }
+
+ /*
+ * For ISO's sake permit default listeners, but only one such . . .
+ */
+ for (pp = pk_listenhead; pp; pp = pp -> lcd_listen) {
+ register struct sockaddr_x25 *sa2 = pp -> lcd_ceaddr;
+ if ((sa2 -> x25_udlen == sa -> x25_udlen) &&
+ (sa2 -> x25_udlen == 0 ||
+ (bcmp (sa2 -> x25_udata, sa -> x25_udata,
+ min (sa2 -> x25_udlen, sa -> x25_udlen)) == 0)))
+ return (EADDRINUSE);
+ }
+ lcp -> lcd_laddr = *sa;
+ lcp -> lcd_ceaddr = &lcp -> lcd_laddr;
+ return (0);
+}
+
+/*
+ * Include a bound control block in the list of listeners.
+ */
+pk_listen (lcp)
+register struct pklcd *lcp;
+{
+ register struct pklcd **pp;
+
+ if (lcp -> lcd_ceaddr == 0)
+ return (EDESTADDRREQ);
+
+ lcp -> lcd_state = LISTEN;
+ /*
+ * Add default listener at end, any others at start.
+ */
+ if (lcp -> lcd_ceaddr -> x25_udlen == 0) {
+ for (pp = &pk_listenhead; *pp; )
+ pp = &((*pp) -> lcd_listen);
+ *pp = lcp;
+ } else {
+ lcp -> lcd_listen = pk_listenhead;
+ pk_listenhead = lcp;
+ }
+ return (0);
+}
+/*
+ * Include a listening control block for the benefit of other protocols.
+ */
+pk_protolisten (spi, spilen, callee)
+int (*callee) ();
+{
+ register struct pklcd *lcp = pk_attach ((struct socket *)0);
+ register struct mbuf *nam;
+ register struct sockaddr_x25 *sa;
+ int error = ENOBUFS;
+
+ if (lcp) {
+ if (nam = m_getclr (MT_SONAME, M_DONTWAIT)) {
+ sa = mtod (nam, struct sockaddr_x25 *);
+ sa -> x25_family = AF_CCITT;
+ sa -> x25_len = nam -> m_len = sizeof (*sa);
+ sa -> x25_udlen = spilen;
+ sa -> x25_udata[0] = spi;
+ lcp -> lcd_upper = callee;
+ lcp -> lcd_flags = X25_MBS_HOLD;
+ if ((error = pk_bind (lcp, nam)) == 0)
+ error = pk_listen (lcp);
+ (void) m_free (nam);
+ }
+ if (error)
+ pk_freelcd (lcp);
+ }
+ return error; /* Hopefully Zero !*/
+}
+
+/*
+ * Associate a logical channel descriptor with a network.
+ * Fill in the default network specific parameters and then
+ * set any parameters explicitly specified by the user or
+ * by the remote DTE.
+ */
+
+pk_assoc (pkp, lcp, sa)
+register struct pkcb *pkp;
+register struct pklcd *lcp;
+register struct sockaddr_x25 *sa;
+{
+
+ lcp -> lcd_pkp = pkp;
+ lcp -> lcd_packetsize = pkp -> pk_xcp -> xc_psize;
+ lcp -> lcd_windowsize = pkp -> pk_xcp -> xc_pwsize;
+ lcp -> lcd_rsn = MODULUS - 1;
+ pkp -> pk_chan[lcp -> lcd_lcn] = lcp;
+
+ if (sa -> x25_opts.op_psize)
+ lcp -> lcd_packetsize = sa -> x25_opts.op_psize;
+ else
+ sa -> x25_opts.op_psize = lcp -> lcd_packetsize;
+ if (sa -> x25_opts.op_wsize)
+ lcp -> lcd_windowsize = sa -> x25_opts.op_wsize;
+ else
+ sa -> x25_opts.op_wsize = lcp -> lcd_windowsize;
+ sa -> x25_net = pkp -> pk_xcp -> xc_addr.x25_net;
+ lcp -> lcd_flags |= sa -> x25_opts.op_flags;
+ lcp -> lcd_stime = time.tv_sec;
+}
+
+pk_connect (lcp, sa)
+register struct pklcd *lcp;
+register struct sockaddr_x25 *sa;
+{
+ register struct pkcb *pkp;
+ register struct rtentry *rt;
+ register struct rtentry *nrt;
+
+ struct rtentry *npaidb_enter ();
+ struct pkcb *pk_newlink ();
+
+ if (sa -> x25_addr[0] == '\0')
+ return (EDESTADDRREQ);
+
+ /*
+ * Is the destination address known?
+ */
+ if (!(rt = rtalloc1 ((struct sockaddr *)sa, 1)))
+ return (ENETUNREACH);
+
+ if (!(pkp = XTRACTPKP(rt)))
+ pkp = pk_newlink ((struct x25_ifaddr *) (rt -> rt_ifa),
+ (caddr_t) 0);
+
+ /*
+ * Have we entered the LLC address?
+ */
+ if (nrt = npaidb_enter (rt -> rt_gateway, rt_key (rt), rt, 0))
+ pkp -> pk_llrt = nrt;
+
+ /*
+ * Have we allocated an LLC2 link yet?
+ */
+ if (pkp -> pk_llnext == (caddr_t)0 && pkp -> pk_llctlinput) {
+ struct dll_ctlinfo ctlinfo;
+
+ ctlinfo.dlcti_rt = rt;
+ ctlinfo.dlcti_pcb = (caddr_t) pkp;
+ ctlinfo.dlcti_conf =
+ (struct dllconfig *) (&((struct x25_ifaddr *)(rt -> rt_ifa)) -> ia_xc);
+ pkp -> pk_llnext =
+ (pkp -> pk_llctlinput) (PRC_CONNECT_REQUEST, 0, &ctlinfo);
+ }
+
+ if (pkp -> pk_state != DTE_READY && pkp -> pk_state != DTE_WAITING)
+ return (ENETDOWN);
+ if ((lcp -> lcd_lcn = pk_getlcn (pkp)) == 0)
+ return (EMFILE);
+
+ lcp -> lcd_faddr = *sa;
+ lcp -> lcd_ceaddr = & lcp -> lcd_faddr;
+ pk_assoc (pkp, lcp, lcp -> lcd_ceaddr);
+
+ /*
+ * If the link is not up yet, initiate an X.25 RESTART
+ */
+ if (pkp -> pk_state == DTE_WAITING) {
+ pkp -> pk_dxerole |= DTE_CONNECTPENDING;
+ pk_ctlinput (PRC_LINKUP, (struct sockaddr *)0, pkp);
+ if (lcp -> lcd_so)
+ soisconnecting (lcp -> lcd_so);
+ return 0;
+ }
+
+ if (lcp -> lcd_so)
+ soisconnecting (lcp -> lcd_so);
+ lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CALL);
+ pk_callrequest (lcp, lcp -> lcd_ceaddr, pkp -> pk_xcp);
+ return (*pkp -> pk_ia -> ia_start) (lcp);
+}
+
+/*
+ * Complete all pending X.25 call requests --- this gets called after
+ * the X.25 link has been restarted.
+ */
+#define RESHUFFLELCN(maxlcn, lcn) ((maxlcn) - (lcn) + 1)
+
+pk_callcomplete (pkp)
+ register struct pkcb *pkp;
+{
+ register struct pklcd *lcp;
+ register int i;
+ register int ni;
+
+
+ if (pkp -> pk_dxerole & DTE_CONNECTPENDING)
+ pkp -> pk_dxerole &= ~DTE_CONNECTPENDING;
+ else return;
+
+ if (pkp -> pk_chan == 0)
+ return;
+
+ /*
+ * We pretended to be a DTE for allocating lcns, if
+ * it turns out that we are in reality performing as a
+ * DCE we need to reshuffle the lcps.
+ *
+ * /+---------------+-------- -
+ * / | a (maxlcn-1) | \
+ * / +---------------+ \
+ * +--- * | b (maxlcn-2) | \
+ * | \ +---------------+ \
+ * r | \ | c (maxlcn-3) | \
+ * e | \+---------------+ |
+ * s | | . |
+ * h | | . | m
+ * u | | . | a
+ * f | | . | x
+ * f | | . | l
+ * l | /+---------------+ | c
+ * e | / | c' ( 3 ) | | n
+ * | / +---------------+ |
+ * +--> * | b' ( 2 ) | /
+ * \ +---------------+ /
+ * \ | a' ( 1 ) | /
+ * \+---------------+ /
+ * | 0 | /
+ * +---------------+-------- -
+ *
+ */
+ if (pkp -> pk_dxerole & DTE_PLAYDCE) {
+ /* Sigh, reshuffle it */
+ for (i = pkp -> pk_maxlcn; i > 0; --i)
+ if (pkp -> pk_chan[i]) {
+ ni = RESHUFFLELCN(pkp -> pk_maxlcn, i);
+ pkp -> pk_chan[ni] = pkp -> pk_chan[i];
+ pkp -> pk_chan[i] = NULL;
+ pkp -> pk_chan[ni] -> lcd_lcn = ni;
+ }
+ }
+
+ for (i = 1; i <= pkp -> pk_maxlcn; ++i)
+ if ((lcp = pkp -> pk_chan[i]) != NULL) {
+ /* if (lcp -> lcd_so)
+ soisconnecting (lcp -> lcd_so); */
+ lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CALL);
+ pk_callrequest (lcp, lcp -> lcd_ceaddr, pkp -> pk_xcp);
+ (*pkp -> pk_ia -> ia_start) (lcp);
+ }
+}
+
+struct bcdinfo {
+ octet *cp;
+ unsigned posn;
+};
+/*
+ * Build the rest of the CALL REQUEST packet. Fill in calling
+ * address, facilities fields and the user data field.
+ */
+
+pk_callrequest (lcp, sa, xcp)
+struct pklcd *lcp;
+register struct sockaddr_x25 *sa;
+register struct x25config *xcp;
+{
+ register struct x25_calladdr *a;
+ register struct mbuf *m = lcp -> lcd_template;
+ register struct x25_packet *xp = mtod (m, struct x25_packet *);
+ struct bcdinfo b;
+
+ if (lcp -> lcd_flags & X25_DBIT)
+ X25SBITS(xp -> bits, d_bit, 1);
+ a = (struct x25_calladdr *) &xp -> packet_data;
+ b.cp = (octet *) a -> address_field;
+ b.posn = 0;
+ X25SBITS(a -> addrlens, called_addrlen, to_bcd (&b, sa, xcp));
+ X25SBITS(a -> addrlens, calling_addrlen, to_bcd (&b, &xcp -> xc_addr, xcp));
+ if (b.posn & 0x01)
+ *b.cp++ &= 0xf0;
+ m -> m_pkthdr.len = m -> m_len += b.cp - (octet *) a;
+
+ if (lcp -> lcd_facilities) {
+ m -> m_pkthdr.len +=
+ (m -> m_next = lcp -> lcd_facilities) -> m_pkthdr.len;
+ lcp -> lcd_facilities = 0;
+ } else
+ pk_build_facilities (m, sa, (int)xcp -> xc_type);
+
+ m_copyback (m, m -> m_pkthdr.len, sa -> x25_udlen, sa -> x25_udata);
+}
+
+pk_build_facilities (m, sa, type)
+register struct mbuf *m;
+struct sockaddr_x25 *sa;
+{
+ register octet *cp;
+ register octet *fcp;
+ register int revcharge;
+
+ cp = mtod (m, octet *) + m -> m_len;
+ fcp = cp + 1;
+ revcharge = sa -> x25_opts.op_flags & X25_REVERSE_CHARGE ? 1 : 0;
+ /*
+ * This is specific to Datapac X.25(1976) DTEs. International
+ * calls must have the "hi priority" bit on.
+ */
+ if (type == X25_1976 && sa -> x25_opts.op_psize == X25_PS128)
+ revcharge |= 02;
+ if (revcharge) {
+ *fcp++ = FACILITIES_REVERSE_CHARGE;
+ *fcp++ = revcharge;
+ }
+ switch (type) {
+ case X25_1980:
+ case X25_1984:
+ *fcp++ = FACILITIES_PACKETSIZE;
+ *fcp++ = sa -> x25_opts.op_psize;
+ *fcp++ = sa -> x25_opts.op_psize;
+
+ *fcp++ = FACILITIES_WINDOWSIZE;
+ *fcp++ = sa -> x25_opts.op_wsize;
+ *fcp++ = sa -> x25_opts.op_wsize;
+ }
+ *cp = fcp - cp - 1;
+ m -> m_pkthdr.len = (m -> m_len += *cp + 1);
+}
+
+to_bcd (b, sa, xcp)
+register struct bcdinfo *b;
+struct sockaddr_x25 *sa;
+register struct x25config *xcp;
+{
+ register char *x = sa -> x25_addr;
+ unsigned start = b -> posn;
+ /*
+ * The nodnic and prepnd0 stuff looks tedious,
+ * but it does allow full X.121 addresses to be used,
+ * which is handy for routing info (& OSI type 37 addresses).
+ */
+ if (xcp -> xc_addr.x25_net && (xcp -> xc_nodnic || xcp -> xc_prepnd0)) {
+ char dnicname[sizeof (long) * NBBY/3 + 2];
+ register char *p = dnicname;
+
+ sprintf (p, "%d", xcp -> xc_addr.x25_net & 0x7fff);
+ for (; *p; p++) /* *p == 0 means dnic matched */
+ if ((*p ^ *x++) & 0x0f)
+ break;
+ if (*p || xcp -> xc_nodnic == 0)
+ x = sa -> x25_addr;
+ if (*p && xcp -> xc_prepnd0) {
+ if ((b -> posn)++ & 0x01)
+ *(b -> cp)++;
+ else
+ *(b -> cp) = 0;
+ }
+ }
+ while (*x)
+ if ((b -> posn)++ & 0x01)
+ *(b -> cp)++ |= *x++ & 0x0F;
+ else
+ *(b -> cp) = *x++ << 4;
+ return ((b -> posn) - start);
+}
+
+/*
+ * This routine gets the first available logical channel number. The
+ * search is
+ * - from the highest number to lowest number if playing DTE, and
+ * - from lowest to highest number if playing DCE.
+ */
+
+pk_getlcn (pkp)
+register struct pkcb *pkp;
+{
+ register int i;
+
+ if (pkp -> pk_chan == 0)
+ return (0);
+ if ( pkp -> pk_dxerole & DTE_PLAYDCE ) {
+ for (i = 1; i <= pkp -> pk_maxlcn; ++i)
+ if (pkp -> pk_chan[i] == NULL)
+ break;
+ } else {
+ for (i = pkp -> pk_maxlcn; i > 0; --i)
+ if (pkp -> pk_chan[i] == NULL)
+ break;
+ }
+ i = ( i > pkp -> pk_maxlcn ? 0 : i );
+ return (i);
+}
+
+/*
+ * This procedure sends a CLEAR request packet. The lc state is
+ * set to "SENT_CLEAR".
+ */
+
+pk_clear (lcp, diagnostic, abortive)
+register struct pklcd *lcp;
+{
+ register struct mbuf *m = pk_template (lcp -> lcd_lcn, X25_CLEAR);
+
+ m -> m_len += 2;
+ m -> m_pkthdr.len += 2;
+ mtod (m, struct x25_packet *) -> packet_data = 0;
+ mtod (m, octet *)[4] = diagnostic;
+ if (lcp -> lcd_facilities) {
+ m -> m_next = lcp -> lcd_facilities;
+ m -> m_pkthdr.len += m -> m_next -> m_len;
+ lcp -> lcd_facilities = 0;
+ }
+ if (abortive)
+ lcp -> lcd_template = m;
+ else {
+ struct socket *so = lcp -> lcd_so;
+ struct sockbuf *sb = so ? & so -> so_snd : & lcp -> lcd_sb;
+ sbappendrecord (sb, m);
+ }
+ pk_output (lcp);
+
+}
+
+/*
+ * This procedure generates RNR's or RR's to inhibit or enable
+ * inward data flow, if the current state changes (blocked ==> open or
+ * vice versa), or if forced to generate one. One forces RNR's to ack data.
+ */
+pk_flowcontrol (lcp, inhibit, forced)
+register struct pklcd *lcp;
+{
+ inhibit = (inhibit != 0);
+ if (lcp == 0 || lcp -> lcd_state != DATA_TRANSFER ||
+ (forced == 0 && lcp -> lcd_rxrnr_condition == inhibit))
+ return;
+ lcp -> lcd_rxrnr_condition = inhibit;
+ lcp -> lcd_template =
+ pk_template (lcp -> lcd_lcn, inhibit ? X25_RNR : X25_RR);
+ pk_output (lcp);
+}
+
+/*
+ * This procedure sends a RESET request packet. It re-intializes
+ * virtual circuit.
+ */
+
+static
+pk_reset (lcp, diagnostic)
+register struct pklcd *lcp;
+{
+ register struct mbuf *m;
+ register struct socket *so = lcp -> lcd_so;
+
+ if (lcp -> lcd_state != DATA_TRANSFER)
+ return;
+
+ if (so)
+ so -> so_error = ECONNRESET;
+ lcp -> lcd_reset_condition = TRUE;
+
+ /* Reset all the control variables for the channel. */
+ pk_flush (lcp);
+ lcp -> lcd_window_condition = lcp -> lcd_rnr_condition =
+ lcp -> lcd_intrconf_pending = FALSE;
+ lcp -> lcd_rsn = MODULUS - 1;
+ lcp -> lcd_ssn = 0;
+ lcp -> lcd_output_window = lcp -> lcd_input_window =
+ lcp -> lcd_last_transmitted_pr = 0;
+ m = lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_RESET);
+ m -> m_pkthdr.len = m -> m_len += 2;
+ mtod (m, struct x25_packet *) -> packet_data = 0;
+ mtod (m, octet *)[4] = diagnostic;
+ pk_output (lcp);
+
+}
+
+/*
+ * This procedure frees all data queued for output or delivery on a
+ * virtual circuit.
+ */
+
+pk_flush (lcp)
+register struct pklcd *lcp;
+{
+ register struct socket *so;
+
+ if (lcp -> lcd_template)
+ m_freem (lcp -> lcd_template);
+
+ if (lcp -> lcd_cps) {
+ m_freem (lcp -> lcd_cps);
+ lcp -> lcd_cps = 0;
+ }
+ if (lcp -> lcd_facilities) {
+ m_freem (lcp -> lcd_facilities);
+ lcp -> lcd_facilities = 0;
+ }
+ if (so = lcp -> lcd_so)
+ sbflush (&so -> so_snd);
+ else
+ sbflush (&lcp -> lcd_sb);
+}
+
+/*
+ * This procedure handles all local protocol procedure errors.
+ */
+
+pk_procerror (error, lcp, errstr, diagnostic)
+register struct pklcd *lcp;
+char *errstr;
+{
+
+ pk_message (lcp -> lcd_lcn, lcp -> lcd_pkp -> pk_xcp, errstr);
+
+ switch (error) {
+ case CLEAR:
+ if (lcp -> lcd_so) {
+ lcp -> lcd_so -> so_error = ECONNABORTED;
+ soisdisconnecting (lcp -> lcd_so);
+ }
+ pk_clear (lcp, diagnostic, 1);
+ break;
+
+ case RESET:
+ pk_reset (lcp, diagnostic);
+ }
+}
+
+/*
+ * This procedure is called during the DATA TRANSFER state to check
+ * and process the P(R) values received in the DATA, RR OR RNR
+ * packets.
+ */
+
+pk_ack (lcp, pr)
+struct pklcd *lcp;
+unsigned pr;
+{
+ register struct socket *so = lcp -> lcd_so;
+
+ if (lcp -> lcd_output_window == pr)
+ return (PACKET_OK);
+ if (lcp -> lcd_output_window < lcp -> lcd_ssn) {
+ if (pr < lcp -> lcd_output_window || pr > lcp -> lcd_ssn) {
+ pk_procerror (RESET, lcp,
+ "p(r) flow control error", 2);
+ return (ERROR_PACKET);
+ }
+ }
+ else {
+ if (pr < lcp -> lcd_output_window && pr > lcp -> lcd_ssn) {
+ pk_procerror (RESET, lcp,
+ "p(r) flow control error #2", 2);
+ return (ERROR_PACKET);
+ }
+ }
+
+ lcp -> lcd_output_window = pr; /* Rotate window. */
+ if (lcp -> lcd_window_condition == TRUE)
+ lcp -> lcd_window_condition = FALSE;
+
+ if (so && ((so -> so_snd.sb_flags & SB_WAIT) ||
+ (so -> so_snd.sb_flags & SB_NOTIFY)))
+ sowwakeup (so);
+
+ return (PACKET_OK);
+}
+
+/*
+ * This procedure decodes the X.25 level 3 packet returning a
+ * code to be used in switchs or arrays.
+ */
+
+pk_decode (xp)
+register struct x25_packet *xp;
+{
+ register int type;
+
+ if (X25GBITS(xp -> bits, fmt_identifier) != 1)
+ return (INVALID_PACKET);
+#ifdef ancient_history
+ /*
+ * Make sure that the logical channel group number is 0.
+ * This restriction may be removed at some later date.
+ */
+ if (xp -> lc_group_number != 0)
+ return (INVALID_PACKET);
+#endif
+ /*
+ * Test for data packet first.
+ */
+ if (!(xp -> packet_type & DATA_PACKET_DESIGNATOR))
+ return (DATA);
+
+ /*
+ * Test if flow control packet (RR or RNR).
+ */
+ if (!(xp -> packet_type & RR_OR_RNR_PACKET_DESIGNATOR))
+ switch (xp -> packet_type & 0x1f) {
+ case X25_RR:
+ return (RR);
+ case X25_RNR:
+ return (RNR);
+ case X25_REJECT:
+ return (REJECT);
+ }
+
+ /*
+ * Determine the rest of the packet types.
+ */
+ switch (xp -> packet_type) {
+ case X25_CALL:
+ type = CALL;
+ break;
+
+ case X25_CALL_ACCEPTED:
+ type = CALL_ACCEPTED;
+ break;
+
+ case X25_CLEAR:
+ type = CLEAR;
+ break;
+
+ case X25_CLEAR_CONFIRM:
+ type = CLEAR_CONF;
+ break;
+
+ case X25_INTERRUPT:
+ type = INTERRUPT;
+ break;
+
+ case X25_INTERRUPT_CONFIRM:
+ type = INTERRUPT_CONF;
+ break;
+
+ case X25_RESET:
+ type = RESET;
+ break;
+
+ case X25_RESET_CONFIRM:
+ type = RESET_CONF;
+ break;
+
+ case X25_RESTART:
+ type = RESTART;
+ break;
+
+ case X25_RESTART_CONFIRM:
+ type = RESTART_CONF;
+ break;
+
+ case X25_DIAGNOSTIC:
+ type = DIAG_TYPE;
+ break;
+
+ default:
+ type = INVALID_PACKET;
+ }
+ return (type);
+}
+
+/*
+ * A restart packet has been received. Print out the reason
+ * for the restart.
+ */
+
+pk_restartcause (pkp, xp)
+struct pkcb *pkp;
+register struct x25_packet *xp;
+{
+ register struct x25config *xcp = pkp -> pk_xcp;
+ register int lcn = LCN(xp);
+
+ switch (xp -> packet_data) {
+ case X25_RESTART_LOCAL_PROCEDURE_ERROR:
+ pk_message (lcn, xcp, "restart: local procedure error");
+ break;
+
+ case X25_RESTART_NETWORK_CONGESTION:
+ pk_message (lcn, xcp, "restart: network congestion");
+ break;
+
+ case X25_RESTART_NETWORK_OPERATIONAL:
+ pk_message (lcn, xcp, "restart: network operational");
+ break;
+
+ default:
+ pk_message (lcn, xcp, "restart: unknown cause");
+ }
+}
+
+#define MAXRESETCAUSE 7
+
+int Reset_cause[] = {
+ EXRESET, EXROUT, 0, EXRRPE, 0, EXRLPE, 0, EXRNCG
+};
+
+/*
+ * A reset packet has arrived. Return the cause to the user.
+ */
+
+pk_resetcause (pkp, xp)
+struct pkcb *pkp;
+register struct x25_packet *xp;
+{
+ register struct pklcd *lcp =
+ pkp -> pk_chan[LCN(xp)];
+ register int code = xp -> packet_data;
+
+ if (code > MAXRESETCAUSE)
+ code = 7; /* EXRNCG */
+
+ pk_message (LCN(xp), lcp -> lcd_pkp, "reset code 0x%x, diagnostic 0x%x",
+ xp -> packet_data, 4[(u_char *)xp]);
+
+ if (lcp -> lcd_so)
+ lcp -> lcd_so -> so_error = Reset_cause[code];
+}
+
+#define MAXCLEARCAUSE 25
+
+int Clear_cause[] = {
+ EXCLEAR, EXCBUSY, 0, EXCINV, 0, EXCNCG, 0,
+ 0, 0, EXCOUT, 0, EXCAB, 0, EXCNOB, 0, 0, 0, EXCRPE,
+ 0, EXCLPE, 0, 0, 0, 0, 0, EXCRRC
+};
+
+/*
+ * A clear packet has arrived. Return the cause to the user.
+ */
+
+pk_clearcause (pkp, xp)
+struct pkcb *pkp;
+register struct x25_packet *xp;
+{
+ register struct pklcd *lcp =
+ pkp -> pk_chan[LCN(xp)];
+ register int code = xp -> packet_data;
+
+ if (code > MAXCLEARCAUSE)
+ code = 5; /* EXRNCG */
+ if (lcp -> lcd_so)
+ lcp -> lcd_so -> so_error = Clear_cause[code];
+}
+
+char *
+format_ntn (xcp)
+register struct x25config *xcp;
+{
+
+ return (xcp -> xc_addr.x25_addr);
+}
+
+/* VARARGS1 */
+pk_message (lcn, xcp, fmt, a1, a2, a3, a4, a5, a6)
+struct x25config *xcp;
+char *fmt;
+{
+
+ if (lcn)
+ if (!PQEMPTY)
+ printf ("X.25(%s): lcn %d: ", format_ntn (xcp), lcn);
+ else
+ printf ("X.25: lcn %d: ", lcn);
+ else
+ if (!PQEMPTY)
+ printf ("X.25(%s): ", format_ntn (xcp));
+ else
+ printf ("X.25: ");
+
+ printf (fmt, a1, a2, a3, a4, a5, a6);
+ printf ("\n");
+}
+
+pk_fragment (lcp, m0, qbit, mbit, wait)
+struct mbuf *m0;
+register struct pklcd *lcp;
+{
+ register struct mbuf *m = m0;
+ register struct x25_packet *xp;
+ register struct sockbuf *sb;
+ struct mbuf *head = 0, *next, **mp = &head, *m_split ();
+ int totlen, psize = 1 << (lcp -> lcd_packetsize);
+
+ if (m == 0)
+ return 0;
+ if (m -> m_flags & M_PKTHDR == 0)
+ panic ("pk_fragment");
+ totlen = m -> m_pkthdr.len;
+ m -> m_act = 0;
+ sb = lcp -> lcd_so ? &lcp -> lcd_so -> so_snd : & lcp -> lcd_sb;
+ do {
+ if (totlen > psize) {
+ if ((next = m_split (m, psize, wait)) == 0)
+ goto abort;
+ totlen -= psize;
+ } else
+ next = 0;
+ M_PREPEND(m, PKHEADERLN, wait);
+ if (m == 0)
+ goto abort;
+ *mp = m;
+ mp = & m -> m_act;
+ *mp = 0;
+ xp = mtod (m, struct x25_packet *);
+ 0[(char *)xp] = 0;
+ if (qbit)
+ X25SBITS(xp -> bits, q_bit, 1);
+ if (lcp -> lcd_flags & X25_DBIT)
+ X25SBITS(xp -> bits, d_bit, 1);
+ X25SBITS(xp -> bits, fmt_identifier, 1);
+ xp -> packet_type = X25_DATA;
+ SET_LCN(xp, lcp -> lcd_lcn);
+ if (next || (mbit && (totlen == psize ||
+ (lcp -> lcd_flags & X25_DBIT))))
+ SMBIT(xp, 1);
+ } while (m = next);
+ for (m = head; m; m = next) {
+ next = m -> m_act;
+ m -> m_act = 0;
+ sbappendrecord (sb, m);
+ }
+ return 0;
+abort:
+ if (wait)
+ panic ("pk_fragment null mbuf after wait");
+ if (next)
+ m_freem (next);
+ for (m = head; m; m = next) {
+ next = m -> m_act;
+ m_freem (m);
+ }
+ return ENOBUFS;
+}
diff --git a/sys/netccitt/pk_timer.c b/sys/netccitt/pk_timer.c
new file mode 100644
index 000000000000..52c1860b4b4e
--- /dev/null
+++ b/sys/netccitt/pk_timer.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) Computing Centre, University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1992
+ * Copyright (c) 1990, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_timer.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+/*
+ * Various timer values. They can be adjusted
+ * by patching the binary with adb if necessary.
+ */
+int pk_t20 = 18 * PR_SLOWHZ; /* restart timer */
+int pk_t21 = 20 * PR_SLOWHZ; /* call timer */
+/* XXX pk_t22 is never used */
+int pk_t22 = 18 * PR_SLOWHZ; /* reset timer */
+int pk_t23 = 18 * PR_SLOWHZ; /* clear timer */
+
+pk_timer ()
+{
+ register struct pkcb *pkp;
+ register struct pklcd *lcp, **pp;
+ register int lcns_jammed, cant_restart;
+
+ FOR_ALL_PKCBS(pkp) {
+ switch (pkp -> pk_state) {
+ case DTE_SENT_RESTART:
+ lcp = pkp -> pk_chan[0];
+ /*
+ * If restart failures are common, a link level
+ * reset should be initiated here.
+ */
+ if (lcp -> lcd_timer && --lcp -> lcd_timer == 0) {
+ pk_message (0, pkp -> pk_xcp,
+ "packet level restart failed");
+ pkp -> pk_state = DTE_WAITING;
+ }
+ break;
+
+ case DTE_READY:
+ lcns_jammed = cant_restart = 0;
+ for (pp = &pkp -> pk_chan[1]; pp <= &pkp -> pk_chan[pkp -> pk_maxlcn]; pp++) {
+ if ((lcp = *pp) == 0)
+ continue;
+ switch (lcp -> lcd_state) {
+ case SENT_CALL:
+ if (--lcp -> lcd_timer == 0) {
+ if (lcp -> lcd_so)
+ lcp -> lcd_so -> so_error = ETIMEDOUT;
+ pk_clear (lcp, 49, 1);
+ }
+ break;
+
+ case SENT_CLEAR:
+ if (lcp -> lcd_retry >= 3)
+ lcns_jammed++;
+ else
+ if (--lcp -> lcd_timer == 0)
+ pk_clear (lcp, 50, 1);
+ break;
+
+ case DATA_TRANSFER: /* lcn active */
+ cant_restart++;
+ break;
+
+ case LCN_ZOMBIE: /* zombie state */
+ pk_freelcd (lcp);
+ break;
+ }
+ }
+ if (lcns_jammed > pkp -> pk_maxlcn / 2 && cant_restart == 0) {
+ pk_message (0, pkp -> pk_xcp, "%d lcns jammed: attempting restart", lcns_jammed);
+ pk_restart (pkp, 0);
+ }
+ }
+ }
+}
diff --git a/sys/netccitt/pk_usrreq.c b/sys/netccitt/pk_usrreq.c
new file mode 100644
index 000000000000..d0dc42c0d403
--- /dev/null
+++ b/sys/netccitt/pk_usrreq.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+static old_to_new();
+static new_to_old();
+/*
+ *
+ * X.25 Packet level protocol interface to socket abstraction.
+ *
+ * Process an X.25 user request on a logical channel. If this is a send
+ * request then m is the mbuf chain of the send data. If this is a timer
+ * expiration (called from the software clock routine) them timertype is
+ * the particular timer.
+ *
+ */
+
+pk_usrreq (so, req, m, nam, control)
+struct socket *so;
+int req;
+register struct mbuf *m, *nam;
+struct mbuf *control;
+{
+ register struct pklcd *lcp = (struct pklcd *) so -> so_pcb;
+ register int error = 0;
+
+ if (req == PRU_CONTROL)
+ return (pk_control (so, (int)m, (caddr_t)nam,
+ (struct ifnet *)control));
+ if (control && control -> m_len) {
+ error = EINVAL;
+ goto release;
+ }
+ if (lcp == NULL && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+
+/*
+ pk_trace (pkcbhead, TR_USER, (struct pklcd *)0,
+ req, (struct x25_packet *)0);
+*/
+
+ switch (req) {
+ /*
+ * X.25 attaches to socket via PRU_ATTACH and allocates a logical
+ * channel descriptor. If the socket is to receive connections,
+ * then the LISTEN state is entered.
+ */
+ case PRU_ATTACH:
+ if (lcp) {
+ error = EISCONN;
+ /* Socket already connected. */
+ break;
+ }
+ lcp = pk_attach (so);
+ if (lcp == 0)
+ error = ENOBUFS;
+ break;
+
+ /*
+ * Detach a logical channel from the socket. If the state of the
+ * channel is embryonic, simply discard it. Otherwise we have to
+ * initiate a PRU_DISCONNECT which will finish later.
+ */
+ case PRU_DETACH:
+ pk_disconnect (lcp);
+ break;
+
+ /*
+ * Give the socket an address.
+ */
+ case PRU_BIND:
+ if (nam -> m_len == sizeof (struct x25_sockaddr))
+ old_to_new (nam);
+ error = pk_bind (lcp, nam);
+ break;
+
+ /*
+ * Prepare to accept connections.
+ */
+ case PRU_LISTEN:
+ error = pk_listen (lcp);
+ break;
+
+ /*
+ * Initiate a CALL REQUEST to peer entity. Enter state SENT_CALL
+ * and mark the socket as connecting. Set timer waiting for
+ * CALL ACCEPT or CLEAR.
+ */
+ case PRU_CONNECT:
+ if (nam -> m_len == sizeof (struct x25_sockaddr))
+ old_to_new (nam);
+ if (pk_checksockaddr (nam))
+ return (EINVAL);
+ error = pk_connect (lcp, mtod (nam, struct sockaddr_x25 *));
+ break;
+
+ /*
+ * Initiate a disconnect to peer entity via a CLEAR REQUEST packet.
+ * The socket will be disconnected when we receive a confirmation
+ * or a clear collision.
+ */
+ case PRU_DISCONNECT:
+ pk_disconnect (lcp);
+ break;
+
+ /*
+ * Accept an INCOMING CALL. Most of the work has already been done
+ * by pk_input. Just return the callers address to the user.
+ */
+ case PRU_ACCEPT:
+ if (lcp -> lcd_craddr == NULL)
+ break;
+ bcopy ((caddr_t)lcp -> lcd_craddr, mtod (nam, caddr_t),
+ sizeof (struct sockaddr_x25));
+ nam -> m_len = sizeof (struct sockaddr_x25);
+ if (lcp -> lcd_flags & X25_OLDSOCKADDR)
+ new_to_old (nam);
+ break;
+
+ /*
+ * After a receive, we should send a RR.
+ */
+ case PRU_RCVD:
+ pk_flowcontrol (lcp, /*sbspace (&so -> so_rcv) <= */ 0, 1);
+ break;
+
+ /*
+ * Send INTERRUPT packet.
+ */
+ case PRU_SENDOOB:
+ if (m == 0) {
+ MGETHDR(m, M_WAITOK, MT_OOBDATA);
+ m -> m_pkthdr.len = m -> m_len = 1;
+ *mtod (m, octet *) = 0;
+ }
+ if (m -> m_pkthdr.len > 32) {
+ m_freem (m);
+ error = EMSGSIZE;
+ break;
+ }
+ MCHTYPE(m, MT_OOBDATA);
+ /* FALLTHROUGH */
+
+ /*
+ * Do send by placing data on the socket output queue.
+ */
+ case PRU_SEND:
+ if (control) {
+ register struct cmsghdr *ch = mtod (m, struct cmsghdr *);
+ control -> m_len -= sizeof (*ch);
+ control -> m_data += sizeof (*ch);
+ error = pk_ctloutput (PRCO_SETOPT, so, ch -> cmsg_level,
+ ch -> cmsg_type, &control);
+ }
+ if (error == 0 && m)
+ error = pk_send (lcp, m);
+ break;
+
+ /*
+ * Abort a virtual circuit. For example all completed calls
+ * waiting acceptance.
+ */
+ case PRU_ABORT:
+ pk_disconnect (lcp);
+ break;
+
+ /* Begin unimplemented hooks. */
+
+ case PRU_SHUTDOWN:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_CONTROL:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_SENSE:
+#ifdef BSD4_3
+ ((struct stat *)m) -> st_blksize = so -> so_snd.sb_hiwat;
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
+
+ /* End unimplemented hooks. */
+
+ case PRU_SOCKADDR:
+ if (lcp -> lcd_ceaddr == 0)
+ return (EADDRNOTAVAIL);
+ nam -> m_len = sizeof (struct sockaddr_x25);
+ bcopy ((caddr_t)lcp -> lcd_ceaddr, mtod (nam, caddr_t),
+ sizeof (struct sockaddr_x25));
+ if (lcp -> lcd_flags & X25_OLDSOCKADDR)
+ new_to_old (nam);
+ break;
+
+ case PRU_PEERADDR:
+ if (lcp -> lcd_state != DATA_TRANSFER)
+ return (ENOTCONN);
+ nam -> m_len = sizeof (struct sockaddr_x25);
+ bcopy (lcp -> lcd_craddr ? (caddr_t)lcp -> lcd_craddr :
+ (caddr_t)lcp -> lcd_ceaddr,
+ mtod (nam, caddr_t), sizeof (struct sockaddr_x25));
+ if (lcp -> lcd_flags & X25_OLDSOCKADDR)
+ new_to_old (nam);
+ break;
+
+ /*
+ * Receive INTERRUPT packet.
+ */
+ case PRU_RCVOOB:
+ if (so -> so_options & SO_OOBINLINE) {
+ register struct mbuf *n = so -> so_rcv.sb_mb;
+ if (n && n -> m_type == MT_OOBDATA) {
+ unsigned len = n -> m_pkthdr.len;
+ so -> so_rcv.sb_mb = n -> m_nextpkt;
+ if (len != n -> m_len &&
+ (n = m_pullup (n, len)) == 0)
+ break;
+ m -> m_len = len;
+ bcopy (mtod (m, caddr_t), mtod (n, caddr_t), len);
+ m_freem (n);
+ }
+ break;
+ }
+ m -> m_len = 1;
+ *mtod (m, char *) = lcp -> lcd_intrdata;
+ break;
+
+ default:
+ panic ("pk_usrreq");
+ }
+release:
+ if (control != NULL)
+ m_freem (control);
+ return (error);
+}
+
+/*
+ * If you want to use UBC X.25 level 3 in conjunction with some
+ * other X.25 level 2 driver, have the ifp -> if_ioctl routine
+ * assign pk_start to ia -> ia_start when called with SIOCSIFCONF_X25.
+ */
+/* ARGSUSED */
+pk_start (lcp)
+register struct pklcd *lcp;
+{
+ pk_output (lcp);
+ return (0); /* XXX pk_output should return a value */
+}
+
+#ifndef _offsetof
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+#endif
+struct sockaddr_x25 pk_sockmask = {
+ _offsetof(struct sockaddr_x25, x25_addr[0]), /* x25_len */
+ 0, /* x25_family */
+ -1, /* x25_net id */
+};
+
+/*ARGSUSED*/
+pk_control (so, cmd, data, ifp)
+struct socket *so;
+int cmd;
+caddr_t data;
+register struct ifnet *ifp;
+{
+ register struct ifreq_x25 *ifr = (struct ifreq_x25 *)data;
+ register struct ifaddr *ifa = 0;
+ register struct x25_ifaddr *ia = 0;
+ struct pklcd *dev_lcp = 0;
+ int error, s, old_maxlcn;
+ unsigned n;
+
+ /*
+ * Find address for this interface, if it exists.
+ */
+ if (ifp)
+ for (ifa = ifp -> if_addrlist; ifa; ifa = ifa -> ifa_next)
+ if (ifa -> ifa_addr -> sa_family == AF_CCITT)
+ break;
+
+ ia = (struct x25_ifaddr *)ifa;
+ switch (cmd) {
+ case SIOCGIFCONF_X25:
+ if (ifa == 0)
+ return (EADDRNOTAVAIL);
+ ifr -> ifr_xc = ia -> ia_xc;
+ return (0);
+
+ case SIOCSIFCONF_X25:
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+ if (ifp == 0)
+ panic ("pk_control");
+ if (ifa == (struct ifaddr *)0) {
+ register struct mbuf *m;
+
+ MALLOC(ia, struct x25_ifaddr *, sizeof (*ia),
+ M_IFADDR, M_WAITOK);
+ if (ia == 0)
+ return (ENOBUFS);
+ bzero ((caddr_t)ia, sizeof (*ia));
+ if (ifa = ifp -> if_addrlist) {
+ for ( ; ifa -> ifa_next; ifa = ifa -> ifa_next)
+ ;
+ ifa -> ifa_next = &ia -> ia_ifa;
+ } else
+ ifp -> if_addrlist = &ia -> ia_ifa;
+ ifa = &ia -> ia_ifa;
+ ifa -> ifa_netmask = (struct sockaddr *)&pk_sockmask;
+ ifa -> ifa_addr = (struct sockaddr *)&ia -> ia_xc.xc_addr;
+ ifa -> ifa_dstaddr = (struct sockaddr *)&ia -> ia_dstaddr; /* XXX */
+ ia -> ia_ifp = ifp;
+ ia -> ia_dstaddr.x25_family = AF_CCITT;
+ ia -> ia_dstaddr.x25_len = pk_sockmask.x25_len;
+ } else if (ISISO8802(ifp) == 0) {
+ rtinit (ifa, (int)RTM_DELETE, 0);
+ }
+ old_maxlcn = ia -> ia_maxlcn;
+ ia -> ia_xc = ifr -> ifr_xc;
+ ia -> ia_dstaddr.x25_net = ia -> ia_xc.xc_addr.x25_net;
+ if (ia -> ia_maxlcn != old_maxlcn && old_maxlcn != 0) {
+ /* VERY messy XXX */
+ register struct pkcb *pkp;
+ FOR_ALL_PKCBS(pkp)
+ if (pkp -> pk_ia == ia)
+ pk_resize (pkp);
+ }
+ /*
+ * Give the interface a chance to initialize if this
+p * is its first address, and to validate the address.
+ */
+ ia -> ia_start = pk_start;
+ s = splimp();
+ if (ifp -> if_ioctl)
+ error = (*ifp -> if_ioctl)(ifp, SIOCSIFCONF_X25,
+ (caddr_t) ifa);
+ if (error)
+ ifp -> if_flags &= ~IFF_UP;
+ else if (ISISO8802(ifp) == 0)
+ error = rtinit (ifa, (int)RTM_ADD, RTF_UP);
+ splx (s);
+ return (error);
+
+ default:
+ if (ifp == 0 || ifp -> if_ioctl == 0)
+ return (EOPNOTSUPP);
+ return ((*ifp -> if_ioctl)(ifp, cmd, data));
+ }
+}
+
+pk_ctloutput (cmd, so, level, optname, mp)
+struct socket *so;
+struct mbuf **mp;
+int cmd, level, optname;
+{
+ register struct mbuf *m = *mp;
+ register struct pklcd *lcp = (struct pklcd *) so -> so_pcb;
+ int error = EOPNOTSUPP;
+
+ if (m == 0)
+ return (EINVAL);
+ if (cmd == PRCO_SETOPT) switch (optname) {
+ case PK_FACILITIES:
+ if (m == 0)
+ return (EINVAL);
+ lcp -> lcd_facilities = m;
+ *mp = 0;
+ return (0);
+
+ case PK_ACCTFILE:
+ if ((so->so_state & SS_PRIV) == 0)
+ error = EPERM;
+ else if (m -> m_len)
+ error = pk_accton (mtod (m, char *));
+ else
+ error = pk_accton ((char *)0);
+ break;
+
+ case PK_RTATTACH:
+ error = pk_rtattach (so, m);
+ break;
+
+ case PK_PRLISTEN:
+ error = pk_user_protolisten (mtod (m, u_char *));
+ }
+ if (*mp) {
+ (void) m_freem (*mp);
+ *mp = 0;
+ }
+ return (error);
+
+}
+
+
+/*
+ * Do an in-place conversion of an "old style"
+ * socket address to the new style
+ */
+
+static
+old_to_new (m)
+register struct mbuf *m;
+{
+ register struct x25_sockaddr *oldp;
+ register struct sockaddr_x25 *newp;
+ register char *ocp, *ncp;
+ struct sockaddr_x25 new;
+
+ oldp = mtod (m, struct x25_sockaddr *);
+ newp = &new;
+ bzero ((caddr_t)newp, sizeof (*newp));
+
+ newp -> x25_family = AF_CCITT;
+ newp -> x25_len = sizeof(*newp);
+ newp -> x25_opts.op_flags = (oldp -> xaddr_facilities & X25_REVERSE_CHARGE)
+ | X25_MQBIT | X25_OLDSOCKADDR;
+ if (oldp -> xaddr_facilities & XS_HIPRIO) /* Datapac specific */
+ newp -> x25_opts.op_psize = X25_PS128;
+ bcopy ((caddr_t)oldp -> xaddr_addr, newp -> x25_addr,
+ (unsigned)min (oldp -> xaddr_len, sizeof (newp -> x25_addr) - 1));
+ if (bcmp ((caddr_t)oldp -> xaddr_proto, newp -> x25_udata, 4) != 0) {
+ bcopy ((caddr_t)oldp -> xaddr_proto, newp -> x25_udata, 4);
+ newp -> x25_udlen = 4;
+ }
+ ocp = (caddr_t)oldp -> xaddr_userdata;
+ ncp = newp -> x25_udata + 4;
+ while (*ocp && ocp < (caddr_t)oldp -> xaddr_userdata + 12) {
+ if (newp -> x25_udlen == 0)
+ newp -> x25_udlen = 4;
+ *ncp++ = *ocp++;
+ newp -> x25_udlen++;
+ }
+ bcopy ((caddr_t)newp, mtod (m, char *), sizeof (*newp));
+ m -> m_len = sizeof (*newp);
+}
+
+/*
+ * Do an in-place conversion of a new style
+ * socket address to the old style
+ */
+
+static
+new_to_old (m)
+register struct mbuf *m;
+{
+ register struct x25_sockaddr *oldp;
+ register struct sockaddr_x25 *newp;
+ register char *ocp, *ncp;
+ struct x25_sockaddr old;
+
+ oldp = &old;
+ newp = mtod (m, struct sockaddr_x25 *);
+ bzero ((caddr_t)oldp, sizeof (*oldp));
+
+ oldp -> xaddr_facilities = newp -> x25_opts.op_flags & X25_REVERSE_CHARGE;
+ if (newp -> x25_opts.op_psize == X25_PS128)
+ oldp -> xaddr_facilities |= XS_HIPRIO; /* Datapac specific */
+ ocp = (char *)oldp -> xaddr_addr;
+ ncp = newp -> x25_addr;
+ while (*ncp) {
+ *ocp++ = *ncp++;
+ oldp -> xaddr_len++;
+ }
+
+ bcopy (newp -> x25_udata, (caddr_t)oldp -> xaddr_proto, 4);
+ if (newp -> x25_udlen > 4)
+ bcopy (newp -> x25_udata + 4, (caddr_t)oldp -> xaddr_userdata,
+ (unsigned)(newp -> x25_udlen - 4));
+
+ bcopy ((caddr_t)oldp, mtod (m, char *), sizeof (*oldp));
+ m -> m_len = sizeof (*oldp);
+}
+
+
+pk_checksockaddr (m)
+struct mbuf *m;
+{
+ register struct sockaddr_x25 *sa = mtod (m, struct sockaddr_x25 *);
+ register char *cp;
+
+ if (m -> m_len != sizeof (struct sockaddr_x25))
+ return (1);
+ if (sa -> x25_family != AF_CCITT ||
+ sa -> x25_udlen > sizeof (sa -> x25_udata))
+ return (1);
+ for (cp = sa -> x25_addr; *cp; cp++) {
+ if (*cp < '0' || *cp > '9' ||
+ cp >= &sa -> x25_addr[sizeof (sa -> x25_addr) - 1])
+ return (1);
+ }
+ return (0);
+}
+
+pk_send (lcp, m)
+struct pklcd *lcp;
+register struct mbuf *m;
+{
+ int mqbit = 0, error = 0;
+ register struct x25_packet *xp;
+ register struct socket *so;
+
+ if (m -> m_type == MT_OOBDATA) {
+ if (lcp -> lcd_intrconf_pending)
+ error = ETOOMANYREFS;
+ if (m -> m_pkthdr.len > 32)
+ error = EMSGSIZE;
+ M_PREPEND(m, PKHEADERLN, M_WAITOK);
+ if (m == 0 || error)
+ goto bad;
+ *(mtod (m, octet *)) = 0;
+ xp = mtod (m, struct x25_packet *);
+ X25SBITS(xp -> bits, fmt_identifier, 1);
+ xp -> packet_type = X25_INTERRUPT;
+ SET_LCN(xp, lcp -> lcd_lcn);
+ sbinsertoob ( (so = lcp -> lcd_so) ?
+ &so -> so_snd : &lcp -> lcd_sb, m);
+ goto send;
+ }
+ /*
+ * Application has elected (at call setup time) to prepend
+ * a control byte to each packet written indicating m-bit
+ * and q-bit status. Examine and then discard this byte.
+ */
+ if (lcp -> lcd_flags & X25_MQBIT) {
+ if (m -> m_len < 1) {
+ m_freem (m);
+ return (EMSGSIZE);
+ }
+ mqbit = *(mtod (m, u_char *));
+ m -> m_len--;
+ m -> m_data++;
+ m -> m_pkthdr.len--;
+ }
+ error = pk_fragment (lcp, m, mqbit & 0x80, mqbit & 0x40, 1);
+send:
+ if (error == 0 && lcp -> lcd_state == DATA_TRANSFER)
+ lcp -> lcd_send (lcp); /* XXXXXXXXX fix pk_output!!! */
+ return (error);
+bad:
+ if (m)
+ m_freem (m);
+ return (error);
+}
diff --git a/sys/netccitt/pk_var.h b/sys/netccitt/pk_var.h
new file mode 100644
index 000000000000..beda05dc3752
--- /dev/null
+++ b/sys/netccitt/pk_var.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) Computing Centre, University of British Columbia, 1985
+ * Copyright (C) Computer Science Department IV,
+ * University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pk_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ *
+ * X.25 Logical Channel Descriptor
+ *
+ */
+
+struct pklcd {
+ struct pklcd_q {
+ struct pklcd_q *q_forw; /* debugging chain */
+ struct pklcd_q *q_back; /* debugging chain */
+ } lcd_q;
+ int (*lcd_upper)(); /* switch to socket vs datagram vs ...*/
+ caddr_t lcd_upnext; /* reference for lcd_upper() */
+ int (*lcd_send)(); /* if X.25 front end, direct connect */
+ caddr_t lcd_downnext; /* reference for lcd_send() */
+ short lcd_lcn; /* Logical channel number */
+ short lcd_state; /* Logical Channel state */
+ short lcd_timer; /* Various timer values */
+ short lcd_dg_timer; /* to reclaim idle datagram circuits */
+ bool lcd_intrconf_pending; /* Interrupt confirmation pending */
+ octet lcd_intrdata; /* Octet of incoming intr data */
+ char lcd_retry; /* Timer retry count */
+ char lcd_rsn; /* Seq no of last received packet */
+ char lcd_ssn; /* Seq no of next packet to send */
+ char lcd_output_window; /* Output flow control window */
+ char lcd_input_window; /* Input flow control window */
+ char lcd_last_transmitted_pr;/* Last Pr value transmitted */
+ bool lcd_rnr_condition; /* Remote in busy condition */
+ bool lcd_window_condition; /* Output window size exceeded */
+ bool lcd_reset_condition; /* True, if waiting reset confirm */
+ bool lcd_rxrnr_condition; /* True, if we have sent rnr */
+ char lcd_packetsize; /* Maximum packet size */
+ char lcd_windowsize; /* Window size - both directions */
+ octet lcd_closed_user_group; /* Closed user group specification */
+ char lcd_flags; /* copy of sockaddr_x25 op_flags */
+ struct mbuf *lcd_facilities; /* user supplied facilities for cr */
+ struct mbuf *lcd_template; /* Address of response packet */
+ struct socket *lcd_so; /* Socket addr for connection */
+ struct sockaddr_x25 *lcd_craddr;/* Calling address pointer */
+ struct sockaddr_x25 *lcd_ceaddr;/* Called address pointer */
+ time_t lcd_stime; /* time circuit established */
+ long lcd_txcnt; /* Data packet transmit count */
+ long lcd_rxcnt; /* Data packet receive count */
+ short lcd_intrcnt; /* Interrupt packet transmit count */
+ struct pklcd *lcd_listen; /* Next lcd on listen queue */
+ struct pkcb *lcd_pkp; /* Network this lcd is attached to */
+ struct mbuf *lcd_cps; /* Complete Packet Sequence reassembly*/
+ long lcd_cpsmax; /* Max length for CPS */
+ struct sockaddr_x25 lcd_faddr; /* Remote Address (Calling) */
+ struct sockaddr_x25 lcd_laddr; /* Local Address (Called) */
+ struct sockbuf lcd_sb; /* alternate for datagram service */
+};
+
+/*
+ * Per network information, allocated dynamically
+ * when a new network is configured.
+ */
+
+struct pkcb {
+ struct pkcb_q {
+ struct pkcb_q *q_forw;
+ struct pkcb_q *q_backw;
+ } pk_q;
+ short pk_state; /* packet level status */
+ short pk_maxlcn; /* local copy of xc_maxlcn */
+ int (*pk_lloutput) (); /* link level output procedure */
+ caddr_t (*pk_llctlinput) (); /* link level ctloutput procedure */
+ caddr_t pk_llnext; /* handle for next level down */
+ struct x25config *pk_xcp; /* network specific configuration */
+ struct x25_ifaddr *pk_ia; /* backpointer to ifaddr */
+ struct pklcd **pk_chan; /* actual size == xc_maxlcn+1 */
+ short pk_dxerole; /* DXE role of PLE over LLC2 */
+ short pk_restartcolls; /* counting RESTART collisions til resolved */
+ struct rtentry *pk_rt; /* back pointer to route */
+ struct rtentry *pk_llrt; /* pointer to reverse mapping */
+ u_short pk_refcount; /* ref count */
+};
+
+#define FOR_ALL_PKCBS(p) for((p) = (struct pkcb *)(pkcb_q.q_forw); \
+ (pkcb_q.q_forw != &pkcb_q) && ((struct pkcb_q *)(p) != &pkcb_q); \
+ (p) = (struct pkcb *)((p) -> pk_q.q_forw))
+
+#define PQEMPTY (pkcb_q.q_forw == &pkcb_q)
+
+/*
+ * Interface address, x25 version. Exactly one of these structures is
+ * allocated for each interface with an x25 address.
+ *
+ * The ifaddr structure conatins the protocol-independent part
+ * of the structure, and is assumed to be first.
+ */
+struct x25_ifaddr {
+ struct ifaddr ia_ifa; /* protocol-independent info */
+#define ia_ifp ia_ifa.ifa_ifp
+#define ia_flags ia_ifa.ifa_flags
+ struct x25config ia_xc; /* network specific configuration */
+ struct pkcb *ia_pkcb;
+#define ia_maxlcn ia_xc.xc_maxlcn
+ int (*ia_start) (); /* connect, confirm method */
+ struct sockaddr_x25 ia_dstaddr; /* reserve space for route dst */
+};
+
+/*
+ * ``Link-Level'' extension to Routing Entry for upper level
+ * packet switching via X.25 virtual circuits.
+ */
+struct llinfo_x25 {
+ struct llinfo_x25 *lx_next; /* chain together in linked list */
+ struct llinfo_x25 *lx_prev; /* chain together in linked list */
+ struct rtentry *lx_rt; /* back pointer to route */
+ struct pklcd *lx_lcd; /* local connection block */
+ struct x25_ifaddr *lx_ia; /* may not be same as rt_ifa */
+ int lx_state; /* can't trust lcd->lcd_state */
+ int lx_flags;
+ int lx_timer; /* for idle timeout */
+ int lx_family; /* for dispatch */
+};
+
+/* States for lx_state */
+#define LXS_NEWBORN 0
+#define LXS_RESOLVING 1
+#define LXS_FREE 2
+#define LXS_CONNECTING 3
+#define LXS_CONNECTED 4
+#define LXS_DISCONNECTING 5
+#define LXS_LISTENING 6
+
+/* flags */
+#define LXF_VALID 0x1 /* Circuit is live, etc. */
+#define LXF_RTHELD 0x2 /* this lcb references rtentry */
+#define LXF_LISTEN 0x4 /* accepting incoming calls */
+
+/*
+ * Definitions for accessing bitfields/bitslices inside X.25 structs
+ */
+
+
+struct x25bitslice {
+ unsigned int bs_mask;
+ unsigned int bs_shift;
+};
+
+#define calling_addrlen 0
+#define called_addrlen 1
+#define q_bit 2
+#define d_bit 3
+#define fmt_identifier 4
+#define lc_group_number 1
+#define p_r 5
+#define m_bit 6
+#define p_s 7
+#define zilch 8
+
+#define X25GBITS(Arg, Index) (((Arg) & x25_bitslice[(Index)].bs_mask) >> x25_bitslice[(Index)].bs_shift)
+#define X25SBITS(Arg, Index, Val) (Arg) |= (((Val) << x25_bitslice[(Index)].bs_shift) & x25_bitslice[(Index)].bs_mask)
+#define X25CSBITS(Arg, Index, Val) (Arg) = (((Val) << x25_bitslice[(Index)].bs_shift) & x25_bitslice[(Index)].bs_mask)
+
+extern struct x25bitslice x25_bitslice[];
+
+
+#define ISOFIFTTYPE(i,t) ((i)->if_type == (t))
+#define ISISO8802(i) ((ISOFIFTTYPE(i, IFT_ETHER) || \
+ ISOFIFTTYPE(i, IFT_ISO88023) || \
+ ISOFIFTTYPE(i, IFT_ISO88024) || \
+ ISOFIFTTYPE(i, IFT_ISO88025) || \
+ ISOFIFTTYPE(i, IFT_ISO88026) || \
+ ISOFIFTTYPE(i, IFT_P10) || \
+ ISOFIFTTYPE(i, IFT_P80) || \
+ ISOFIFTTYPE(i, IFT_FDDI)))
+
+/*
+ * miscellenous debugging info
+ */
+struct mbuf_cache {
+ int mbc_size;
+ int mbc_num;
+ int mbc_oldsize;
+ struct mbuf **mbc_cache;
+};
+
+#if defined(KERNEL) && defined(CCITT)
+extern struct pkcb_q pkcb_q;
+struct pklcd *pk_listenhead;
+struct pklcd *pk_attach();
+
+extern char *pk_name[], *pk_state[];
+int pk_t20, pk_t21, pk_t22, pk_t23;
+#endif
diff --git a/sys/netccitt/x25.h b/sys/netccitt/x25.h
new file mode 100644
index 000000000000..e86af39a1a67
--- /dev/null
+++ b/sys/netccitt/x25.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ * University of Erlangen-Nuremberg, Germany, 1992
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)x25.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef KERNEL
+#define PRC_IFUP 3
+#define PRC_LINKUP 4
+#define PRC_LINKDOWN 5
+#define PRC_LINKRESET 6
+#define PRC_LINKDONTCOPY 7
+#ifndef PRC_DISCONNECT_REQUEST
+#define PRC_DISCONNECT_REQUEST 10
+#endif
+#endif
+
+#define CCITTPROTO_HDLC 1
+#define CCITTPROTO_X25 2 /* packet level protocol */
+#define IEEEPROTO_802LLC 3 /* doesn't belong here */
+
+#define HDLCPROTO_LAP 1
+#define HDLCPROTO_LAPB 2
+#define HDLCPROTO_UNSET 3
+#define HDLCPROTO_LAPD 4
+
+/* socket options */
+#define PK_ACCTFILE 1 /* use level = CCITTPROTO_X25 */
+#define PK_FACILITIES 2 /* use level = CCITTPROTO_X25 */
+#define PK_RTATTACH 3 /* use level = CCITTPROTO_X25 */
+#define PK_PRLISTEN 4 /* use level = CCITTPROTO_X25 */
+
+#define MAX_FACILITIES 109 /* maximum size for facilities */
+
+/*
+ * X.25 Socket address structure. It contains the X.121 or variation of
+ * X.121, facilities information, higher level protocol value (first four
+ * bytes of the User Data field), and the last 12 characters of the User
+ * Data field.
+ */
+
+struct x25_sockaddr { /* obsolete - use sockaddr_x25 */
+ short xaddr_len; /* Length of xaddr_addr. */
+ u_char xaddr_addr[15]; /* Network dependent or X.121 address. */
+ u_char xaddr_facilities; /* Facilities information. */
+#define XS_REVERSE_CHARGE 0x01
+#define XS_HIPRIO 0x02
+ u_char xaddr_proto[4]; /* Protocol ID (4 bytes of user data). */
+ u_char xaddr_userdata[12]; /* Remaining User data field. */
+};
+
+/*
+ * X.25 Socket address structure. It contains the network id, X.121
+ * address, facilities information, higher level protocol value (first four
+ * bytes of the User Data field), and up to 12 characters of User Data.
+ */
+
+struct sockaddr_x25 {
+ u_char x25_len;
+ u_char x25_family; /* must be AF_CCITT */
+ short x25_net; /* network id code (usually a dnic) */
+ char x25_addr[16]; /* X.121 address (null terminated) */
+ struct x25opts {
+ char op_flags; /* miscellaneous options */
+ /* pk_var.h defines other lcd_flags */
+#define X25_REVERSE_CHARGE 0x01 /* remote DTE pays for call */
+#define X25_DBIT 0x02 /* not yet supported */
+#define X25_MQBIT 0x04 /* prepend M&Q bit status byte to packet data */
+#define X25_OLDSOCKADDR 0x08 /* uses old sockaddr structure */
+#define X25_DG_CIRCUIT 0x10 /* lcd_flag: used for datagrams */
+#define X25_DG_ROUTING 0x20 /* lcd_flag: peer addr not yet known */
+#define X25_MBS_HOLD 0x40 /* lcd_flag: collect m-bit sequences */
+ char op_psize; /* requested packet size */
+#define X25_PS128 7
+#define X25_PS256 8
+#define X25_PS512 9
+ char op_wsize; /* window size (1 .. 7) */
+ char op_speed; /* throughput class */
+ } x25_opts;
+ short x25_udlen; /* user data field length */
+ char x25_udata[16]; /* user data field */
+};
+
+/*
+ * network configuration info
+ * this structure must be 16 bytes long
+ */
+
+struct x25config {
+ struct sockaddr_x25 xc_addr;
+ /* link level parameters */
+ u_short xc_lproto:4, /* link level protocol eg. CCITTPROTO_HDLC */
+ xc_lptype:4, /* protocol type eg. HDLCPROTO_LAPB */
+ xc_ltrace:1, /* link level tracing flag */
+ xc_lwsize:7; /* link level window size */
+ u_short xc_lxidxchg:1, /* link level XID exchange flag - NOT YET */
+ /* packet level parameters */
+ xc_rsvd1:2,
+ xc_pwsize:3, /* default window size */
+ xc_psize:4, /* default packet size 7=128, 8=256, ... */
+ xc_type:3, /* network type */
+#define X25_1976 0
+#define X25_1980 1
+#define X25_1984 2
+#define X25_DDN 3
+#define X25_BASIC 4
+ xc_ptrace:1, /* packet level tracing flag */
+ xc_nodnic:1, /* remove our dnic when calling on net */
+ xc_prepnd0:1; /* prepend 0 when making offnet calls */
+ u_short xc_maxlcn; /* max logical channels */
+ u_short xc_dg_idletimo; /* timeout for idle datagram circuits. */
+};
+
+#ifdef IFNAMSIZ
+struct ifreq_x25 {
+ char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct x25config ifr_xc;
+};
+#define SIOCSIFCONF_X25 _IOW('i', 12, struct ifreq_x25) /* set ifnet config */
+#define SIOCGIFCONF_X25 _IOWR('i',13, struct ifreq_x25) /* get ifnet config */
+#endif
diff --git a/sys/netccitt/x25acct.h b/sys/netccitt/x25acct.h
new file mode 100644
index 000000000000..71f3fd89603a
--- /dev/null
+++ b/sys/netccitt/x25acct.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)x25acct.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Format of X.25 accounting record written
+ * to X25ACCTF whenever a circuit is closed.
+ */
+
+#ifdef waterloo
+#define X25ACCTF "/usr/adm/logs/x25acct"
+#else
+#define X25ACCTF "/usr/adm/x25acct"
+#endif
+
+struct x25acct {
+ time_t x25acct_stime; /* start time */
+#ifdef waterloo
+ u_long x25acct_etime; /* elapsed time (seconds) */
+#else
+ u_short x25acct_etime; /* elapsed time (seconds) */
+#endif
+ short x25acct_uid; /* user id */
+ short x25acct_net; /* network id */
+ u_short x25acct_psize:4, /* packet size */
+ x25acct_addrlen:4, /* x25acct_addr length */
+ x25acct_revcharge:1, /* reverse charging */
+ x25acct_callin:1, /* incoming call */
+ x25acct_unused:6;
+ char x25acct_addr[8]; /* remote DTE address (in bcd) */
+ char x25acct_udata[4]; /* protocol id */
+ long x25acct_txcnt; /* packets transmitted */
+ long x25acct_rxcnt; /* packets received */
+};
diff --git a/sys/netccitt/x25err.h b/sys/netccitt/x25err.h
new file mode 100644
index 000000000000..44d5490b422d
--- /dev/null
+++ b/sys/netccitt/x25err.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)x25err.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ *
+ * X.25 Reset and Clear errors and diagnostics. These values are
+ * returned in the u_error field of the u structure.
+ *
+ */
+
+#define EXRESET 100 /* Reset: call reset */
+#define EXROUT 101 /* Reset: out of order */
+#define EXRRPE 102 /* Reset: remote procedure error */
+#define EXRLPE 103 /* Reset: local procedure error */
+#define EXRNCG 104 /* Reset: network congestion */
+
+#define EXCLEAR 110 /* Clear: call cleared */
+#define EXCBUSY 111 /* Clear: number busy */
+#define EXCOUT 112 /* Clear: out of order */
+#define EXCRPE 113 /* Clear: remote procedure error */
+#define EXCRRC 114 /* Clear: collect call refused */
+#define EXCINV 115 /* Clear: invalid call */
+#define EXCAB 116 /* Clear: access barred */
+#define EXCLPE 117 /* Clear: local procedure error */
+#define EXCNCG 118 /* Clear: network congestion */
+#define EXCNOB 119 /* Clear: not obtainable */
+
diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h
new file mode 100644
index 000000000000..beef16e1836d
--- /dev/null
+++ b/sys/netinet/icmp_var.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)icmp_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Variables related to this implementation
+ * of the internet control message protocol.
+ */
+struct icmpstat {
+/* statistics related to icmp packets generated */
+ u_long icps_error; /* # of calls to icmp_error */
+ u_long icps_oldshort; /* no error 'cuz old ip too short */
+ u_long icps_oldicmp; /* no error 'cuz old was icmp */
+ u_long icps_outhist[ICMP_MAXTYPE + 1];
+/* statistics related to input messages processed */
+ u_long icps_badcode; /* icmp_code out of range */
+ u_long icps_tooshort; /* packet < ICMP_MINLEN */
+ u_long icps_checksum; /* bad checksum */
+ u_long icps_badlen; /* calculated bound mismatch */
+ u_long icps_reflect; /* number of responses */
+ u_long icps_inhist[ICMP_MAXTYPE + 1];
+};
+
+/*
+ * Names for ICMP sysctl objects
+ */
+#define ICMPCTL_MASKREPL 1 /* allow replies to netmask requests */
+#define ICMPCTL_MAXID 2
+
+#define ICMPCTL_NAMES { \
+ { 0, 0 }, \
+ { "maskrepl", CTLTYPE_INT }, \
+}
+
+#ifdef KERNEL
+struct icmpstat icmpstat;
+#endif
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
new file mode 100644
index 000000000000..41f07c017b8a
--- /dev/null
+++ b/sys/netinet/if_ether.c
@@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_ether.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Ethernet address resolution protocol.
+ * TODO:
+ * add "inuse/lock" bit (or ref. count) along with valid bit
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+
+#define SIN(s) ((struct sockaddr_in *)s)
+#define SDL(s) ((struct sockaddr_dl *)s)
+#define SRP(s) ((struct sockaddr_inarp *)s)
+
+/*
+ * ARP trailer negotiation. Trailer protocol is not IP specific,
+ * but ARP request/response use IP addresses.
+ */
+#define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
+
+
+/* timer values */
+int arpt_prune = (5*60*1); /* walk list every 5 minutes */
+int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
+int arpt_down = 20; /* once declared down, don't send for 20 secs */
+#define rt_expire rt_rmx.rmx_expire
+
+static void arprequest __P((struct arpcom *, u_long *, u_long *, u_char *));
+static void arptfree __P((struct llinfo_arp *));
+static void arptimer __P((void *));
+static struct llinfo_arp *arplookup __P((u_long, int, int));
+static void in_arpinput __P((struct mbuf *));
+
+extern struct ifnet loif;
+extern struct timeval time;
+struct llinfo_arp llinfo_arp = {&llinfo_arp, &llinfo_arp};
+struct ifqueue arpintrq = {0, 0, 0, 50};
+int arp_inuse, arp_allocated, arp_intimer;
+int arp_maxtries = 5;
+int useloopback = 1; /* use loopback interface for local traffic */
+int arpinit_done = 0;
+
+/*
+ * Timeout routine. Age arp_tab entries periodically.
+ */
+/* ARGSUSED */
+static void
+arptimer(ignored_arg)
+ void *ignored_arg;
+{
+ int s = splnet();
+ register struct llinfo_arp *la = llinfo_arp.la_next;
+
+ timeout(arptimer, (caddr_t)0, arpt_prune * hz);
+ while (la != &llinfo_arp) {
+ register struct rtentry *rt = la->la_rt;
+ la = la->la_next;
+ if (rt->rt_expire && rt->rt_expire <= time.tv_sec)
+ arptfree(la->la_prev); /* timer has expired, clear */
+ }
+ splx(s);
+}
+
+/*
+ * Parallel to llc_rtrequest.
+ */
+void
+arp_rtrequest(req, rt, sa)
+ int req;
+ register struct rtentry *rt;
+ struct sockaddr *sa;
+{
+ register struct sockaddr *gate = rt->rt_gateway;
+ register struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
+ static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+
+ if (!arpinit_done) {
+ arpinit_done = 1;
+ timeout(arptimer, (caddr_t)0, hz);
+ }
+ if (rt->rt_flags & RTF_GATEWAY)
+ return;
+ switch (req) {
+
+ case RTM_ADD:
+ /*
+ * XXX: If this is a manually added route to interface
+ * such as older version of routed or gated might provide,
+ * restore cloning bit.
+ */
+ if ((rt->rt_flags & RTF_HOST) == 0 &&
+ SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
+ rt->rt_flags |= RTF_CLONING;
+ if (rt->rt_flags & RTF_CLONING) {
+ /*
+ * Case 1: This route should come from a route to iface.
+ */
+ rt_setgate(rt, rt_key(rt),
+ (struct sockaddr *)&null_sdl);
+ gate = rt->rt_gateway;
+ SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+ SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+ rt->rt_expire = time.tv_sec;
+ break;
+ }
+ /* Announce a new entry if requested. */
+ if (rt->rt_flags & RTF_ANNOUNCE)
+ arprequest((struct arpcom *)rt->rt_ifp,
+ &SIN(rt_key(rt))->sin_addr.s_addr,
+ &SIN(rt_key(rt))->sin_addr.s_addr,
+ (u_char *)LLADDR(SDL(gate)));
+ /*FALLTHROUGH*/
+ case RTM_RESOLVE:
+ if (gate->sa_family != AF_LINK ||
+ gate->sa_len < sizeof(null_sdl)) {
+ log(LOG_DEBUG, "arp_rtrequest: bad gateway value");
+ break;
+ }
+ SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+ SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+ if (la != 0)
+ break; /* This happens on a route change */
+ /*
+ * Case 2: This route may come from cloning, or a manual route
+ * add with a LL address.
+ */
+ R_Malloc(la, struct llinfo_arp *, sizeof(*la));
+ rt->rt_llinfo = (caddr_t)la;
+ if (la == 0) {
+ log(LOG_DEBUG, "arp_rtrequest: malloc failed\n");
+ break;
+ }
+ arp_inuse++, arp_allocated++;
+ Bzero(la, sizeof(*la));
+ la->la_rt = rt;
+ rt->rt_flags |= RTF_LLINFO;
+ insque(la, &llinfo_arp);
+ if (SIN(rt_key(rt))->sin_addr.s_addr ==
+ (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
+ /*
+ * This test used to be
+ * if (loif.if_flags & IFF_UP)
+ * It allowed local traffic to be forced
+ * through the hardware by configuring the loopback down.
+ * However, it causes problems during network configuration
+ * for boards that can't receive packets they send.
+ * It is now necessary to clear "useloopback" and remove
+ * the route to force traffic out to the hardware.
+ */
+ rt->rt_expire = 0;
+ Bcopy(((struct arpcom *)rt->rt_ifp)->ac_enaddr,
+ LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6);
+ if (useloopback)
+ rt->rt_ifp = &loif;
+
+ }
+ break;
+
+ case RTM_DELETE:
+ if (la == 0)
+ break;
+ arp_inuse--;
+ remque(la);
+ rt->rt_llinfo = 0;
+ rt->rt_flags &= ~RTF_LLINFO;
+ if (la->la_hold)
+ m_freem(la->la_hold);
+ Free((caddr_t)la);
+ }
+}
+
+/*
+ * Broadcast an ARP packet, asking who has addr on interface ac.
+ */
+void
+arpwhohas(ac, addr)
+ register struct arpcom *ac;
+ register struct in_addr *addr;
+{
+ arprequest(ac, &ac->ac_ipaddr.s_addr, &addr->s_addr, ac->ac_enaddr);
+}
+
+/*
+ * Broadcast an ARP request. Caller specifies:
+ * - arp header source ip address
+ * - arp header target ip address
+ * - arp header source ethernet address
+ */
+static void
+arprequest(ac, sip, tip, enaddr)
+ register struct arpcom *ac;
+ register u_long *sip, *tip;
+ register u_char *enaddr;
+{
+ register struct mbuf *m;
+ register struct ether_header *eh;
+ register struct ether_arp *ea;
+ struct sockaddr sa;
+
+ if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+ return;
+ m->m_len = sizeof(*ea);
+ m->m_pkthdr.len = sizeof(*ea);
+ MH_ALIGN(m, sizeof(*ea));
+ ea = mtod(m, struct ether_arp *);
+ eh = (struct ether_header *)sa.sa_data;
+ bzero((caddr_t)ea, sizeof (*ea));
+ bcopy((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost,
+ sizeof(eh->ether_dhost));
+ eh->ether_type = ETHERTYPE_ARP; /* if_output will swap */
+ ea->arp_hrd = htons(ARPHRD_ETHER);
+ ea->arp_pro = htons(ETHERTYPE_IP);
+ ea->arp_hln = sizeof(ea->arp_sha); /* hardware address length */
+ ea->arp_pln = sizeof(ea->arp_spa); /* protocol address length */
+ ea->arp_op = htons(ARPOP_REQUEST);
+ bcopy((caddr_t)enaddr, (caddr_t)ea->arp_sha, sizeof(ea->arp_sha));
+ bcopy((caddr_t)sip, (caddr_t)ea->arp_spa, sizeof(ea->arp_spa));
+ bcopy((caddr_t)tip, (caddr_t)ea->arp_tpa, sizeof(ea->arp_tpa));
+ sa.sa_family = AF_UNSPEC;
+ sa.sa_len = sizeof(sa);
+ (*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+}
+
+/*
+ * Resolve an IP address into an ethernet address. If success,
+ * desten is filled in. If there is no entry in arptab,
+ * set one up and broadcast a request for the IP address.
+ * Hold onto this mbuf and resend it once the address
+ * is finally resolved. A return value of 1 indicates
+ * that desten has been filled in and the packet should be sent
+ * normally; a 0 return indicates that the packet has been
+ * taken over here, either now or for later transmission.
+ */
+int
+arpresolve(ac, rt, m, dst, desten)
+ register struct arpcom *ac;
+ register struct rtentry *rt;
+ struct mbuf *m;
+ register struct sockaddr *dst;
+ register u_char *desten;
+{
+ register struct llinfo_arp *la;
+ struct sockaddr_dl *sdl;
+
+ if (m->m_flags & M_BCAST) { /* broadcast */
+ bcopy((caddr_t)etherbroadcastaddr, (caddr_t)desten,
+ sizeof(etherbroadcastaddr));
+ return (1);
+ }
+ if (m->m_flags & M_MCAST) { /* multicast */
+ ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
+ return(1);
+ }
+ if (rt)
+ la = (struct llinfo_arp *)rt->rt_llinfo;
+ else {
+ if (la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0))
+ rt = la->la_rt;
+ }
+ if (la == 0 || rt == 0) {
+ log(LOG_DEBUG, "arpresolve: can't allocate llinfo");
+ m_freem(m);
+ return (0);
+ }
+ sdl = SDL(rt->rt_gateway);
+ /*
+ * Check the address family and length is valid, the address
+ * is resolved; otherwise, try to resolve.
+ */
+ if ((rt->rt_expire == 0 || rt->rt_expire > time.tv_sec) &&
+ sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
+ bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
+ return 1;
+ }
+ /*
+ * There is an arptab entry, but no ethernet address
+ * response yet. Replace the held mbuf with this
+ * latest one.
+ */
+ if (la->la_hold)
+ m_freem(la->la_hold);
+ la->la_hold = m;
+ if (rt->rt_expire) {
+ rt->rt_flags &= ~RTF_REJECT;
+ if (la->la_asked == 0 || rt->rt_expire != time.tv_sec) {
+ rt->rt_expire = time.tv_sec;
+ if (la->la_asked++ < arp_maxtries)
+ arpwhohas(ac, &(SIN(dst)->sin_addr));
+ else {
+ rt->rt_flags |= RTF_REJECT;
+ rt->rt_expire += arpt_down;
+ la->la_asked = 0;
+ }
+
+ }
+ }
+ return (0);
+}
+
+/*
+ * Common length and type checks are done here,
+ * then the protocol-specific routine is called.
+ */
+void
+arpintr()
+{
+ register struct mbuf *m;
+ register struct arphdr *ar;
+ int s;
+
+ while (arpintrq.ifq_head) {
+ s = splimp();
+ IF_DEQUEUE(&arpintrq, m);
+ splx(s);
+ if (m == 0 || (m->m_flags & M_PKTHDR) == 0)
+ panic("arpintr");
+ if (m->m_len >= sizeof(struct arphdr) &&
+ (ar = mtod(m, struct arphdr *)) &&
+ ntohs(ar->ar_hrd) == ARPHRD_ETHER &&
+ m->m_len >=
+ sizeof(struct arphdr) + 2 * ar->ar_hln + 2 * ar->ar_pln)
+
+ switch (ntohs(ar->ar_pro)) {
+
+ case ETHERTYPE_IP:
+ case ETHERTYPE_IPTRAILERS:
+ in_arpinput(m);
+ continue;
+ }
+ m_freem(m);
+ }
+}
+
+/*
+ * ARP for Internet protocols on 10 Mb/s Ethernet.
+ * Algorithm is that given in RFC 826.
+ * In addition, a sanity check is performed on the sender
+ * protocol address, to catch impersonators.
+ * We no longer handle negotiations for use of trailer protocol:
+ * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
+ * along with IP replies if we wanted trailers sent to us,
+ * and also sent them in response to IP replies.
+ * This allowed either end to announce the desire to receive
+ * trailer packets.
+ * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
+ * but formerly didn't normally send requests.
+ */
+static void
+in_arpinput(m)
+ struct mbuf *m;
+{
+ register struct ether_arp *ea;
+ register struct arpcom *ac = (struct arpcom *)m->m_pkthdr.rcvif;
+ struct ether_header *eh;
+ register struct llinfo_arp *la = 0;
+ register struct rtentry *rt;
+ struct in_ifaddr *ia, *maybe_ia = 0;
+ struct sockaddr_dl *sdl;
+ struct sockaddr sa;
+ struct in_addr isaddr, itaddr, myaddr;
+ int op;
+
+ ea = mtod(m, struct ether_arp *);
+ op = ntohs(ea->arp_op);
+ bcopy((caddr_t)ea->arp_spa, (caddr_t)&isaddr, sizeof (isaddr));
+ bcopy((caddr_t)ea->arp_tpa, (caddr_t)&itaddr, sizeof (itaddr));
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == &ac->ac_if) {
+ maybe_ia = ia;
+ if ((itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) ||
+ (isaddr.s_addr == ia->ia_addr.sin_addr.s_addr))
+ break;
+ }
+ if (maybe_ia == 0)
+ goto out;
+ myaddr = ia ? ia->ia_addr.sin_addr : maybe_ia->ia_addr.sin_addr;
+ if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)ac->ac_enaddr,
+ sizeof (ea->arp_sha)))
+ goto out; /* it's from me, ignore it. */
+ if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)etherbroadcastaddr,
+ sizeof (ea->arp_sha))) {
+ log(LOG_ERR,
+ "arp: ether address is broadcast for IP address %x!\n",
+ ntohl(isaddr.s_addr));
+ goto out;
+ }
+ if (isaddr.s_addr == myaddr.s_addr) {
+ log(LOG_ERR,
+ "duplicate IP address %x!! sent from ethernet address: %s\n",
+ ntohl(isaddr.s_addr), ether_sprintf(ea->arp_sha));
+ itaddr = myaddr;
+ goto reply;
+ }
+ la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
+ if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) {
+ if (sdl->sdl_alen &&
+ bcmp((caddr_t)ea->arp_sha, LLADDR(sdl), sdl->sdl_alen))
+ log(LOG_INFO, "arp info overwritten for %x by %s\n",
+ isaddr.s_addr, ether_sprintf(ea->arp_sha));
+ bcopy((caddr_t)ea->arp_sha, LLADDR(sdl),
+ sdl->sdl_alen = sizeof(ea->arp_sha));
+ if (rt->rt_expire)
+ rt->rt_expire = time.tv_sec + arpt_keep;
+ rt->rt_flags &= ~RTF_REJECT;
+ la->la_asked = 0;
+ if (la->la_hold) {
+ (*ac->ac_if.if_output)(&ac->ac_if, la->la_hold,
+ rt_key(rt), rt);
+ la->la_hold = 0;
+ }
+ }
+reply:
+ if (op != ARPOP_REQUEST) {
+ out:
+ m_freem(m);
+ return;
+ }
+ if (itaddr.s_addr == myaddr.s_addr) {
+ /* I am the target */
+ bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha,
+ sizeof(ea->arp_sha));
+ bcopy((caddr_t)ac->ac_enaddr, (caddr_t)ea->arp_sha,
+ sizeof(ea->arp_sha));
+ } else {
+ la = arplookup(itaddr.s_addr, 0, SIN_PROXY);
+ if (la == NULL)
+ goto out;
+ rt = la->la_rt;
+ bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha,
+ sizeof(ea->arp_sha));
+ sdl = SDL(rt->rt_gateway);
+ bcopy(LLADDR(sdl), (caddr_t)ea->arp_sha, sizeof(ea->arp_sha));
+ }
+
+ bcopy((caddr_t)ea->arp_spa, (caddr_t)ea->arp_tpa, sizeof(ea->arp_spa));
+ bcopy((caddr_t)&itaddr, (caddr_t)ea->arp_spa, sizeof(ea->arp_spa));
+ ea->arp_op = htons(ARPOP_REPLY);
+ ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */
+ eh = (struct ether_header *)sa.sa_data;
+ bcopy((caddr_t)ea->arp_tha, (caddr_t)eh->ether_dhost,
+ sizeof(eh->ether_dhost));
+ eh->ether_type = ETHERTYPE_ARP;
+ sa.sa_family = AF_UNSPEC;
+ sa.sa_len = sizeof(sa);
+ (*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+ return;
+}
+
+/*
+ * Free an arp entry.
+ */
+static void
+arptfree(la)
+ register struct llinfo_arp *la;
+{
+ register struct rtentry *rt = la->la_rt;
+ register struct sockaddr_dl *sdl;
+ if (rt == 0)
+ panic("arptfree");
+ if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) &&
+ sdl->sdl_family == AF_LINK) {
+ sdl->sdl_alen = 0;
+ la->la_asked = 0;
+ rt->rt_flags &= ~RTF_REJECT;
+ return;
+ }
+ rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt),
+ 0, (struct rtentry **)0);
+}
+/*
+ * Lookup or enter a new address in arptab.
+ */
+static struct llinfo_arp *
+arplookup(addr, create, proxy)
+ u_long addr;
+ int create, proxy;
+{
+ register struct rtentry *rt;
+ static struct sockaddr_inarp sin = {sizeof(sin), AF_INET };
+
+ sin.sin_addr.s_addr = addr;
+ sin.sin_other = proxy ? SIN_PROXY : 0;
+ rt = rtalloc1((struct sockaddr *)&sin, create);
+ if (rt == 0)
+ return (0);
+ rt->rt_refcnt--;
+ if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
+ rt->rt_gateway->sa_family != AF_LINK) {
+ if (create)
+ log(LOG_DEBUG, "arptnew failed on %x\n", ntohl(addr));
+ return (0);
+ }
+ return ((struct llinfo_arp *)rt->rt_llinfo);
+}
+
+int
+arpioctl(cmd, data)
+ int cmd;
+ caddr_t data;
+{
+ return (EOPNOTSUPP);
+}
diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h
new file mode 100644
index 000000000000..6b4def054f78
--- /dev/null
+++ b/sys/netinet/if_ether.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_ether.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structure of a 10Mb/s Ethernet header.
+ */
+struct ether_header {
+ u_char ether_dhost[6];
+ u_char ether_shost[6];
+ u_short ether_type;
+};
+
+#define ETHERTYPE_PUP 0x0200 /* PUP protocol */
+#define ETHERTYPE_IP 0x0800 /* IP protocol */
+#define ETHERTYPE_ARP 0x0806 /* Addr. resolution protocol */
+#define ETHERTYPE_REVARP 0x8035 /* reverse Addr. resolution protocol */
+
+/*
+ * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have
+ * (type-ETHERTYPE_TRAIL)*512 bytes of data followed
+ * by an ETHER type (as given above) and then the (variable-length) header.
+ */
+#define ETHERTYPE_TRAIL 0x1000 /* Trailer packet */
+#define ETHERTYPE_NTRAILER 16
+
+#define ETHERMTU 1500
+#define ETHERMIN (60-14)
+
+#ifdef KERNEL
+/*
+ * Macro to map an IP multicast address to an Ethernet multicast address.
+ * The high-order 25 bits of the Ethernet address are statically assigned,
+ * and the low-order 23 bits are taken from the low end of the IP address.
+ */
+#define ETHER_MAP_IP_MULTICAST(ipaddr, enaddr) \
+ /* struct in_addr *ipaddr; */ \
+ /* u_char enaddr[6]; */ \
+{ \
+ (enaddr)[0] = 0x01; \
+ (enaddr)[1] = 0x00; \
+ (enaddr)[2] = 0x5e; \
+ (enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \
+ (enaddr)[4] = ((u_char *)ipaddr)[2]; \
+ (enaddr)[5] = ((u_char *)ipaddr)[3]; \
+}
+#endif
+
+/*
+ * Ethernet Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description. Structure below is adapted
+ * to resolving internet addresses. Field names used correspond to
+ * RFC 826.
+ */
+struct ether_arp {
+ struct arphdr ea_hdr; /* fixed-size header */
+ u_char arp_sha[6]; /* sender hardware address */
+ u_char arp_spa[4]; /* sender protocol address */
+ u_char arp_tha[6]; /* target hardware address */
+ u_char arp_tpa[4]; /* target protocol address */
+};
+#define arp_hrd ea_hdr.ar_hrd
+#define arp_pro ea_hdr.ar_pro
+#define arp_hln ea_hdr.ar_hln
+#define arp_pln ea_hdr.ar_pln
+#define arp_op ea_hdr.ar_op
+
+
+/*
+ * Structure shared between the ethernet driver modules and
+ * the address resolution code. For example, each ec_softc or il_softc
+ * begins with this structure.
+ */
+struct arpcom {
+ struct ifnet ac_if; /* network-visible interface */
+ u_char ac_enaddr[6]; /* ethernet hardware address */
+ struct in_addr ac_ipaddr; /* copy of ip address- XXX */
+ struct ether_multi *ac_multiaddrs; /* list of ether multicast addrs */
+ int ac_multicnt; /* length of ac_multiaddrs list */
+};
+
+struct llinfo_arp {
+ struct llinfo_arp *la_next;
+ struct llinfo_arp *la_prev;
+ struct rtentry *la_rt;
+ struct mbuf *la_hold; /* last packet until resolved/timeout */
+ long la_asked; /* last time we QUERIED for this addr */
+#define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */
+};
+
+struct sockaddr_inarp {
+ u_char sin_len;
+ u_char sin_family;
+ u_short sin_port;
+ struct in_addr sin_addr;
+ struct in_addr sin_srcaddr;
+ u_short sin_tos;
+ u_short sin_other;
+#define SIN_PROXY 1
+};
+/*
+ * IP and ethernet specific routing flags
+ */
+#define RTF_USETRAILERS RTF_PROTO1 /* use trailers */
+#define RTF_ANNOUNCE RTF_PROTO2 /* announce new arp entry */
+
+#ifdef KERNEL
+u_char etherbroadcastaddr[6];
+u_char ether_ipmulticast_min[6];
+u_char ether_ipmulticast_max[6];
+struct ifqueue arpintrq;
+
+struct llinfo_arp *arptnew __P((struct in_addr *));
+struct llinfo_arp llinfo_arp; /* head of the llinfo queue */
+
+void arpwhohas __P((struct arpcom *, struct in_addr *));
+void arpintr __P((void));
+int arpresolve __P((struct arpcom *,
+ struct rtentry *, struct mbuf *, struct sockaddr *, u_char *));
+void arp_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+void arpwhohas __P((struct arpcom *, struct in_addr *));
+
+int ether_addmulti __P((struct ifreq *, struct arpcom *));
+int ether_delmulti __P((struct ifreq *, struct arpcom *));
+
+/*
+ * Ethernet multicast address structure. There is one of these for each
+ * multicast address or range of multicast addresses that we are supposed
+ * to listen to on a particular interface. They are kept in a linked list,
+ * rooted in the interface's arpcom structure. (This really has nothing to
+ * do with ARP, or with the Internet address family, but this appears to be
+ * the minimally-disrupting place to put it.)
+ */
+struct ether_multi {
+ u_char enm_addrlo[6]; /* low or only address of range */
+ u_char enm_addrhi[6]; /* high or only address of range */
+ struct arpcom *enm_ac; /* back pointer to arpcom */
+ u_int enm_refcount; /* no. claims to this addr/range */
+ struct ether_multi *enm_next; /* ptr to next ether_multi */
+};
+
+/*
+ * Structure used by macros below to remember position when stepping through
+ * all of the ether_multi records.
+ */
+struct ether_multistep {
+ struct ether_multi *e_enm;
+};
+
+/*
+ * Macro for looking up the ether_multi record for a given range of Ethernet
+ * multicast addresses connected to a given arpcom structure. If no matching
+ * record is found, "enm" returns NULL.
+ */
+#define ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm) \
+ /* u_char addrlo[6]; */ \
+ /* u_char addrhi[6]; */ \
+ /* struct arpcom *ac; */ \
+ /* struct ether_multi *enm; */ \
+{ \
+ for ((enm) = (ac)->ac_multiaddrs; \
+ (enm) != NULL && \
+ (bcmp((enm)->enm_addrlo, (addrlo), 6) != 0 || \
+ bcmp((enm)->enm_addrhi, (addrhi), 6) != 0); \
+ (enm) = (enm)->enm_next); \
+}
+
+/*
+ * Macro to step through all of the ether_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide. ETHER_FIRST_MULTI(), below, must be called to initialize "step"
+ * and get the first record. Both macros return a NULL "enm" when there
+ * are no remaining records.
+ */
+#define ETHER_NEXT_MULTI(step, enm) \
+ /* struct ether_multistep step; */ \
+ /* struct ether_multi *enm; */ \
+{ \
+ if (((enm) = (step).e_enm) != NULL) \
+ (step).e_enm = (enm)->enm_next; \
+}
+
+#define ETHER_FIRST_MULTI(step, ac, enm) \
+ /* struct ether_multistep step; */ \
+ /* struct arpcom *ac; */ \
+ /* struct ether_multi *enm; */ \
+{ \
+ (step).e_enm = (ac)->ac_multiaddrs; \
+ ETHER_NEXT_MULTI((step), (enm)); \
+}
+
+#endif
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c
new file mode 100644
index 000000000000..78b426c49eaf
--- /dev/null
+++ b/sys/netinet/igmp.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)igmp.c 8.1 (Berkeley) 7/19/93
+ */
+
+/* Internet Group Management Protocol (IGMP) routines. */
+
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/igmp.h>
+#include <netinet/igmp_var.h>
+
+extern struct ifnet loif;
+
+static int igmp_timers_are_running = 0;
+static u_long igmp_all_hosts_group;
+
+static void igmp_sendreport __P((struct in_multi *));
+
+void
+igmp_init()
+{
+ /*
+ * To avoid byte-swapping the same value over and over again.
+ */
+ igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP);
+}
+
+void
+igmp_input(m, iphlen)
+ register struct mbuf *m;
+ register int iphlen;
+{
+ register struct igmp *igmp;
+ register struct ip *ip;
+ register int igmplen;
+ register struct ifnet *ifp = m->m_pkthdr.rcvif;
+ register int minlen;
+ register struct in_multi *inm;
+ register struct in_ifaddr *ia;
+ struct in_multistep step;
+
+ ++igmpstat.igps_rcv_total;
+
+ ip = mtod(m, struct ip *);
+ igmplen = ip->ip_len;
+
+ /*
+ * Validate lengths
+ */
+ if (igmplen < IGMP_MINLEN) {
+ ++igmpstat.igps_rcv_tooshort;
+ m_freem(m);
+ return;
+ }
+ minlen = iphlen + IGMP_MINLEN;
+ if ((m->m_flags & M_EXT || m->m_len < minlen) &&
+ (m = m_pullup(m, minlen)) == 0) {
+ ++igmpstat.igps_rcv_tooshort;
+ return;
+ }
+
+ /*
+ * Validate checksum
+ */
+ m->m_data += iphlen;
+ m->m_len -= iphlen;
+ igmp = mtod(m, struct igmp *);
+ if (in_cksum(m, igmplen)) {
+ ++igmpstat.igps_rcv_badsum;
+ m_freem(m);
+ return;
+ }
+ m->m_data -= iphlen;
+ m->m_len += iphlen;
+ ip = mtod(m, struct ip *);
+
+ switch (igmp->igmp_type) {
+
+ case IGMP_HOST_MEMBERSHIP_QUERY:
+ ++igmpstat.igps_rcv_queries;
+
+ if (ifp == &loif)
+ break;
+
+ if (ip->ip_dst.s_addr != igmp_all_hosts_group) {
+ ++igmpstat.igps_rcv_badqueries;
+ m_freem(m);
+ return;
+ }
+
+ /*
+ * Start the timers in all of our membership records for
+ * the interface on which the query arrived, except those
+ * that are already running and those that belong to the
+ * "all-hosts" group.
+ */
+ IN_FIRST_MULTI(step, inm);
+ while (inm != NULL) {
+ if (inm->inm_ifp == ifp && inm->inm_timer == 0 &&
+ inm->inm_addr.s_addr != igmp_all_hosts_group) {
+ inm->inm_timer =
+ IGMP_RANDOM_DELAY(inm->inm_addr);
+ igmp_timers_are_running = 1;
+ }
+ IN_NEXT_MULTI(step, inm);
+ }
+
+ break;
+
+ case IGMP_HOST_MEMBERSHIP_REPORT:
+ ++igmpstat.igps_rcv_reports;
+
+ if (ifp == &loif)
+ break;
+
+ if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
+ igmp->igmp_group.s_addr != ip->ip_dst.s_addr) {
+ ++igmpstat.igps_rcv_badreports;
+ m_freem(m);
+ return;
+ }
+
+ /*
+ * KLUDGE: if the IP source address of the report has an
+ * unspecified (i.e., zero) subnet number, as is allowed for
+ * a booting host, replace it with the correct subnet number
+ * so that a process-level multicast routing demon can
+ * determine which subnet it arrived from. This is necessary
+ * to compensate for the lack of any way for a process to
+ * determine the arrival interface of an incoming packet.
+ */
+ if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) {
+ IFP_TO_IA(ifp, ia);
+ if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet);
+ }
+
+ /*
+ * If we belong to the group being reported, stop
+ * our timer for that group.
+ */
+ IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+ if (inm != NULL) {
+ inm->inm_timer = 0;
+ ++igmpstat.igps_rcv_ourreports;
+ }
+
+ break;
+ }
+
+ /*
+ * Pass all valid IGMP packets up to any process(es) listening
+ * on a raw IGMP socket.
+ */
+ rip_input(m);
+}
+
+void
+igmp_joingroup(inm)
+ struct in_multi *inm;
+{
+ register int s = splnet();
+
+ if (inm->inm_addr.s_addr == igmp_all_hosts_group ||
+ inm->inm_ifp == &loif)
+ inm->inm_timer = 0;
+ else {
+ igmp_sendreport(inm);
+ inm->inm_timer = IGMP_RANDOM_DELAY(inm->inm_addr);
+ igmp_timers_are_running = 1;
+ }
+ splx(s);
+}
+
+void
+igmp_leavegroup(inm)
+ struct in_multi *inm;
+{
+ /*
+ * No action required on leaving a group.
+ */
+}
+
+void
+igmp_fasttimo()
+{
+ register struct in_multi *inm;
+ register int s;
+ struct in_multistep step;
+
+ /*
+ * Quick check to see if any work needs to be done, in order
+ * to minimize the overhead of fasttimo processing.
+ */
+ if (!igmp_timers_are_running)
+ return;
+
+ s = splnet();
+ igmp_timers_are_running = 0;
+ IN_FIRST_MULTI(step, inm);
+ while (inm != NULL) {
+ if (inm->inm_timer == 0) {
+ /* do nothing */
+ } else if (--inm->inm_timer == 0) {
+ igmp_sendreport(inm);
+ } else {
+ igmp_timers_are_running = 1;
+ }
+ IN_NEXT_MULTI(step, inm);
+ }
+ splx(s);
+}
+
+static void
+igmp_sendreport(inm)
+ register struct in_multi *inm;
+{
+ register struct mbuf *m;
+ register struct igmp *igmp;
+ register struct ip *ip;
+ register struct ip_moptions *imo;
+ struct ip_moptions simo;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+ /*
+ * Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN
+ * is smaller than mbuf size returned by MGETHDR.
+ */
+ m->m_data += max_linkhdr;
+ m->m_len = sizeof(struct ip) + IGMP_MINLEN;
+ m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN;
+
+ ip = mtod(m, struct ip *);
+ ip->ip_tos = 0;
+ ip->ip_len = sizeof(struct ip) + IGMP_MINLEN;
+ ip->ip_off = 0;
+ ip->ip_p = IPPROTO_IGMP;
+ ip->ip_src.s_addr = INADDR_ANY;
+ ip->ip_dst = inm->inm_addr;
+
+ igmp = (struct igmp *)(ip + 1);
+ igmp->igmp_type = IGMP_HOST_MEMBERSHIP_REPORT;
+ igmp->igmp_code = 0;
+ igmp->igmp_group = inm->inm_addr;
+ igmp->igmp_cksum = 0;
+ igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN);
+
+ imo = &simo;
+ bzero((caddr_t)imo, sizeof(*imo));
+ imo->imo_multicast_ifp = inm->inm_ifp;
+ imo->imo_multicast_ttl = 1;
+ /*
+ * Request loopback of the report if we are acting as a multicast
+ * router, so that the process-level routing demon can hear it.
+ */
+#ifdef MROUTING
+ {
+ extern struct socket *ip_mrouter;
+ imo->imo_multicast_loop = (ip_mrouter != NULL);
+ }
+#endif
+ ip_output(m, NULL, NULL, 0, imo);
+
+ ++igmpstat.igps_snd_reports;
+}
diff --git a/sys/netinet/igmp.h b/sys/netinet/igmp.h
new file mode 100644
index 000000000000..29ce21dee6f0
--- /dev/null
+++ b/sys/netinet/igmp.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)igmp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/* Internet Group Management Protocol (IGMP) definitions. */
+
+/*
+ * IGMP packet format.
+ */
+struct igmp {
+ u_char igmp_type; /* version & type of IGMP message */
+ u_char igmp_code; /* unused, should be zero */
+ u_short igmp_cksum; /* IP-style checksum */
+ struct in_addr igmp_group; /* group address being reported */
+}; /* (zero for queries) */
+
+#define IGMP_MINLEN 8
+
+#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* message types, incl. version */
+#define IGMP_HOST_MEMBERSHIP_REPORT 0x12
+#define IGMP_DVMRP 0x13 /* for experimental multicast */
+ /* routing protocol */
+
+#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */
diff --git a/sys/netinet/igmp_var.h b/sys/netinet/igmp_var.h
new file mode 100644
index 000000000000..ff70f70e2b34
--- /dev/null
+++ b/sys/netinet/igmp_var.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)igmp_var.h 8.1 (Berkeley) 7/19/93
+ */
+
+/*
+ * Internet Group Management Protocol (IGMP),
+ * implementation-specific definitions.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ *
+ * MULTICAST 1.1
+ */
+
+struct igmpstat {
+ u_long igps_rcv_total; /* total IGMP messages received */
+ u_long igps_rcv_tooshort; /* received with too few bytes */
+ u_long igps_rcv_badsum; /* received with bad checksum */
+ u_long igps_rcv_queries; /* received membership queries */
+ u_long igps_rcv_badqueries; /* received invalid queries */
+ u_long igps_rcv_reports; /* received membership reports */
+ u_long igps_rcv_badreports; /* received invalid reports */
+ u_long igps_rcv_ourreports; /* received reports for our groups */
+ u_long igps_snd_reports; /* sent membership reports */
+};
+
+#ifdef KERNEL
+struct igmpstat igmpstat;
+
+/*
+ * Macro to compute a random timer value between 1 and (IGMP_MAX_REPORTING_
+ * DELAY * countdown frequency). We generate a "random" number by adding
+ * the total number of IP packets received, our primary IP address, and the
+ * multicast address being timed-out. The 4.3 random() routine really
+ * ought to be available in the kernel!
+ */
+#define IGMP_RANDOM_DELAY(multiaddr) \
+ /* struct in_addr multiaddr; */ \
+ ( (ipstat.ips_total + \
+ ntohl(IA_SIN(in_ifaddr)->sin_addr.s_addr) + \
+ ntohl((multiaddr).s_addr) \
+ ) \
+ % (IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ) + 1 \
+ )
+
+void igmp_init __P(());
+void igmp_input __P((struct mbuf *, int));
+void igmp_joingroup __P((struct in_multi *));
+void igmp_leavegroup __P((struct in_multi *));
+void igmp_fasttimo __P(());
+#endif
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
new file mode 100644
index 000000000000..e8b481b4005c
--- /dev/null
+++ b/sys/netinet/in.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in.c 8.2 (Berkeley) 11/15/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
+
+#ifdef INET
+/*
+ * Return the network number from an internet address.
+ */
+u_long
+in_netof(in)
+ struct in_addr in;
+{
+ register u_long i = ntohl(in.s_addr);
+ register u_long net;
+ register struct in_ifaddr *ia;
+
+ if (IN_CLASSA(i))
+ net = i & IN_CLASSA_NET;
+ else if (IN_CLASSB(i))
+ net = i & IN_CLASSB_NET;
+ else if (IN_CLASSC(i))
+ net = i & IN_CLASSC_NET;
+ else if (IN_CLASSD(i))
+ net = i & IN_CLASSD_NET;
+ else
+ return (0);
+
+ /*
+ * Check whether network is a subnet;
+ * if so, return subnet number.
+ */
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (net == ia->ia_net)
+ return (i & ia->ia_subnetmask);
+ return (net);
+}
+
+#ifndef SUBNETSARELOCAL
+#define SUBNETSARELOCAL 1
+#endif
+int subnetsarelocal = SUBNETSARELOCAL;
+/*
+ * Return 1 if an internet address is for a ``local'' host
+ * (one to which we have a connection). If subnetsarelocal
+ * is true, this includes other subnets of the local net.
+ * Otherwise, it includes only the directly-connected (sub)nets.
+ */
+in_localaddr(in)
+ struct in_addr in;
+{
+ register u_long i = ntohl(in.s_addr);
+ register struct in_ifaddr *ia;
+
+ if (subnetsarelocal) {
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if ((i & ia->ia_netmask) == ia->ia_net)
+ return (1);
+ } else {
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if ((i & ia->ia_subnetmask) == ia->ia_subnet)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Determine whether an IP address is in a reserved set of addresses
+ * that may not be forwarded, or whether datagrams to that destination
+ * may be forwarded.
+ */
+in_canforward(in)
+ struct in_addr in;
+{
+ register u_long i = ntohl(in.s_addr);
+ register u_long net;
+
+ if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i))
+ return (0);
+ if (IN_CLASSA(i)) {
+ net = i & IN_CLASSA_NET;
+ if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * Trim a mask in a sockaddr
+ */
+void
+in_socktrim(ap)
+struct sockaddr_in *ap;
+{
+ register char *cplim = (char *) &ap->sin_addr;
+ register char *cp = (char *) (&ap->sin_addr + 1);
+
+ ap->sin_len = 0;
+ while (--cp > cplim)
+ if (*cp) {
+ (ap)->sin_len = cp - (char *) (ap) + 1;
+ break;
+ }
+}
+
+int in_interfaces; /* number of external internet interfaces */
+extern struct ifnet loif;
+
+/*
+ * Generic internet control operations (ioctl's).
+ * Ifp is 0 if not an interface-specific ioctl.
+ */
+/* ARGSUSED */
+in_control(so, cmd, data, ifp)
+ struct socket *so;
+ int cmd;
+ caddr_t data;
+ register struct ifnet *ifp;
+{
+ register struct ifreq *ifr = (struct ifreq *)data;
+ register struct in_ifaddr *ia = 0;
+ register struct ifaddr *ifa;
+ struct in_ifaddr *oia;
+ struct in_aliasreq *ifra = (struct in_aliasreq *)data;
+ struct sockaddr_in oldaddr;
+ int error, hostIsNew, maskIsNew;
+ u_long i;
+
+ /*
+ * Find address for this interface, if it exists.
+ */
+ if (ifp)
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp)
+ break;
+
+ switch (cmd) {
+
+ case SIOCAIFADDR:
+ case SIOCDIFADDR:
+ if (ifra->ifra_addr.sin_family == AF_INET)
+ for (oia = ia; ia; ia = ia->ia_next) {
+ if (ia->ia_ifp == ifp &&
+ ia->ia_addr.sin_addr.s_addr ==
+ ifra->ifra_addr.sin_addr.s_addr)
+ break;
+ }
+ if (cmd == SIOCDIFADDR && ia == 0)
+ return (EADDRNOTAVAIL);
+ /* FALLTHROUGH */
+ case SIOCSIFADDR:
+ case SIOCSIFNETMASK:
+ case SIOCSIFDSTADDR:
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+
+ if (ifp == 0)
+ panic("in_control");
+ if (ia == (struct in_ifaddr *)0) {
+ oia = (struct in_ifaddr *)
+ malloc(sizeof *oia, M_IFADDR, M_WAITOK);
+ if (oia == (struct in_ifaddr *)NULL)
+ return (ENOBUFS);
+ bzero((caddr_t)oia, sizeof *oia);
+ if (ia = in_ifaddr) {
+ for ( ; ia->ia_next; ia = ia->ia_next)
+ continue;
+ ia->ia_next = oia;
+ } else
+ in_ifaddr = oia;
+ ia = oia;
+ if (ifa = ifp->if_addrlist) {
+ for ( ; ifa->ifa_next; ifa = ifa->ifa_next)
+ continue;
+ ifa->ifa_next = (struct ifaddr *) ia;
+ } else
+ ifp->if_addrlist = (struct ifaddr *) ia;
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ ia->ia_ifa.ifa_dstaddr
+ = (struct sockaddr *)&ia->ia_dstaddr;
+ ia->ia_ifa.ifa_netmask
+ = (struct sockaddr *)&ia->ia_sockmask;
+ ia->ia_sockmask.sin_len = 8;
+ if (ifp->if_flags & IFF_BROADCAST) {
+ ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
+ ia->ia_broadaddr.sin_family = AF_INET;
+ }
+ ia->ia_ifp = ifp;
+ if (ifp != &loif)
+ in_interfaces++;
+ }
+ break;
+
+ case SIOCSIFBRDADDR:
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+ /* FALLTHROUGH */
+
+ case SIOCGIFADDR:
+ case SIOCGIFNETMASK:
+ case SIOCGIFDSTADDR:
+ case SIOCGIFBRDADDR:
+ if (ia == (struct in_ifaddr *)0)
+ return (EADDRNOTAVAIL);
+ break;
+ }
+ switch (cmd) {
+
+ case SIOCGIFADDR:
+ *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
+ break;
+
+ case SIOCGIFBRDADDR:
+ if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ return (EINVAL);
+ *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
+ break;
+
+ case SIOCGIFDSTADDR:
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ return (EINVAL);
+ *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
+ break;
+
+ case SIOCGIFNETMASK:
+ *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
+ break;
+
+ case SIOCSIFDSTADDR:
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ return (EINVAL);
+ oldaddr = ia->ia_dstaddr;
+ ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
+ if (ifp->if_ioctl && (error = (*ifp->if_ioctl)
+ (ifp, SIOCSIFDSTADDR, (caddr_t)ia))) {
+ ia->ia_dstaddr = oldaddr;
+ return (error);
+ }
+ if (ia->ia_flags & IFA_ROUTE) {
+ ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ ia->ia_ifa.ifa_dstaddr =
+ (struct sockaddr *)&ia->ia_dstaddr;
+ rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+ }
+ break;
+
+ case SIOCSIFBRDADDR:
+ if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ return (EINVAL);
+ ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
+ break;
+
+ case SIOCSIFADDR:
+ return (in_ifinit(ifp, ia,
+ (struct sockaddr_in *) &ifr->ifr_addr, 1));
+
+ case SIOCSIFNETMASK:
+ i = ifra->ifra_addr.sin_addr.s_addr;
+ ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr = i);
+ break;
+
+ case SIOCAIFADDR:
+ maskIsNew = 0;
+ hostIsNew = 1;
+ error = 0;
+ if (ia->ia_addr.sin_family == AF_INET) {
+ if (ifra->ifra_addr.sin_len == 0) {
+ ifra->ifra_addr = ia->ia_addr;
+ hostIsNew = 0;
+ } else if (ifra->ifra_addr.sin_addr.s_addr ==
+ ia->ia_addr.sin_addr.s_addr)
+ hostIsNew = 0;
+ }
+ if (ifra->ifra_mask.sin_len) {
+ in_ifscrub(ifp, ia);
+ ia->ia_sockmask = ifra->ifra_mask;
+ ia->ia_subnetmask =
+ ntohl(ia->ia_sockmask.sin_addr.s_addr);
+ maskIsNew = 1;
+ }
+ if ((ifp->if_flags & IFF_POINTOPOINT) &&
+ (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+ in_ifscrub(ifp, ia);
+ ia->ia_dstaddr = ifra->ifra_dstaddr;
+ maskIsNew = 1; /* We lie; but the effect's the same */
+ }
+ if (ifra->ifra_addr.sin_family == AF_INET &&
+ (hostIsNew || maskIsNew))
+ error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+ if ((ifp->if_flags & IFF_BROADCAST) &&
+ (ifra->ifra_broadaddr.sin_family == AF_INET))
+ ia->ia_broadaddr = ifra->ifra_broadaddr;
+ return (error);
+
+ case SIOCDIFADDR:
+ in_ifscrub(ifp, ia);
+ if ((ifa = ifp->if_addrlist) == (struct ifaddr *)ia)
+ ifp->if_addrlist = ifa->ifa_next;
+ else {
+ while (ifa->ifa_next &&
+ (ifa->ifa_next != (struct ifaddr *)ia))
+ ifa = ifa->ifa_next;
+ if (ifa->ifa_next)
+ ifa->ifa_next = ((struct ifaddr *)ia)->ifa_next;
+ else
+ printf("Couldn't unlink inifaddr from ifp\n");
+ }
+ oia = ia;
+ if (oia == (ia = in_ifaddr))
+ in_ifaddr = ia->ia_next;
+ else {
+ while (ia->ia_next && (ia->ia_next != oia))
+ ia = ia->ia_next;
+ if (ia->ia_next)
+ ia->ia_next = oia->ia_next;
+ else
+ printf("Didn't unlink inifadr from list\n");
+ }
+ IFAFREE((&oia->ia_ifa));
+ break;
+
+ default:
+ if (ifp == 0 || ifp->if_ioctl == 0)
+ return (EOPNOTSUPP);
+ return ((*ifp->if_ioctl)(ifp, cmd, data));
+ }
+ return (0);
+}
+
+/*
+ * Delete any existing route for an interface.
+ */
+void
+in_ifscrub(ifp, ia)
+ register struct ifnet *ifp;
+ register struct in_ifaddr *ia;
+{
+
+ if ((ia->ia_flags & IFA_ROUTE) == 0)
+ return;
+ if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ else
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+ ia->ia_flags &= ~IFA_ROUTE;
+}
+
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+in_ifinit(ifp, ia, sin, scrub)
+ register struct ifnet *ifp;
+ register struct in_ifaddr *ia;
+ struct sockaddr_in *sin;
+ int scrub;
+{
+ register u_long i = ntohl(sin->sin_addr.s_addr);
+ struct sockaddr_in oldaddr;
+ int s = splimp(), flags = RTF_UP, error, ether_output();
+
+ oldaddr = ia->ia_addr;
+ ia->ia_addr = *sin;
+ /*
+ * Give the interface a chance to initialize
+ * if this is its first address,
+ * and to validate the address if necessary.
+ */
+ if (ifp->if_ioctl &&
+ (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
+ splx(s);
+ ia->ia_addr = oldaddr;
+ return (error);
+ }
+ if (ifp->if_output == ether_output) { /* XXX: Another Kludge */
+ ia->ia_ifa.ifa_rtrequest = arp_rtrequest;
+ ia->ia_ifa.ifa_flags |= RTF_CLONING;
+ }
+ splx(s);
+ if (scrub) {
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+ in_ifscrub(ifp, ia);
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ }
+ if (IN_CLASSA(i))
+ ia->ia_netmask = IN_CLASSA_NET;
+ else if (IN_CLASSB(i))
+ ia->ia_netmask = IN_CLASSB_NET;
+ else
+ ia->ia_netmask = IN_CLASSC_NET;
+ /*
+ * The subnet mask usually includes at least the standard network part,
+ * but may may be smaller in the case of supernetting.
+ * If it is set, we believe it.
+ */
+ if (ia->ia_subnetmask == 0) {
+ ia->ia_subnetmask = ia->ia_netmask;
+ ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
+ } else
+ ia->ia_netmask &= ia->ia_subnetmask;
+ ia->ia_net = i & ia->ia_netmask;
+ ia->ia_subnet = i & ia->ia_subnetmask;
+ in_socktrim(&ia->ia_sockmask);
+ /*
+ * Add route for the network.
+ */
+ ia->ia_ifa.ifa_metric = ifp->if_metric;
+ if (ifp->if_flags & IFF_BROADCAST) {
+ ia->ia_broadaddr.sin_addr.s_addr =
+ htonl(ia->ia_subnet | ~ia->ia_subnetmask);
+ ia->ia_netbroadcast.s_addr =
+ htonl(ia->ia_net | ~ ia->ia_netmask);
+ } else if (ifp->if_flags & IFF_LOOPBACK) {
+ ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr;
+ flags |= RTF_HOST;
+ } else if (ifp->if_flags & IFF_POINTOPOINT) {
+ if (ia->ia_dstaddr.sin_family != AF_INET)
+ return (0);
+ flags |= RTF_HOST;
+ }
+ if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0)
+ ia->ia_flags |= IFA_ROUTE;
+ /*
+ * If the interface supports multicast, join the "all hosts"
+ * multicast group on that interface.
+ */
+ if (ifp->if_flags & IFF_MULTICAST) {
+ struct in_addr addr;
+
+ addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
+ in_addmulti(&addr, ifp);
+ }
+ return (error);
+}
+
+
+/*
+ * Return 1 if the address might be a local broadcast address.
+ */
+in_broadcast(in, ifp)
+ struct in_addr in;
+ struct ifnet *ifp;
+{
+ register struct ifaddr *ifa;
+ u_long t;
+
+ if (in.s_addr == INADDR_BROADCAST ||
+ in.s_addr == INADDR_ANY)
+ return 1;
+ if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ return 0;
+ t = ntohl(in.s_addr);
+ /*
+ * Look through the list of addresses for a match
+ * with a broadcast address.
+ */
+#define ia ((struct in_ifaddr *)ifa)
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+ if (ifa->ifa_addr->sa_family == AF_INET &&
+ (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
+ in.s_addr == ia->ia_netbroadcast.s_addr ||
+ /*
+ * Check for old-style (host 0) broadcast.
+ */
+ t == ia->ia_subnet || t == ia->ia_net))
+ return 1;
+ return (0);
+#undef ia
+}
+/*
+ * Add an address to the list of IP multicast addresses for a given interface.
+ */
+struct in_multi *
+in_addmulti(ap, ifp)
+ register struct in_addr *ap;
+ register struct ifnet *ifp;
+{
+ register struct in_multi *inm;
+ struct ifreq ifr;
+ struct in_ifaddr *ia;
+ int s = splnet();
+
+ /*
+ * See if address already in list.
+ */
+ IN_LOOKUP_MULTI(*ap, ifp, inm);
+ if (inm != NULL) {
+ /*
+ * Found it; just increment the reference count.
+ */
+ ++inm->inm_refcount;
+ }
+ else {
+ /*
+ * New address; allocate a new multicast record
+ * and link it into the interface's multicast list.
+ */
+ inm = (struct in_multi *)malloc(sizeof(*inm),
+ M_IPMADDR, M_NOWAIT);
+ if (inm == NULL) {
+ splx(s);
+ return (NULL);
+ }
+ inm->inm_addr = *ap;
+ inm->inm_ifp = ifp;
+ inm->inm_refcount = 1;
+ IFP_TO_IA(ifp, ia);
+ if (ia == NULL) {
+ free(inm, M_IPMADDR);
+ splx(s);
+ return (NULL);
+ }
+ inm->inm_ia = ia;
+ inm->inm_next = ia->ia_multiaddrs;
+ ia->ia_multiaddrs = inm;
+ /*
+ * Ask the network driver to update its multicast reception
+ * filter appropriately for the new address.
+ */
+ ((struct sockaddr_in *)&ifr.ifr_addr)->sin_family = AF_INET;
+ ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr = *ap;
+ if ((ifp->if_ioctl == NULL) ||
+ (*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) {
+ ia->ia_multiaddrs = inm->inm_next;
+ free(inm, M_IPMADDR);
+ splx(s);
+ return (NULL);
+ }
+ /*
+ * Let IGMP know that we have joined a new IP multicast group.
+ */
+ igmp_joingroup(inm);
+ }
+ splx(s);
+ return (inm);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+int
+in_delmulti(inm)
+ register struct in_multi *inm;
+{
+ register struct in_multi **p;
+ struct ifreq ifr;
+ int s = splnet();
+
+ if (--inm->inm_refcount == 0) {
+ /*
+ * No remaining claims to this record; let IGMP know that
+ * we are leaving the multicast group.
+ */
+ igmp_leavegroup(inm);
+ /*
+ * Unlink from list.
+ */
+ for (p = &inm->inm_ia->ia_multiaddrs;
+ *p != inm;
+ p = &(*p)->inm_next)
+ continue;
+ *p = (*p)->inm_next;
+ /*
+ * Notify the network driver to update its multicast reception
+ * filter.
+ */
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr =
+ inm->inm_addr;
+ (*inm->inm_ifp->if_ioctl)(inm->inm_ifp, SIOCDELMULTI,
+ (caddr_t)&ifr);
+ free(inm, M_IPMADDR);
+ }
+ splx(s);
+}
+#endif
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
new file mode 100644
index 000000000000..1ce9948f6e34
--- /dev/null
+++ b/sys/netinet/in.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in.h 8.3 (Berkeley) 1/3/94
+ */
+
+/*
+ * Constants and structures defined by the internet system,
+ * Per RFC 790, September 1981, and numerous additions.
+ */
+
+/*
+ * Protocols
+ */
+#define IPPROTO_IP 0 /* dummy for IP */
+#define IPPROTO_ICMP 1 /* control message protocol */
+#define IPPROTO_IGMP 2 /* group mgmt protocol */
+#define IPPROTO_GGP 3 /* gateway^2 (deprecated) */
+#define IPPROTO_TCP 6 /* tcp */
+#define IPPROTO_EGP 8 /* exterior gateway protocol */
+#define IPPROTO_PUP 12 /* pup */
+#define IPPROTO_UDP 17 /* user datagram protocol */
+#define IPPROTO_IDP 22 /* xns idp */
+#define IPPROTO_TP 29 /* tp-4 w/ class negotiation */
+#define IPPROTO_EON 80 /* ISO cnlp */
+#define IPPROTO_ENCAP 98 /* encapsulation header */
+
+#define IPPROTO_RAW 255 /* raw IP packet */
+#define IPPROTO_MAX 256
+
+
+/*
+ * Local port number conventions:
+ * Ports < IPPORT_RESERVED are reserved for
+ * privileged processes (e.g. root).
+ * Ports > IPPORT_USERRESERVED are reserved
+ * for servers, not necessarily privileged.
+ */
+#define IPPORT_RESERVED 1024
+#define IPPORT_USERRESERVED 5000
+
+/*
+ * Internet address (a structure for historical reasons)
+ */
+struct in_addr {
+ u_long s_addr;
+};
+
+/*
+ * Definitions of bits in internet address integers.
+ * On subnets, the decomposition of addresses to host and net parts
+ * is done according to subnet mask, not the masks here.
+ */
+#define IN_CLASSA(i) (((long)(i) & 0x80000000) == 0)
+#define IN_CLASSA_NET 0xff000000
+#define IN_CLASSA_NSHIFT 24
+#define IN_CLASSA_HOST 0x00ffffff
+#define IN_CLASSA_MAX 128
+
+#define IN_CLASSB(i) (((long)(i) & 0xc0000000) == 0x80000000)
+#define IN_CLASSB_NET 0xffff0000
+#define IN_CLASSB_NSHIFT 16
+#define IN_CLASSB_HOST 0x0000ffff
+#define IN_CLASSB_MAX 65536
+
+#define IN_CLASSC(i) (((long)(i) & 0xe0000000) == 0xc0000000)
+#define IN_CLASSC_NET 0xffffff00
+#define IN_CLASSC_NSHIFT 8
+#define IN_CLASSC_HOST 0x000000ff
+
+#define IN_CLASSD(i) (((long)(i) & 0xf0000000) == 0xe0000000)
+#define IN_CLASSD_NET 0xf0000000 /* These ones aren't really */
+#define IN_CLASSD_NSHIFT 28 /* net and host fields, but */
+#define IN_CLASSD_HOST 0x0fffffff /* routing needn't know. */
+#define IN_MULTICAST(i) IN_CLASSD(i)
+
+#define IN_EXPERIMENTAL(i) (((long)(i) & 0xf0000000) == 0xf0000000)
+#define IN_BADCLASS(i) (((long)(i) & 0xf0000000) == 0xf0000000)
+
+#define INADDR_ANY (u_long)0x00000000
+#define INADDR_BROADCAST (u_long)0xffffffff /* must be masked */
+#ifndef KERNEL
+#define INADDR_NONE 0xffffffff /* -1 return */
+#endif
+
+#define INADDR_UNSPEC_GROUP (u_long)0xe0000000 /* 224.0.0.0 */
+#define INADDR_ALLHOSTS_GROUP (u_long)0xe0000001 /* 224.0.0.1 */
+#define INADDR_MAX_LOCAL_GROUP (u_long)0xe00000ff /* 224.0.0.255 */
+
+#define IN_LOOPBACKNET 127 /* official! */
+
+/*
+ * Socket address, internet style.
+ */
+struct sockaddr_in {
+ u_char sin_len;
+ u_char sin_family;
+ u_short sin_port;
+ struct in_addr sin_addr;
+ char sin_zero[8];
+};
+
+/*
+ * Structure used to describe IP options.
+ * Used to store options internally, to pass them to a process,
+ * or to restore options retrieved earlier.
+ * The ip_dst is used for the first-hop gateway when using a source route
+ * (this gets put into the header proper).
+ */
+struct ip_opts {
+ struct in_addr ip_dst; /* first hop, 0 w/o src rt */
+ char ip_opts[40]; /* actually variable in size */
+};
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * First word of comment is data type; bool is stored in int.
+ */
+#define IP_OPTIONS 1 /* buf/ip_opts; set/get IP options */
+#define IP_HDRINCL 2 /* int; header is included with data */
+#define IP_TOS 3 /* int; IP type of service and preced. */
+#define IP_TTL 4 /* int; IP time to live */
+#define IP_RECVOPTS 5 /* bool; receive all IP opts w/dgram */
+#define IP_RECVRETOPTS 6 /* bool; receive IP opts for response */
+#define IP_RECVDSTADDR 7 /* bool; receive IP dst addr w/dgram */
+#define IP_RETOPTS 8 /* ip_opts; set/get IP options */
+#define IP_MULTICAST_IF 9 /* u_char; set/get IP multicast i/f */
+#define IP_MULTICAST_TTL 10 /* u_char; set/get IP multicast ttl */
+#define IP_MULTICAST_LOOP 11 /* u_char; set/get IP multicast loopback */
+#define IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */
+#define IP_DROP_MEMBERSHIP 13 /* ip_mreq; drop an IP group membership */
+
+/*
+ * Defaults and limits for options
+ */
+#define IP_DEFAULT_MULTICAST_TTL 1 /* normally limit m'casts to 1 hop */
+#define IP_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */
+#define IP_MAX_MEMBERSHIPS 20 /* per socket; must fit in one mbuf */
+
+/*
+ * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
+ */
+struct ip_mreq {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_interface; /* local IP address of interface */
+};
+
+/*
+ * Definitions for inet sysctl operations.
+ *
+ * Third level is protocol number.
+ * Fourth level is desired variable within that protocol.
+ */
+#define IPPROTO_MAXID (IPPROTO_IDP + 1) /* don't list to IPPROTO_MAX */
+
+#define CTL_IPPROTO_NAMES { \
+ { "ip", CTLTYPE_NODE }, \
+ { "icmp", CTLTYPE_NODE }, \
+ { "igmp", CTLTYPE_NODE }, \
+ { "ggp", CTLTYPE_NODE }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { "tcp", CTLTYPE_NODE }, \
+ { 0, 0 }, \
+ { "egp", CTLTYPE_NODE }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { "pup", CTLTYPE_NODE }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { "udp", CTLTYPE_NODE }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { 0, 0 }, \
+ { "idp", CTLTYPE_NODE }, \
+}
+
+/*
+ * Names for IP sysctl objects
+ */
+#define IPCTL_FORWARDING 1 /* act as router */
+#define IPCTL_SENDREDIRECTS 2 /* may send redirects when forwarding */
+#define IPCTL_DEFTTL 3 /* default TTL */
+#ifdef notyet
+#define IPCTL_DEFMTU 4 /* default MTU */
+#endif
+#define IPCTL_MAXID 5
+
+#define IPCTL_NAMES { \
+ { 0, 0 }, \
+ { "forwarding", CTLTYPE_INT }, \
+ { "redirect", CTLTYPE_INT }, \
+ { "ttl", CTLTYPE_INT }, \
+ { "mtu", CTLTYPE_INT }, \
+}
+
+
+#ifdef KERNEL
+int in_broadcast __P((struct in_addr, struct ifnet *));
+int in_canforward __P((struct in_addr));
+int in_cksum __P((struct mbuf *, int));
+int in_localaddr __P((struct in_addr));
+u_long in_netof __P((struct in_addr));
+void in_socktrim __P((struct sockaddr_in *));
+#endif
diff --git a/sys/netinet/in_cksum.c b/sys/netinet/in_cksum.c
new file mode 100644
index 000000000000..c19a92008360
--- /dev/null
+++ b/sys/netinet/in_cksum.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 1988, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers (Portable Version).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
+
+int
+in_cksum(m, len)
+ register struct mbuf *m;
+ register int len;
+{
+ register u_short *w;
+ register int sum = 0;
+ register int mlen = 0;
+ int byte_swapped = 0;
+
+ union {
+ char c[2];
+ u_short s;
+ } s_util;
+ union {
+ u_short s[2];
+ long l;
+ } l_util;
+
+ for (;m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_short *);
+ if (mlen == -1) {
+ /*
+ * The first byte of this mbuf is the continuation
+ * of a word spanning between this mbuf and the
+ * last mbuf.
+ *
+ * s_util.c[0] is already saved when scanning previous
+ * mbuf.
+ */
+ s_util.c[1] = *(char *)w;
+ sum += s_util.s;
+ w = (u_short *)((char *)w + 1);
+ mlen = m->m_len - 1;
+ len--;
+ } else
+ mlen = m->m_len;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+ /*
+ * Force to even boundary.
+ */
+ if ((1 & (int) w) && (mlen > 0)) {
+ REDUCE;
+ sum <<= 8;
+ s_util.c[0] = *(u_char *)w;
+ w = (u_short *)((char *)w + 1);
+ mlen--;
+ byte_swapped = 1;
+ }
+ /*
+ * Unroll the loop to make overhead from
+ * branches &c small.
+ */
+ while ((mlen -= 32) >= 0) {
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+ sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
+ sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
+ sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
+ w += 16;
+ }
+ mlen += 32;
+ while ((mlen -= 8) >= 0) {
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+ w += 4;
+ }
+ mlen += 8;
+ if (mlen == 0 && byte_swapped == 0)
+ continue;
+ REDUCE;
+ while ((mlen -= 2) >= 0) {
+ sum += *w++;
+ }
+ if (byte_swapped) {
+ REDUCE;
+ sum <<= 8;
+ byte_swapped = 0;
+ if (mlen == -1) {
+ s_util.c[1] = *(char *)w;
+ sum += s_util.s;
+ mlen = 0;
+ } else
+ mlen = -1;
+ } else if (mlen == -1)
+ s_util.c[0] = *(char *)w;
+ }
+ if (len)
+ printf("cksum: out of data\n");
+ if (mlen == -1) {
+ /* The last mbuf has odd # of bytes. Follow the
+ standard (the odd byte may be shifted left by 8 bits
+ or not as determined by endian-ness of the machine) */
+ s_util.c[1] = 0;
+ sum += s_util.s;
+ }
+ REDUCE;
+ return (~sum & 0xffff);
+}
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
new file mode 100644
index 000000000000..01b6b17961c3
--- /dev/null
+++ b/sys/netinet/in_pcb.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+struct in_addr zeroin_addr;
+
+int
+in_pcballoc(so, head)
+ struct socket *so;
+ struct inpcb *head;
+{
+ register struct inpcb *inp;
+
+ MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_WAITOK);
+ if (inp == NULL)
+ return (ENOBUFS);
+ bzero((caddr_t)inp, sizeof(*inp));
+ inp->inp_head = head;
+ inp->inp_socket = so;
+ insque(inp, head);
+ so->so_pcb = (caddr_t)inp;
+ return (0);
+}
+
+int
+in_pcbbind(inp, nam)
+ register struct inpcb *inp;
+ struct mbuf *nam;
+{
+ register struct socket *so = inp->inp_socket;
+ register struct inpcb *head = inp->inp_head;
+ register struct sockaddr_in *sin;
+ struct proc *p = curproc; /* XXX */
+ u_short lport = 0;
+ int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+ int error;
+
+ if (in_ifaddr == 0)
+ return (EADDRNOTAVAIL);
+ if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
+ return (EINVAL);
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
+ ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
+ (so->so_options & SO_ACCEPTCONN) == 0))
+ wild = INPLOOKUP_WILDCARD;
+ if (nam) {
+ sin = mtod(nam, struct sockaddr_in *);
+ if (nam->m_len != sizeof (*sin))
+ return (EINVAL);
+#ifdef notdef
+ /*
+ * We should check the family, but old programs
+ * incorrectly fail to initialize it.
+ */
+ if (sin->sin_family != AF_INET)
+ return (EAFNOSUPPORT);
+#endif
+ lport = sin->sin_port;
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+ /*
+ * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
+ * allow complete duplication of binding if
+ * SO_REUSEPORT is set, or if SO_REUSEADDR is set
+ * and a multicast address is bound on both
+ * new and duplicated sockets.
+ */
+ if (so->so_options & SO_REUSEADDR)
+ reuseport = SO_REUSEADDR|SO_REUSEPORT;
+ } else if (sin->sin_addr.s_addr != INADDR_ANY) {
+ sin->sin_port = 0; /* yech... */
+ if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
+ return (EADDRNOTAVAIL);
+ }
+ if (lport) {
+ struct inpcb *t;
+
+ /* GROSS */
+ if (ntohs(lport) < IPPORT_RESERVED &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ return (error);
+ t = in_pcblookup(head, zeroin_addr, 0,
+ sin->sin_addr, lport, wild);
+ if (t && (reuseport & t->inp_socket->so_options) == 0)
+ return (EADDRINUSE);
+ }
+ inp->inp_laddr = sin->sin_addr;
+ }
+ if (lport == 0)
+ do {
+ if (head->inp_lport++ < IPPORT_RESERVED ||
+ head->inp_lport > IPPORT_USERRESERVED)
+ head->inp_lport = IPPORT_RESERVED;
+ lport = htons(head->inp_lport);
+ } while (in_pcblookup(head,
+ zeroin_addr, 0, inp->inp_laddr, lport, wild));
+ inp->inp_lport = lport;
+ return (0);
+}
+
+/*
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sin.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+int
+in_pcbconnect(inp, nam)
+ register struct inpcb *inp;
+ struct mbuf *nam;
+{
+ struct in_ifaddr *ia;
+ struct sockaddr_in *ifaddr;
+ register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+
+ if (nam->m_len != sizeof (*sin))
+ return (EINVAL);
+ if (sin->sin_family != AF_INET)
+ return (EAFNOSUPPORT);
+ if (sin->sin_port == 0)
+ return (EADDRNOTAVAIL);
+ if (in_ifaddr) {
+ /*
+ * If the destination address is INADDR_ANY,
+ * use the primary local address.
+ * If the supplied address is INADDR_BROADCAST,
+ * and the primary interface supports broadcast,
+ * choose the broadcast address for that interface.
+ */
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+#define sintosa(sin) ((struct sockaddr *)(sin))
+#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ sin->sin_addr = IA_SIN(in_ifaddr)->sin_addr;
+ else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
+ (in_ifaddr->ia_ifp->if_flags & IFF_BROADCAST))
+ sin->sin_addr = satosin(&in_ifaddr->ia_broadaddr)->sin_addr;
+ }
+ if (inp->inp_laddr.s_addr == INADDR_ANY) {
+ register struct route *ro;
+
+ ia = (struct in_ifaddr *)0;
+ /*
+ * If route is known or can be allocated now,
+ * our src addr is taken from the i/f, else punt.
+ */
+ ro = &inp->inp_route;
+ if (ro->ro_rt &&
+ (satosin(&ro->ro_dst)->sin_addr.s_addr !=
+ sin->sin_addr.s_addr ||
+ inp->inp_socket->so_options & SO_DONTROUTE)) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ }
+ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+ (ro->ro_rt == (struct rtentry *)0 ||
+ ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+ /* No route yet, so try to acquire one */
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
+ ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+ sin->sin_addr;
+ rtalloc(ro);
+ }
+ /*
+ * If we found a route, use the address
+ * corresponding to the outgoing interface
+ * unless it is the loopback (in case a route
+ * to our address on another net goes to loopback).
+ */
+ if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
+ ia = ifatoia(ro->ro_rt->rt_ifa);
+ if (ia == 0) {
+ u_short fport = sin->sin_port;
+
+ sin->sin_port = 0;
+ ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
+ if (ia == 0)
+ ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
+ sin->sin_port = fport;
+ if (ia == 0)
+ ia = in_ifaddr;
+ if (ia == 0)
+ return (EADDRNOTAVAIL);
+ }
+ /*
+ * If the destination address is multicast and an outgoing
+ * interface has been set as a multicast option, use the
+ * address of that interface as our source address.
+ */
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
+ inp->inp_moptions != NULL) {
+ struct ip_moptions *imo;
+ struct ifnet *ifp;
+
+ imo = inp->inp_moptions;
+ if (imo->imo_multicast_ifp != NULL) {
+ ifp = imo->imo_multicast_ifp;
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp)
+ break;
+ if (ia == 0)
+ return (EADDRNOTAVAIL);
+ }
+ }
+ ifaddr = (struct sockaddr_in *)&ia->ia_addr;
+ }
+ if (in_pcblookup(inp->inp_head,
+ sin->sin_addr,
+ sin->sin_port,
+ inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
+ inp->inp_lport,
+ 0))
+ return (EADDRINUSE);
+ if (inp->inp_laddr.s_addr == INADDR_ANY) {
+ if (inp->inp_lport == 0)
+ (void)in_pcbbind(inp, (struct mbuf *)0);
+ inp->inp_laddr = ifaddr->sin_addr;
+ }
+ inp->inp_faddr = sin->sin_addr;
+ inp->inp_fport = sin->sin_port;
+ return (0);
+}
+
+int
+in_pcbdisconnect(inp)
+ struct inpcb *inp;
+{
+
+ inp->inp_faddr.s_addr = INADDR_ANY;
+ inp->inp_fport = 0;
+ if (inp->inp_socket->so_state & SS_NOFDREF)
+ in_pcbdetach(inp);
+}
+
+int
+in_pcbdetach(inp)
+ struct inpcb *inp;
+{
+ struct socket *so = inp->inp_socket;
+
+ so->so_pcb = 0;
+ sofree(so);
+ if (inp->inp_options)
+ (void)m_free(inp->inp_options);
+ if (inp->inp_route.ro_rt)
+ rtfree(inp->inp_route.ro_rt);
+ ip_freemoptions(inp->inp_moptions);
+ remque(inp);
+ FREE(inp, M_PCB);
+}
+
+int
+in_setsockaddr(inp, nam)
+ register struct inpcb *inp;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *sin;
+
+ nam->m_len = sizeof (*sin);
+ sin = mtod(nam, struct sockaddr_in *);
+ bzero((caddr_t)sin, sizeof (*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_port = inp->inp_lport;
+ sin->sin_addr = inp->inp_laddr;
+}
+
+int
+in_setpeeraddr(inp, nam)
+ struct inpcb *inp;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *sin;
+
+ nam->m_len = sizeof (*sin);
+ sin = mtod(nam, struct sockaddr_in *);
+ bzero((caddr_t)sin, sizeof (*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_port = inp->inp_fport;
+ sin->sin_addr = inp->inp_faddr;
+}
+
+/*
+ * Pass some notification to all connections of a protocol
+ * associated with address dst. The local address and/or port numbers
+ * may be specified to limit the search. The "usual action" will be
+ * taken, depending on the ctlinput cmd. The caller must filter any
+ * cmds that are uninteresting (e.g., no error in the map).
+ * Call the protocol specific routine (if any) to report
+ * any errors for each matching socket.
+ *
+ * Must be called at splnet.
+ */
+int
+in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify)
+ struct inpcb *head;
+ struct sockaddr *dst;
+ u_int fport_arg, lport_arg;
+ struct in_addr laddr;
+ int cmd;
+ void (*notify) __P((struct inpcb *, int));
+{
+ extern u_char inetctlerrmap[];
+ register struct inpcb *inp, *oinp;
+ struct in_addr faddr;
+ u_short fport = fport_arg, lport = lport_arg;
+ int errno;
+
+ if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
+ return;
+ faddr = ((struct sockaddr_in *)dst)->sin_addr;
+ if (faddr.s_addr == INADDR_ANY)
+ return;
+
+ /*
+ * Redirects go to all references to the destination,
+ * and use in_rtchange to invalidate the route cache.
+ * Dead host indications: notify all references to the destination.
+ * Otherwise, if we have knowledge of the local port and address,
+ * deliver only to that socket.
+ */
+ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
+ fport = 0;
+ lport = 0;
+ laddr.s_addr = 0;
+ if (cmd != PRC_HOSTDEAD)
+ notify = in_rtchange;
+ }
+ errno = inetctlerrmap[cmd];
+ for (inp = head->inp_next; inp != head;) {
+ if (inp->inp_faddr.s_addr != faddr.s_addr ||
+ inp->inp_socket == 0 ||
+ (lport && inp->inp_lport != lport) ||
+ (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
+ (fport && inp->inp_fport != fport)) {
+ inp = inp->inp_next;
+ continue;
+ }
+ oinp = inp;
+ inp = inp->inp_next;
+ if (notify)
+ (*notify)(oinp, errno);
+ }
+}
+
+/*
+ * Check for alternatives when higher level complains
+ * about service problems. For now, invalidate cached
+ * routing information. If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+int
+in_losing(inp)
+ struct inpcb *inp;
+{
+ register struct rtentry *rt;
+ struct rt_addrinfo info;
+
+ if ((rt = inp->inp_route.ro_rt)) {
+ inp->inp_route.ro_rt = 0;
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] =
+ (struct sockaddr *)&inp->inp_route.ro_dst;
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
+ if (rt->rt_flags & RTF_DYNAMIC)
+ (void) rtrequest(RTM_DELETE, rt_key(rt),
+ rt->rt_gateway, rt_mask(rt), rt->rt_flags,
+ (struct rtentry **)0);
+ else
+ /*
+ * A new route can be allocated
+ * the next time output is attempted.
+ */
+ rtfree(rt);
+ }
+}
+
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+void
+in_rtchange(inp, errno)
+ register struct inpcb *inp;
+ int errno;
+{
+ if (inp->inp_route.ro_rt) {
+ rtfree(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = 0;
+ /*
+ * A new route can be allocated the next time
+ * output is attempted.
+ */
+ }
+}
+
+struct inpcb *
+in_pcblookup(head, faddr, fport_arg, laddr, lport_arg, flags)
+ struct inpcb *head;
+ struct in_addr faddr, laddr;
+ u_int fport_arg, lport_arg;
+ int flags;
+{
+ register struct inpcb *inp, *match = 0;
+ int matchwild = 3, wildcard;
+ u_short fport = fport_arg, lport = lport_arg;
+
+ for (inp = head->inp_next; inp != head; inp = inp->inp_next) {
+ if (inp->inp_lport != lport)
+ continue;
+ wildcard = 0;
+ if (inp->inp_laddr.s_addr != INADDR_ANY) {
+ if (laddr.s_addr == INADDR_ANY)
+ wildcard++;
+ else if (inp->inp_laddr.s_addr != laddr.s_addr)
+ continue;
+ } else {
+ if (laddr.s_addr != INADDR_ANY)
+ wildcard++;
+ }
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ if (faddr.s_addr == INADDR_ANY)
+ wildcard++;
+ else if (inp->inp_faddr.s_addr != faddr.s_addr ||
+ inp->inp_fport != fport)
+ continue;
+ } else {
+ if (faddr.s_addr != INADDR_ANY)
+ wildcard++;
+ }
+ if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)
+ continue;
+ if (wildcard < matchwild) {
+ match = inp;
+ matchwild = wildcard;
+ if (matchwild == 0)
+ break;
+ }
+ }
+ return (match);
+}
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
new file mode 100644
index 000000000000..c85324702a7f
--- /dev/null
+++ b/sys/netinet/in_pcb.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Common structure pcb for internet protocol implementation.
+ * Here are stored pointers to local and foreign host table
+ * entries, local and foreign socket numbers, and pointers
+ * up (to a socket structure) and down (to a protocol-specific)
+ * control block.
+ */
+struct inpcb {
+ struct inpcb *inp_next,*inp_prev;
+ /* pointers to other pcb's */
+ struct inpcb *inp_head; /* pointer back to chain of inpcb's
+ for this protocol */
+ struct in_addr inp_faddr; /* foreign host table entry */
+ u_short inp_fport; /* foreign port */
+ struct in_addr inp_laddr; /* local host table entry */
+ u_short inp_lport; /* local port */
+ struct socket *inp_socket; /* back pointer to socket */
+ caddr_t inp_ppcb; /* pointer to per-protocol pcb */
+ struct route inp_route; /* placeholder for routing entry */
+ int inp_flags; /* generic IP/datagram flags */
+ struct ip inp_ip; /* header prototype; should have more */
+ struct mbuf *inp_options; /* IP options */
+ struct ip_moptions *inp_moptions; /* IP multicast options */
+};
+
+/* flags in inp_flags: */
+#define INP_RECVOPTS 0x01 /* receive incoming IP options */
+#define INP_RECVRETOPTS 0x02 /* receive IP options for reply */
+#define INP_RECVDSTADDR 0x04 /* receive IP dst address */
+#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR)
+#define INP_HDRINCL 0x08 /* user supplies entire IP header */
+
+#define INPLOOKUP_WILDCARD 1
+#define INPLOOKUP_SETLOCAL 2
+
+#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
+
+#ifdef KERNEL
+int in_losing __P((struct inpcb *));
+int in_pcballoc __P((struct socket *, struct inpcb *));
+int in_pcbbind __P((struct inpcb *, struct mbuf *));
+int in_pcbconnect __P((struct inpcb *, struct mbuf *));
+int in_pcbdetach __P((struct inpcb *));
+int in_pcbdisconnect __P((struct inpcb *));
+struct inpcb *
+ in_pcblookup __P((struct inpcb *,
+ struct in_addr, u_int, struct in_addr, u_int, int));
+int in_pcbnotify __P((struct inpcb *, struct sockaddr *,
+ u_int, struct in_addr, u_int, int, void (*)(struct inpcb *, int)));
+void in_rtchange __P((struct inpcb *, int));
+int in_setpeeraddr __P((struct inpcb *, struct mbuf *));
+int in_setsockaddr __P((struct inpcb *, struct mbuf *));
+#endif
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
new file mode 100644
index 000000000000..00916b4ce1a1
--- /dev/null
+++ b/sys/netinet/in_proto.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_proto.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <net/if.h>
+#include <net/radix.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/igmp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+/*
+ * TCP/IP protocol family: IP, ICMP, UDP, TCP.
+ */
+
+#ifdef NSIP
+void idpip_input(), nsip_ctlinput();
+#endif
+
+#ifdef TPIP
+void tpip_input(), tpip_ctlinput(), tp_ctloutput();
+int tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq();
+#endif
+
+#ifdef EON
+void eoninput(), eonctlinput(), eonprotoinit();
+#endif /* EON */
+
+extern struct domain inetdomain;
+
+struct protosw inetsw[] = {
+{ 0, &inetdomain, 0, 0,
+ 0, ip_output, 0, 0,
+ 0,
+ ip_init, 0, ip_slowtimo, ip_drain, ip_sysctl
+},
+{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR,
+ udp_input, 0, udp_ctlinput, ip_ctloutput,
+ udp_usrreq,
+ udp_init, 0, 0, 0, udp_sysctl
+},
+{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD,
+ tcp_input, 0, tcp_ctlinput, tcp_ctloutput,
+ tcp_usrreq,
+ tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain,
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR,
+ rip_input, rip_output, 0, rip_ctloutput,
+ rip_usrreq,
+ 0, 0, 0, 0,
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR,
+ icmp_input, rip_output, 0, rip_ctloutput,
+ rip_usrreq,
+ 0, 0, 0, 0, icmp_sysctl
+},
+{ SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR,
+ igmp_input, rip_output, 0, rip_ctloutput,
+ rip_usrreq,
+ igmp_init, igmp_fasttimo, 0, 0,
+},
+#ifdef TPIP
+{ SOCK_SEQPACKET,&inetdomain, IPPROTO_TP, PR_CONNREQUIRED|PR_WANTRCVD,
+ tpip_input, 0, tpip_ctlinput, tp_ctloutput,
+ tp_usrreq,
+ tp_init, 0, tp_slowtimo, tp_drain,
+},
+#endif
+/* EON (ISO CLNL over IP) */
+#ifdef EON
+{ SOCK_RAW, &inetdomain, IPPROTO_EON, 0,
+ eoninput, 0, eonctlinput, 0,
+ 0,
+ eonprotoinit, 0, 0, 0,
+},
+#endif
+#ifdef NSIP
+{ SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR,
+ idpip_input, rip_output, nsip_ctlinput, 0,
+ rip_usrreq,
+ 0, 0, 0, 0,
+},
+#endif
+ /* raw wildcard */
+{ SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR,
+ rip_input, rip_output, 0, rip_ctloutput,
+ rip_usrreq,
+ rip_init, 0, 0, 0,
+},
+};
+
+struct domain inetdomain =
+ { AF_INET, "internet", 0, 0, 0,
+ inetsw, &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], 0,
+ rn_inithead, 32, sizeof(struct sockaddr_in) };
+
+#include "imp.h"
+#if NIMP > 0
+extern struct domain impdomain;
+int rimp_output(), hostslowtimo();
+
+struct protosw impsw[] = {
+{ SOCK_RAW, &impdomain, 0, PR_ATOMIC|PR_ADDR,
+ 0, rimp_output, 0, 0,
+ rip_usrreq,
+ 0, 0, hostslowtimo, 0,
+},
+};
+
+struct domain impdomain =
+ { AF_IMPLINK, "imp", 0, 0, 0,
+ impsw, &impsw[sizeof (impsw)/sizeof(impsw[0])] };
+#endif
+
+#include "hy.h"
+#if NHY > 0
+/*
+ * HYPERchannel protocol family: raw interface.
+ */
+int rhy_output();
+extern struct domain hydomain;
+
+struct protosw hysw[] = {
+{ SOCK_RAW, &hydomain, 0, PR_ATOMIC|PR_ADDR,
+ 0, rhy_output, 0, 0,
+ rip_usrreq,
+ 0, 0, 0, 0,
+},
+};
+
+struct domain hydomain =
+ { AF_HYLINK, "hy", 0, 0, 0, hysw, &hysw[sizeof (hysw)/sizeof(hysw[0])] };
+#endif
diff --git a/sys/netinet/in_systm.h b/sys/netinet/in_systm.h
new file mode 100644
index 000000000000..cbd8e539a1ef
--- /dev/null
+++ b/sys/netinet/in_systm.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_systm.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Miscellaneous internetwork
+ * definitions for kernel.
+ */
+
+/*
+ * Network types.
+ *
+ * Internally the system keeps counters in the headers with the bytes
+ * swapped so that VAX instructions will work on them. It reverses
+ * the bytes before transmission at each protocol level. The n_ types
+ * represent the types with the bytes in ``high-ender'' order.
+ */
+typedef u_short n_short; /* short as received from the net */
+typedef u_long n_long; /* long as received from the net */
+
+typedef u_long n_time; /* ms since 00:00 GMT, byte rev */
+
+#ifdef KERNEL
+n_time iptime __P((void));
+#endif
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
new file mode 100644
index 000000000000..8218f0b74a38
--- /dev/null
+++ b/sys/netinet/in_var.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 1985, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Interface address, Internet version. One of these structures
+ * is allocated for each interface with an Internet address.
+ * The ifaddr structure contains the protocol-independent part
+ * of the structure and is assumed to be first.
+ */
+struct in_ifaddr {
+ struct ifaddr ia_ifa; /* protocol-independent info */
+#define ia_ifp ia_ifa.ifa_ifp
+#define ia_flags ia_ifa.ifa_flags
+ /* ia_{,sub}net{,mask} in host order */
+ u_long ia_net; /* network number of interface */
+ u_long ia_netmask; /* mask of net part */
+ u_long ia_subnet; /* subnet number, including net */
+ u_long ia_subnetmask; /* mask of subnet part */
+ struct in_addr ia_netbroadcast; /* to recognize net broadcasts */
+ struct in_ifaddr *ia_next; /* next in list of internet addresses */
+ struct sockaddr_in ia_addr; /* reserve space for interface name */
+ struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
+#define ia_broadaddr ia_dstaddr
+ struct sockaddr_in ia_sockmask; /* reserve space for general netmask */
+ struct in_multi *ia_multiaddrs; /* list of multicast addresses */
+};
+
+struct in_aliasreq {
+ char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct sockaddr_in ifra_addr;
+ struct sockaddr_in ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+ struct sockaddr_in ifra_mask;
+};
+/*
+ * Given a pointer to an in_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockaddr_in.
+ */
+#define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))
+
+#define IN_LNAOF(in, ifa) \
+ ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
+
+
+#ifdef KERNEL
+extern struct in_ifaddr *in_ifaddr;
+extern struct ifqueue ipintrq; /* ip packet input queue */
+void in_socktrim __P((struct sockaddr_in *));
+
+
+/*
+ * Macro for finding the interface (ifnet structure) corresponding to one
+ * of our IP addresses.
+ */
+#define INADDR_TO_IFP(addr, ifp) \
+ /* struct in_addr addr; */ \
+ /* struct ifnet *ifp; */ \
+{ \
+ register struct in_ifaddr *ia; \
+\
+ for (ia = in_ifaddr; \
+ ia != NULL && IA_SIN(ia)->sin_addr.s_addr != (addr).s_addr; \
+ ia = ia->ia_next) \
+ continue; \
+ (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
+}
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr) corresponding
+ * to a given interface (ifnet structure).
+ */
+#define IFP_TO_IA(ifp, ia) \
+ /* struct ifnet *ifp; */ \
+ /* struct in_ifaddr *ia; */ \
+{ \
+ for ((ia) = in_ifaddr; \
+ (ia) != NULL && (ia)->ia_ifp != (ifp); \
+ (ia) = (ia)->ia_next) \
+ continue; \
+}
+#endif
+
+/*
+ * Internet multicast address structure. There is one of these for each IP
+ * multicast group to which this host belongs on a given network interface.
+ * They are kept in a linked list, rooted in the interface's in_ifaddr
+ * structure.
+ */
+struct in_multi {
+ struct in_addr inm_addr; /* IP multicast address */
+ struct ifnet *inm_ifp; /* back pointer to ifnet */
+ struct in_ifaddr *inm_ia; /* back pointer to in_ifaddr */
+ u_int inm_refcount; /* no. membership claims by sockets */
+ u_int inm_timer; /* IGMP membership report timer */
+ struct in_multi *inm_next; /* ptr to next multicast address */
+};
+
+#ifdef KERNEL
+/*
+ * Structure used by macros below to remember position when stepping through
+ * all of the in_multi records.
+ */
+struct in_multistep {
+ struct in_ifaddr *i_ia;
+ struct in_multi *i_inm;
+};
+
+/*
+ * Macro for looking up the in_multi record for a given IP multicast address
+ * on a given interface. If no matching record is found, "inm" returns NULL.
+ */
+#define IN_LOOKUP_MULTI(addr, ifp, inm) \
+ /* struct in_addr addr; */ \
+ /* struct ifnet *ifp; */ \
+ /* struct in_multi *inm; */ \
+{ \
+ register struct in_ifaddr *ia; \
+\
+ IFP_TO_IA((ifp), ia); \
+ if (ia == NULL) \
+ (inm) = NULL; \
+ else \
+ for ((inm) = ia->ia_multiaddrs; \
+ (inm) != NULL && (inm)->inm_addr.s_addr != (addr).s_addr; \
+ (inm) = inm->inm_next) \
+ continue; \
+}
+
+/*
+ * Macro to step through all of the in_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide. IN_FIRST_MULTI(), below, must be called to initialize "step"
+ * and get the first record. Both macros return a NULL "inm" when there
+ * are no remaining records.
+ */
+#define IN_NEXT_MULTI(step, inm) \
+ /* struct in_multistep step; */ \
+ /* struct in_multi *inm; */ \
+{ \
+ if (((inm) = (step).i_inm) != NULL) \
+ (step).i_inm = (inm)->inm_next; \
+ else \
+ while ((step).i_ia != NULL) { \
+ (inm) = (step).i_ia->ia_multiaddrs; \
+ (step).i_ia = (step).i_ia->ia_next; \
+ if ((inm) != NULL) { \
+ (step).i_inm = (inm)->inm_next; \
+ break; \
+ } \
+ } \
+}
+
+#define IN_FIRST_MULTI(step, inm) \
+ /* struct in_multistep step; */ \
+ /* struct in_multi *inm; */ \
+{ \
+ (step).i_ia = in_ifaddr; \
+ (step).i_inm = NULL; \
+ IN_NEXT_MULTI((step), (inm)); \
+}
+
+int in_ifinit __P((struct ifnet *,
+ struct in_ifaddr *, struct sockaddr_in *, int));
+struct in_multi *in_addmulti __P((struct in_addr *, struct ifnet *));
+int in_delmulti __P((struct in_multi *));
+void in_ifscrub __P((struct ifnet *, struct in_ifaddr *));
+int in_control __P((struct socket *, int, caddr_t, struct ifnet *));
+#endif
diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h
new file mode 100644
index 000000000000..8a31dfaf13dc
--- /dev/null
+++ b/sys/netinet/ip.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for internet protocol version 4.
+ * Per RFC 791, September 1981.
+ */
+#define IPVERSION 4
+
+/*
+ * Structure of an internet header, naked of options.
+ *
+ * We declare ip_len and ip_off to be short, rather than u_short
+ * pragmatically since otherwise unsigned comparisons can result
+ * against negative integers quite easily, and fail in subtle ways.
+ */
+struct ip {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_char ip_hl:4, /* header length */
+ ip_v:4; /* version */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_char ip_v:4, /* version */
+ ip_hl:4; /* header length */
+#endif
+ u_char ip_tos; /* type of service */
+ short ip_len; /* total length */
+ u_short ip_id; /* identification */
+ short ip_off; /* fragment offset field */
+#define IP_DF 0x4000 /* dont fragment flag */
+#define IP_MF 0x2000 /* more fragments flag */
+#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
+ u_char ip_ttl; /* time to live */
+ u_char ip_p; /* protocol */
+ u_short ip_sum; /* checksum */
+ struct in_addr ip_src,ip_dst; /* source and dest address */
+};
+
+#define IP_MAXPACKET 65535 /* maximum packet size */
+
+/*
+ * Definitions for IP type of service (ip_tos)
+ */
+#define IPTOS_LOWDELAY 0x10
+#define IPTOS_THROUGHPUT 0x08
+#define IPTOS_RELIABILITY 0x04
+
+/*
+ * Definitions for IP precedence (also in ip_tos) (hopefully unused)
+ */
+#define IPTOS_PREC_NETCONTROL 0xe0
+#define IPTOS_PREC_INTERNETCONTROL 0xc0
+#define IPTOS_PREC_CRITIC_ECP 0xa0
+#define IPTOS_PREC_FLASHOVERRIDE 0x80
+#define IPTOS_PREC_FLASH 0x60
+#define IPTOS_PREC_IMMEDIATE 0x40
+#define IPTOS_PREC_PRIORITY 0x20
+#define IPTOS_PREC_ROUTINE 0x10
+
+/*
+ * Definitions for options.
+ */
+#define IPOPT_COPIED(o) ((o)&0x80)
+#define IPOPT_CLASS(o) ((o)&0x60)
+#define IPOPT_NUMBER(o) ((o)&0x1f)
+
+#define IPOPT_CONTROL 0x00
+#define IPOPT_RESERVED1 0x20
+#define IPOPT_DEBMEAS 0x40
+#define IPOPT_RESERVED2 0x60
+
+#define IPOPT_EOL 0 /* end of option list */
+#define IPOPT_NOP 1 /* no operation */
+
+#define IPOPT_RR 7 /* record packet route */
+#define IPOPT_TS 68 /* timestamp */
+#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */
+#define IPOPT_LSRR 131 /* loose source route */
+#define IPOPT_SATID 136 /* satnet id */
+#define IPOPT_SSRR 137 /* strict source route */
+
+/*
+ * Offsets to fields in options other than EOL and NOP.
+ */
+#define IPOPT_OPTVAL 0 /* option ID */
+#define IPOPT_OLEN 1 /* option length */
+#define IPOPT_OFFSET 2 /* offset within option */
+#define IPOPT_MINOFF 4 /* min value of above */
+
+/*
+ * Time stamp option structure.
+ */
+struct ip_timestamp {
+ u_char ipt_code; /* IPOPT_TS */
+ u_char ipt_len; /* size of structure (variable) */
+ u_char ipt_ptr; /* index of current entry */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_char ipt_flg:4, /* flags, see below */
+ ipt_oflw:4; /* overflow counter */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_char ipt_oflw:4, /* overflow counter */
+ ipt_flg:4; /* flags, see below */
+#endif
+ union ipt_timestamp {
+ n_long ipt_time[1];
+ struct ipt_ta {
+ struct in_addr ipt_addr;
+ n_long ipt_time;
+ } ipt_ta[1];
+ } ipt_timestamp;
+};
+
+/* flag bits for ipt_flg */
+#define IPOPT_TS_TSONLY 0 /* timestamps only */
+#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
+#define IPOPT_TS_PRESPEC 3 /* specified modules only */
+
+/* bits for security (not byte swapped) */
+#define IPOPT_SECUR_UNCLASS 0x0000
+#define IPOPT_SECUR_CONFID 0xf135
+#define IPOPT_SECUR_EFTO 0x789a
+#define IPOPT_SECUR_MMMM 0xbc4d
+#define IPOPT_SECUR_RESTR 0xaf13
+#define IPOPT_SECUR_SECRET 0xd788
+#define IPOPT_SECUR_TOPSECRET 0x6bc5
+
+/*
+ * Internet implementation parameters.
+ */
+#define MAXTTL 255 /* maximum time to live (seconds) */
+#define IPDEFTTL 64 /* default ttl, from RFC 1340 */
+#define IPFRAGTTL 60 /* time to live for frags, slowhz */
+#define IPTTLDEC 1 /* subtracted when forwarding */
+
+#define IP_MSS 576 /* default maximum segment size */
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
new file mode 100644
index 000000000000..c9b82bca9085
--- /dev/null
+++ b/sys/netinet/ip_icmp.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+
+/*
+ * ICMP routines: error generation, receive packet processing, and
+ * routines to turnaround packets back to the originator, and
+ * host table maintenance routines.
+ */
+
+int icmpmaskrepl = 0;
+#ifdef ICMPPRINTFS
+int icmpprintfs = 0;
+#endif
+
+extern struct protosw inetsw[];
+
+/*
+ * Generate an error packet of type error
+ * in response to bad packet ip.
+ */
+void
+icmp_error(n, type, code, dest, destifp)
+ struct mbuf *n;
+ int type, code;
+ n_long dest;
+ struct ifnet *destifp;
+{
+ register struct ip *oip = mtod(n, struct ip *), *nip;
+ register unsigned oiplen = oip->ip_hl << 2;
+ register struct icmp *icp;
+ register struct mbuf *m;
+ unsigned icmplen;
+
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("icmp_error(%x, %d, %d)\n", oip, type, code);
+#endif
+ if (type != ICMP_REDIRECT)
+ icmpstat.icps_error++;
+ /*
+ * Don't send error if not the first fragment of message.
+ * Don't error if the old packet protocol was ICMP
+ * error message, only known informational types.
+ */
+ if (oip->ip_off &~ (IP_MF|IP_DF))
+ goto freeit;
+ if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
+ n->m_len >= oiplen + ICMP_MINLEN &&
+ !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
+ icmpstat.icps_oldicmp++;
+ goto freeit;
+ }
+ /* Don't send error in response to a multicast or broadcast packet */
+ if (n->m_flags & (M_BCAST|M_MCAST))
+ goto freeit;
+ /*
+ * First, formulate icmp message
+ */
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ goto freeit;
+ icmplen = oiplen + min(8, oip->ip_len);
+ m->m_len = icmplen + ICMP_MINLEN;
+ MH_ALIGN(m, m->m_len);
+ icp = mtod(m, struct icmp *);
+ if ((u_int)type > ICMP_MAXTYPE)
+ panic("icmp_error");
+ icmpstat.icps_outhist[type]++;
+ icp->icmp_type = type;
+ if (type == ICMP_REDIRECT)
+ icp->icmp_gwaddr.s_addr = dest;
+ else {
+ icp->icmp_void = 0;
+ /*
+ * The following assignments assume an overlay with the
+ * zeroed icmp_void field.
+ */
+ if (type == ICMP_PARAMPROB) {
+ icp->icmp_pptr = code;
+ code = 0;
+ } else if (type == ICMP_UNREACH &&
+ code == ICMP_UNREACH_NEEDFRAG && destifp) {
+ icp->icmp_nextmtu = htons(destifp->if_mtu);
+ }
+ }
+
+ icp->icmp_code = code;
+ bcopy((caddr_t)oip, (caddr_t)&icp->icmp_ip, icmplen);
+ nip = &icp->icmp_ip;
+ nip->ip_len = htons((u_short)(nip->ip_len + oiplen));
+
+ /*
+ * Now, copy old ip header (without options)
+ * in front of icmp message.
+ */
+ if (m->m_data - sizeof(struct ip) < m->m_pktdat)
+ panic("icmp len");
+ m->m_data -= sizeof(struct ip);
+ m->m_len += sizeof(struct ip);
+ m->m_pkthdr.len = m->m_len;
+ m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
+ nip = mtod(m, struct ip *);
+ bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
+ nip->ip_len = m->m_len;
+ nip->ip_hl = sizeof(struct ip) >> 2;
+ nip->ip_p = IPPROTO_ICMP;
+ nip->ip_tos = 0;
+ icmp_reflect(m);
+
+freeit:
+ m_freem(n);
+}
+
+static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
+struct sockaddr_in icmpmask = { 8, 0 };
+
+/*
+ * Process a received ICMP message.
+ */
+void
+icmp_input(m, hlen)
+ register struct mbuf *m;
+ int hlen;
+{
+ register struct icmp *icp;
+ register struct ip *ip = mtod(m, struct ip *);
+ int icmplen = ip->ip_len;
+ register int i;
+ struct in_ifaddr *ia;
+ void (*ctlfunc) __P((int, struct sockaddr *, struct ip *));
+ int code;
+ extern u_char ip_protox[];
+
+ /*
+ * Locate icmp structure in mbuf, and check
+ * that not corrupted and of at least minimum length.
+ */
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("icmp_input from %x to %x, len %d\n",
+ ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
+ icmplen);
+#endif
+ if (icmplen < ICMP_MINLEN) {
+ icmpstat.icps_tooshort++;
+ goto freeit;
+ }
+ i = hlen + min(icmplen, ICMP_ADVLENMIN);
+ if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
+ icmpstat.icps_tooshort++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ m->m_len -= hlen;
+ m->m_data += hlen;
+ icp = mtod(m, struct icmp *);
+ if (in_cksum(m, icmplen)) {
+ icmpstat.icps_checksum++;
+ goto freeit;
+ }
+ m->m_len += hlen;
+ m->m_data -= hlen;
+
+#ifdef ICMPPRINTFS
+ /*
+ * Message type specific processing.
+ */
+ if (icmpprintfs)
+ printf("icmp_input, type %d code %d\n", icp->icmp_type,
+ icp->icmp_code);
+#endif
+ if (icp->icmp_type > ICMP_MAXTYPE)
+ goto raw;
+ icmpstat.icps_inhist[icp->icmp_type]++;
+ code = icp->icmp_code;
+ switch (icp->icmp_type) {
+
+ case ICMP_UNREACH:
+ switch (code) {
+ case ICMP_UNREACH_NET:
+ case ICMP_UNREACH_HOST:
+ case ICMP_UNREACH_PROTOCOL:
+ case ICMP_UNREACH_PORT:
+ case ICMP_UNREACH_SRCFAIL:
+ code += PRC_UNREACH_NET;
+ break;
+
+ case ICMP_UNREACH_NEEDFRAG:
+ code = PRC_MSGSIZE;
+ break;
+
+ case ICMP_UNREACH_NET_UNKNOWN:
+ case ICMP_UNREACH_NET_PROHIB:
+ case ICMP_UNREACH_TOSNET:
+ code = PRC_UNREACH_NET;
+ break;
+
+ case ICMP_UNREACH_HOST_UNKNOWN:
+ case ICMP_UNREACH_ISOLATED:
+ case ICMP_UNREACH_HOST_PROHIB:
+ case ICMP_UNREACH_TOSHOST:
+ code = PRC_UNREACH_HOST;
+ break;
+
+ default:
+ goto badcode;
+ }
+ goto deliver;
+
+ case ICMP_TIMXCEED:
+ if (code > 1)
+ goto badcode;
+ code += PRC_TIMXCEED_INTRANS;
+ goto deliver;
+
+ case ICMP_PARAMPROB:
+ if (code > 1)
+ goto badcode;
+ code = PRC_PARAMPROB;
+ goto deliver;
+
+ case ICMP_SOURCEQUENCH:
+ if (code)
+ goto badcode;
+ code = PRC_QUENCH;
+ deliver:
+ /*
+ * Problem with datagram; advise higher level routines.
+ */
+ if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+ icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+ icmpstat.icps_badlen++;
+ goto freeit;
+ }
+ NTOHS(icp->icmp_ip.ip_len);
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
+#endif
+ icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+ if (ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput)
+ (*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
+ &icp->icmp_ip);
+ break;
+
+ badcode:
+ icmpstat.icps_badcode++;
+ break;
+
+ case ICMP_ECHO:
+ icp->icmp_type = ICMP_ECHOREPLY;
+ goto reflect;
+
+ case ICMP_TSTAMP:
+ if (icmplen < ICMP_TSLEN) {
+ icmpstat.icps_badlen++;
+ break;
+ }
+ icp->icmp_type = ICMP_TSTAMPREPLY;
+ icp->icmp_rtime = iptime();
+ icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
+ goto reflect;
+
+ case ICMP_MASKREQ:
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+ if (icmpmaskrepl == 0)
+ break;
+ /*
+ * We are not able to respond with all ones broadcast
+ * unless we receive it over a point-to-point interface.
+ */
+ if (icmplen < ICMP_MASKLEN)
+ break;
+ switch (ip->ip_dst.s_addr) {
+
+ case INADDR_BROADCAST:
+ case INADDR_ANY:
+ icmpdst.sin_addr = ip->ip_src;
+ break;
+
+ default:
+ icmpdst.sin_addr = ip->ip_dst;
+ }
+ ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+ (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+ if (ia == 0)
+ break;
+ icp->icmp_type = ICMP_MASKREPLY;
+ icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
+ if (ip->ip_src.s_addr == 0) {
+ if (ia->ia_ifp->if_flags & IFF_BROADCAST)
+ ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
+ else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
+ ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
+ }
+reflect:
+ ip->ip_len += hlen; /* since ip_input deducts this */
+ icmpstat.icps_reflect++;
+ icmpstat.icps_outhist[icp->icmp_type]++;
+ icmp_reflect(m);
+ return;
+
+ case ICMP_REDIRECT:
+ if (code > 3)
+ goto badcode;
+ if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+ icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+ icmpstat.icps_badlen++;
+ break;
+ }
+ /*
+ * Short circuit routing redirects to force
+ * immediate change in the kernel's routing
+ * tables. The message is also handed to anyone
+ * listening on a raw socket (e.g. the routing
+ * daemon for use in updating its tables).
+ */
+ icmpgw.sin_addr = ip->ip_src;
+ icmpdst.sin_addr = icp->icmp_gwaddr;
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
+ icp->icmp_gwaddr);
+#endif
+ icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+ rtredirect((struct sockaddr *)&icmpsrc,
+ (struct sockaddr *)&icmpdst,
+ (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
+ (struct sockaddr *)&icmpgw, (struct rtentry **)0);
+ pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
+ break;
+
+ /*
+ * No kernel processing for the following;
+ * just fall through to send to raw listener.
+ */
+ case ICMP_ECHOREPLY:
+ case ICMP_ROUTERADVERT:
+ case ICMP_ROUTERSOLICIT:
+ case ICMP_TSTAMPREPLY:
+ case ICMP_IREQREPLY:
+ case ICMP_MASKREPLY:
+ default:
+ break;
+ }
+
+raw:
+ rip_input(m);
+ return;
+
+freeit:
+ m_freem(m);
+}
+
+/*
+ * Reflect the ip packet back to the source
+ */
+void
+icmp_reflect(m)
+ struct mbuf *m;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register struct in_ifaddr *ia;
+ struct in_addr t;
+ struct mbuf *opts = 0, *ip_srcroute();
+ int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
+
+ if (!in_canforward(ip->ip_src) &&
+ ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
+ (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
+ m_freem(m); /* Bad return address */
+ goto done; /* Ip_output() will check for broadcast */
+ }
+ t = ip->ip_dst;
+ ip->ip_dst = ip->ip_src;
+ /*
+ * If the incoming packet was addressed directly to us,
+ * use dst as the src for the reply. Otherwise (broadcast
+ * or anonymous), use the address which corresponds
+ * to the incoming interface.
+ */
+ for (ia = in_ifaddr; ia; ia = ia->ia_next) {
+ if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+ break;
+ if ((ia->ia_ifp->if_flags & IFF_BROADCAST) &&
+ t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
+ break;
+ }
+ icmpdst.sin_addr = t;
+ if (ia == (struct in_ifaddr *)0)
+ ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+ (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+ /*
+ * The following happens if the packet was not addressed to us,
+ * and was received on an interface with no IP address.
+ */
+ if (ia == (struct in_ifaddr *)0)
+ ia = in_ifaddr;
+ t = IA_SIN(ia)->sin_addr;
+ ip->ip_src = t;
+ ip->ip_ttl = MAXTTL;
+
+ if (optlen > 0) {
+ register u_char *cp;
+ int opt, cnt;
+ u_int len;
+
+ /*
+ * Retrieve any source routing from the incoming packet;
+ * add on any record-route or timestamp options.
+ */
+ cp = (u_char *) (ip + 1);
+ if ((opts = ip_srcroute()) == 0 &&
+ (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
+ opts->m_len = sizeof(struct in_addr);
+ mtod(opts, struct in_addr *)->s_addr = 0;
+ }
+ if (opts) {
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("icmp_reflect optlen %d rt %d => ",
+ optlen, opts->m_len);
+#endif
+ for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ len = 1;
+ else {
+ len = cp[IPOPT_OLEN];
+ if (len <= 0 || len > cnt)
+ break;
+ }
+ /*
+ * Should check for overflow, but it "can't happen"
+ */
+ if (opt == IPOPT_RR || opt == IPOPT_TS ||
+ opt == IPOPT_SECURITY) {
+ bcopy((caddr_t)cp,
+ mtod(opts, caddr_t) + opts->m_len, len);
+ opts->m_len += len;
+ }
+ }
+ /* Terminate & pad, if necessary */
+ if (cnt = opts->m_len % 4) {
+ for (; cnt < 4; cnt++) {
+ *(mtod(opts, caddr_t) + opts->m_len) =
+ IPOPT_EOL;
+ opts->m_len++;
+ }
+ }
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("%d\n", opts->m_len);
+#endif
+ }
+ /*
+ * Now strip out original options by copying rest of first
+ * mbuf's data back, and adjust the IP length.
+ */
+ ip->ip_len -= optlen;
+ ip->ip_hl = sizeof(struct ip) >> 2;
+ m->m_len -= optlen;
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len -= optlen;
+ optlen += sizeof(struct ip);
+ bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
+ (unsigned)(m->m_len - sizeof(struct ip)));
+ }
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ icmp_send(m, opts);
+done:
+ if (opts)
+ (void)m_free(opts);
+}
+
+/*
+ * Send an icmp packet back to the ip level,
+ * after supplying a checksum.
+ */
+void
+icmp_send(m, opts)
+ register struct mbuf *m;
+ struct mbuf *opts;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register int hlen;
+ register struct icmp *icp;
+
+ hlen = ip->ip_hl << 2;
+ m->m_data += hlen;
+ m->m_len -= hlen;
+ icp = mtod(m, struct icmp *);
+ icp->icmp_cksum = 0;
+ icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+ m->m_data -= hlen;
+ m->m_len += hlen;
+#ifdef ICMPPRINTFS
+ if (icmpprintfs)
+ printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
+#endif
+ (void) ip_output(m, opts, NULL, 0, NULL);
+}
+
+n_time
+iptime()
+{
+ struct timeval atv;
+ u_long t;
+
+ microtime(&atv);
+ t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
+ return (htonl(t));
+}
+
+int
+icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+{
+
+ /* All sysctl names at this level are terminal. */
+ if (namelen != 1)
+ return (ENOTDIR);
+
+ switch (name[0]) {
+ case ICMPCTL_MASKREPL:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl));
+ default:
+ return (ENOPROTOOPT);
+ }
+ /* NOTREACHED */
+}
diff --git a/sys/netinet/ip_icmp.h b/sys/netinet/ip_icmp.h
new file mode 100644
index 000000000000..3c3462d32667
--- /dev/null
+++ b/sys/netinet/ip_icmp.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Interface Control Message Protocol Definitions.
+ * Per RFC 792, September 1981.
+ */
+
+/*
+ * Structure of an icmp header.
+ */
+struct icmp {
+ u_char icmp_type; /* type of message, see below */
+ u_char icmp_code; /* type sub code */
+ u_short icmp_cksum; /* ones complement cksum of struct */
+ union {
+ u_char ih_pptr; /* ICMP_PARAMPROB */
+ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */
+ struct ih_idseq {
+ n_short icd_id;
+ n_short icd_seq;
+ } ih_idseq;
+ int ih_void;
+
+ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */
+ struct ih_pmtu {
+ n_short ipm_void;
+ n_short ipm_nextmtu;
+ } ih_pmtu;
+ } icmp_hun;
+#define icmp_pptr icmp_hun.ih_pptr
+#define icmp_gwaddr icmp_hun.ih_gwaddr
+#define icmp_id icmp_hun.ih_idseq.icd_id
+#define icmp_seq icmp_hun.ih_idseq.icd_seq
+#define icmp_void icmp_hun.ih_void
+#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void
+#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu
+ union {
+ struct id_ts {
+ n_time its_otime;
+ n_time its_rtime;
+ n_time its_ttime;
+ } id_ts;
+ struct id_ip {
+ struct ip idi_ip;
+ /* options and then 64 bits of data */
+ } id_ip;
+ u_long id_mask;
+ char id_data[1];
+ } icmp_dun;
+#define icmp_otime icmp_dun.id_ts.its_otime
+#define icmp_rtime icmp_dun.id_ts.its_rtime
+#define icmp_ttime icmp_dun.id_ts.its_ttime
+#define icmp_ip icmp_dun.id_ip.idi_ip
+#define icmp_mask icmp_dun.id_mask
+#define icmp_data icmp_dun.id_data
+};
+
+/*
+ * Lower bounds on packet lengths for various types.
+ * For the error advice packets must first insure that the
+ * packet is large enought to contain the returned ip header.
+ * Only then can we do the check to see if 64 bits of packet
+ * data have been returned, since we need to check the returned
+ * ip header length.
+ */
+#define ICMP_MINLEN 8 /* abs minimum */
+#define ICMP_TSLEN (8 + 3 * sizeof (n_time)) /* timestamp */
+#define ICMP_MASKLEN 12 /* address mask */
+#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */
+#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8)
+ /* N.B.: must separately check that ip_hl >= 5 */
+
+/*
+ * Definition of type and code field values.
+ */
+#define ICMP_ECHOREPLY 0 /* echo reply */
+#define ICMP_UNREACH 3 /* dest unreachable, codes: */
+#define ICMP_UNREACH_NET 0 /* bad net */
+#define ICMP_UNREACH_HOST 1 /* bad host */
+#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */
+#define ICMP_UNREACH_PORT 3 /* bad port */
+#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */
+#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */
+#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */
+#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */
+#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */
+#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */
+#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */
+#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */
+#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */
+#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */
+#define ICMP_REDIRECT 5 /* shorter route, codes: */
+#define ICMP_REDIRECT_NET 0 /* for network */
+#define ICMP_REDIRECT_HOST 1 /* for host */
+#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */
+#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */
+#define ICMP_ECHO 8 /* echo service */
+#define ICMP_ROUTERADVERT 9 /* router advertisement */
+#define ICMP_ROUTERSOLICIT 10 /* router solicitation */
+#define ICMP_TIMXCEED 11 /* time exceeded, code: */
+#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */
+#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */
+#define ICMP_PARAMPROB 12 /* ip header bad */
+#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */
+#define ICMP_TSTAMP 13 /* timestamp request */
+#define ICMP_TSTAMPREPLY 14 /* timestamp reply */
+#define ICMP_IREQ 15 /* information request */
+#define ICMP_IREQREPLY 16 /* information reply */
+#define ICMP_MASKREQ 17 /* address mask request */
+#define ICMP_MASKREPLY 18 /* address mask reply */
+
+#define ICMP_MAXTYPE 18
+
+#define ICMP_INFOTYPE(type) \
+ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \
+ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \
+ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \
+ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \
+ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY)
+
+#ifdef KERNEL
+void icmp_error __P((struct mbuf *, int, int, n_long, struct ifnet *));
+void icmp_input __P((struct mbuf *, int));
+void icmp_reflect __P((struct mbuf *));
+void icmp_send __P((struct mbuf *, struct mbuf *));
+int icmp_sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+#endif
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
new file mode 100644
index 000000000000..d3bfeac4b19e
--- /dev/null
+++ b/sys/netinet/ip_input.c
@@ -0,0 +1,1166 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+
+#ifndef IPFORWARDING
+#ifdef GATEWAY
+#define IPFORWARDING 1 /* forward IP packets not for us */
+#else /* GATEWAY */
+#define IPFORWARDING 0 /* don't forward IP packets not for us */
+#endif /* GATEWAY */
+#endif /* IPFORWARDING */
+#ifndef IPSENDREDIRECTS
+#define IPSENDREDIRECTS 1
+#endif
+int ipforwarding = IPFORWARDING;
+int ipsendredirects = IPSENDREDIRECTS;
+int ip_defttl = IPDEFTTL;
+#ifdef DIAGNOSTIC
+int ipprintfs = 0;
+#endif
+
+extern struct domain inetdomain;
+extern struct protosw inetsw[];
+u_char ip_protox[IPPROTO_MAX];
+int ipqmaxlen = IFQ_MAXLEN;
+struct in_ifaddr *in_ifaddr; /* first inet address */
+struct ifqueue ipintrq;
+
+/*
+ * We need to save the IP options in case a protocol wants to respond
+ * to an incoming packet over the same route if the packet got here
+ * using IP source routing. This allows connection establishment and
+ * maintenance when the remote end is on a network that is not known
+ * to us.
+ */
+int ip_nhops = 0;
+static struct ip_srcrt {
+ struct in_addr dst; /* final destination */
+ char nop; /* one NOP to align */
+ char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
+ struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
+} ip_srcrt;
+
+#ifdef GATEWAY
+extern int if_index;
+u_long *ip_ifmatrix;
+#endif
+
+static void save_rte __P((u_char *, struct in_addr));
+/*
+ * IP initialization: fill in IP protocol switch table.
+ * All protocols not implemented in kernel go to raw IP protocol handler.
+ */
+void
+ip_init()
+{
+ register struct protosw *pr;
+ register int i;
+
+ pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
+ if (pr == 0)
+ panic("ip_init");
+ for (i = 0; i < IPPROTO_MAX; i++)
+ ip_protox[i] = pr - inetsw;
+ for (pr = inetdomain.dom_protosw;
+ pr < inetdomain.dom_protoswNPROTOSW; pr++)
+ if (pr->pr_domain->dom_family == PF_INET &&
+ pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
+ ip_protox[pr->pr_protocol] = pr - inetsw;
+ ipq.next = ipq.prev = &ipq;
+ ip_id = time.tv_sec & 0xffff;
+ ipintrq.ifq_maxlen = ipqmaxlen;
+#ifdef GATEWAY
+ i = (if_index + 1) * (if_index + 1) * sizeof (u_long);
+ ip_ifmatrix = (u_long *) malloc(i, M_RTABLE, M_WAITOK);
+ bzero((char *)ip_ifmatrix, i);
+#endif
+}
+
+struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
+struct route ipforward_rt;
+
+/*
+ * Ip input routine. Checksum and byte swap header. If fragmented
+ * try to reassemble. Process options. Pass to next level.
+ */
+void
+ipintr()
+{
+ register struct ip *ip;
+ register struct mbuf *m;
+ register struct ipq *fp;
+ register struct in_ifaddr *ia;
+ int hlen, s;
+
+next:
+ /*
+ * Get next datagram off input queue and get IP header
+ * in first mbuf.
+ */
+ s = splimp();
+ IF_DEQUEUE(&ipintrq, m);
+ splx(s);
+ if (m == 0)
+ return;
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ipintr no HDR");
+#endif
+ /*
+ * If no IP addresses have been set yet but the interfaces
+ * are receiving, can't do anything with incoming packets yet.
+ */
+ if (in_ifaddr == NULL)
+ goto bad;
+ ipstat.ips_total++;
+ if (m->m_len < sizeof (struct ip) &&
+ (m = m_pullup(m, sizeof (struct ip))) == 0) {
+ ipstat.ips_toosmall++;
+ goto next;
+ }
+ ip = mtod(m, struct ip *);
+ if (ip->ip_v != IPVERSION) {
+ ipstat.ips_badvers++;
+ goto bad;
+ }
+ hlen = ip->ip_hl << 2;
+ if (hlen < sizeof(struct ip)) { /* minimum header length */
+ ipstat.ips_badhlen++;
+ goto bad;
+ }
+ if (hlen > m->m_len) {
+ if ((m = m_pullup(m, hlen)) == 0) {
+ ipstat.ips_badhlen++;
+ goto next;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ if (ip->ip_sum = in_cksum(m, hlen)) {
+ ipstat.ips_badsum++;
+ goto bad;
+ }
+
+ /*
+ * Convert fields to host representation.
+ */
+ NTOHS(ip->ip_len);
+ if (ip->ip_len < hlen) {
+ ipstat.ips_badlen++;
+ goto bad;
+ }
+ NTOHS(ip->ip_id);
+ NTOHS(ip->ip_off);
+
+ /*
+ * Check that the amount of data in the buffers
+ * is as at least much as the IP header would have us expect.
+ * Trim mbufs if longer than we expect.
+ * Drop packet if shorter than we expect.
+ */
+ if (m->m_pkthdr.len < ip->ip_len) {
+ ipstat.ips_tooshort++;
+ goto bad;
+ }
+ if (m->m_pkthdr.len > ip->ip_len) {
+ if (m->m_len == m->m_pkthdr.len) {
+ m->m_len = ip->ip_len;
+ m->m_pkthdr.len = ip->ip_len;
+ } else
+ m_adj(m, ip->ip_len - m->m_pkthdr.len);
+ }
+
+ /*
+ * Process options and, if not destined for us,
+ * ship it on. ip_dooptions returns 1 when an
+ * error was detected (causing an icmp message
+ * to be sent and the original packet to be freed).
+ */
+ ip_nhops = 0; /* for source routed packets */
+ if (hlen > sizeof (struct ip) && ip_dooptions(m))
+ goto next;
+
+ /*
+ * Check our list of addresses, to see if the packet is for us.
+ */
+ for (ia = in_ifaddr; ia; ia = ia->ia_next) {
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+
+ if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
+ goto ours;
+ if (
+#ifdef DIRECTED_BROADCAST
+ ia->ia_ifp == m->m_pkthdr.rcvif &&
+#endif
+ (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
+ u_long t;
+
+ if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+ ip->ip_dst.s_addr)
+ goto ours;
+ if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
+ goto ours;
+ /*
+ * Look for all-0's host part (old broadcast addr),
+ * either for subnet or net.
+ */
+ t = ntohl(ip->ip_dst.s_addr);
+ if (t == ia->ia_subnet)
+ goto ours;
+ if (t == ia->ia_net)
+ goto ours;
+ }
+ }
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ struct in_multi *inm;
+#ifdef MROUTING
+ extern struct socket *ip_mrouter;
+
+ if (ip_mrouter) {
+ /*
+ * If we are acting as a multicast router, all
+ * incoming multicast packets are passed to the
+ * kernel-level multicast forwarding function.
+ * The packet is returned (relatively) intact; if
+ * ip_mforward() returns a non-zero value, the packet
+ * must be discarded, else it may be accepted below.
+ *
+ * (The IP ident field is put in the same byte order
+ * as expected when ip_mforward() is called from
+ * ip_output().)
+ */
+ ip->ip_id = htons(ip->ip_id);
+ if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ goto next;
+ }
+ ip->ip_id = ntohs(ip->ip_id);
+
+ /*
+ * The process-level routing demon needs to receive
+ * all multicast IGMP packets, whether or not this
+ * host belongs to their destination groups.
+ */
+ if (ip->ip_p == IPPROTO_IGMP)
+ goto ours;
+ ipstat.ips_forward++;
+ }
+#endif
+ /*
+ * See if we belong to the destination multicast group on the
+ * arrival interface.
+ */
+ IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
+ if (inm == NULL) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ goto next;
+ }
+ goto ours;
+ }
+ if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
+ goto ours;
+ if (ip->ip_dst.s_addr == INADDR_ANY)
+ goto ours;
+
+ /*
+ * Not for us; forward if possible and desirable.
+ */
+ if (ipforwarding == 0) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ } else
+ ip_forward(m, 0);
+ goto next;
+
+ours:
+ /*
+ * If offset or IP_MF are set, must reassemble.
+ * Otherwise, nothing need be done.
+ * (We could look in the reassembly queue to see
+ * if the packet was previously fragmented,
+ * but it's not worth the time; just let them time out.)
+ */
+ if (ip->ip_off &~ IP_DF) {
+ if (m->m_flags & M_EXT) { /* XXX */
+ if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
+ ipstat.ips_toosmall++;
+ goto next;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ /*
+ * Look for queue of fragments
+ * of this datagram.
+ */
+ for (fp = ipq.next; fp != &ipq; fp = fp->next)
+ if (ip->ip_id == fp->ipq_id &&
+ ip->ip_src.s_addr == fp->ipq_src.s_addr &&
+ ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
+ ip->ip_p == fp->ipq_p)
+ goto found;
+ fp = 0;
+found:
+
+ /*
+ * Adjust ip_len to not reflect header,
+ * set ip_mff if more fragments are expected,
+ * convert offset of this to bytes.
+ */
+ ip->ip_len -= hlen;
+ ((struct ipasfrag *)ip)->ipf_mff &= ~1;
+ if (ip->ip_off & IP_MF)
+ ((struct ipasfrag *)ip)->ipf_mff |= 1;
+ ip->ip_off <<= 3;
+
+ /*
+ * If datagram marked as having more fragments
+ * or if this is not the first fragment,
+ * attempt reassembly; if it succeeds, proceed.
+ */
+ if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) {
+ ipstat.ips_fragments++;
+ ip = ip_reass((struct ipasfrag *)ip, fp);
+ if (ip == 0)
+ goto next;
+ ipstat.ips_reassembled++;
+ m = dtom(ip);
+ } else
+ if (fp)
+ ip_freef(fp);
+ } else
+ ip->ip_len -= hlen;
+
+ /*
+ * Switch out to protocol's input routine.
+ */
+ ipstat.ips_delivered++;
+ (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
+ goto next;
+bad:
+ m_freem(m);
+ goto next;
+}
+
+/*
+ * Take incoming datagram fragment and try to
+ * reassemble it into whole datagram. If a chain for
+ * reassembly of this datagram already exists, then it
+ * is given as fp; otherwise have to make a chain.
+ */
+struct ip *
+ip_reass(ip, fp)
+ register struct ipasfrag *ip;
+ register struct ipq *fp;
+{
+ register struct mbuf *m = dtom(ip);
+ register struct ipasfrag *q;
+ struct mbuf *t;
+ int hlen = ip->ip_hl << 2;
+ int i, next;
+
+ /*
+ * Presence of header sizes in mbufs
+ * would confuse code below.
+ */
+ m->m_data += hlen;
+ m->m_len -= hlen;
+
+ /*
+ * If first fragment to arrive, create a reassembly queue.
+ */
+ if (fp == 0) {
+ if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
+ goto dropfrag;
+ fp = mtod(t, struct ipq *);
+ insque(fp, &ipq);
+ fp->ipq_ttl = IPFRAGTTL;
+ fp->ipq_p = ip->ip_p;
+ fp->ipq_id = ip->ip_id;
+ fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
+ fp->ipq_src = ((struct ip *)ip)->ip_src;
+ fp->ipq_dst = ((struct ip *)ip)->ip_dst;
+ q = (struct ipasfrag *)fp;
+ goto insert;
+ }
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
+ if (q->ip_off > ip->ip_off)
+ break;
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us.
+ */
+ if (q->ipf_prev != (struct ipasfrag *)fp) {
+ i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
+ if (i > 0) {
+ if (i >= ip->ip_len)
+ goto dropfrag;
+ m_adj(dtom(ip), i);
+ ip->ip_off += i;
+ ip->ip_len -= i;
+ }
+ }
+
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
+ i = (ip->ip_off + ip->ip_len) - q->ip_off;
+ if (i < q->ip_len) {
+ q->ip_len -= i;
+ q->ip_off += i;
+ m_adj(dtom(q), i);
+ break;
+ }
+ q = q->ipf_next;
+ m_freem(dtom(q->ipf_prev));
+ ip_deq(q->ipf_prev);
+ }
+
+insert:
+ /*
+ * Stick new segment in its place;
+ * check for complete reassembly.
+ */
+ ip_enq(ip, q->ipf_prev);
+ next = 0;
+ for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
+ if (q->ip_off != next)
+ return (0);
+ next += q->ip_len;
+ }
+ if (q->ipf_prev->ipf_mff & 1)
+ return (0);
+
+ /*
+ * Reassembly is complete; concatenate fragments.
+ */
+ q = fp->ipq_next;
+ m = dtom(q);
+ t = m->m_next;
+ m->m_next = 0;
+ m_cat(m, t);
+ q = q->ipf_next;
+ while (q != (struct ipasfrag *)fp) {
+ t = dtom(q);
+ q = q->ipf_next;
+ m_cat(m, t);
+ }
+
+ /*
+ * Create header for new ip packet by
+ * modifying header of first packet;
+ * dequeue and discard fragment reassembly header.
+ * Make header visible.
+ */
+ ip = fp->ipq_next;
+ ip->ip_len = next;
+ ip->ipf_mff &= ~1;
+ ((struct ip *)ip)->ip_src = fp->ipq_src;
+ ((struct ip *)ip)->ip_dst = fp->ipq_dst;
+ remque(fp);
+ (void) m_free(dtom(fp));
+ m = dtom(ip);
+ m->m_len += (ip->ip_hl << 2);
+ m->m_data -= (ip->ip_hl << 2);
+ /* some debugging cruft by sklower, below, will go away soon */
+ if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
+ register int plen = 0;
+ for (t = m; m; m = m->m_next)
+ plen += m->m_len;
+ t->m_pkthdr.len = plen;
+ }
+ return ((struct ip *)ip);
+
+dropfrag:
+ ipstat.ips_fragdropped++;
+ m_freem(m);
+ return (0);
+}
+
+/*
+ * Free a fragment reassembly header and all
+ * associated datagrams.
+ */
+void
+ip_freef(fp)
+ struct ipq *fp;
+{
+ register struct ipasfrag *q, *p;
+
+ for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
+ p = q->ipf_next;
+ ip_deq(q);
+ m_freem(dtom(q));
+ }
+ remque(fp);
+ (void) m_free(dtom(fp));
+}
+
+/*
+ * Put an ip fragment on a reassembly chain.
+ * Like insque, but pointers in middle of structure.
+ */
+void
+ip_enq(p, prev)
+ register struct ipasfrag *p, *prev;
+{
+
+ p->ipf_prev = prev;
+ p->ipf_next = prev->ipf_next;
+ prev->ipf_next->ipf_prev = p;
+ prev->ipf_next = p;
+}
+
+/*
+ * To ip_enq as remque is to insque.
+ */
+void
+ip_deq(p)
+ register struct ipasfrag *p;
+{
+
+ p->ipf_prev->ipf_next = p->ipf_next;
+ p->ipf_next->ipf_prev = p->ipf_prev;
+}
+
+/*
+ * IP timer processing;
+ * if a timer expires on a reassembly
+ * queue, discard it.
+ */
+void
+ip_slowtimo()
+{
+ register struct ipq *fp;
+ int s = splnet();
+
+ fp = ipq.next;
+ if (fp == 0) {
+ splx(s);
+ return;
+ }
+ while (fp != &ipq) {
+ --fp->ipq_ttl;
+ fp = fp->next;
+ if (fp->prev->ipq_ttl == 0) {
+ ipstat.ips_fragtimeout++;
+ ip_freef(fp->prev);
+ }
+ }
+ splx(s);
+}
+
+/*
+ * Drain off all datagram fragments.
+ */
+void
+ip_drain()
+{
+
+ while (ipq.next != &ipq) {
+ ipstat.ips_fragdropped++;
+ ip_freef(ipq.next);
+ }
+}
+
+/*
+ * Do option processing on a datagram,
+ * possibly discarding it if bad options are encountered,
+ * or forwarding it if source-routed.
+ * Returns 1 if packet has been forwarded/freed,
+ * 0 if the packet should be processed further.
+ */
+int
+ip_dooptions(m)
+ struct mbuf *m;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register u_char *cp;
+ register struct ip_timestamp *ipt;
+ register struct in_ifaddr *ia;
+ int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
+ struct in_addr *sin, dst;
+ n_time ntime;
+
+ dst = ip->ip_dst;
+ cp = (u_char *)(ip + 1);
+ cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[IPOPT_OLEN];
+ if (optlen <= 0 || optlen > cnt) {
+ code = &cp[IPOPT_OLEN] - (u_char *)ip;
+ goto bad;
+ }
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ /*
+ * Source routing with record.
+ * Find interface with current destination address.
+ * If none on this machine then drop if strictly routed,
+ * or do nothing if loosely routed.
+ * Record interface address and bring up next address
+ * component. If strictly routed make sure next
+ * address is on directly accessible net.
+ */
+ case IPOPT_LSRR:
+ case IPOPT_SSRR:
+ if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ ipaddr.sin_addr = ip->ip_dst;
+ ia = (struct in_ifaddr *)
+ ifa_ifwithaddr((struct sockaddr *)&ipaddr);
+ if (ia == 0) {
+ if (opt == IPOPT_SSRR) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ }
+ /*
+ * Loose routing, and not at next destination
+ * yet; nothing to do except forward.
+ */
+ break;
+ }
+ off--; /* 0 origin */
+ if (off > optlen - sizeof(struct in_addr)) {
+ /*
+ * End of source route. Should be for us.
+ */
+ save_rte(cp, ip->ip_src);
+ break;
+ }
+ /*
+ * locate outgoing interface
+ */
+ bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
+ sizeof(ipaddr.sin_addr));
+ if (opt == IPOPT_SSRR) {
+#define INA struct in_ifaddr *
+#define SA struct sockaddr *
+ if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
+ ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+ } else
+ ia = ip_rtaddr(ipaddr.sin_addr);
+ if (ia == 0) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ }
+ ip->ip_dst = ipaddr.sin_addr;
+ bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
+ (caddr_t)(cp + off), sizeof(struct in_addr));
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ /*
+ * Let ip_intr's mcast routing check handle mcast pkts
+ */
+ forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
+ break;
+
+ case IPOPT_RR:
+ if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ /*
+ * If no space remains, ignore.
+ */
+ off--; /* 0 origin */
+ if (off > optlen - sizeof(struct in_addr))
+ break;
+ bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
+ sizeof(ipaddr.sin_addr));
+ /*
+ * locate outgoing interface; if we're the destination,
+ * use the incoming interface (should be same).
+ */
+ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
+ (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_HOST;
+ goto bad;
+ }
+ bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
+ (caddr_t)(cp + off), sizeof(struct in_addr));
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ break;
+
+ case IPOPT_TS:
+ code = cp - (u_char *)ip;
+ ipt = (struct ip_timestamp *)cp;
+ if (ipt->ipt_len < 5)
+ goto bad;
+ if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
+ if (++ipt->ipt_oflw == 0)
+ goto bad;
+ break;
+ }
+ sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
+ switch (ipt->ipt_flg) {
+
+ case IPOPT_TS_TSONLY:
+ break;
+
+ case IPOPT_TS_TSANDADDR:
+ if (ipt->ipt_ptr + sizeof(n_time) +
+ sizeof(struct in_addr) > ipt->ipt_len)
+ goto bad;
+ ipaddr.sin_addr = dst;
+ ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
+ m->m_pkthdr.rcvif);
+ if (ia == 0)
+ continue;
+ bcopy((caddr_t)&IA_SIN(ia)->sin_addr,
+ (caddr_t)sin, sizeof(struct in_addr));
+ ipt->ipt_ptr += sizeof(struct in_addr);
+ break;
+
+ case IPOPT_TS_PRESPEC:
+ if (ipt->ipt_ptr + sizeof(n_time) +
+ sizeof(struct in_addr) > ipt->ipt_len)
+ goto bad;
+ bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
+ sizeof(struct in_addr));
+ if (ifa_ifwithaddr((SA)&ipaddr) == 0)
+ continue;
+ ipt->ipt_ptr += sizeof(struct in_addr);
+ break;
+
+ default:
+ goto bad;
+ }
+ ntime = iptime();
+ bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
+ sizeof(n_time));
+ ipt->ipt_ptr += sizeof(n_time);
+ }
+ }
+ if (forward) {
+ ip_forward(m, 1);
+ return (1);
+ }
+ return (0);
+bad:
+ ip->ip_len -= ip->ip_hl << 2; /* XXX icmp_error adds in hdr length */
+ icmp_error(m, type, code, 0, 0);
+ ipstat.ips_badoptions++;
+ return (1);
+}
+
+/*
+ * Given address of next destination (final or next hop),
+ * return internet address info of interface to be used to get there.
+ */
+struct in_ifaddr *
+ip_rtaddr(dst)
+ struct in_addr dst;
+{
+ register struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
+
+ if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
+ if (ipforward_rt.ro_rt) {
+ RTFREE(ipforward_rt.ro_rt);
+ ipforward_rt.ro_rt = 0;
+ }
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = dst;
+
+ rtalloc(&ipforward_rt);
+ }
+ if (ipforward_rt.ro_rt == 0)
+ return ((struct in_ifaddr *)0);
+ return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
+}
+
+/*
+ * Save incoming source route for use in replies,
+ * to be picked up later by ip_srcroute if the receiver is interested.
+ */
+void
+save_rte(option, dst)
+ u_char *option;
+ struct in_addr dst;
+{
+ unsigned olen;
+
+ olen = option[IPOPT_OLEN];
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf("save_rte: olen %d\n", olen);
+#endif
+ if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
+ return;
+ bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
+ ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
+ ip_srcrt.dst = dst;
+}
+
+/*
+ * Retrieve incoming source route for use in replies,
+ * in the same form used by setsockopt.
+ * The first hop is placed before the options, will be removed later.
+ */
+struct mbuf *
+ip_srcroute()
+{
+ register struct in_addr *p, *q;
+ register struct mbuf *m;
+
+ if (ip_nhops == 0)
+ return ((struct mbuf *)0);
+ m = m_get(M_DONTWAIT, MT_SOOPTS);
+ if (m == 0)
+ return ((struct mbuf *)0);
+
+#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
+
+ /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
+ m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
+ OPTSIZ;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
+#endif
+
+ /*
+ * First save first hop for return route
+ */
+ p = &ip_srcrt.route[ip_nhops - 1];
+ *(mtod(m, struct in_addr *)) = *p--;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
+#endif
+
+ /*
+ * Copy option fields and padding (nop) to mbuf.
+ */
+ ip_srcrt.nop = IPOPT_NOP;
+ ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
+ bcopy((caddr_t)&ip_srcrt.nop,
+ mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
+ q = (struct in_addr *)(mtod(m, caddr_t) +
+ sizeof(struct in_addr) + OPTSIZ);
+#undef OPTSIZ
+ /*
+ * Record return path as an IP source route,
+ * reversing the path (pointers are now aligned).
+ */
+ while (p >= ip_srcrt.route) {
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf(" %lx", ntohl(q->s_addr));
+#endif
+ *q++ = *p--;
+ }
+ /*
+ * Last hop goes to final destination.
+ */
+ *q = ip_srcrt.dst;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf(" %lx\n", ntohl(q->s_addr));
+#endif
+ return (m);
+}
+
+/*
+ * Strip out IP options, at higher
+ * level protocol in the kernel.
+ * Second argument is buffer to which options
+ * will be moved, and return value is their length.
+ * XXX should be deleted; last arg currently ignored.
+ */
+void
+ip_stripoptions(m, mopt)
+ register struct mbuf *m;
+ struct mbuf *mopt;
+{
+ register int i;
+ struct ip *ip = mtod(m, struct ip *);
+ register caddr_t opts;
+ int olen;
+
+ olen = (ip->ip_hl<<2) - sizeof (struct ip);
+ opts = (caddr_t)(ip + 1);
+ i = m->m_len - (sizeof (struct ip) + olen);
+ bcopy(opts + olen, opts, (unsigned)i);
+ m->m_len -= olen;
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len -= olen;
+ ip->ip_hl = sizeof(struct ip) >> 2;
+}
+
+u_char inetctlerrmap[PRC_NCMDS] = {
+ 0, 0, 0, 0,
+ 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
+ EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
+ EMSGSIZE, EHOSTUNREACH, 0, 0,
+ 0, 0, 0, 0,
+ ENOPROTOOPT
+};
+
+/*
+ * Forward a packet. If some error occurs return the sender
+ * an icmp packet. Note we can't always generate a meaningful
+ * icmp message because icmp doesn't have a large enough repertoire
+ * of codes and types.
+ *
+ * If not forwarding, just drop the packet. This could be confusing
+ * if ipforwarding was zero but some routing protocol was advancing
+ * us as a gateway to somewhere. However, we must let the routing
+ * protocol deal with that.
+ *
+ * The srcrt parameter indicates whether the packet is being forwarded
+ * via a source route.
+ */
+void
+ip_forward(m, srcrt)
+ struct mbuf *m;
+ int srcrt;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register struct sockaddr_in *sin;
+ register struct rtentry *rt;
+ int error, type = 0, code;
+ struct mbuf *mcopy;
+ n_long dest;
+ struct ifnet *destifp;
+
+ dest = 0;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
+ ip->ip_dst, ip->ip_ttl);
+#endif
+ if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ return;
+ }
+ HTONS(ip->ip_id);
+ if (ip->ip_ttl <= IPTTLDEC) {
+ icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
+ return;
+ }
+ ip->ip_ttl -= IPTTLDEC;
+
+ sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
+ if ((rt = ipforward_rt.ro_rt) == 0 ||
+ ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
+ if (ipforward_rt.ro_rt) {
+ RTFREE(ipforward_rt.ro_rt);
+ ipforward_rt.ro_rt = 0;
+ }
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ip->ip_dst;
+
+ rtalloc(&ipforward_rt);
+ if (ipforward_rt.ro_rt == 0) {
+ icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
+ return;
+ }
+ rt = ipforward_rt.ro_rt;
+ }
+
+ /*
+ * Save at most 64 bytes of the packet in case
+ * we need to generate an ICMP message to the src.
+ */
+ mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
+
+#ifdef GATEWAY
+ ip_ifmatrix[rt->rt_ifp->if_index +
+ if_index * m->m_pkthdr.rcvif->if_index]++;
+#endif
+ /*
+ * If forwarding packet using same interface that it came in on,
+ * perhaps should send a redirect to sender to shortcut a hop.
+ * Only send redirect if source is sending directly to us,
+ * and if packet was not source routed (or has any options).
+ * Also, don't send redirect if forwarding using a default route
+ * or a route modified by a redirect.
+ */
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+ if (rt->rt_ifp == m->m_pkthdr.rcvif &&
+ (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
+ satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
+ ipsendredirects && !srcrt) {
+#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
+ u_long src = ntohl(ip->ip_src.s_addr);
+
+ if (RTA(rt) &&
+ (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
+ else
+ dest = ip->ip_dst.s_addr;
+ /* Router requirements says to only send host redirects */
+ type = ICMP_REDIRECT;
+ code = ICMP_REDIRECT_HOST;
+#ifdef DIAGNOSTIC
+ if (ipprintfs)
+ printf("redirect (%d) to %lx\n", code, (u_long)dest);
+#endif
+ }
+ }
+
+ error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING
+#ifdef DIRECTED_BROADCAST
+ | IP_ALLOWBROADCAST
+#endif
+ , 0);
+ if (error)
+ ipstat.ips_cantforward++;
+ else {
+ ipstat.ips_forward++;
+ if (type)
+ ipstat.ips_redirectsent++;
+ else {
+ if (mcopy)
+ m_freem(mcopy);
+ return;
+ }
+ }
+ if (mcopy == NULL)
+ return;
+ destifp = NULL;
+
+ switch (error) {
+
+ case 0: /* forwarded, but need redirect */
+ /* type, code set above */
+ break;
+
+ case ENETUNREACH: /* shouldn't happen, checked above */
+ case EHOSTUNREACH:
+ case ENETDOWN:
+ case EHOSTDOWN:
+ default:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_HOST;
+ break;
+
+ case EMSGSIZE:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_NEEDFRAG;
+ if (ipforward_rt.ro_rt)
+ destifp = ipforward_rt.ro_rt->rt_ifp;
+ ipstat.ips_cantfrag++;
+ break;
+
+ case ENOBUFS:
+ type = ICMP_SOURCEQUENCH;
+ code = 0;
+ break;
+ }
+ icmp_error(mcopy, type, code, dest, destifp);
+}
+
+int
+ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+{
+ /* All sysctl names at this level are terminal. */
+ if (namelen != 1)
+ return (ENOTDIR);
+
+ switch (name[0]) {
+ case IPCTL_FORWARDING:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
+ case IPCTL_SENDREDIRECTS:
+ return (sysctl_int(oldp, oldlenp, newp, newlen,
+ &ipsendredirects));
+ case IPCTL_DEFTTL:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
+#ifdef notyet
+ case IPCTL_DEFMTU:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
+#endif
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
new file mode 100644
index 000000000000..1744ec17fb65
--- /dev/null
+++ b/sys/netinet/ip_mroute.c
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 1989 Stephen Deering
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
+ */
+
+/*
+ * Procedures for the kernel part of DVMRP,
+ * a Distance-Vector Multicast Routing Protocol.
+ * (See RFC-1075.)
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ *
+ * MROUTING 1.1
+ */
+
+#ifndef MROUTING
+int ip_mrtproto; /* for netstat only */
+#else
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#include <netinet/igmp.h>
+#include <netinet/igmp_var.h>
+#include <netinet/ip_mroute.h>
+
+/* Static forwards */
+static int ip_mrouter_init __P((struct socket *));
+static int add_vif __P((struct vifctl *));
+static int del_vif __P((vifi_t *vifip));
+static int add_lgrp __P((struct lgrplctl *));
+static int del_lgrp __P((struct lgrplctl *));
+static int grplst_member __P((struct vif *, struct in_addr));
+static u_long nethash __P((struct in_addr in));
+static int add_mrt __P((struct mrtctl *));
+static int del_mrt __P((struct in_addr *));
+static struct mrt *mrtfind __P((struct in_addr));
+static void phyint_send __P((struct mbuf *, struct vif *));
+static void tunnel_send __P((struct mbuf *, struct vif *));
+
+#define INSIZ sizeof(struct in_addr)
+#define same(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+
+/*
+ * Globals. All but ip_mrouter and ip_mrtproto could be static,
+ * except for netstat or debugging purposes.
+ */
+struct socket *ip_mrouter = NULL;
+int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
+
+struct mrt *mrttable[MRTHASHSIZ];
+struct vif viftable[MAXVIFS];
+struct mrtstat mrtstat;
+
+/*
+ * Private variables.
+ */
+static vifi_t numvifs = 0;
+static struct mrt *cached_mrt = NULL;
+static u_long cached_origin;
+static u_long cached_originmask;
+
+/*
+ * Handle DVMRP setsockopt commands to modify the multicast routing tables.
+ */
+int
+ip_mrouter_cmd(cmd, so, m)
+ register int cmd;
+ register struct socket *so;
+ register struct mbuf *m;
+{
+ register int error = 0;
+
+ if (cmd != DVMRP_INIT && so != ip_mrouter)
+ error = EACCES;
+ else switch (cmd) {
+
+ case DVMRP_INIT:
+ error = ip_mrouter_init(so);
+ break;
+
+ case DVMRP_DONE:
+ error = ip_mrouter_done();
+ break;
+
+ case DVMRP_ADD_VIF:
+ if (m == NULL || m->m_len < sizeof(struct vifctl))
+ error = EINVAL;
+ else
+ error = add_vif(mtod(m, struct vifctl *));
+ break;
+
+ case DVMRP_DEL_VIF:
+ if (m == NULL || m->m_len < sizeof(short))
+ error = EINVAL;
+ else
+ error = del_vif(mtod(m, vifi_t *));
+ break;
+
+ case DVMRP_ADD_LGRP:
+ if (m == NULL || m->m_len < sizeof(struct lgrplctl))
+ error = EINVAL;
+ else
+ error = add_lgrp(mtod(m, struct lgrplctl *));
+ break;
+
+ case DVMRP_DEL_LGRP:
+ if (m == NULL || m->m_len < sizeof(struct lgrplctl))
+ error = EINVAL;
+ else
+ error = del_lgrp(mtod(m, struct lgrplctl *));
+ break;
+
+ case DVMRP_ADD_MRT:
+ if (m == NULL || m->m_len < sizeof(struct mrtctl))
+ error = EINVAL;
+ else
+ error = add_mrt(mtod(m, struct mrtctl *));
+ break;
+
+ case DVMRP_DEL_MRT:
+ if (m == NULL || m->m_len < sizeof(struct in_addr))
+ error = EINVAL;
+ else
+ error = del_mrt(mtod(m, struct in_addr *));
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Enable multicast routing
+ */
+static int
+ip_mrouter_init(so)
+ register struct socket *so;
+{
+ if (so->so_type != SOCK_RAW ||
+ so->so_proto->pr_protocol != IPPROTO_IGMP)
+ return (EOPNOTSUPP);
+
+ if (ip_mrouter != NULL)
+ return (EADDRINUSE);
+
+ ip_mrouter = so;
+
+ return (0);
+}
+
+/*
+ * Disable multicast routing
+ */
+int
+ip_mrouter_done()
+{
+ register vifi_t vifi;
+ register int i;
+ register struct ifnet *ifp;
+ register int s;
+ struct ifreq ifr;
+
+ s = splnet();
+
+ /*
+ * For each phyint in use, free its local group list and
+ * disable promiscuous reception of all IP multicasts.
+ */
+ for (vifi = 0; vifi < numvifs; vifi++) {
+ if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
+ !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
+ if (viftable[vifi].v_lcl_grps)
+ free(viftable[vifi].v_lcl_grps, M_MRTABLE);
+ satosin(&ifr.ifr_addr)->sin_family = AF_INET;
+ satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
+ ifp = viftable[vifi].v_ifp;
+ (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+ }
+ }
+ bzero((caddr_t)viftable, sizeof(viftable));
+ numvifs = 0;
+
+ /*
+ * Free any multicast route entries.
+ */
+ for (i = 0; i < MRTHASHSIZ; i++)
+ if (mrttable[i])
+ free(mrttable[i], M_MRTABLE);
+ bzero((caddr_t)mrttable, sizeof(mrttable));
+ cached_mrt = NULL;
+
+ ip_mrouter = NULL;
+
+ splx(s);
+ return (0);
+}
+
+/*
+ * Add a vif to the vif table
+ */
+static int
+add_vif(vifcp)
+ register struct vifctl *vifcp;
+{
+ register struct vif *vifp = viftable + vifcp->vifc_vifi;
+ register struct ifaddr *ifa;
+ register struct ifnet *ifp;
+ struct ifreq ifr;
+ register int error, s;
+ static struct sockaddr_in sin = { sizeof(sin), AF_INET };
+
+ if (vifcp->vifc_vifi >= MAXVIFS)
+ return (EINVAL);
+ if (vifp->v_lcl_addr.s_addr != 0)
+ return (EADDRINUSE);
+
+ /* Find the interface with an address in AF_INET family */
+ sin.sin_addr = vifcp->vifc_lcl_addr;
+ ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
+ if (ifa == 0)
+ return (EADDRNOTAVAIL);
+
+ s = splnet();
+
+ if (vifcp->vifc_flags & VIFF_TUNNEL)
+ vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
+ else {
+ /* Make sure the interface supports multicast */
+ ifp = ifa->ifa_ifp;
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ splx(s);
+ return (EOPNOTSUPP);
+ }
+ /*
+ * Enable promiscuous reception of all IP multicasts
+ * from the interface.
+ */
+ satosin(&ifr.ifr_addr)->sin_family = AF_INET;
+ satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
+ error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
+ if (error) {
+ splx(s);
+ return (error);
+ }
+ }
+
+ vifp->v_flags = vifcp->vifc_flags;
+ vifp->v_threshold = vifcp->vifc_threshold;
+ vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
+ vifp->v_ifp = ifa->ifa_ifp;
+
+ /* Adjust numvifs up if the vifi is higher than numvifs */
+ if (numvifs <= vifcp->vifc_vifi)
+ numvifs = vifcp->vifc_vifi + 1;
+
+ splx(s);
+ return (0);
+}
+
+/*
+ * Delete a vif from the vif table
+ */
+static int
+del_vif(vifip)
+ register vifi_t *vifip;
+{
+ register struct vif *vifp = viftable + *vifip;
+ register struct ifnet *ifp;
+ register int i, s;
+ struct ifreq ifr;
+
+ if (*vifip >= numvifs)
+ return (EINVAL);
+ if (vifp->v_lcl_addr.s_addr == 0)
+ return (EADDRNOTAVAIL);
+
+ s = splnet();
+
+ if (!(vifp->v_flags & VIFF_TUNNEL)) {
+ if (vifp->v_lcl_grps)
+ free(vifp->v_lcl_grps, M_MRTABLE);
+ satosin(&ifr.ifr_addr)->sin_family = AF_INET;
+ satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
+ ifp = vifp->v_ifp;
+ (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+ }
+
+ bzero((caddr_t)vifp, sizeof (*vifp));
+
+ /* Adjust numvifs down */
+ for (i = numvifs - 1; i >= 0; i--)
+ if (viftable[i].v_lcl_addr.s_addr != 0)
+ break;
+ numvifs = i + 1;
+
+ splx(s);
+ return (0);
+}
+
+/*
+ * Add the multicast group in the lgrpctl to the list of local multicast
+ * group memberships associated with the vif indexed by gcp->lgc_vifi.
+ */
+static int
+add_lgrp(gcp)
+ register struct lgrplctl *gcp;
+{
+ register struct vif *vifp;
+ register int s;
+
+ if (gcp->lgc_vifi >= numvifs)
+ return (EINVAL);
+
+ vifp = viftable + gcp->lgc_vifi;
+ if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL))
+ return (EADDRNOTAVAIL);
+
+ /* If not enough space in existing list, allocate a larger one */
+ s = splnet();
+ if (vifp->v_lcl_grps_n + 1 >= vifp->v_lcl_grps_max) {
+ register int num;
+ register struct in_addr *ip;
+
+ num = vifp->v_lcl_grps_max;
+ if (num <= 0)
+ num = 32; /* initial number */
+ else
+ num += num; /* double last number */
+ ip = (struct in_addr *)malloc(num * sizeof(*ip),
+ M_MRTABLE, M_NOWAIT);
+ if (ip == NULL) {
+ splx(s);
+ return (ENOBUFS);
+ }
+
+ bzero((caddr_t)ip, num * sizeof(*ip)); /* XXX paranoid */
+ bcopy((caddr_t)vifp->v_lcl_grps, (caddr_t)ip,
+ vifp->v_lcl_grps_n * sizeof(*ip));
+
+ vifp->v_lcl_grps_max = num;
+ if (vifp->v_lcl_grps)
+ free(vifp->v_lcl_grps, M_MRTABLE);
+ vifp->v_lcl_grps = ip;
+
+ splx(s);
+ }
+
+ vifp->v_lcl_grps[vifp->v_lcl_grps_n++] = gcp->lgc_gaddr;
+
+ if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group)
+ vifp->v_cached_result = 1;
+
+ splx(s);
+ return (0);
+}
+
+/*
+ * Delete the the local multicast group associated with the vif
+ * indexed by gcp->lgc_vifi.
+ */
+
+static int
+del_lgrp(gcp)
+ register struct lgrplctl *gcp;
+{
+ register struct vif *vifp;
+ register int i, error, s;
+
+ if (gcp->lgc_vifi >= numvifs)
+ return (EINVAL);
+ vifp = viftable + gcp->lgc_vifi;
+ if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL))
+ return (EADDRNOTAVAIL);
+
+ s = splnet();
+
+ if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group)
+ vifp->v_cached_result = 0;
+
+ error = EADDRNOTAVAIL;
+ for (i = 0; i < vifp->v_lcl_grps_n; ++i)
+ if (same(&gcp->lgc_gaddr, &vifp->v_lcl_grps[i])) {
+ error = 0;
+ vifp->v_lcl_grps_n--;
+ bcopy((caddr_t)&vifp->v_lcl_grps[i + 1],
+ (caddr_t)&vifp->v_lcl_grps[i],
+ (vifp->v_lcl_grps_n - i) * sizeof(struct in_addr));
+ error = 0;
+ break;
+ }
+
+ splx(s);
+ return (error);
+}
+
+/*
+ * Return 1 if gaddr is a member of the local group list for vifp.
+ */
+static int
+grplst_member(vifp, gaddr)
+ register struct vif *vifp;
+ struct in_addr gaddr;
+{
+ register int i, s;
+ register u_long addr;
+
+ mrtstat.mrts_grp_lookups++;
+
+ addr = gaddr.s_addr;
+ if (addr == vifp->v_cached_group)
+ return (vifp->v_cached_result);
+
+ mrtstat.mrts_grp_misses++;
+
+ for (i = 0; i < vifp->v_lcl_grps_n; ++i)
+ if (addr == vifp->v_lcl_grps[i].s_addr) {
+ s = splnet();
+ vifp->v_cached_group = addr;
+ vifp->v_cached_result = 1;
+ splx(s);
+ return (1);
+ }
+ s = splnet();
+ vifp->v_cached_group = addr;
+ vifp->v_cached_result = 0;
+ splx(s);
+ return (0);
+}
+
+/*
+ * A simple hash function: returns MRTHASHMOD of the low-order octet of
+ * the argument's network or subnet number.
+ */
+static u_long
+nethash(in)
+ struct in_addr in;
+{
+ register u_long n;
+
+ n = in_netof(in);
+ while ((n & 0xff) == 0)
+ n >>= 8;
+ return (MRTHASHMOD(n));
+}
+
+/*
+ * Add an mrt entry
+ */
+static int
+add_mrt(mrtcp)
+ register struct mrtctl *mrtcp;
+{
+ struct mrt *rt;
+ u_long hash;
+ int s;
+
+ if (rt = mrtfind(mrtcp->mrtc_origin)) {
+ /* Just update the route */
+ s = splnet();
+ rt->mrt_parent = mrtcp->mrtc_parent;
+ VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children);
+ VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves);
+ splx(s);
+ return (0);
+ }
+
+ s = splnet();
+
+ rt = (struct mrt *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
+ if (rt == NULL) {
+ splx(s);
+ return (ENOBUFS);
+ }
+
+ /*
+ * insert new entry at head of hash chain
+ */
+ rt->mrt_origin = mrtcp->mrtc_origin;
+ rt->mrt_originmask = mrtcp->mrtc_originmask;
+ rt->mrt_parent = mrtcp->mrtc_parent;
+ VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children);
+ VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves);
+ /* link into table */
+ hash = nethash(mrtcp->mrtc_origin);
+ rt->mrt_next = mrttable[hash];
+ mrttable[hash] = rt;
+
+ splx(s);
+ return (0);
+}
+
+/*
+ * Delete an mrt entry
+ */
+static int
+del_mrt(origin)
+ register struct in_addr *origin;
+{
+ register struct mrt *rt, *prev_rt;
+ register u_long hash = nethash(*origin);
+ register int s;
+
+ for (prev_rt = rt = mrttable[hash]; rt; prev_rt = rt, rt = rt->mrt_next)
+ if (origin->s_addr == rt->mrt_origin.s_addr)
+ break;
+ if (!rt)
+ return (ESRCH);
+
+ s = splnet();
+
+ if (rt == cached_mrt)
+ cached_mrt = NULL;
+
+ if (prev_rt == rt)
+ mrttable[hash] = rt->mrt_next;
+ else
+ prev_rt->mrt_next = rt->mrt_next;
+ free(rt, M_MRTABLE);
+
+ splx(s);
+ return (0);
+}
+
+/*
+ * Find a route for a given origin IP address.
+ */
+static struct mrt *
+mrtfind(origin)
+ struct in_addr origin;
+{
+ register struct mrt *rt;
+ register u_int hash;
+ register int s;
+
+ mrtstat.mrts_mrt_lookups++;
+
+ if (cached_mrt != NULL &&
+ (origin.s_addr & cached_originmask) == cached_origin)
+ return (cached_mrt);
+
+ mrtstat.mrts_mrt_misses++;
+
+ hash = nethash(origin);
+ for (rt = mrttable[hash]; rt; rt = rt->mrt_next)
+ if ((origin.s_addr & rt->mrt_originmask.s_addr) ==
+ rt->mrt_origin.s_addr) {
+ s = splnet();
+ cached_mrt = rt;
+ cached_origin = rt->mrt_origin.s_addr;
+ cached_originmask = rt->mrt_originmask.s_addr;
+ splx(s);
+ return (rt);
+ }
+ return (NULL);
+}
+
+/*
+ * IP multicast forwarding function. This function assumes that the packet
+ * pointed to by "ip" has arrived on (or is about to be sent to) the interface
+ * pointed to by "ifp", and the packet is to be relayed to other networks
+ * that have members of the packet's destination IP multicast group.
+ *
+ * The packet is returned unscathed to the caller, unless it is tunneled
+ * or erroneous, in which case a non-zero return value tells the caller to
+ * discard it.
+ */
+
+#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
+#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
+
+int
+ip_mforward(m, ifp)
+ register struct mbuf *m;
+ register struct ifnet *ifp;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register struct mrt *rt;
+ register struct vif *vifp;
+ register int vifi;
+ register u_char *ipoptions;
+ u_long tunnel_src;
+
+ if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
+ (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
+ /*
+ * Packet arrived via a physical interface.
+ */
+ tunnel_src = 0;
+ } else {
+ /*
+ * Packet arrived through a tunnel.
+ *
+ * A tunneled packet has a single NOP option and a
+ * two-element loose-source-and-record-route (LSRR)
+ * option immediately following the fixed-size part of
+ * the IP header. At this point in processing, the IP
+ * header should contain the following IP addresses:
+ *
+ * original source - in the source address field
+ * destination group - in the destination address field
+ * remote tunnel end-point - in the first element of LSRR
+ * one of this host's addrs - in the second element of LSRR
+ *
+ * NOTE: RFC-1075 would have the original source and
+ * remote tunnel end-point addresses swapped. However,
+ * that could cause delivery of ICMP error messages to
+ * innocent applications on intermediate routing
+ * hosts! Therefore, we hereby change the spec.
+ */
+
+ /*
+ * Verify that the tunnel options are well-formed.
+ */
+ if (ipoptions[0] != IPOPT_NOP ||
+ ipoptions[2] != 11 || /* LSRR option length */
+ ipoptions[3] != 12 || /* LSRR address pointer */
+ (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) {
+ mrtstat.mrts_bad_tunnel++;
+ return (1);
+ }
+
+ /*
+ * Delete the tunnel options from the packet.
+ */
+ ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions,
+ (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN)));
+ m->m_len -= TUNNEL_LEN;
+ ip->ip_len -= TUNNEL_LEN;
+ ip->ip_hl -= TUNNEL_LEN >> 2;
+ }
+
+ /*
+ * Don't forward a packet with time-to-live of zero or one,
+ * or a packet destined to a local-only group.
+ */
+ if (ip->ip_ttl <= 1 ||
+ ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
+ return ((int)tunnel_src);
+
+ /*
+ * Don't forward if we don't have a route for the packet's origin.
+ */
+ if (!(rt = mrtfind(ip->ip_src))) {
+ mrtstat.mrts_no_route++;
+ return ((int)tunnel_src);
+ }
+
+ /*
+ * Don't forward if it didn't arrive from the parent vif for its origin.
+ */
+ vifi = rt->mrt_parent;
+ if (tunnel_src == 0 ) {
+ if ((viftable[vifi].v_flags & VIFF_TUNNEL) ||
+ viftable[vifi].v_ifp != ifp )
+ return ((int)tunnel_src);
+ } else {
+ if (!(viftable[vifi].v_flags & VIFF_TUNNEL) ||
+ viftable[vifi].v_rmt_addr.s_addr != tunnel_src )
+ return ((int)tunnel_src);
+ }
+
+ /*
+ * For each vif, decide if a copy of the packet should be forwarded.
+ * Forward if:
+ * - the ttl exceeds the vif's threshold AND
+ * - the vif is a child in the origin's route AND
+ * - ( the vif is not a leaf in the origin's route OR
+ * the destination group has members on the vif )
+ *
+ * (This might be speeded up with some sort of cache -- someday.)
+ */
+ for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) {
+ if (ip->ip_ttl > vifp->v_threshold &&
+ VIFM_ISSET(vifi, rt->mrt_children) &&
+ (!VIFM_ISSET(vifi, rt->mrt_leaves) ||
+ grplst_member(vifp, ip->ip_dst))) {
+ if (vifp->v_flags & VIFF_TUNNEL)
+ tunnel_send(m, vifp);
+ else
+ phyint_send(m, vifp);
+ }
+ }
+
+ return ((int)tunnel_src);
+}
+
+static void
+phyint_send(m, vifp)
+ register struct mbuf *m;
+ register struct vif *vifp;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register struct mbuf *mb_copy;
+ register struct ip_moptions *imo;
+ register int error;
+ struct ip_moptions simo;
+
+ mb_copy = m_copy(m, 0, M_COPYALL);
+ if (mb_copy == NULL)
+ return;
+
+ imo = &simo;
+ imo->imo_multicast_ifp = vifp->v_ifp;
+ imo->imo_multicast_ttl = ip->ip_ttl - 1;
+ imo->imo_multicast_loop = 1;
+
+ error = ip_output(mb_copy, NULL, NULL, IP_FORWARDING, imo);
+}
+
+static void
+tunnel_send(m, vifp)
+ register struct mbuf *m;
+ register struct vif *vifp;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register struct mbuf *mb_copy, *mb_opts;
+ register struct ip *ip_copy;
+ register int error;
+ register u_char *cp;
+
+ /*
+ * Make sure that adding the tunnel options won't exceed the
+ * maximum allowed number of option bytes.
+ */
+ if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) {
+ mrtstat.mrts_cant_tunnel++;
+ return;
+ }
+
+ /*
+ * Get a private copy of the IP header so that changes to some
+ * of the IP fields don't damage the original header, which is
+ * examined later in ip_input.c.
+ */
+ mb_copy = m_copy(m, IP_HDR_LEN, M_COPYALL);
+ if (mb_copy == NULL)
+ return;
+ MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER);
+ if (mb_opts == NULL) {
+ m_freem(mb_copy);
+ return;
+ }
+ /*
+ * Make mb_opts be the new head of the packet chain.
+ * Any options of the packet were left in the old packet chain head
+ */
+ mb_opts->m_next = mb_copy;
+ mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN;
+ mb_opts->m_data += MSIZE - mb_opts->m_len;
+
+ ip_copy = mtod(mb_opts, struct ip *);
+ /*
+ * Copy the base ip header to the new head mbuf.
+ */
+ *ip_copy = *ip;
+ ip_copy->ip_ttl--;
+ ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */
+ /*
+ * Adjust the ip header length to account for the tunnel options.
+ */
+ ip_copy->ip_hl += TUNNEL_LEN >> 2;
+ ip_copy->ip_len += TUNNEL_LEN;
+ /*
+ * Add the NOP and LSRR after the base ip header
+ */
+ cp = (u_char *)(ip_copy + 1);
+ *cp++ = IPOPT_NOP;
+ *cp++ = IPOPT_LSRR;
+ *cp++ = 11; /* LSRR option length */
+ *cp++ = 8; /* LSSR pointer to second element */
+ *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */
+ cp += 4;
+ *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */
+
+ error = ip_output(mb_opts, NULL, NULL, IP_FORWARDING, NULL);
+}
+#endif
diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
new file mode 100644
index 000000000000..adb40be9552d
--- /dev/null
+++ b/sys/netinet/ip_mroute.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 1989 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for the kernel part of DVMRP,
+ * a Distance-Vector Multicast Routing Protocol.
+ * (See RFC-1075.)
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ *
+ * MROUTING 1.0
+ */
+
+
+/*
+ * DVMRP-specific setsockopt commands.
+ */
+#define DVMRP_INIT 100
+#define DVMRP_DONE 101
+#define DVMRP_ADD_VIF 102
+#define DVMRP_DEL_VIF 103
+#define DVMRP_ADD_LGRP 104
+#define DVMRP_DEL_LGRP 105
+#define DVMRP_ADD_MRT 106
+#define DVMRP_DEL_MRT 107
+
+
+/*
+ * Types and macros for handling bitmaps with one bit per virtual interface.
+ */
+#define MAXVIFS 32
+typedef u_long vifbitmap_t;
+typedef u_short vifi_t; /* type of a vif index */
+
+#define VIFM_SET(n, m) ((m) |= (1 << (n)))
+#define VIFM_CLR(n, m) ((m) &= ~(1 << (n)))
+#define VIFM_ISSET(n, m) ((m) & (1 << (n)))
+#define VIFM_CLRALL(m) ((m) = 0x00000000)
+#define VIFM_COPY(mfrom, mto) ((mto) = (mfrom))
+#define VIFM_SAME(m1, m2) ((m1) == (m2))
+
+
+/*
+ * Agument structure for DVMRP_ADD_VIF.
+ * (DVMRP_DEL_VIF takes a single vifi_t argument.)
+ */
+struct vifctl {
+ vifi_t vifc_vifi; /* the index of the vif to be added */
+ u_char vifc_flags; /* VIFF_ flags defined below */
+ u_char vifc_threshold; /* min ttl required to forward on vif */
+ struct in_addr vifc_lcl_addr; /* local interface address */
+ struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */
+};
+
+#define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */
+
+
+/*
+ * Argument structure for DVMRP_ADD_LGRP and DVMRP_DEL_LGRP.
+ */
+struct lgrplctl {
+ vifi_t lgc_vifi;
+ struct in_addr lgc_gaddr;
+};
+
+
+/*
+ * Argument structure for DVMRP_ADD_MRT.
+ * (DVMRP_DEL_MRT takes a single struct in_addr argument, containing origin.)
+ */
+struct mrtctl {
+ struct in_addr mrtc_origin; /* subnet origin of multicasts */
+ struct in_addr mrtc_originmask; /* subnet mask for origin */
+ vifi_t mrtc_parent; /* incoming vif */
+ vifbitmap_t mrtc_children; /* outgoing children vifs */
+ vifbitmap_t mrtc_leaves; /* subset of outgoing children vifs */
+};
+
+
+#ifdef KERNEL
+
+/*
+ * The kernel's virtual-interface structure.
+ */
+struct vif {
+ u_char v_flags; /* VIFF_ flags defined above */
+ u_char v_threshold; /* min ttl required to forward on vif */
+ struct in_addr v_lcl_addr; /* local interface address */
+ struct in_addr v_rmt_addr; /* remote address (tunnels only) */
+ struct ifnet *v_ifp; /* pointer to interface */
+ struct in_addr *v_lcl_grps; /* list of local grps (phyints only) */
+ int v_lcl_grps_max; /* malloc'ed number of v_lcl_grps */
+ int v_lcl_grps_n; /* used number of v_lcl_grps */
+ u_long v_cached_group; /* last grp looked-up (phyints only) */
+ int v_cached_result; /* last look-up result (phyints only) */
+};
+
+/*
+ * The kernel's multicast route structure.
+ */
+struct mrt {
+ struct in_addr mrt_origin; /* subnet origin of multicasts */
+ struct in_addr mrt_originmask; /* subnet mask for origin */
+ vifi_t mrt_parent; /* incoming vif */
+ vifbitmap_t mrt_children; /* outgoing children vifs */
+ vifbitmap_t mrt_leaves; /* subset of outgoing children vifs */
+ struct mrt *mrt_next; /* forward link */
+};
+
+
+#define MRTHASHSIZ 64
+#if (MRTHASHSIZ & (MRTHASHSIZ - 1)) == 0 /* from sys:route.h */
+#define MRTHASHMOD(h) ((h) & (MRTHASHSIZ - 1))
+#else
+#define MRTHASHMOD(h) ((h) % MRTHASHSIZ)
+#endif
+
+/*
+ * The kernel's multicast routing statistics.
+ */
+struct mrtstat {
+ u_long mrts_mrt_lookups; /* # multicast route lookups */
+ u_long mrts_mrt_misses; /* # multicast route cache misses */
+ u_long mrts_grp_lookups; /* # group address lookups */
+ u_long mrts_grp_misses; /* # group address cache misses */
+ u_long mrts_no_route; /* no route for packet's origin */
+ u_long mrts_bad_tunnel; /* malformed tunnel options */
+ u_long mrts_cant_tunnel; /* no room for tunnel options */
+};
+
+
+int ip_mrouter_cmd __P((int, struct socket *, struct mbuf *));
+int ip_mrouter_done __P((void));
+
+#endif /* KERNEL */
+
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
new file mode 100644
index 000000000000..4c22a5e53ec3
--- /dev/null
+++ b/sys/netinet/ip_output.c
@@ -0,0 +1,1064 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#ifdef vax
+#include <machine/mtpr.h>
+#endif
+
+static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
+static void ip_mloopback
+ __P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
+
+/*
+ * IP output. The packet in mbuf chain m contains a skeletal IP
+ * header (with len, off, ttl, proto, tos, src, dst).
+ * The mbuf chain containing the packet will be freed.
+ * The mbuf opt, if present, will not be freed.
+ */
+int
+ip_output(m0, opt, ro, flags, imo)
+ struct mbuf *m0;
+ struct mbuf *opt;
+ struct route *ro;
+ int flags;
+ struct ip_moptions *imo;
+{
+ register struct ip *ip, *mhip;
+ register struct ifnet *ifp;
+ register struct mbuf *m = m0;
+ register int hlen = sizeof (struct ip);
+ int len, off, error = 0;
+ struct route iproute;
+ struct sockaddr_in *dst;
+ struct in_ifaddr *ia;
+
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ip_output no HDR");
+#endif
+ if (opt) {
+ m = ip_insertoptions(m, opt, &len);
+ hlen = len;
+ }
+ ip = mtod(m, struct ip *);
+ /*
+ * Fill in IP header.
+ */
+ if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
+ ip->ip_v = IPVERSION;
+ ip->ip_off &= IP_DF;
+ ip->ip_id = htons(ip_id++);
+ ip->ip_hl = hlen >> 2;
+ ipstat.ips_localout++;
+ } else {
+ hlen = ip->ip_hl << 2;
+ }
+ /*
+ * Route packet.
+ */
+ if (ro == 0) {
+ ro = &iproute;
+ bzero((caddr_t)ro, sizeof (*ro));
+ }
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ /*
+ * If there is a cached route,
+ * check that it is to the same destination
+ * and is still up. If not, free it and try again.
+ */
+ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ }
+ if (ro->ro_rt == 0) {
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr = ip->ip_dst;
+ }
+ /*
+ * If routing to interface only,
+ * short circuit routing lookup.
+ */
+#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
+#define sintosa(sin) ((struct sockaddr *)(sin))
+ if (flags & IP_ROUTETOIF) {
+ if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
+ (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
+ ipstat.ips_noroute++;
+ error = ENETUNREACH;
+ goto bad;
+ }
+ ifp = ia->ia_ifp;
+ ip->ip_ttl = 1;
+ } else {
+ if (ro->ro_rt == 0)
+ rtalloc(ro);
+ if (ro->ro_rt == 0) {
+ ipstat.ips_noroute++;
+ error = EHOSTUNREACH;
+ goto bad;
+ }
+ ia = ifatoia(ro->ro_rt->rt_ifa);
+ ifp = ro->ro_rt->rt_ifp;
+ ro->ro_rt->rt_use++;
+ if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+ }
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ struct in_multi *inm;
+ extern struct ifnet loif;
+
+ m->m_flags |= M_MCAST;
+ /*
+ * IP destination address is multicast. Make sure "dst"
+ * still points to the address in "ro". (It may have been
+ * changed to point to a gateway address, above.)
+ */
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ /*
+ * See if the caller provided any multicast options
+ */
+ if (imo != NULL) {
+ ip->ip_ttl = imo->imo_multicast_ttl;
+ if (imo->imo_multicast_ifp != NULL)
+ ifp = imo->imo_multicast_ifp;
+ } else
+ ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+ /*
+ * Confirm that the outgoing interface supports multicast.
+ */
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ ipstat.ips_noroute++;
+ error = ENETUNREACH;
+ goto bad;
+ }
+ /*
+ * If source address not specified yet, use address
+ * of outgoing interface.
+ */
+ if (ip->ip_src.s_addr == INADDR_ANY) {
+ register struct in_ifaddr *ia;
+
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp) {
+ ip->ip_src = IA_SIN(ia)->sin_addr;
+ break;
+ }
+ }
+
+ IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
+ if (inm != NULL &&
+ (imo == NULL || imo->imo_multicast_loop)) {
+ /*
+ * If we belong to the destination multicast group
+ * on the outgoing interface, and the caller did not
+ * forbid loopback, loop back a copy.
+ */
+ ip_mloopback(ifp, m, dst);
+ }
+#ifdef MROUTING
+ else {
+ /*
+ * If we are acting as a multicast router, perform
+ * multicast forwarding as if the packet had just
+ * arrived on the interface to which we are about
+ * to send. The multicast forwarding function
+ * recursively calls this function, using the
+ * IP_FORWARDING flag to prevent infinite recursion.
+ *
+ * Multicasts that are looped back by ip_mloopback(),
+ * above, will be forwarded by the ip_input() routine,
+ * if necessary.
+ */
+ extern struct socket *ip_mrouter;
+ if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
+ if (ip_mforward(m, ifp) != 0) {
+ m_freem(m);
+ goto done;
+ }
+ }
+ }
+#endif
+ /*
+ * Multicasts with a time-to-live of zero may be looped-
+ * back, above, but must not be transmitted on a network.
+ * Also, multicasts addressed to the loopback interface
+ * are not sent -- the above call to ip_mloopback() will
+ * loop back a copy if this host actually belongs to the
+ * destination group on the loopback interface.
+ */
+ if (ip->ip_ttl == 0 || ifp == &loif) {
+ m_freem(m);
+ goto done;
+ }
+
+ goto sendit;
+ }
+#ifndef notdef
+ /*
+ * If source address not specified yet, use address
+ * of outgoing interface.
+ */
+ if (ip->ip_src.s_addr == INADDR_ANY)
+ ip->ip_src = IA_SIN(ia)->sin_addr;
+#endif
+ /*
+ * Look for broadcast address and
+ * and verify user is allowed to send
+ * such a packet.
+ */
+ if (in_broadcast(dst->sin_addr, ifp)) {
+ if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ if ((flags & IP_ALLOWBROADCAST) == 0) {
+ error = EACCES;
+ goto bad;
+ }
+ /* don't allow broadcast messages to be fragmented */
+ if ((u_short)ip->ip_len > ifp->if_mtu) {
+ error = EMSGSIZE;
+ goto bad;
+ }
+ m->m_flags |= M_BCAST;
+ } else
+ m->m_flags &= ~M_BCAST;
+
+sendit:
+ /*
+ * If small enough for interface, can just send directly.
+ */
+ if ((u_short)ip->ip_len <= ifp->if_mtu) {
+ ip->ip_len = htons((u_short)ip->ip_len);
+ ip->ip_off = htons((u_short)ip->ip_off);
+ ip->ip_sum = 0;
+ ip->ip_sum = in_cksum(m, hlen);
+ error = (*ifp->if_output)(ifp, m,
+ (struct sockaddr *)dst, ro->ro_rt);
+ goto done;
+ }
+ /*
+ * Too large for interface; fragment if possible.
+ * Must be able to put at least 8 bytes per fragment.
+ */
+ if (ip->ip_off & IP_DF) {
+ error = EMSGSIZE;
+ ipstat.ips_cantfrag++;
+ goto bad;
+ }
+ len = (ifp->if_mtu - hlen) &~ 7;
+ if (len < 8) {
+ error = EMSGSIZE;
+ goto bad;
+ }
+
+ {
+ int mhlen, firstlen = len;
+ struct mbuf **mnext = &m->m_nextpkt;
+
+ /*
+ * Loop through length of segment after first fragment,
+ * make new header and copy data of each part and link onto chain.
+ */
+ m0 = m;
+ mhlen = sizeof (struct ip);
+ for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == 0) {
+ error = ENOBUFS;
+ ipstat.ips_odropped++;
+ goto sendorfree;
+ }
+ m->m_data += max_linkhdr;
+ mhip = mtod(m, struct ip *);
+ *mhip = *ip;
+ if (hlen > sizeof (struct ip)) {
+ mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
+ mhip->ip_hl = mhlen >> 2;
+ }
+ m->m_len = mhlen;
+ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
+ if (ip->ip_off & IP_MF)
+ mhip->ip_off |= IP_MF;
+ if (off + len >= (u_short)ip->ip_len)
+ len = (u_short)ip->ip_len - off;
+ else
+ mhip->ip_off |= IP_MF;
+ mhip->ip_len = htons((u_short)(len + mhlen));
+ m->m_next = m_copy(m0, off, len);
+ if (m->m_next == 0) {
+ (void) m_free(m);
+ error = ENOBUFS; /* ??? */
+ ipstat.ips_odropped++;
+ goto sendorfree;
+ }
+ m->m_pkthdr.len = mhlen + len;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ mhip->ip_off = htons((u_short)mhip->ip_off);
+ mhip->ip_sum = 0;
+ mhip->ip_sum = in_cksum(m, mhlen);
+ *mnext = m;
+ mnext = &m->m_nextpkt;
+ ipstat.ips_ofragments++;
+ }
+ /*
+ * Update first fragment by trimming what's been copied out
+ * and updating header, then send each fragment (in order).
+ */
+ m = m0;
+ m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
+ m->m_pkthdr.len = hlen + firstlen;
+ ip->ip_len = htons((u_short)m->m_pkthdr.len);
+ ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
+ ip->ip_sum = 0;
+ ip->ip_sum = in_cksum(m, hlen);
+sendorfree:
+ for (m = m0; m; m = m0) {
+ m0 = m->m_nextpkt;
+ m->m_nextpkt = 0;
+ if (error == 0)
+ error = (*ifp->if_output)(ifp, m,
+ (struct sockaddr *)dst, ro->ro_rt);
+ else
+ m_freem(m);
+ }
+
+ if (error == 0)
+ ipstat.ips_fragmented++;
+ }
+done:
+ if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
+ RTFREE(ro->ro_rt);
+ return (error);
+bad:
+ m_freem(m0);
+ goto done;
+}
+
+/*
+ * Insert IP options into preformed packet.
+ * Adjust IP destination as required for IP source routing,
+ * as indicated by a non-zero in_addr at the start of the options.
+ */
+static struct mbuf *
+ip_insertoptions(m, opt, phlen)
+ register struct mbuf *m;
+ struct mbuf *opt;
+ int *phlen;
+{
+ register struct ipoption *p = mtod(opt, struct ipoption *);
+ struct mbuf *n;
+ register struct ip *ip = mtod(m, struct ip *);
+ unsigned optlen;
+
+ optlen = opt->m_len - sizeof(p->ipopt_dst);
+ if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
+ return (m); /* XXX should fail */
+ if (p->ipopt_dst.s_addr)
+ ip->ip_dst = p->ipopt_dst;
+ if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
+ MGETHDR(n, M_DONTWAIT, MT_HEADER);
+ if (n == 0)
+ return (m);
+ n->m_pkthdr.len = m->m_pkthdr.len + optlen;
+ m->m_len -= sizeof(struct ip);
+ m->m_data += sizeof(struct ip);
+ n->m_next = m;
+ m = n;
+ m->m_len = optlen + sizeof(struct ip);
+ m->m_data += max_linkhdr;
+ bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+ } else {
+ m->m_data -= optlen;
+ m->m_len += optlen;
+ m->m_pkthdr.len += optlen;
+ ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+ }
+ ip = mtod(m, struct ip *);
+ bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
+ *phlen = sizeof(struct ip) + optlen;
+ ip->ip_len += optlen;
+ return (m);
+}
+
+/*
+ * Copy options from ip to jp,
+ * omitting those not copied during fragmentation.
+ */
+int
+ip_optcopy(ip, jp)
+ struct ip *ip, *jp;
+{
+ register u_char *cp, *dp;
+ int opt, optlen, cnt;
+
+ cp = (u_char *)(ip + 1);
+ dp = (u_char *)(jp + 1);
+ cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[0];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP) {
+ /* Preserve for IP mcast tunnel's LSRR alignment. */
+ *dp++ = IPOPT_NOP;
+ optlen = 1;
+ continue;
+ } else
+ optlen = cp[IPOPT_OLEN];
+ /* bogus lengths should have been caught by ip_dooptions */
+ if (optlen > cnt)
+ optlen = cnt;
+ if (IPOPT_COPIED(opt)) {
+ bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
+ dp += optlen;
+ }
+ }
+ for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
+ *dp++ = IPOPT_EOL;
+ return (optlen);
+}
+
+/*
+ * IP socket option processing.
+ */
+int
+ip_ctloutput(op, so, level, optname, mp)
+ int op;
+ struct socket *so;
+ int level, optname;
+ struct mbuf **mp;
+{
+ register struct inpcb *inp = sotoinpcb(so);
+ register struct mbuf *m = *mp;
+ register int optval;
+ int error = 0;
+
+ if (level != IPPROTO_IP) {
+ error = EINVAL;
+ if (op == PRCO_SETOPT && *mp)
+ (void) m_free(*mp);
+ } else switch (op) {
+
+ case PRCO_SETOPT:
+ switch (optname) {
+ case IP_OPTIONS:
+#ifdef notyet
+ case IP_RETOPTS:
+ return (ip_pcbopts(optname, &inp->inp_options, m));
+#else
+ return (ip_pcbopts(&inp->inp_options, m));
+#endif
+
+ case IP_TOS:
+ case IP_TTL:
+ case IP_RECVOPTS:
+ case IP_RECVRETOPTS:
+ case IP_RECVDSTADDR:
+ if (m->m_len != sizeof(int))
+ error = EINVAL;
+ else {
+ optval = *mtod(m, int *);
+ switch (optname) {
+
+ case IP_TOS:
+ inp->inp_ip.ip_tos = optval;
+ break;
+
+ case IP_TTL:
+ inp->inp_ip.ip_ttl = optval;
+ break;
+#define OPTSET(bit) \
+ if (optval) \
+ inp->inp_flags |= bit; \
+ else \
+ inp->inp_flags &= ~bit;
+
+ case IP_RECVOPTS:
+ OPTSET(INP_RECVOPTS);
+ break;
+
+ case IP_RECVRETOPTS:
+ OPTSET(INP_RECVRETOPTS);
+ break;
+
+ case IP_RECVDSTADDR:
+ OPTSET(INP_RECVDSTADDR);
+ break;
+ }
+ }
+ break;
+#undef OPTSET
+
+ case IP_MULTICAST_IF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_LOOP:
+ case IP_ADD_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
+ error = ip_setmoptions(optname, &inp->inp_moptions, m);
+ break;
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ if (m)
+ (void)m_free(m);
+ break;
+
+ case PRCO_GETOPT:
+ switch (optname) {
+ case IP_OPTIONS:
+ case IP_RETOPTS:
+ *mp = m = m_get(M_WAIT, MT_SOOPTS);
+ if (inp->inp_options) {
+ m->m_len = inp->inp_options->m_len;
+ bcopy(mtod(inp->inp_options, caddr_t),
+ mtod(m, caddr_t), (unsigned)m->m_len);
+ } else
+ m->m_len = 0;
+ break;
+
+ case IP_TOS:
+ case IP_TTL:
+ case IP_RECVOPTS:
+ case IP_RECVRETOPTS:
+ case IP_RECVDSTADDR:
+ *mp = m = m_get(M_WAIT, MT_SOOPTS);
+ m->m_len = sizeof(int);
+ switch (optname) {
+
+ case IP_TOS:
+ optval = inp->inp_ip.ip_tos;
+ break;
+
+ case IP_TTL:
+ optval = inp->inp_ip.ip_ttl;
+ break;
+
+#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
+
+ case IP_RECVOPTS:
+ optval = OPTBIT(INP_RECVOPTS);
+ break;
+
+ case IP_RECVRETOPTS:
+ optval = OPTBIT(INP_RECVRETOPTS);
+ break;
+
+ case IP_RECVDSTADDR:
+ optval = OPTBIT(INP_RECVDSTADDR);
+ break;
+ }
+ *mtod(m, int *) = optval;
+ break;
+
+ case IP_MULTICAST_IF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_LOOP:
+ case IP_ADD_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
+ error = ip_getmoptions(optname, inp->inp_moptions, mp);
+ break;
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Set up IP options in pcb for insertion in output packets.
+ * Store in mbuf with pointer in pcbopt, adding pseudo-option
+ * with destination address if source routed.
+ */
+int
+#ifdef notyet
+ip_pcbopts(optname, pcbopt, m)
+ int optname;
+#else
+ip_pcbopts(pcbopt, m)
+#endif
+ struct mbuf **pcbopt;
+ register struct mbuf *m;
+{
+ register cnt, optlen;
+ register u_char *cp;
+ u_char opt;
+
+ /* turn off any old options */
+ if (*pcbopt)
+ (void)m_free(*pcbopt);
+ *pcbopt = 0;
+ if (m == (struct mbuf *)0 || m->m_len == 0) {
+ /*
+ * Only turning off any previous options.
+ */
+ if (m)
+ (void)m_free(m);
+ return (0);
+ }
+
+#ifndef vax
+ if (m->m_len % sizeof(long))
+ goto bad;
+#endif
+ /*
+ * IP first-hop destination address will be stored before
+ * actual options; move other options back
+ * and clear it when none present.
+ */
+ if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
+ goto bad;
+ cnt = m->m_len;
+ m->m_len += sizeof(struct in_addr);
+ cp = mtod(m, u_char *) + sizeof(struct in_addr);
+ ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
+ bzero(mtod(m, caddr_t), sizeof(struct in_addr));
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[IPOPT_OLEN];
+ if (optlen <= IPOPT_OLEN || optlen > cnt)
+ goto bad;
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ case IPOPT_LSRR:
+ case IPOPT_SSRR:
+ /*
+ * user process specifies route as:
+ * ->A->B->C->D
+ * D must be our final destination (but we can't
+ * check that since we may not have connected yet).
+ * A is first hop destination, which doesn't appear in
+ * actual IP option, but is stored before the options.
+ */
+ if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
+ goto bad;
+ m->m_len -= sizeof(struct in_addr);
+ cnt -= sizeof(struct in_addr);
+ optlen -= sizeof(struct in_addr);
+ cp[IPOPT_OLEN] = optlen;
+ /*
+ * Move first hop before start of options.
+ */
+ bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
+ sizeof(struct in_addr));
+ /*
+ * Then copy rest of options back
+ * to close up the deleted entry.
+ */
+ ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
+ sizeof(struct in_addr)),
+ (caddr_t)&cp[IPOPT_OFFSET+1],
+ (unsigned)cnt + sizeof(struct in_addr));
+ break;
+ }
+ }
+ if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
+ goto bad;
+ *pcbopt = m;
+ return (0);
+
+bad:
+ (void)m_free(m);
+ return (EINVAL);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ */
+int
+ip_setmoptions(optname, imop, m)
+ int optname;
+ struct ip_moptions **imop;
+ struct mbuf *m;
+{
+ register int error = 0;
+ u_char loop;
+ register int i;
+ struct in_addr addr;
+ register struct ip_mreq *mreq;
+ register struct ifnet *ifp;
+ register struct ip_moptions *imo = *imop;
+ struct route ro;
+ register struct sockaddr_in *dst;
+
+ if (imo == NULL) {
+ /*
+ * No multicast option buffer attached to the pcb;
+ * allocate one and initialize to default values.
+ */
+ imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
+ M_WAITOK);
+
+ if (imo == NULL)
+ return (ENOBUFS);
+ *imop = imo;
+ imo->imo_multicast_ifp = NULL;
+ imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+ imo->imo_num_memberships = 0;
+ }
+
+ switch (optname) {
+
+ case IP_MULTICAST_IF:
+ /*
+ * Select the interface for outgoing multicast packets.
+ */
+ if (m == NULL || m->m_len != sizeof(struct in_addr)) {
+ error = EINVAL;
+ break;
+ }
+ addr = *(mtod(m, struct in_addr *));
+ /*
+ * INADDR_ANY is used to remove a previous selection.
+ * When no interface is selected, a default one is
+ * chosen every time a multicast packet is sent.
+ */
+ if (addr.s_addr == INADDR_ANY) {
+ imo->imo_multicast_ifp = NULL;
+ break;
+ }
+ /*
+ * The selected interface is identified by its local
+ * IP address. Find the interface and confirm that
+ * it supports multicasting.
+ */
+ INADDR_TO_IFP(addr, ifp);
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ imo->imo_multicast_ifp = ifp;
+ break;
+
+ case IP_MULTICAST_TTL:
+ /*
+ * Set the IP time-to-live for outgoing multicast packets.
+ */
+ if (m == NULL || m->m_len != 1) {
+ error = EINVAL;
+ break;
+ }
+ imo->imo_multicast_ttl = *(mtod(m, u_char *));
+ break;
+
+ case IP_MULTICAST_LOOP:
+ /*
+ * Set the loopback flag for outgoing multicast packets.
+ * Must be zero or one.
+ */
+ if (m == NULL || m->m_len != 1 ||
+ (loop = *(mtod(m, u_char *))) > 1) {
+ error = EINVAL;
+ break;
+ }
+ imo->imo_multicast_loop = loop;
+ break;
+
+ case IP_ADD_MEMBERSHIP:
+ /*
+ * Add a multicast group membership.
+ * Group must be a valid IP multicast address.
+ */
+ if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
+ error = EINVAL;
+ break;
+ }
+ mreq = mtod(m, struct ip_mreq *);
+ if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
+ error = EINVAL;
+ break;
+ }
+ /*
+ * If no interface address was provided, use the interface of
+ * the route to the given multicast address.
+ */
+ if (mreq->imr_interface.s_addr == INADDR_ANY) {
+ ro.ro_rt = NULL;
+ dst = (struct sockaddr_in *)&ro.ro_dst;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_family = AF_INET;
+ dst->sin_addr = mreq->imr_multiaddr;
+ rtalloc(&ro);
+ if (ro.ro_rt == NULL) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ ifp = ro.ro_rt->rt_ifp;
+ rtfree(ro.ro_rt);
+ }
+ else {
+ INADDR_TO_IFP(mreq->imr_interface, ifp);
+ }
+ /*
+ * See if we found an interface, and confirm that it
+ * supports multicast.
+ */
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ /*
+ * See if the membership already exists or if all the
+ * membership slots are full.
+ */
+ for (i = 0; i < imo->imo_num_memberships; ++i) {
+ if (imo->imo_membership[i]->inm_ifp == ifp &&
+ imo->imo_membership[i]->inm_addr.s_addr
+ == mreq->imr_multiaddr.s_addr)
+ break;
+ }
+ if (i < imo->imo_num_memberships) {
+ error = EADDRINUSE;
+ break;
+ }
+ if (i == IP_MAX_MEMBERSHIPS) {
+ error = ETOOMANYREFS;
+ break;
+ }
+ /*
+ * Everything looks good; add a new record to the multicast
+ * address list for the given interface.
+ */
+ if ((imo->imo_membership[i] =
+ in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
+ error = ENOBUFS;
+ break;
+ }
+ ++imo->imo_num_memberships;
+ break;
+
+ case IP_DROP_MEMBERSHIP:
+ /*
+ * Drop a multicast group membership.
+ * Group must be a valid IP multicast address.
+ */
+ if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
+ error = EINVAL;
+ break;
+ }
+ mreq = mtod(m, struct ip_mreq *);
+ if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
+ error = EINVAL;
+ break;
+ }
+ /*
+ * If an interface address was specified, get a pointer
+ * to its ifnet structure.
+ */
+ if (mreq->imr_interface.s_addr == INADDR_ANY)
+ ifp = NULL;
+ else {
+ INADDR_TO_IFP(mreq->imr_interface, ifp);
+ if (ifp == NULL) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ }
+ /*
+ * Find the membership in the membership array.
+ */
+ for (i = 0; i < imo->imo_num_memberships; ++i) {
+ if ((ifp == NULL ||
+ imo->imo_membership[i]->inm_ifp == ifp) &&
+ imo->imo_membership[i]->inm_addr.s_addr ==
+ mreq->imr_multiaddr.s_addr)
+ break;
+ }
+ if (i == imo->imo_num_memberships) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ /*
+ * Give up the multicast address record to which the
+ * membership points.
+ */
+ in_delmulti(imo->imo_membership[i]);
+ /*
+ * Remove the gap in the membership array.
+ */
+ for (++i; i < imo->imo_num_memberships; ++i)
+ imo->imo_membership[i-1] = imo->imo_membership[i];
+ --imo->imo_num_memberships;
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ /*
+ * If all options have default values, no need to keep the mbuf.
+ */
+ if (imo->imo_multicast_ifp == NULL &&
+ imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
+ imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
+ imo->imo_num_memberships == 0) {
+ free(*imop, M_IPMOPTS);
+ *imop = NULL;
+ }
+
+ return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+int
+ip_getmoptions(optname, imo, mp)
+ int optname;
+ register struct ip_moptions *imo;
+ register struct mbuf **mp;
+{
+ u_char *ttl;
+ u_char *loop;
+ struct in_addr *addr;
+ struct in_ifaddr *ia;
+
+ *mp = m_get(M_WAIT, MT_SOOPTS);
+
+ switch (optname) {
+
+ case IP_MULTICAST_IF:
+ addr = mtod(*mp, struct in_addr *);
+ (*mp)->m_len = sizeof(struct in_addr);
+ if (imo == NULL || imo->imo_multicast_ifp == NULL)
+ addr->s_addr = INADDR_ANY;
+ else {
+ IFP_TO_IA(imo->imo_multicast_ifp, ia);
+ addr->s_addr = (ia == NULL) ? INADDR_ANY
+ : IA_SIN(ia)->sin_addr.s_addr;
+ }
+ return (0);
+
+ case IP_MULTICAST_TTL:
+ ttl = mtod(*mp, u_char *);
+ (*mp)->m_len = 1;
+ *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
+ : imo->imo_multicast_ttl;
+ return (0);
+
+ case IP_MULTICAST_LOOP:
+ loop = mtod(*mp, u_char *);
+ (*mp)->m_len = 1;
+ *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
+ : imo->imo_multicast_loop;
+ return (0);
+
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+/*
+ * Discard the IP multicast options.
+ */
+void
+ip_freemoptions(imo)
+ register struct ip_moptions *imo;
+{
+ register int i;
+
+ if (imo != NULL) {
+ for (i = 0; i < imo->imo_num_memberships; ++i)
+ in_delmulti(imo->imo_membership[i]);
+ free(imo, M_IPMOPTS);
+ }
+}
+
+/*
+ * Routine called from ip_output() to loop back a copy of an IP multicast
+ * packet to the input queue of a specified interface. Note that this
+ * calls the output routine of the loopback "driver", but with an interface
+ * pointer that might NOT be &loif -- easier than replicating that code here.
+ */
+static void
+ip_mloopback(ifp, m, dst)
+ struct ifnet *ifp;
+ register struct mbuf *m;
+ register struct sockaddr_in *dst;
+{
+ register struct ip *ip;
+ struct mbuf *copym;
+
+ copym = m_copy(m, 0, M_COPYALL);
+ if (copym != NULL) {
+ /*
+ * We don't bother to fragment if the IP length is greater
+ * than the interface's MTU. Can this possibly matter?
+ */
+ ip = mtod(copym, struct ip *);
+ ip->ip_len = htons((u_short)ip->ip_len);
+ ip->ip_off = htons((u_short)ip->ip_off);
+ ip->ip_sum = 0;
+ ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
+ (void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
+ }
+}
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
new file mode 100644
index 000000000000..27eda5e67cdc
--- /dev/null
+++ b/sys/netinet/ip_var.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Overlay for ip header used by other protocols (tcp, udp).
+ */
+struct ipovly {
+ caddr_t ih_next, ih_prev; /* for protocol sequence q's */
+ u_char ih_x1; /* (unused) */
+ u_char ih_pr; /* protocol */
+ short ih_len; /* protocol length */
+ struct in_addr ih_src; /* source internet address */
+ struct in_addr ih_dst; /* destination internet address */
+};
+
+/*
+ * Ip reassembly queue structure. Each fragment
+ * being reassembled is attached to one of these structures.
+ * They are timed out after ipq_ttl drops to 0, and may also
+ * be reclaimed if memory becomes tight.
+ */
+struct ipq {
+ struct ipq *next,*prev; /* to other reass headers */
+ u_char ipq_ttl; /* time for reass q to live */
+ u_char ipq_p; /* protocol of this fragment */
+ u_short ipq_id; /* sequence id for reassembly */
+ struct ipasfrag *ipq_next,*ipq_prev;
+ /* to ip headers of fragments */
+ struct in_addr ipq_src,ipq_dst;
+};
+
+/*
+ * Ip header, when holding a fragment.
+ *
+ * Note: ipf_next must be at same offset as ipq_next above
+ */
+struct ipasfrag {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_char ip_hl:4,
+ ip_v:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_char ip_v:4,
+ ip_hl:4;
+#endif
+ u_char ipf_mff; /* XXX overlays ip_tos: use low bit
+ * to avoid destroying tos;
+ * copied from (ip_off&IP_MF) */
+ short ip_len;
+ u_short ip_id;
+ short ip_off;
+ u_char ip_ttl;
+ u_char ip_p;
+ u_short ip_sum;
+ struct ipasfrag *ipf_next; /* next fragment */
+ struct ipasfrag *ipf_prev; /* previous fragment */
+};
+
+/*
+ * Structure stored in mbuf in inpcb.ip_options
+ * and passed to ip_output when ip options are in use.
+ * The actual length of the options (including ipopt_dst)
+ * is in m_len.
+ */
+#define MAX_IPOPTLEN 40
+
+struct ipoption {
+ struct in_addr ipopt_dst; /* first-hop dst if source routed */
+ char ipopt_list[MAX_IPOPTLEN]; /* options proper */
+};
+
+/*
+ * Structure attached to inpcb.ip_moptions and
+ * passed to ip_output when IP multicast options are in use.
+ */
+struct ip_moptions {
+ struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
+ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */
+ u_char imo_multicast_loop; /* 1 => hear sends if a member */
+ u_short imo_num_memberships; /* no. memberships this socket */
+ struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
+};
+
+struct ipstat {
+ u_long ips_total; /* total packets received */
+ u_long ips_badsum; /* checksum bad */
+ u_long ips_tooshort; /* packet too short */
+ u_long ips_toosmall; /* not enough data */
+ u_long ips_badhlen; /* ip header length < data size */
+ u_long ips_badlen; /* ip length < ip header length */
+ u_long ips_fragments; /* fragments received */
+ u_long ips_fragdropped; /* frags dropped (dups, out of space) */
+ u_long ips_fragtimeout; /* fragments timed out */
+ u_long ips_forward; /* packets forwarded */
+ u_long ips_cantforward; /* packets rcvd for unreachable dest */
+ u_long ips_redirectsent; /* packets forwarded on same net */
+ u_long ips_noproto; /* unknown or unsupported protocol */
+ u_long ips_delivered; /* datagrams delivered to upper level*/
+ u_long ips_localout; /* total ip packets generated here */
+ u_long ips_odropped; /* lost packets due to nobufs, etc. */
+ u_long ips_reassembled; /* total packets reassembled ok */
+ u_long ips_fragmented; /* datagrams sucessfully fragmented */
+ u_long ips_ofragments; /* output fragments created */
+ u_long ips_cantfrag; /* don't fragment flag was set, etc. */
+ u_long ips_badoptions; /* error in option processing */
+ u_long ips_noroute; /* packets discarded due to no route */
+ u_long ips_badvers; /* ip version != 4 */
+ u_long ips_rawout; /* total raw ip packets generated */
+};
+
+#ifdef KERNEL
+/* flags passed to ip_output as last parameter */
+#define IP_FORWARDING 0x1 /* most of ip header exists */
+#define IP_RAWOUTPUT 0x2 /* raw ip header exists */
+#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */
+#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */
+
+struct ipstat ipstat;
+struct ipq ipq; /* ip reass. queue */
+u_short ip_id; /* ip packet ctr, for ids */
+int ip_defttl; /* default IP ttl */
+
+int in_control __P((struct socket *, int, caddr_t, struct ifnet *));
+int ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
+void ip_deq __P((struct ipasfrag *));
+int ip_dooptions __P((struct mbuf *));
+void ip_drain __P((void));
+void ip_enq __P((struct ipasfrag *, struct ipasfrag *));
+void ip_forward __P((struct mbuf *, int));
+void ip_freef __P((struct ipq *));
+void ip_freemoptions __P((struct ip_moptions *));
+int ip_getmoptions __P((int, struct ip_moptions *, struct mbuf **));
+void ip_init __P((void));
+int ip_mforward __P((struct mbuf *, struct ifnet *));
+int ip_optcopy __P((struct ip *, struct ip *));
+int ip_output __P((struct mbuf *,
+ struct mbuf *, struct route *, int, struct ip_moptions *));
+int ip_pcbopts __P((struct mbuf **, struct mbuf *));
+struct ip *
+ ip_reass __P((struct ipasfrag *, struct ipq *));
+struct in_ifaddr *
+ ip_rtaddr __P((struct in_addr));
+int ip_setmoptions __P((int, struct ip_moptions **, struct mbuf *));
+void ip_slowtimo __P((void));
+struct mbuf *
+ ip_srcroute __P((void));
+void ip_stripoptions __P((struct mbuf *, struct mbuf *));
+int ip_sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+void ipintr __P((void));
+int rip_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
+void rip_init __P((void));
+void rip_input __P((struct mbuf *));
+int rip_output __P((struct mbuf *, struct socket *, u_long));
+int rip_usrreq __P((struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *));
+#endif
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
new file mode 100644
index 000000000000..c8092ee9ec75
--- /dev/null
+++ b/sys/netinet/raw_ip.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_mroute.h>
+#include <netinet/in_pcb.h>
+
+struct inpcb rawinpcb;
+
+/*
+ * Nominal space allocated to a raw ip socket.
+ */
+#define RIPSNDQ 8192
+#define RIPRCVQ 8192
+
+/*
+ * Raw interface to IP protocol.
+ */
+
+/*
+ * Initialize raw connection block q.
+ */
+void
+rip_init()
+{
+
+ rawinpcb.inp_next = rawinpcb.inp_prev = &rawinpcb;
+}
+
+struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
+/*
+ * Setup generic address and protocol structures
+ * for raw_input routine, then pass them along with
+ * mbuf chain.
+ */
+void
+rip_input(m)
+ struct mbuf *m;
+{
+ register struct ip *ip = mtod(m, struct ip *);
+ register struct inpcb *inp;
+ struct socket *last = 0;
+
+ ripsrc.sin_addr = ip->ip_src;
+ for (inp = rawinpcb.inp_next; inp != &rawinpcb; inp = inp->inp_next) {
+ if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
+ continue;
+ if (inp->inp_laddr.s_addr &&
+ inp->inp_laddr.s_addr == ip->ip_dst.s_addr)
+ continue;
+ if (inp->inp_faddr.s_addr &&
+ inp->inp_faddr.s_addr == ip->ip_src.s_addr)
+ continue;
+ if (last) {
+ struct mbuf *n;
+ if (n = m_copy(m, 0, (int)M_COPYALL)) {
+ if (sbappendaddr(&last->so_rcv, &ripsrc,
+ n, (struct mbuf *)0) == 0)
+ /* should notify about lost packet */
+ m_freem(n);
+ else
+ sorwakeup(last);
+ }
+ }
+ last = inp->inp_socket;
+ }
+ if (last) {
+ if (sbappendaddr(&last->so_rcv, &ripsrc,
+ m, (struct mbuf *)0) == 0)
+ m_freem(m);
+ else
+ sorwakeup(last);
+ } else {
+ m_freem(m);
+ ipstat.ips_noproto++;
+ ipstat.ips_delivered--;
+ }
+}
+
+/*
+ * Generate IP header and pass packet to ip_output.
+ * Tack on options user may have setup with control call.
+ */
+int
+rip_output(m, so, dst)
+ register struct mbuf *m;
+ struct socket *so;
+ u_long dst;
+{
+ register struct ip *ip;
+ register struct inpcb *inp = sotoinpcb(so);
+ struct mbuf *opts;
+ int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
+
+ /*
+ * If the user handed us a complete IP packet, use it.
+ * Otherwise, allocate an mbuf for a header and fill it in.
+ */
+ if ((inp->inp_flags & INP_HDRINCL) == 0) {
+ M_PREPEND(m, sizeof(struct ip), M_WAIT);
+ ip = mtod(m, struct ip *);
+ ip->ip_tos = 0;
+ ip->ip_off = 0;
+ ip->ip_p = inp->inp_ip.ip_p;
+ ip->ip_len = m->m_pkthdr.len;
+ ip->ip_src = inp->inp_laddr;
+ ip->ip_dst.s_addr = dst;
+ ip->ip_ttl = MAXTTL;
+ opts = inp->inp_options;
+ } else {
+ ip = mtod(m, struct ip *);
+ if (ip->ip_id == 0)
+ ip->ip_id = htons(ip_id++);
+ opts = NULL;
+ /* XXX prevent ip_output from overwriting header fields */
+ flags |= IP_RAWOUTPUT;
+ ipstat.ips_rawout++;
+ }
+ return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions));
+}
+
+/*
+ * Raw IP socket option processing.
+ */
+int
+rip_ctloutput(op, so, level, optname, m)
+ int op;
+ struct socket *so;
+ int level, optname;
+ struct mbuf **m;
+{
+ register struct inpcb *inp = sotoinpcb(so);
+ register int error;
+
+ if (level != IPPROTO_IP)
+ return (EINVAL);
+
+ switch (optname) {
+
+ case IP_HDRINCL:
+ if (op == PRCO_SETOPT || op == PRCO_GETOPT) {
+ if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
+ return (EINVAL);
+ if (op == PRCO_SETOPT) {
+ if (*mtod(*m, int *))
+ inp->inp_flags |= INP_HDRINCL;
+ else
+ inp->inp_flags &= ~INP_HDRINCL;
+ (void)m_free(*m);
+ } else {
+ (*m)->m_len = sizeof (int);
+ *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
+ }
+ return (0);
+ }
+ break;
+
+ case DVMRP_INIT:
+ case DVMRP_DONE:
+ case DVMRP_ADD_VIF:
+ case DVMRP_DEL_VIF:
+ case DVMRP_ADD_LGRP:
+ case DVMRP_DEL_LGRP:
+ case DVMRP_ADD_MRT:
+ case DVMRP_DEL_MRT:
+#ifdef MROUTING
+ if (op == PRCO_SETOPT) {
+ error = ip_mrouter_cmd(optname, so, *m);
+ if (*m)
+ (void)m_free(*m);
+ } else
+ error = EINVAL;
+ return (error);
+#else
+ if (op == PRCO_SETOPT && *m)
+ (void)m_free(*m);
+ return (EOPNOTSUPP);
+#endif
+ }
+ return (ip_ctloutput(op, so, level, optname, m));
+}
+
+u_long rip_sendspace = RIPSNDQ;
+u_long rip_recvspace = RIPRCVQ;
+
+/*ARGSUSED*/
+int
+rip_usrreq(so, req, m, nam, control)
+ register struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ register int error = 0;
+ register struct inpcb *inp = sotoinpcb(so);
+#ifdef MROUTING
+ extern struct socket *ip_mrouter;
+#endif
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (inp)
+ panic("rip_attach");
+ if ((so->so_state & SS_PRIV) == 0) {
+ error = EACCES;
+ break;
+ }
+ if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
+ (error = in_pcballoc(so, &rawinpcb)))
+ break;
+ inp = (struct inpcb *)so->so_pcb;
+ inp->inp_ip.ip_p = (int)nam;
+ break;
+
+ case PRU_DISCONNECT:
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ /* FALLTHROUGH */
+ case PRU_ABORT:
+ soisdisconnected(so);
+ /* FALLTHROUGH */
+ case PRU_DETACH:
+ if (inp == 0)
+ panic("rip_detach");
+#ifdef MROUTING
+ if (so == ip_mrouter)
+ ip_mrouter_done();
+#endif
+ in_pcbdetach(inp);
+ break;
+
+ case PRU_BIND:
+ {
+ struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
+
+ if (nam->m_len != sizeof(*addr)) {
+ error = EINVAL;
+ break;
+ }
+ if ((ifnet == 0) ||
+ ((addr->sin_family != AF_INET) &&
+ (addr->sin_family != AF_IMPLINK)) ||
+ (addr->sin_addr.s_addr &&
+ ifa_ifwithaddr((struct sockaddr *)addr) == 0)) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ inp->inp_laddr = addr->sin_addr;
+ break;
+ }
+ case PRU_CONNECT:
+ {
+ struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
+
+ if (nam->m_len != sizeof(*addr)) {
+ error = EINVAL;
+ break;
+ }
+ if (ifnet == 0) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ if ((addr->sin_family != AF_INET) &&
+ (addr->sin_family != AF_IMPLINK)) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ inp->inp_faddr = addr->sin_addr;
+ soisconnected(so);
+ break;
+ }
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ /*
+ * Mark the connection as being incapable of further input.
+ */
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ /*
+ * Ship a packet out. The appropriate raw output
+ * routine handles any massaging necessary.
+ */
+ case PRU_SEND:
+ {
+ register u_long dst;
+
+ if (so->so_state & SS_ISCONNECTED) {
+ if (nam) {
+ error = EISCONN;
+ break;
+ }
+ dst = inp->inp_faddr.s_addr;
+ } else {
+ if (nam == NULL) {
+ error = ENOTCONN;
+ break;
+ }
+ dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ }
+ error = rip_output(m, so, dst);
+ m = NULL;
+ break;
+ }
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ /*
+ * Not supported.
+ */
+ case PRU_RCVOOB:
+ case PRU_RCVD:
+ case PRU_LISTEN:
+ case PRU_ACCEPT:
+ case PRU_SENDOOB:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_SOCKADDR:
+ in_setsockaddr(inp, nam);
+ break;
+
+ case PRU_PEERADDR:
+ in_setpeeraddr(inp, nam);
+ break;
+
+ default:
+ panic("rip_usrreq");
+ }
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
new file mode 100644
index 000000000000..6b77ff663a41
--- /dev/null
+++ b/sys/netinet/tcp.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp.h 8.1 (Berkeley) 6/10/93
+ */
+
+typedef u_long tcp_seq;
+/*
+ * TCP header.
+ * Per RFC 793, September, 1981.
+ */
+struct tcphdr {
+ u_short th_sport; /* source port */
+ u_short th_dport; /* destination port */
+ tcp_seq th_seq; /* sequence number */
+ tcp_seq th_ack; /* acknowledgement number */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_char th_x2:4, /* (unused) */
+ th_off:4; /* data offset */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_char th_off:4, /* data offset */
+ th_x2:4; /* (unused) */
+#endif
+ u_char th_flags;
+#define TH_FIN 0x01
+#define TH_SYN 0x02
+#define TH_RST 0x04
+#define TH_PUSH 0x08
+#define TH_ACK 0x10
+#define TH_URG 0x20
+ u_short th_win; /* window */
+ u_short th_sum; /* checksum */
+ u_short th_urp; /* urgent pointer */
+};
+
+#define TCPOPT_EOL 0
+#define TCPOPT_NOP 1
+#define TCPOPT_MAXSEG 2
+#define TCPOLEN_MAXSEG 4
+#define TCPOPT_WINDOW 3
+#define TCPOLEN_WINDOW 3
+#define TCPOPT_SACK_PERMITTED 4 /* Experimental */
+#define TCPOLEN_SACK_PERMITTED 2
+#define TCPOPT_SACK 5 /* Experimental */
+#define TCPOPT_TIMESTAMP 8
+#define TCPOLEN_TIMESTAMP 10
+#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
+
+#define TCPOPT_TSTAMP_HDR \
+ (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
+
+/*
+ * Default maximum segment size for TCP.
+ * With an IP MSS of 576, this is 536,
+ * but 512 is probably more convenient.
+ * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)).
+ */
+#define TCP_MSS 512
+
+#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */
+
+#define TCP_MAX_WINSHIFT 14 /* maximum window shift */
+
+/*
+ * User-settable options (used with setsockopt).
+ */
+#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */
+#define TCP_MAXSEG 0x02 /* set maximum segment size */
diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c
new file mode 100644
index 000000000000..ddb30927b4ac
--- /dev/null
+++ b/sys/netinet/tcp_debug.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_debug.c 8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef TCPDEBUG
+/* load symbolic names */
+#define PRUREQUESTS
+#define TCPSTATES
+#define TCPTIMERS
+#define TANAMES
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#ifdef TCPDEBUG
+int tcpconsdebug = 0;
+#endif
+/*
+ * Tcp debug routines
+ */
+void
+tcp_trace(act, ostate, tp, ti, req)
+ short act, ostate;
+ struct tcpcb *tp;
+ struct tcpiphdr *ti;
+ int req;
+{
+ tcp_seq seq, ack;
+ int len, flags;
+ struct tcp_debug *td = &tcp_debug[tcp_debx++];
+
+ if (tcp_debx == TCP_NDEBUG)
+ tcp_debx = 0;
+ td->td_time = iptime();
+ td->td_act = act;
+ td->td_ostate = ostate;
+ td->td_tcb = (caddr_t)tp;
+ if (tp)
+ td->td_cb = *tp;
+ else
+ bzero((caddr_t)&td->td_cb, sizeof (*tp));
+ if (ti)
+ td->td_ti = *ti;
+ else
+ bzero((caddr_t)&td->td_ti, sizeof (*ti));
+ td->td_req = req;
+#ifdef TCPDEBUG
+ if (tcpconsdebug == 0)
+ return;
+ if (tp)
+ printf("%x %s:", tp, tcpstates[ostate]);
+ else
+ printf("???????? ");
+ printf("%s ", tanames[act]);
+ switch (act) {
+
+ case TA_INPUT:
+ case TA_OUTPUT:
+ case TA_DROP:
+ if (ti == 0)
+ break;
+ seq = ti->ti_seq;
+ ack = ti->ti_ack;
+ len = ti->ti_len;
+ if (act == TA_OUTPUT) {
+ seq = ntohl(seq);
+ ack = ntohl(ack);
+ len = ntohs((u_short)len);
+ }
+ if (act == TA_OUTPUT)
+ len -= sizeof (struct tcphdr);
+ if (len)
+ printf("[%x..%x)", seq, seq+len);
+ else
+ printf("%x", seq);
+ printf("@%x, urp=%x", ack, ti->ti_urp);
+ flags = ti->ti_flags;
+ if (flags) {
+#ifndef lint
+ char *cp = "<";
+#define pf(f) { if (ti->ti_flags&TH_/**/f) { printf("%s%s", cp, "f"); cp = ","; } }
+ pf(SYN); pf(ACK); pf(FIN); pf(RST); pf(PUSH); pf(URG);
+#endif
+ printf(">");
+ }
+ break;
+
+ case TA_USER:
+ printf("%s", prurequests[req&0xff]);
+ if ((req & 0xff) == PRU_SLOWTIMO)
+ printf("<%s>", tcptimers[req>>8]);
+ break;
+ }
+ if (tp)
+ printf(" -> %s", tcpstates[tp->t_state]);
+ /* print out internal state of tp !?! */
+ printf("\n");
+ if (tp == 0)
+ return;
+ printf("\trcv_(nxt,wnd,up) (%x,%x,%x) snd_(una,nxt,max) (%x,%x,%x)\n",
+ tp->rcv_nxt, tp->rcv_wnd, tp->rcv_up, tp->snd_una, tp->snd_nxt,
+ tp->snd_max);
+ printf("\tsnd_(wl1,wl2,wnd) (%x,%x,%x)\n",
+ tp->snd_wl1, tp->snd_wl2, tp->snd_wnd);
+#endif /* TCPDEBUG */
+}
diff --git a/sys/netinet/tcp_debug.h b/sys/netinet/tcp_debug.h
new file mode 100644
index 000000000000..c02c0cd521d2
--- /dev/null
+++ b/sys/netinet/tcp_debug.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_debug.h 8.1 (Berkeley) 6/10/93
+ */
+
+struct tcp_debug {
+ n_time td_time;
+ short td_act;
+ short td_ostate;
+ caddr_t td_tcb;
+ struct tcpiphdr td_ti;
+ short td_req;
+ struct tcpcb td_cb;
+};
+
+#define TA_INPUT 0
+#define TA_OUTPUT 1
+#define TA_USER 2
+#define TA_RESPOND 3
+#define TA_DROP 4
+
+#ifdef TANAMES
+char *tanames[] =
+ { "input", "output", "user", "respond", "drop" };
+#endif
+
+#define TCP_NDEBUG 100
+struct tcp_debug tcp_debug[TCP_NDEBUG];
+int tcp_debx;
diff --git a/sys/netinet/tcp_fsm.h b/sys/netinet/tcp_fsm.h
new file mode 100644
index 000000000000..c5da7fc32d91
--- /dev/null
+++ b/sys/netinet/tcp_fsm.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_fsm.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * TCP FSM state definitions.
+ * Per RFC793, September, 1981.
+ */
+
+#define TCP_NSTATES 11
+
+#define TCPS_CLOSED 0 /* closed */
+#define TCPS_LISTEN 1 /* listening for connection */
+#define TCPS_SYN_SENT 2 /* active, have sent syn */
+#define TCPS_SYN_RECEIVED 3 /* have send and received syn */
+/* states < TCPS_ESTABLISHED are those where connections not established */
+#define TCPS_ESTABLISHED 4 /* established */
+#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */
+/* states > TCPS_CLOSE_WAIT are those where user has closed */
+#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */
+#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */
+#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */
+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
+#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */
+#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */
+
+#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED)
+#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT)
+
+#ifdef TCPOUTFLAGS
+/*
+ * Flags used when sending segments in tcp_output.
+ * Basic flags (TH_RST,TH_ACK,TH_SYN,TH_FIN) are totally
+ * determined by state, with the proviso that TH_FIN is sent only
+ * if all data queued for output is included in the segment.
+ */
+u_char tcp_outflags[TCP_NSTATES] = {
+ TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK,
+ TH_ACK, TH_ACK,
+ TH_FIN|TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK, TH_ACK, TH_ACK,
+};
+#endif
+
+#ifdef KPROF
+int tcp_acounts[TCP_NSTATES][PRU_NREQ];
+#endif
+
+#ifdef TCPSTATES
+char *tcpstates[] = {
+ "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD",
+ "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING",
+ "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT",
+};
+#endif
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
new file mode 100644
index 000000000000..2dd1d749c409
--- /dev/null
+++ b/sys/netinet/tcp_input.c
@@ -0,0 +1,1647 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
+ */
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+int tcprexmtthresh = 3;
+struct tcpiphdr tcp_saveti;
+struct inpcb *tcp_last_inpcb = &tcb;
+
+extern u_long sb_max;
+
+#endif /* TUBA_INCLUDE */
+#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
+
+/*
+ * Insert segment ti into reassembly queue of tcp with
+ * control block tp. Return TH_FIN if reassembly now includes
+ * a segment with FIN. The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#define TCP_REASS(tp, ti, m, so, flags) { \
+ if ((ti)->ti_seq == (tp)->rcv_nxt && \
+ (tp)->seg_next == (struct tcpiphdr *)(tp) && \
+ (tp)->t_state == TCPS_ESTABLISHED) { \
+ tp->t_flags |= TF_DELACK; \
+ (tp)->rcv_nxt += (ti)->ti_len; \
+ flags = (ti)->ti_flags & TH_FIN; \
+ tcpstat.tcps_rcvpack++;\
+ tcpstat.tcps_rcvbyte += (ti)->ti_len;\
+ sbappend(&(so)->so_rcv, (m)); \
+ sorwakeup(so); \
+ } else { \
+ (flags) = tcp_reass((tp), (ti), (m)); \
+ tp->t_flags |= TF_ACKNOW; \
+ } \
+}
+#ifndef TUBA_INCLUDE
+
+int
+tcp_reass(tp, ti, m)
+ register struct tcpcb *tp;
+ register struct tcpiphdr *ti;
+ struct mbuf *m;
+{
+ register struct tcpiphdr *q;
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int flags;
+
+ /*
+ * Call with ti==0 after become established to
+ * force pre-ESTABLISHED data up to user socket.
+ */
+ if (ti == 0)
+ goto present;
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
+ q = (struct tcpiphdr *)q->ti_next)
+ if (SEQ_GT(q->ti_seq, ti->ti_seq))
+ break;
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us.
+ */
+ if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
+ register int i;
+ q = (struct tcpiphdr *)q->ti_prev;
+ /* conversion to int (in i) handles seq wraparound */
+ i = q->ti_seq + q->ti_len - ti->ti_seq;
+ if (i > 0) {
+ if (i >= ti->ti_len) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ m_freem(m);
+ return (0);
+ }
+ m_adj(m, i);
+ ti->ti_len -= i;
+ ti->ti_seq += i;
+ }
+ q = (struct tcpiphdr *)(q->ti_next);
+ }
+ tcpstat.tcps_rcvoopack++;
+ tcpstat.tcps_rcvoobyte += ti->ti_len;
+ REASS_MBUF(ti) = m; /* XXX */
+
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ while (q != (struct tcpiphdr *)tp) {
+ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
+ if (i <= 0)
+ break;
+ if (i < q->ti_len) {
+ q->ti_seq += i;
+ q->ti_len -= i;
+ m_adj(REASS_MBUF(q), i);
+ break;
+ }
+ q = (struct tcpiphdr *)q->ti_next;
+ m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
+ remque(q->ti_prev);
+ m_freem(m);
+ }
+
+ /*
+ * Stick new segment in its place.
+ */
+ insque(ti, q->ti_prev);
+
+present:
+ /*
+ * Present data to user, advancing rcv_nxt through
+ * completed sequence space.
+ */
+ if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
+ return (0);
+ ti = tp->seg_next;
+ if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
+ return (0);
+ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
+ return (0);
+ do {
+ tp->rcv_nxt += ti->ti_len;
+ flags = ti->ti_flags & TH_FIN;
+ remque(ti);
+ m = REASS_MBUF(ti);
+ ti = (struct tcpiphdr *)ti->ti_next;
+ if (so->so_state & SS_CANTRCVMORE)
+ m_freem(m);
+ else
+ sbappend(&so->so_rcv, m);
+ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
+ sorwakeup(so);
+ return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+void
+tcp_input(m, iphlen)
+ register struct mbuf *m;
+ int iphlen;
+{
+ register struct tcpiphdr *ti;
+ register struct inpcb *inp;
+ caddr_t optp = NULL;
+ int optlen;
+ int len, tlen, off;
+ register struct tcpcb *tp = 0;
+ register int tiflags;
+ struct socket *so;
+ int todrop, acked, ourfinisacked, needoutput = 0;
+ short ostate;
+ struct in_addr laddr;
+ int dropsocket = 0;
+ int iss = 0;
+ u_long tiwin, ts_val, ts_ecr;
+ int ts_present = 0;
+
+ tcpstat.tcps_rcvtotal++;
+ /*
+ * Get IP and TCP header together in first mbuf.
+ * Note: IP leaves IP header in first mbuf.
+ */
+ ti = mtod(m, struct tcpiphdr *);
+ if (iphlen > sizeof (struct ip))
+ ip_stripoptions(m, (struct mbuf *)0);
+ if (m->m_len < sizeof (struct tcpiphdr)) {
+ if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+
+ /*
+ * Checksum extended TCP header and data.
+ */
+ tlen = ((struct ip *)ti)->ip_len;
+ len = sizeof (struct ip) + tlen;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_len = (u_short)tlen;
+ HTONS(ti->ti_len);
+ if (ti->ti_sum = in_cksum(m, len)) {
+ tcpstat.tcps_rcvbadsum++;
+ goto drop;
+ }
+#endif /* TUBA_INCLUDE */
+
+ /*
+ * Check that TCP offset makes sense,
+ * pull out TCP options and adjust length. XXX
+ */
+ off = ti->ti_off << 2;
+ if (off < sizeof (struct tcphdr) || off > tlen) {
+ tcpstat.tcps_rcvbadoff++;
+ goto drop;
+ }
+ tlen -= off;
+ ti->ti_len = tlen;
+ if (off > sizeof (struct tcphdr)) {
+ if (m->m_len < sizeof(struct ip) + off) {
+ if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+ optlen = off - sizeof (struct tcphdr);
+ optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
+ /*
+ * Do quick retrieval of timestamp options ("options
+ * prediction?"). If timestamp is the only option and it's
+ * formatted as recommended in RFC 1323 appendix A, we
+ * quickly get the values now and not bother calling
+ * tcp_dooptions(), etc.
+ */
+ if ((optlen == TCPOLEN_TSTAMP_APPA ||
+ (optlen > TCPOLEN_TSTAMP_APPA &&
+ optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
+ *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+ (ti->ti_flags & TH_SYN) == 0) {
+ ts_present = 1;
+ ts_val = ntohl(*(u_long *)(optp + 4));
+ ts_ecr = ntohl(*(u_long *)(optp + 8));
+ optp = NULL; /* we've parsed the options */
+ }
+ }
+ tiflags = ti->ti_flags;
+
+ /*
+ * Convert TCP protocol specific fields to host format.
+ */
+ NTOHL(ti->ti_seq);
+ NTOHL(ti->ti_ack);
+ NTOHS(ti->ti_win);
+ NTOHS(ti->ti_urp);
+
+ /*
+ * Locate pcb for segment.
+ */
+findpcb:
+ inp = tcp_last_inpcb;
+ if (inp->inp_lport != ti->ti_dport ||
+ inp->inp_fport != ti->ti_sport ||
+ inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
+ inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
+ inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,
+ ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
+ if (inp)
+ tcp_last_inpcb = inp;
+ ++tcpstat.tcps_pcbcachemiss;
+ }
+
+ /*
+ * If the state is CLOSED (i.e., TCB does not exist) then
+ * all data in the incoming segment is discarded.
+ * If the TCB exists but is in CLOSED state, it is embryonic,
+ * but should either do a listen or a connect soon.
+ */
+ if (inp == 0)
+ goto dropwithreset;
+ tp = intotcpcb(inp);
+ if (tp == 0)
+ goto dropwithreset;
+ if (tp->t_state == TCPS_CLOSED)
+ goto drop;
+
+ /* Unscale the window into a 32-bit value. */
+ if ((tiflags & TH_SYN) == 0)
+ tiwin = ti->ti_win << tp->snd_scale;
+ else
+ tiwin = ti->ti_win;
+
+ so = inp->inp_socket;
+ if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+ if (so->so_options & SO_DEBUG) {
+ ostate = tp->t_state;
+ tcp_saveti = *ti;
+ }
+ if (so->so_options & SO_ACCEPTCONN) {
+ so = sonewconn(so, 0);
+ if (so == 0)
+ goto drop;
+ /*
+ * This is ugly, but ....
+ *
+ * Mark socket as temporary until we're
+ * committed to keeping it. The code at
+ * ``drop'' and ``dropwithreset'' check the
+ * flag dropsocket to see if the temporary
+ * socket created here should be discarded.
+ * We mark the socket as discardable until
+ * we're committed to it below in TCPS_LISTEN.
+ */
+ dropsocket++;
+ inp = (struct inpcb *)so->so_pcb;
+ inp->inp_laddr = ti->ti_dst;
+ inp->inp_lport = ti->ti_dport;
+#if BSD>=43
+ inp->inp_options = ip_srcroute();
+#endif
+ tp = intotcpcb(inp);
+ tp->t_state = TCPS_LISTEN;
+
+ /* Compute proper scaling value from buffer space
+ */
+ while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+ TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
+ tp->request_r_scale++;
+ }
+ }
+
+ /*
+ * Segment received on connection.
+ * Reset idle time and keep-alive timer.
+ */
+ tp->t_idle = 0;
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+
+ /*
+ * Process options if not in LISTEN state,
+ * else do it below (after getting remote address).
+ */
+ if (optp && tp->t_state != TCPS_LISTEN)
+ tcp_dooptions(tp, optp, optlen, ti,
+ &ts_present, &ts_val, &ts_ecr);
+
+ /*
+ * Header prediction: check for the two common cases
+ * of a uni-directional data xfer. If the packet has
+ * no control flags, is in-sequence, the window didn't
+ * change and we're not retransmitting, it's a
+ * candidate. If the length is zero and the ack moved
+ * forward, we're the sender side of the xfer. Just
+ * free the data acked & wake any higher level process
+ * that was blocked waiting for space. If the length
+ * is non-zero and the ack didn't move, we're the
+ * receiver side. If we're getting packets in-order
+ * (the reassembly queue is empty), add the data to
+ * the socket buffer and note that we need a delayed ack.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+ (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
+ ti->ti_seq == tp->rcv_nxt &&
+ tiwin && tiwin == tp->snd_wnd &&
+ tp->snd_nxt == tp->snd_max) {
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record the timestamp.
+ */
+ if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+ SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = ts_val;
+ }
+
+ if (ti->ti_len == 0) {
+ if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
+ SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
+ tp->snd_cwnd >= tp->snd_wnd) {
+ /*
+ * this is a pure ack for outstanding data.
+ */
+ ++tcpstat.tcps_predack;
+ if (ts_present)
+ tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+ else if (tp->t_rtt &&
+ SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp, tp->t_rtt);
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+ sbdrop(&so->so_snd, acked);
+ tp->snd_una = ti->ti_ack;
+ m_freem(m);
+
+ /*
+ * If all outstanding data are acked, stop
+ * retransmit timer, otherwise restart timer
+ * using current (possibly backed-off) value.
+ * If process is waiting for space,
+ * wakeup/selwakeup/signal. If data
+ * are ready to send, let tcp_output
+ * decide between more output or persist.
+ */
+ if (tp->snd_una == tp->snd_max)
+ tp->t_timer[TCPT_REXMT] = 0;
+ else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ if (so->so_snd.sb_cc)
+ (void) tcp_output(tp);
+ return;
+ }
+ } else if (ti->ti_ack == tp->snd_una &&
+ tp->seg_next == (struct tcpiphdr *)tp &&
+ ti->ti_len <= sbspace(&so->so_rcv)) {
+ /*
+ * this is a pure, in-sequence data packet
+ * with nothing on the reassembly queue and
+ * we have enough buffer space to take it.
+ */
+ ++tcpstat.tcps_preddat;
+ tp->rcv_nxt += ti->ti_len;
+ tcpstat.tcps_rcvpack++;
+ tcpstat.tcps_rcvbyte += ti->ti_len;
+ /*
+ * Drop TCP, IP headers and TCP options then add data
+ * to socket buffer.
+ */
+ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ sbappend(&so->so_rcv, m);
+ sorwakeup(so);
+ tp->t_flags |= TF_DELACK;
+ return;
+ }
+ }
+
+ /*
+ * Drop TCP, IP headers and TCP options.
+ */
+ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+
+ /*
+ * Calculate amount of space in receive window,
+ * and then do TCP input processing.
+ * Receive window is amount of space in rcv queue,
+ * but not less than advertised window.
+ */
+ { int win;
+
+ win = sbspace(&so->so_rcv);
+ if (win < 0)
+ win = 0;
+ tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+ }
+
+ switch (tp->t_state) {
+
+ /*
+ * If the state is LISTEN then ignore segment if it contains an RST.
+ * If the segment contains an ACK then it is bad and send a RST.
+ * If it does not contain a SYN then it is not interesting; drop it.
+ * Don't bother responding if the destination was a broadcast.
+ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+ * tp->iss, and send a segment:
+ * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+ * Fill in remote peer address fields if not previously specified.
+ * Enter SYN_RECEIVED state, and process any other fields of this
+ * segment in this state.
+ */
+ case TCPS_LISTEN: {
+ struct mbuf *am;
+ register struct sockaddr_in *sin;
+
+ if (tiflags & TH_RST)
+ goto drop;
+ if (tiflags & TH_ACK)
+ goto dropwithreset;
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ /*
+ * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+ * in_broadcast() should never return true on a received
+ * packet with M_BCAST not set.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ti->ti_dst.s_addr))
+ goto drop;
+ am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */
+ if (am == NULL)
+ goto drop;
+ am->m_len = sizeof (struct sockaddr_in);
+ sin = mtod(am, struct sockaddr_in *);
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ti->ti_src;
+ sin->sin_port = ti->ti_sport;
+ bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+ laddr = inp->inp_laddr;
+ if (inp->inp_laddr.s_addr == INADDR_ANY)
+ inp->inp_laddr = ti->ti_dst;
+ if (in_pcbconnect(inp, am)) {
+ inp->inp_laddr = laddr;
+ (void) m_free(am);
+ goto drop;
+ }
+ (void) m_free(am);
+ tp->t_template = tcp_template(tp);
+ if (tp->t_template == 0) {
+ tp = tcp_drop(tp, ENOBUFS);
+ dropsocket = 0; /* socket is already gone */
+ goto drop;
+ }
+ if (optp)
+ tcp_dooptions(tp, optp, optlen, ti,
+ &ts_present, &ts_val, &ts_ecr);
+ if (iss)
+ tp->iss = iss;
+ else
+ tp->iss = tcp_iss;
+ tcp_iss += TCP_ISSINCR/2;
+ tp->irs = ti->ti_seq;
+ tcp_sendseqinit(tp);
+ tcp_rcvseqinit(tp);
+ tp->t_flags |= TF_ACKNOW;
+ tp->t_state = TCPS_SYN_RECEIVED;
+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+ dropsocket = 0; /* committed to socket */
+ tcpstat.tcps_accepts++;
+ goto trimthenstep6;
+ }
+
+ /*
+ * If the state is SYN_SENT:
+ * if seg contains an ACK, but not for our SYN, drop the input.
+ * if seg contains a RST, then drop the connection.
+ * if seg does not contain SYN, then drop it.
+ * Otherwise this is an acceptable SYN segment
+ * initialize tp->rcv_nxt and tp->irs
+ * if seg contains ack then advance tp->snd_una
+ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+ * arrange for segment to be acked (eventually)
+ * continue processing rest of data/controls, beginning with URG
+ */
+ case TCPS_SYN_SENT:
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LEQ(ti->ti_ack, tp->iss) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max)))
+ goto dropwithreset;
+ if (tiflags & TH_RST) {
+ if (tiflags & TH_ACK)
+ tp = tcp_drop(tp, ECONNREFUSED);
+ goto drop;
+ }
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ if (tiflags & TH_ACK) {
+ tp->snd_una = ti->ti_ack;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+ tp->snd_nxt = tp->snd_una;
+ }
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->irs = ti->ti_seq;
+ tcp_rcvseqinit(tp);
+ tp->t_flags |= TF_ACKNOW;
+ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ tp->t_state = TCPS_ESTABLISHED;
+ /* Do window scaling on this connection? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ (void) tcp_reass(tp, (struct tcpiphdr *)0,
+ (struct mbuf *)0);
+ /*
+ * if we didn't have to retransmit the SYN,
+ * use its rtt as our initial srtt & rtt var.
+ */
+ if (tp->t_rtt)
+ tcp_xmit_timer(tp, tp->t_rtt);
+ } else
+ tp->t_state = TCPS_SYN_RECEIVED;
+
+trimthenstep6:
+ /*
+ * Advance ti->ti_seq to correspond to first data byte.
+ * If data, trim to stay within window,
+ * dropping FIN if necessary.
+ */
+ ti->ti_seq++;
+ if (ti->ti_len > tp->rcv_wnd) {
+ todrop = ti->ti_len - tp->rcv_wnd;
+ m_adj(m, -todrop);
+ ti->ti_len = tp->rcv_wnd;
+ tiflags &= ~TH_FIN;
+ tcpstat.tcps_rcvpackafterwin++;
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ }
+ tp->snd_wl1 = ti->ti_seq - 1;
+ tp->rcv_up = ti->ti_seq;
+ goto step6;
+ }
+
+ /*
+ * States other than LISTEN or SYN_SENT.
+ * First check timestamp, if present.
+ * Then check that at least some bytes of segment are within
+ * receive window. If segment begins before rcv_nxt,
+ * drop leading data (and SYN); if nothing left, just ack.
+ *
+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
+ * and it's less than ts_recent, drop it.
+ */
+ if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
+ TSTMP_LT(ts_val, tp->ts_recent)) {
+
+ /* Check to see if ts_recent is over 24 days old. */
+ if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+ /*
+ * Invalidate ts_recent. If this segment updates
+ * ts_recent, the age will be reset later and ts_recent
+ * will get a valid value. If it does not, setting
+ * ts_recent to zero will at least satisfy the
+ * requirement that zero be placed in the timestamp
+ * echo reply when ts_recent isn't valid. The
+ * age isn't reset until we get a valid ts_recent
+ * because we don't want out-of-order segments to be
+ * dropped when ts_recent is old.
+ */
+ tp->ts_recent = 0;
+ } else {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ tcpstat.tcps_pawsdrop++;
+ goto dropafterack;
+ }
+ }
+
+ todrop = tp->rcv_nxt - ti->ti_seq;
+ if (todrop > 0) {
+ if (tiflags & TH_SYN) {
+ tiflags &= ~TH_SYN;
+ ti->ti_seq++;
+ if (ti->ti_urp > 1)
+ ti->ti_urp--;
+ else
+ tiflags &= ~TH_URG;
+ todrop--;
+ }
+ if (todrop >= ti->ti_len) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ /*
+ * If segment is just one to the left of the window,
+ * check two special cases:
+ * 1. Don't toss RST in response to 4.2-style keepalive.
+ * 2. If the only thing to drop is a FIN, we can drop
+ * it, but check the ACK or we will get into FIN
+ * wars if our FINs crossed (both CLOSING).
+ * In either case, send ACK to resynchronize,
+ * but keep on processing for RST or ACK.
+ */
+ if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
+#ifdef TCP_COMPAT_42
+ || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
+#endif
+ ) {
+ todrop = ti->ti_len;
+ tiflags &= ~TH_FIN;
+ tp->t_flags |= TF_ACKNOW;
+ } else {
+ /*
+ * Handle the case when a bound socket connects
+ * to itself. Allow packets with a SYN and
+ * an ACK to continue with the processing.
+ */
+ if (todrop != 0 || (tiflags & TH_ACK) == 0)
+ goto dropafterack;
+ }
+ } else {
+ tcpstat.tcps_rcvpartduppack++;
+ tcpstat.tcps_rcvpartdupbyte += todrop;
+ }
+ m_adj(m, todrop);
+ ti->ti_seq += todrop;
+ ti->ti_len -= todrop;
+ if (ti->ti_urp > todrop)
+ ti->ti_urp -= todrop;
+ else {
+ tiflags &= ~TH_URG;
+ ti->ti_urp = 0;
+ }
+ }
+
+ /*
+ * If new data are received on a connection after the
+ * user processes are gone, then RST the other end.
+ */
+ if ((so->so_state & SS_NOFDREF) &&
+ tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
+ tp = tcp_close(tp);
+ tcpstat.tcps_rcvafterclose++;
+ goto dropwithreset;
+ }
+
+ /*
+ * If segment ends after window, drop trailing data
+ * (and PUSH and FIN); if nothing left, just ACK.
+ */
+ todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
+ if (todrop > 0) {
+ tcpstat.tcps_rcvpackafterwin++;
+ if (todrop >= ti->ti_len) {
+ tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
+ /*
+ * If a new connection request is received
+ * while in TIME_WAIT, drop the old connection
+ * and start over if the sequence numbers
+ * are above the previous ones.
+ */
+ if (tiflags & TH_SYN &&
+ tp->t_state == TCPS_TIME_WAIT &&
+ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
+ iss = tp->rcv_nxt + TCP_ISSINCR;
+ tp = tcp_close(tp);
+ goto findpcb;
+ }
+ /*
+ * If window is closed can only take segments at
+ * window edge, and have to drop data and PUSH from
+ * incoming segments. Continue processing, but
+ * remember to ack. Otherwise, drop segment
+ * and ack.
+ */
+ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
+ tp->t_flags |= TF_ACKNOW;
+ tcpstat.tcps_rcvwinprobe++;
+ } else
+ goto dropafterack;
+ } else
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ m_adj(m, -todrop);
+ ti->ti_len -= todrop;
+ tiflags &= ~(TH_PUSH|TH_FIN);
+ }
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record its timestamp.
+ */
+ if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+ SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
+ ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = ts_val;
+ }
+
+ /*
+ * If the RST bit is set examine the state:
+ * SYN_RECEIVED STATE:
+ * If passive open, return to LISTEN state.
+ * If active open, inform user that connection was refused.
+ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+ * Inform user that connection was reset, and close tcb.
+ * CLOSING, LAST_ACK, TIME_WAIT STATES
+ * Close the tcb.
+ */
+ if (tiflags&TH_RST) switch (tp->t_state) {
+
+ case TCPS_SYN_RECEIVED:
+ so->so_error = ECONNREFUSED;
+ goto close;
+
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ so->so_error = ECONNRESET;
+ close:
+ tp->t_state = TCPS_CLOSED;
+ tcpstat.tcps_drops++;
+ tp = tcp_close(tp);
+ goto drop;
+
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+ tp = tcp_close(tp);
+ goto drop;
+ }
+
+ /*
+ * If a SYN is in the window, then this is an
+ * error and we send an RST and drop the connection.
+ */
+ if (tiflags & TH_SYN) {
+ tp = tcp_drop(tp, ECONNRESET);
+ goto dropwithreset;
+ }
+
+ /*
+ * If the ACK bit is off we drop the segment and return.
+ */
+ if ((tiflags & TH_ACK) == 0)
+ goto drop;
+
+ /*
+ * Ack processing.
+ */
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED state if the ack ACKs our SYN then enter
+ * ESTABLISHED state and continue processing, otherwise
+ * send an RST.
+ */
+ case TCPS_SYN_RECEIVED:
+ if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max))
+ goto dropwithreset;
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ tp->t_state = TCPS_ESTABLISHED;
+ /* Do window scaling? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
+ tp->snd_wl1 = ti->ti_seq - 1;
+ /* fall into ... */
+
+ /*
+ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+ * ACKs. If the ack is in the range
+ * tp->snd_una < ti->ti_ack <= tp->snd_max
+ * then advance tp->snd_una to ti->ti_ack and drop
+ * data from the retransmission queue. If this ACK reflects
+ * more up to date window information we update our window information.
+ */
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+
+ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
+ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
+ tcpstat.tcps_rcvdupack++;
+ /*
+ * If we have outstanding data (other than
+ * a window probe), this is a completely
+ * duplicate ack (ie, window info didn't
+ * change), the ack is the biggest we've
+ * seen and we've seen exactly our rexmt
+ * threshhold of them, assume a packet
+ * has been dropped and retransmit it.
+ * Kludge snd_nxt & the congestion
+ * window so we send only this one
+ * packet.
+ *
+ * We know we're losing at the current
+ * window size so do congestion avoidance
+ * (set ssthresh to half the current window
+ * and pull our congestion window back to
+ * the new ssthresh).
+ *
+ * Dup acks mean that packets have left the
+ * network (they're now cached at the receiver)
+ * so bump cwnd by the amount in the receiver
+ * to keep a constant cwnd packets in the
+ * network.
+ */
+ if (tp->t_timer[TCPT_REXMT] == 0 ||
+ ti->ti_ack != tp->snd_una)
+ tp->t_dupacks = 0;
+ else if (++tp->t_dupacks == tcprexmtthresh) {
+ tcp_seq onxt = tp->snd_nxt;
+ u_int win =
+ min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+ tp->t_maxseg;
+
+ if (win < 2)
+ win = 2;
+ tp->snd_ssthresh = win * tp->t_maxseg;
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rtt = 0;
+ tp->snd_nxt = ti->ti_ack;
+ tp->snd_cwnd = tp->t_maxseg;
+ (void) tcp_output(tp);
+ tp->snd_cwnd = tp->snd_ssthresh +
+ tp->t_maxseg * tp->t_dupacks;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ goto drop;
+ } else if (tp->t_dupacks > tcprexmtthresh) {
+ tp->snd_cwnd += tp->t_maxseg;
+ (void) tcp_output(tp);
+ goto drop;
+ }
+ } else
+ tp->t_dupacks = 0;
+ break;
+ }
+ /*
+ * If the congestion window was inflated to account
+ * for the other side's cached packets, retract it.
+ */
+ if (tp->t_dupacks > tcprexmtthresh &&
+ tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->t_dupacks = 0;
+ if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
+ tcpstat.tcps_rcvacktoomuch++;
+ goto dropafterack;
+ }
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+
+ /*
+ * If we have a timestamp reply, update smoothed
+ * round trip time. If no timestamp is present but
+ * transmit timer is running and timed sequence
+ * number was acked, update smoothed round trip time.
+ * Since we now have an rtt measurement, cancel the
+ * timer backoff (cf., Phil Karn's retransmit alg.).
+ * Recompute the initial retransmit timer.
+ */
+ if (ts_present)
+ tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+ else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp,tp->t_rtt);
+
+ /*
+ * If all outstanding data is acked, stop retransmit
+ * timer and remember to restart (more output or persist).
+ * If there is more data to be acked, restart retransmit
+ * timer, using current (possibly backed-off) value.
+ */
+ if (ti->ti_ack == tp->snd_max) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ needoutput = 1;
+ } else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ /*
+ * When new data is acked, open the congestion window.
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (maxseg per packet).
+ * Otherwise open linearly: maxseg per window
+ * (maxseg^2 / cwnd per packet), plus a constant
+ * fraction of a packet (maxseg/8) to help larger windows
+ * open quickly enough.
+ */
+ {
+ register u_int cw = tp->snd_cwnd;
+ register u_int incr = tp->t_maxseg;
+
+ if (cw > tp->snd_ssthresh)
+ incr = incr * incr / cw + incr / 8;
+ tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
+ }
+ if (acked > so->so_snd.sb_cc) {
+ tp->snd_wnd -= so->so_snd.sb_cc;
+ sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+ ourfinisacked = 1;
+ } else {
+ sbdrop(&so->so_snd, acked);
+ tp->snd_wnd -= acked;
+ ourfinisacked = 0;
+ }
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ tp->snd_una = ti->ti_ack;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+ tp->snd_nxt = tp->snd_una;
+
+ switch (tp->t_state) {
+
+ /*
+ * In FIN_WAIT_1 STATE in addition to the processing
+ * for the ESTABLISHED state if our FIN is now acknowledged
+ * then enter FIN_WAIT_2.
+ */
+ case TCPS_FIN_WAIT_1:
+ if (ourfinisacked) {
+ /*
+ * If we can't receive any more
+ * data, then closing user can proceed.
+ * Starting the timer is contrary to the
+ * specification, but if we don't get a FIN
+ * we'll hang forever.
+ */
+ if (so->so_state & SS_CANTRCVMORE) {
+ soisdisconnected(so);
+ tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+ }
+ tp->t_state = TCPS_FIN_WAIT_2;
+ }
+ break;
+
+ /*
+ * In CLOSING STATE in addition to the processing for
+ * the ESTABLISHED state if the ACK acknowledges our FIN
+ * then enter the TIME-WAIT state, otherwise ignore
+ * the segment.
+ */
+ case TCPS_CLOSING:
+ if (ourfinisacked) {
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ }
+ break;
+
+ /*
+ * In LAST_ACK, we may still be waiting for data to drain
+ * and/or to be acked, as well as for the ack of our FIN.
+ * If our FIN is now acknowledged, delete the TCB,
+ * enter the closed state and return.
+ */
+ case TCPS_LAST_ACK:
+ if (ourfinisacked) {
+ tp = tcp_close(tp);
+ goto drop;
+ }
+ break;
+
+ /*
+ * In TIME_WAIT state the only thing that should arrive
+ * is a retransmission of the remote FIN. Acknowledge
+ * it and restart the finack timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ goto dropafterack;
+ }
+ }
+
+step6:
+ /*
+ * Update window information.
+ * Don't look at window if no ACK: TAC's send garbage on first SYN.
+ */
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
+ (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
+ tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) {
+ /* keep track of pure window updates */
+ if (ti->ti_len == 0 &&
+ tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
+ tcpstat.tcps_rcvwinupd++;
+ tp->snd_wnd = tiwin;
+ tp->snd_wl1 = ti->ti_seq;
+ tp->snd_wl2 = ti->ti_ack;
+ if (tp->snd_wnd > tp->max_sndwnd)
+ tp->max_sndwnd = tp->snd_wnd;
+ needoutput = 1;
+ }
+
+ /*
+ * Process segments with URG.
+ */
+ if ((tiflags & TH_URG) && ti->ti_urp &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ /*
+ * This is a kludge, but if we receive and accept
+ * random urgent pointers, we'll crash in
+ * soreceive. It's hard to imagine someone
+ * actually wanting to send this much urgent data.
+ */
+ if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
+ ti->ti_urp = 0; /* XXX */
+ tiflags &= ~TH_URG; /* XXX */
+ goto dodata; /* XXX */
+ }
+ /*
+ * If this segment advances the known urgent pointer,
+ * then mark the data stream. This should not happen
+ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+ * a FIN has been received from the remote side.
+ * In these states we ignore the URG.
+ *
+ * According to RFC961 (Assigned Protocols),
+ * the urgent pointer points to the last octet
+ * of urgent data. We continue, however,
+ * to consider it to indicate the first octet
+ * of data past the urgent section as the original
+ * spec states (in one of two places).
+ */
+ if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
+ tp->rcv_up = ti->ti_seq + ti->ti_urp;
+ so->so_oobmark = so->so_rcv.sb_cc +
+ (tp->rcv_up - tp->rcv_nxt) - 1;
+ if (so->so_oobmark == 0)
+ so->so_state |= SS_RCVATMARK;
+ sohasoutofband(so);
+ tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+ }
+ /*
+ * Remove out of band data so doesn't get presented to user.
+ * This can happen independent of advancing the URG pointer,
+ * but if two URG's are pending at once, some out-of-band
+ * data may creep in... ick.
+ */
+ if (ti->ti_urp <= ti->ti_len
+#ifdef SO_OOBINLINE
+ && (so->so_options & SO_OOBINLINE) == 0
+#endif
+ )
+ tcp_pulloutofband(so, ti, m);
+ } else
+ /*
+ * If no out of band data is expected,
+ * pull receive urgent pointer along
+ * with the receive window.
+ */
+ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+ tp->rcv_up = tp->rcv_nxt;
+dodata: /* XXX */
+
+ /*
+ * Process the segment text, merging it into the TCP sequencing queue,
+ * and arranging for acknowledgment of receipt if necessary.
+ * This process logically involves adjusting tp->rcv_wnd as data
+ * is presented to the user (this happens in tcp_usrreq.c,
+ * case PRU_RCVD). If a FIN has already been received on this
+ * connection then we just ignore the text.
+ */
+ if ((ti->ti_len || (tiflags&TH_FIN)) &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ TCP_REASS(tp, ti, m, so, tiflags);
+ /*
+ * Note the amount of data that peer has sent into
+ * our window, in order to estimate the sender's
+ * buffer size.
+ */
+ len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+ } else {
+ m_freem(m);
+ tiflags &= ~TH_FIN;
+ }
+
+ /*
+ * If FIN is received ACK the FIN and let the user know
+ * that the connection is closing.
+ */
+ if (tiflags & TH_FIN) {
+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ socantrcvmore(so);
+ tp->t_flags |= TF_ACKNOW;
+ tp->rcv_nxt++;
+ }
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED and ESTABLISHED STATES
+ * enter the CLOSE_WAIT state.
+ */
+ case TCPS_SYN_RECEIVED:
+ case TCPS_ESTABLISHED:
+ tp->t_state = TCPS_CLOSE_WAIT;
+ break;
+
+ /*
+ * If still in FIN_WAIT_1 STATE FIN has not been acked so
+ * enter the CLOSING state.
+ */
+ case TCPS_FIN_WAIT_1:
+ tp->t_state = TCPS_CLOSING;
+ break;
+
+ /*
+ * In FIN_WAIT_2 state enter the TIME_WAIT state,
+ * starting the time-wait timer, turning off the other
+ * standard timers.
+ */
+ case TCPS_FIN_WAIT_2:
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ break;
+
+ /*
+ * In TIME_WAIT state restart the 2 MSL time_wait timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ break;
+ }
+ }
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
+
+ /*
+ * Return any desired output.
+ */
+ if (needoutput || (tp->t_flags & TF_ACKNOW))
+ (void) tcp_output(tp);
+ return;
+
+dropafterack:
+ /*
+ * Generate an ACK dropping incoming segment if it occupies
+ * sequence space, where the ACK reflects our state.
+ */
+ if (tiflags & TH_RST)
+ goto drop;
+ m_freem(m);
+ tp->t_flags |= TF_ACKNOW;
+ (void) tcp_output(tp);
+ return;
+
+dropwithreset:
+ /*
+ * Generate a RST, dropping incoming segment.
+ * Make ACK acceptable to originator of segment.
+ * Don't bother to respond if destination was broadcast/multicast.
+ */
+ if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ti->ti_dst.s_addr))
+ goto drop;
+ if (tiflags & TH_ACK)
+ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
+ else {
+ if (tiflags & TH_SYN)
+ ti->ti_len++;
+ tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
+ TH_RST|TH_ACK);
+ }
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+
+drop:
+ /*
+ * Drop space held by incoming segment and return.
+ */
+ if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+ m_freem(m);
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+#ifndef TUBA_INCLUDE
+}
+
+void
+tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
+ struct tcpcb *tp;
+ u_char *cp;
+ int cnt;
+ struct tcpiphdr *ti;
+ int *ts_present;
+ u_long *ts_val, *ts_ecr;
+{
+ u_short mss;
+ int opt, optlen;
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[0];
+ if (opt == TCPOPT_EOL)
+ break;
+ if (opt == TCPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[1];
+ if (optlen <= 0)
+ break;
+ }
+ switch (opt) {
+
+ default:
+ continue;
+
+ case TCPOPT_MAXSEG:
+ if (optlen != TCPOLEN_MAXSEG)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+ NTOHS(mss);
+ (void) tcp_mss(tp, mss); /* sets t_maxseg */
+ break;
+
+ case TCPOPT_WINDOW:
+ if (optlen != TCPOLEN_WINDOW)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ tp->t_flags |= TF_RCVD_SCALE;
+ tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+ break;
+
+ case TCPOPT_TIMESTAMP:
+ if (optlen != TCPOLEN_TIMESTAMP)
+ continue;
+ *ts_present = 1;
+ bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
+ NTOHL(*ts_val);
+ bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
+ NTOHL(*ts_ecr);
+
+ /*
+ * A timestamp received in a SYN makes
+ * it ok to send timestamp requests and replies.
+ */
+ if (ti->ti_flags & TH_SYN) {
+ tp->t_flags |= TF_RCVD_TSTMP;
+ tp->ts_recent = *ts_val;
+ tp->ts_recent_age = tcp_now;
+ }
+ break;
+ }
+ }
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+void
+tcp_pulloutofband(so, ti, m)
+ struct socket *so;
+ struct tcpiphdr *ti;
+ register struct mbuf *m;
+{
+ int cnt = ti->ti_urp - 1;
+
+ while (cnt >= 0) {
+ if (m->m_len > cnt) {
+ char *cp = mtod(m, caddr_t) + cnt;
+ struct tcpcb *tp = sototcpcb(so);
+
+ tp->t_iobc = *cp;
+ tp->t_oobflags |= TCPOOB_HAVEDATA;
+ bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+ m->m_len--;
+ return;
+ }
+ cnt -= m->m_len;
+ m = m->m_next;
+ if (m == 0)
+ break;
+ }
+ panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+void
+tcp_xmit_timer(tp, rtt)
+ register struct tcpcb *tp;
+ short rtt;
+{
+ register short delta;
+
+ tcpstat.tcps_rttupdated++;
+ if (tp->t_srtt != 0) {
+ /*
+ * srtt is stored as fixed point with 3 bits after the
+ * binary point (i.e., scaled by 8). The following magic
+ * is equivalent to the smoothing algorithm in rfc793 with
+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+ * point). Adjust rtt to origin 0.
+ */
+ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
+ if ((tp->t_srtt += delta) <= 0)
+ tp->t_srtt = 1;
+ /*
+ * We accumulate a smoothed rtt variance (actually, a
+ * smoothed mean difference), then set the retransmit
+ * timer to smoothed rtt + 4 times the smoothed variance.
+ * rttvar is stored as fixed point with 2 bits after the
+ * binary point (scaled by 4). The following is
+ * equivalent to rfc793 smoothing with an alpha of .75
+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
+ * rfc793's wired-in beta.
+ */
+ if (delta < 0)
+ delta = -delta;
+ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
+ if ((tp->t_rttvar += delta) <= 0)
+ tp->t_rttvar = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt).
+ */
+ tp->t_srtt = rtt << TCP_RTT_SHIFT;
+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+ }
+ tp->t_rtt = 0;
+ tp->t_rxtshift = 0;
+
+ /*
+ * the retransmit should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ */
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ tp->t_rttmin, TCPTV_REXMTMAX);
+
+ /*
+ * We received an ack for a packet that wasn't retransmitted;
+ * it is probably safe to discard any error indications we've
+ * received recently. This isn't quite right, but close enough
+ * for now (a route might have failed after we sent a segment,
+ * and the return path might not be symmetrical).
+ */
+ tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs. If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks. We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ */
+int
+tcp_mss(tp, offer)
+ register struct tcpcb *tp;
+ u_int offer;
+{
+ struct route *ro;
+ register struct rtentry *rt;
+ struct ifnet *ifp;
+ register int rtt, mss;
+ u_long bufsize;
+ struct inpcb *inp;
+ struct socket *so;
+ extern int tcp_mssdflt;
+
+ inp = tp->t_inpcb;
+ ro = &inp->inp_route;
+
+ if ((rt = ro->ro_rt) == (struct rtentry *)0) {
+ /* No route yet, so try to acquire one */
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+ ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+ inp->inp_faddr;
+ rtalloc(ro);
+ }
+ if ((rt = ro->ro_rt) == (struct rtentry *)0)
+ return (tcp_mssdflt);
+ }
+ ifp = rt->rt_ifp;
+ so = inp->inp_socket;
+
+#ifdef RTV_MTU /* if route characteristics exist ... */
+ /*
+ * While we're here, check if there's an initial rtt
+ * or rttvar. Convert from the route-table units
+ * to scaled multiples of the slow timeout timer.
+ */
+ if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+ /*
+ * XXX the lock bit for MTU indicates that the value
+ * is also a minimum value; this is subject to time.
+ */
+ if (rt->rt_rmx.rmx_locks & RTV_RTT)
+ tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
+ tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ if (rt->rt_rmx.rmx_rttvar)
+ tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ else
+ /* default variation is +- 1 rtt */
+ tp->t_rttvar =
+ tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+ tp->t_rttmin, TCPTV_REXMTMAX);
+ }
+ /*
+ * if there's an mtu associated with the route, use it
+ */
+ if (rt->rt_rmx.rmx_mtu)
+ mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
+ else
+#endif /* RTV_MTU */
+ {
+ mss = ifp->if_mtu - sizeof(struct tcpiphdr);
+#if (MCLBYTES & (MCLBYTES - 1)) == 0
+ if (mss > MCLBYTES)
+ mss &= ~(MCLBYTES-1);
+#else
+ if (mss > MCLBYTES)
+ mss = mss / MCLBYTES * MCLBYTES;
+#endif
+ if (!in_localaddr(inp->inp_faddr))
+ mss = min(mss, tcp_mssdflt);
+ }
+ /*
+ * The current mss, t_maxseg, is initialized to the default value.
+ * If we compute a smaller value, reduce the current mss.
+ * If we compute a larger value, return it for use in sending
+ * a max seg size option, but don't store it for use
+ * unless we received an offer at least that large from peer.
+ * However, do not accept offers under 32 bytes.
+ */
+ if (offer)
+ mss = min(mss, offer);
+ mss = max(mss, 32); /* sanity */
+ if (mss < tp->t_maxseg || offer != 0) {
+ /*
+ * If there's a pipesize, change the socket buffer
+ * to that size. Make the socket buffers an integral
+ * number of mss units; if the mss is larger than
+ * the socket buffer, decrease the mss.
+ */
+#ifdef RTV_SPIPE
+ if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+ bufsize = so->so_snd.sb_hiwat;
+ if (bufsize < mss)
+ mss = bufsize;
+ else {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_snd, bufsize);
+ }
+ tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+ if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+ bufsize = so->so_rcv.sb_hiwat;
+ if (bufsize > mss) {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_rcv, bufsize);
+ }
+ }
+ tp->snd_cwnd = mss;
+
+#ifdef RTV_SSTHRESH
+ if (rt->rt_rmx.rmx_ssthresh) {
+ /*
+ * There's some sort of gateway or interface
+ * buffer limit on the path. Use this to set
+ * the slow start threshhold, but set the
+ * threshold to no less than 2*mss.
+ */
+ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+ }
+#endif /* RTV_MTU */
+ return (mss);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
new file mode 100644
index 000000000000..667579fc0ed8
--- /dev/null
+++ b/sys/netinet/tcp_output.c
@@ -0,0 +1,599 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#define TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#ifdef notyet
+extern struct mbuf *m_copypack();
+#endif
+
+
+#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */
+
+/*
+ * Tcp output routine: figure out what should be sent and send it.
+ */
+int
+tcp_output(tp)
+ register struct tcpcb *tp;
+{
+ register struct socket *so = tp->t_inpcb->inp_socket;
+ register long len, win;
+ int off, flags, error;
+ register struct mbuf *m;
+ register struct tcpiphdr *ti;
+ u_char opt[MAX_TCPOPTLEN];
+ unsigned optlen, hdrlen;
+ int idle, sendalot;
+
+ /*
+ * Determine length of data that should be transmitted,
+ * and flags that will be used.
+ * If there is some data or critical controls (SYN, RST)
+ * to send, then transmit; otherwise, investigate further.
+ */
+ idle = (tp->snd_max == tp->snd_una);
+ if (idle && tp->t_idle >= tp->t_rxtcur)
+ /*
+ * We have been idle for "a while" and no acks are
+ * expected to clock out any data we send --
+ * slow start to get ack "clock" running again.
+ */
+ tp->snd_cwnd = tp->t_maxseg;
+again:
+ sendalot = 0;
+ off = tp->snd_nxt - tp->snd_una;
+ win = min(tp->snd_wnd, tp->snd_cwnd);
+
+ flags = tcp_outflags[tp->t_state];
+ /*
+ * If in persist timeout with window of 0, send 1 byte.
+ * Otherwise, if window is small but nonzero
+ * and timer expired, we will send what we can
+ * and go to transmit state.
+ */
+ if (tp->t_force) {
+ if (win == 0) {
+ /*
+ * If we still have some data to send, then
+ * clear the FIN bit. Usually this would
+ * happen below when it realizes that we
+ * aren't sending all the data. However,
+ * if we have exactly 1 byte of unset data,
+ * then it won't clear the FIN bit below,
+ * and if we are in persist state, we wind
+ * up sending the packet without recording
+ * that we sent the FIN bit.
+ *
+ * We can't just blindly clear the FIN bit,
+ * because if we don't have any more data
+ * to send then the probe will be the FIN
+ * itself.
+ */
+ if (off < so->so_snd.sb_cc)
+ flags &= ~TH_FIN;
+ win = 1;
+ } else {
+ tp->t_timer[TCPT_PERSIST] = 0;
+ tp->t_rxtshift = 0;
+ }
+ }
+
+ len = min(so->so_snd.sb_cc, win) - off;
+
+ if (len < 0) {
+ /*
+ * If FIN has been sent but not acked,
+ * but we haven't been called to retransmit,
+ * len will be -1. Otherwise, window shrank
+ * after we sent into it. If window shrank to 0,
+ * cancel pending retransmit and pull snd_nxt
+ * back to (closed) window. We will enter persist
+ * state below. If the window didn't close completely,
+ * just wait for an ACK.
+ */
+ len = 0;
+ if (win == 0) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->snd_nxt = tp->snd_una;
+ }
+ }
+ if (len > tp->t_maxseg) {
+ len = tp->t_maxseg;
+ sendalot = 1;
+ }
+ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
+ flags &= ~TH_FIN;
+
+ win = sbspace(&so->so_rcv);
+
+ /*
+ * Sender silly window avoidance. If connection is idle
+ * and can send all data, a maximum segment,
+ * at least a maximum default-size segment do it,
+ * or are forced, do it; otherwise don't bother.
+ * If peer's buffer is tiny, then send
+ * when window is at least half open.
+ * If retransmitting (possibly after persist timer forced us
+ * to send into a small window), then must resend.
+ */
+ if (len) {
+ if (len == tp->t_maxseg)
+ goto send;
+ if ((idle || tp->t_flags & TF_NODELAY) &&
+ len + off >= so->so_snd.sb_cc)
+ goto send;
+ if (tp->t_force)
+ goto send;
+ if (len >= tp->max_sndwnd / 2)
+ goto send;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ goto send;
+ }
+
+ /*
+ * Compare available window to amount of window
+ * known to peer (as advertised window less
+ * next expected input). If the difference is at least two
+ * max size segments, or at least 50% of the maximum possible
+ * window, then want to send a window update to peer.
+ */
+ if (win > 0) {
+ /*
+ * "adv" is the amount we can increase the window,
+ * taking into account that we are limited by
+ * TCP_MAXWIN << tp->rcv_scale.
+ */
+ long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
+ (tp->rcv_adv - tp->rcv_nxt);
+
+ if (adv >= (long) (2 * tp->t_maxseg))
+ goto send;
+ if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+ goto send;
+ }
+
+ /*
+ * Send if we owe peer an ACK.
+ */
+ if (tp->t_flags & TF_ACKNOW)
+ goto send;
+ if (flags & (TH_SYN|TH_RST))
+ goto send;
+ if (SEQ_GT(tp->snd_up, tp->snd_una))
+ goto send;
+ /*
+ * If our state indicates that FIN should be sent
+ * and we have not yet done so, or we're retransmitting the FIN,
+ * then we need to send.
+ */
+ if (flags & TH_FIN &&
+ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
+ goto send;
+
+ /*
+ * TCP window updates are not reliable, rather a polling protocol
+ * using ``persist'' packets is used to insure receipt of window
+ * updates. The three ``states'' for the output side are:
+ * idle not doing retransmits or persists
+ * persisting to move a small or zero window
+ * (re)transmitting and thereby not persisting
+ *
+ * tp->t_timer[TCPT_PERSIST]
+ * is set when we are in persist state.
+ * tp->t_force
+ * is set when we are called to send a persist packet.
+ * tp->t_timer[TCPT_REXMT]
+ * is set when we are retransmitting
+ * The output side is idle when both timers are zero.
+ *
+ * If send window is too small, there is data to transmit, and no
+ * retransmit or persist is pending, then go to persist state.
+ * If nothing happens soon, send when timer expires:
+ * if window is nonzero, transmit what we can,
+ * otherwise force out a byte.
+ */
+ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
+ tp->t_timer[TCPT_PERSIST] == 0) {
+ tp->t_rxtshift = 0;
+ tcp_setpersist(tp);
+ }
+
+ /*
+ * No reason to send a segment, just return.
+ */
+ return (0);
+
+send:
+ /*
+ * Before ESTABLISHED, force sending of initial options
+ * unless TCP set not to do any options.
+ * NOTE: we assume that the IP/TCP header plus TCP options
+ * always fit in a single mbuf, leaving room for a maximum
+ * link header, i.e.
+ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
+ */
+ optlen = 0;
+ hdrlen = sizeof (struct tcpiphdr);
+ if (flags & TH_SYN) {
+ tp->snd_nxt = tp->iss;
+ if ((tp->t_flags & TF_NOOPT) == 0) {
+ u_short mss;
+
+ opt[0] = TCPOPT_MAXSEG;
+ opt[1] = 4;
+ mss = htons((u_short) tcp_mss(tp, 0));
+ bcopy((caddr_t)&mss, (caddr_t)(opt + 2), sizeof(mss));
+ optlen = 4;
+
+ if ((tp->t_flags & TF_REQ_SCALE) &&
+ ((flags & TH_ACK) == 0 ||
+ (tp->t_flags & TF_RCVD_SCALE))) {
+ *((u_long *) (opt + optlen)) = htonl(
+ TCPOPT_NOP << 24 |
+ TCPOPT_WINDOW << 16 |
+ TCPOLEN_WINDOW << 8 |
+ tp->request_r_scale);
+ optlen += 4;
+ }
+ }
+ }
+
+ /*
+ * Send a timestamp and echo-reply if this is a SYN and our side
+ * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
+ * and our peer have sent timestamps in our SYN's.
+ */
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+ (flags & TH_RST) == 0 &&
+ ((flags & (TH_SYN|TH_ACK)) == TH_SYN ||
+ (tp->t_flags & TF_RCVD_TSTMP))) {
+ u_long *lp = (u_long *)(opt + optlen);
+
+ /* Form timestamp option as shown in appendix A of RFC 1323. */
+ *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ *lp++ = htonl(tcp_now);
+ *lp = htonl(tp->ts_recent);
+ optlen += TCPOLEN_TSTAMP_APPA;
+ }
+
+ hdrlen += optlen;
+
+ /*
+ * Adjust data length if insertion of options will
+ * bump the packet length beyond the t_maxseg length.
+ */
+ if (len > tp->t_maxseg - optlen) {
+ len = tp->t_maxseg - optlen;
+ sendalot = 1;
+ }
+
+
+#ifdef DIAGNOSTIC
+ if (max_linkhdr + hdrlen > MHLEN)
+ panic("tcphdr too big");
+#endif
+
+ /*
+ * Grab a header mbuf, attaching a copy of data to
+ * be transmitted, and initialize the header from
+ * the template for sends on this connection.
+ */
+ if (len) {
+ if (tp->t_force && len == 1)
+ tcpstat.tcps_sndprobe++;
+ else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ tcpstat.tcps_sndrexmitpack++;
+ tcpstat.tcps_sndrexmitbyte += len;
+ } else {
+ tcpstat.tcps_sndpack++;
+ tcpstat.tcps_sndbyte += len;
+ }
+#ifdef notyet
+ if ((m = m_copypack(so->so_snd.sb_mb, off,
+ (int)len, max_linkhdr + hdrlen)) == 0) {
+ error = ENOBUFS;
+ goto out;
+ }
+ /*
+ * m_copypack left space for our hdr; use it.
+ */
+ m->m_len += hdrlen;
+ m->m_data -= hdrlen;
+#else
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto out;
+ }
+ m->m_data += max_linkhdr;
+ m->m_len = hdrlen;
+ if (len <= MHLEN - hdrlen - max_linkhdr) {
+ m_copydata(so->so_snd.sb_mb, off, (int) len,
+ mtod(m, caddr_t) + hdrlen);
+ m->m_len += len;
+ } else {
+ m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
+ if (m->m_next == 0)
+ len = 0;
+ }
+#endif
+ /*
+ * If we're sending everything we've got, set PUSH.
+ * (This will keep happy those implementations which only
+ * give data to the user when a buffer fills or
+ * a PUSH comes in.)
+ */
+ if (off + len == so->so_snd.sb_cc)
+ flags |= TH_PUSH;
+ } else {
+ if (tp->t_flags & TF_ACKNOW)
+ tcpstat.tcps_sndacks++;
+ else if (flags & (TH_SYN|TH_FIN|TH_RST))
+ tcpstat.tcps_sndctrl++;
+ else if (SEQ_GT(tp->snd_up, tp->snd_una))
+ tcpstat.tcps_sndurg++;
+ else
+ tcpstat.tcps_sndwinup++;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto out;
+ }
+ m->m_data += max_linkhdr;
+ m->m_len = hdrlen;
+ }
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ ti = mtod(m, struct tcpiphdr *);
+ if (tp->t_template == 0)
+ panic("tcp_output");
+ bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
+
+ /*
+ * Fill in fields, remembering maximum advertised
+ * window for use in delaying messages about window sizes.
+ * If resending a FIN, be sure not to use a new sequence number.
+ */
+ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
+ tp->snd_nxt == tp->snd_max)
+ tp->snd_nxt--;
+ /*
+ * If we are doing retransmissions, then snd_nxt will
+ * not reflect the first unsent octet. For ACK only
+ * packets, we do not want the sequence number of the
+ * retransmitted packet, we want the sequence number
+ * of the next unsent octet. So, if there is no data
+ * (and no SYN or FIN), use snd_max instead of snd_nxt
+ * when filling in ti_seq. But if we are in persist
+ * state, snd_max might reflect one byte beyond the
+ * right edge of the window, so use snd_nxt in that
+ * case, since we know we aren't doing a retransmission.
+ * (retransmit and persist are mutually exclusive...)
+ */
+ if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
+ ti->ti_seq = htonl(tp->snd_nxt);
+ else
+ ti->ti_seq = htonl(tp->snd_max);
+ ti->ti_ack = htonl(tp->rcv_nxt);
+ if (optlen) {
+ bcopy((caddr_t)opt, (caddr_t)(ti + 1), optlen);
+ ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
+ }
+ ti->ti_flags = flags;
+ /*
+ * Calculate receive window. Don't shrink window,
+ * but avoid silly window syndrome.
+ */
+ if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
+ win = 0;
+ if (win > (long)TCP_MAXWIN << tp->rcv_scale)
+ win = (long)TCP_MAXWIN << tp->rcv_scale;
+ if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
+ win = (long)(tp->rcv_adv - tp->rcv_nxt);
+ ti->ti_win = htons((u_short) (win>>tp->rcv_scale));
+ if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
+ ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
+ ti->ti_flags |= TH_URG;
+ } else
+ /*
+ * If no urgent pointer to send, then we pull
+ * the urgent pointer to the left edge of the send window
+ * so that it doesn't drift into the send window on sequence
+ * number wraparound.
+ */
+ tp->snd_up = tp->snd_una; /* drag it along */
+
+ /*
+ * Put TCP length in extended header, and then
+ * checksum extended header and data.
+ */
+ if (len + optlen)
+ ti->ti_len = htons((u_short)(sizeof (struct tcphdr) +
+ optlen + len));
+ ti->ti_sum = in_cksum(m, (int)(hdrlen + len));
+
+ /*
+ * In transmit state, time the transmission and arrange for
+ * the retransmit. In persist state, just set snd_max.
+ */
+ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
+ tcp_seq startseq = tp->snd_nxt;
+
+ /*
+ * Advance snd_nxt over sequence space of this segment.
+ */
+ if (flags & (TH_SYN|TH_FIN)) {
+ if (flags & TH_SYN)
+ tp->snd_nxt++;
+ if (flags & TH_FIN) {
+ tp->snd_nxt++;
+ tp->t_flags |= TF_SENTFIN;
+ }
+ }
+ tp->snd_nxt += len;
+ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_max = tp->snd_nxt;
+ /*
+ * Time this transmission if not a retransmission and
+ * not currently timing anything.
+ */
+ if (tp->t_rtt == 0) {
+ tp->t_rtt = 1;
+ tp->t_rtseq = startseq;
+ tcpstat.tcps_segstimed++;
+ }
+ }
+
+ /*
+ * Set retransmit timer if not currently set,
+ * and not doing an ack or a keep-alive probe.
+ * Initial value for retransmit timer is smoothed
+ * round-trip time + 2 * round-trip time variance.
+ * Initialize shift counter which is used for backoff
+ * of retransmit time.
+ */
+ if (tp->t_timer[TCPT_REXMT] == 0 &&
+ tp->snd_nxt != tp->snd_una) {
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ if (tp->t_timer[TCPT_PERSIST]) {
+ tp->t_timer[TCPT_PERSIST] = 0;
+ tp->t_rxtshift = 0;
+ }
+ }
+ } else
+ if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
+ tp->snd_max = tp->snd_nxt + len;
+
+ /*
+ * Trace.
+ */
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
+
+ /*
+ * Fill in IP length and desired time to live and
+ * send to IP level. There should be a better way
+ * to handle ttl and tos; we could keep them in
+ * the template, but need a way to checksum without them.
+ */
+ m->m_pkthdr.len = hdrlen + len;
+#ifdef TUBA
+ if (tp->t_tuba_pcb)
+ error = tuba_output(m, tp);
+ else
+#endif
+ {
+ ((struct ip *)ti)->ip_len = m->m_pkthdr.len;
+ ((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl; /* XXX */
+ ((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip.ip_tos; /* XXX */
+#if BSD >= 43
+ error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+ so->so_options & SO_DONTROUTE, 0);
+#else
+ error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route,
+ so->so_options & SO_DONTROUTE);
+#endif
+ }
+ if (error) {
+out:
+ if (error == ENOBUFS) {
+ tcp_quench(tp->t_inpcb, 0);
+ return (0);
+ }
+ if ((error == EHOSTUNREACH || error == ENETDOWN)
+ && TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_softerror = error;
+ return (0);
+ }
+ return (error);
+ }
+ tcpstat.tcps_sndtotal++;
+
+ /*
+ * Data sent (as far as we can tell).
+ * If this advertises a larger window than any other segment,
+ * then remember the size of the advertised window.
+ * Any pending ACK has now been sent.
+ */
+ if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
+ tp->rcv_adv = tp->rcv_nxt + win;
+ tp->last_ack_sent = tp->rcv_nxt;
+ tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
+ if (sendalot)
+ goto again;
+ return (0);
+}
+
+void
+tcp_setpersist(tp)
+ register struct tcpcb *tp;
+{
+ register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+
+ if (tp->t_timer[TCPT_REXMT])
+ panic("tcp_output REXMT");
+ /*
+ * Start/restart persistance timer.
+ */
+ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
+ t * tcp_backoff[tp->t_rxtshift],
+ TCPTV_PERSMIN, TCPTV_PERSMAX);
+ if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+ tp->t_rxtshift++;
+}
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
new file mode 100644
index 000000000000..2dd1d749c409
--- /dev/null
+++ b/sys/netinet/tcp_reass.c
@@ -0,0 +1,1647 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
+ */
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+int tcprexmtthresh = 3;
+struct tcpiphdr tcp_saveti;
+struct inpcb *tcp_last_inpcb = &tcb;
+
+extern u_long sb_max;
+
+#endif /* TUBA_INCLUDE */
+#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
+
+/*
+ * Insert segment ti into reassembly queue of tcp with
+ * control block tp. Return TH_FIN if reassembly now includes
+ * a segment with FIN. The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#define TCP_REASS(tp, ti, m, so, flags) { \
+ if ((ti)->ti_seq == (tp)->rcv_nxt && \
+ (tp)->seg_next == (struct tcpiphdr *)(tp) && \
+ (tp)->t_state == TCPS_ESTABLISHED) { \
+ tp->t_flags |= TF_DELACK; \
+ (tp)->rcv_nxt += (ti)->ti_len; \
+ flags = (ti)->ti_flags & TH_FIN; \
+ tcpstat.tcps_rcvpack++;\
+ tcpstat.tcps_rcvbyte += (ti)->ti_len;\
+ sbappend(&(so)->so_rcv, (m)); \
+ sorwakeup(so); \
+ } else { \
+ (flags) = tcp_reass((tp), (ti), (m)); \
+ tp->t_flags |= TF_ACKNOW; \
+ } \
+}
+#ifndef TUBA_INCLUDE
+
+int
+tcp_reass(tp, ti, m)
+ register struct tcpcb *tp;
+ register struct tcpiphdr *ti;
+ struct mbuf *m;
+{
+ register struct tcpiphdr *q;
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int flags;
+
+ /*
+ * Call with ti==0 after become established to
+ * force pre-ESTABLISHED data up to user socket.
+ */
+ if (ti == 0)
+ goto present;
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
+ q = (struct tcpiphdr *)q->ti_next)
+ if (SEQ_GT(q->ti_seq, ti->ti_seq))
+ break;
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us.
+ */
+ if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
+ register int i;
+ q = (struct tcpiphdr *)q->ti_prev;
+ /* conversion to int (in i) handles seq wraparound */
+ i = q->ti_seq + q->ti_len - ti->ti_seq;
+ if (i > 0) {
+ if (i >= ti->ti_len) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ m_freem(m);
+ return (0);
+ }
+ m_adj(m, i);
+ ti->ti_len -= i;
+ ti->ti_seq += i;
+ }
+ q = (struct tcpiphdr *)(q->ti_next);
+ }
+ tcpstat.tcps_rcvoopack++;
+ tcpstat.tcps_rcvoobyte += ti->ti_len;
+ REASS_MBUF(ti) = m; /* XXX */
+
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ while (q != (struct tcpiphdr *)tp) {
+ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
+ if (i <= 0)
+ break;
+ if (i < q->ti_len) {
+ q->ti_seq += i;
+ q->ti_len -= i;
+ m_adj(REASS_MBUF(q), i);
+ break;
+ }
+ q = (struct tcpiphdr *)q->ti_next;
+ m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
+ remque(q->ti_prev);
+ m_freem(m);
+ }
+
+ /*
+ * Stick new segment in its place.
+ */
+ insque(ti, q->ti_prev);
+
+present:
+ /*
+ * Present data to user, advancing rcv_nxt through
+ * completed sequence space.
+ */
+ if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
+ return (0);
+ ti = tp->seg_next;
+ if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
+ return (0);
+ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
+ return (0);
+ do {
+ tp->rcv_nxt += ti->ti_len;
+ flags = ti->ti_flags & TH_FIN;
+ remque(ti);
+ m = REASS_MBUF(ti);
+ ti = (struct tcpiphdr *)ti->ti_next;
+ if (so->so_state & SS_CANTRCVMORE)
+ m_freem(m);
+ else
+ sbappend(&so->so_rcv, m);
+ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
+ sorwakeup(so);
+ return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+void
+tcp_input(m, iphlen)
+ register struct mbuf *m;
+ int iphlen;
+{
+ register struct tcpiphdr *ti;
+ register struct inpcb *inp;
+ caddr_t optp = NULL;
+ int optlen;
+ int len, tlen, off;
+ register struct tcpcb *tp = 0;
+ register int tiflags;
+ struct socket *so;
+ int todrop, acked, ourfinisacked, needoutput = 0;
+ short ostate;
+ struct in_addr laddr;
+ int dropsocket = 0;
+ int iss = 0;
+ u_long tiwin, ts_val, ts_ecr;
+ int ts_present = 0;
+
+ tcpstat.tcps_rcvtotal++;
+ /*
+ * Get IP and TCP header together in first mbuf.
+ * Note: IP leaves IP header in first mbuf.
+ */
+ ti = mtod(m, struct tcpiphdr *);
+ if (iphlen > sizeof (struct ip))
+ ip_stripoptions(m, (struct mbuf *)0);
+ if (m->m_len < sizeof (struct tcpiphdr)) {
+ if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+
+ /*
+ * Checksum extended TCP header and data.
+ */
+ tlen = ((struct ip *)ti)->ip_len;
+ len = sizeof (struct ip) + tlen;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_len = (u_short)tlen;
+ HTONS(ti->ti_len);
+ if (ti->ti_sum = in_cksum(m, len)) {
+ tcpstat.tcps_rcvbadsum++;
+ goto drop;
+ }
+#endif /* TUBA_INCLUDE */
+
+ /*
+ * Check that TCP offset makes sense,
+ * pull out TCP options and adjust length. XXX
+ */
+ off = ti->ti_off << 2;
+ if (off < sizeof (struct tcphdr) || off > tlen) {
+ tcpstat.tcps_rcvbadoff++;
+ goto drop;
+ }
+ tlen -= off;
+ ti->ti_len = tlen;
+ if (off > sizeof (struct tcphdr)) {
+ if (m->m_len < sizeof(struct ip) + off) {
+ if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+ optlen = off - sizeof (struct tcphdr);
+ optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
+ /*
+ * Do quick retrieval of timestamp options ("options
+ * prediction?"). If timestamp is the only option and it's
+ * formatted as recommended in RFC 1323 appendix A, we
+ * quickly get the values now and not bother calling
+ * tcp_dooptions(), etc.
+ */
+ if ((optlen == TCPOLEN_TSTAMP_APPA ||
+ (optlen > TCPOLEN_TSTAMP_APPA &&
+ optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
+ *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+ (ti->ti_flags & TH_SYN) == 0) {
+ ts_present = 1;
+ ts_val = ntohl(*(u_long *)(optp + 4));
+ ts_ecr = ntohl(*(u_long *)(optp + 8));
+ optp = NULL; /* we've parsed the options */
+ }
+ }
+ tiflags = ti->ti_flags;
+
+ /*
+ * Convert TCP protocol specific fields to host format.
+ */
+ NTOHL(ti->ti_seq);
+ NTOHL(ti->ti_ack);
+ NTOHS(ti->ti_win);
+ NTOHS(ti->ti_urp);
+
+ /*
+ * Locate pcb for segment.
+ */
+findpcb:
+ inp = tcp_last_inpcb;
+ if (inp->inp_lport != ti->ti_dport ||
+ inp->inp_fport != ti->ti_sport ||
+ inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
+ inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
+ inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,
+ ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
+ if (inp)
+ tcp_last_inpcb = inp;
+ ++tcpstat.tcps_pcbcachemiss;
+ }
+
+ /*
+ * If the state is CLOSED (i.e., TCB does not exist) then
+ * all data in the incoming segment is discarded.
+ * If the TCB exists but is in CLOSED state, it is embryonic,
+ * but should either do a listen or a connect soon.
+ */
+ if (inp == 0)
+ goto dropwithreset;
+ tp = intotcpcb(inp);
+ if (tp == 0)
+ goto dropwithreset;
+ if (tp->t_state == TCPS_CLOSED)
+ goto drop;
+
+ /* Unscale the window into a 32-bit value. */
+ if ((tiflags & TH_SYN) == 0)
+ tiwin = ti->ti_win << tp->snd_scale;
+ else
+ tiwin = ti->ti_win;
+
+ so = inp->inp_socket;
+ if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+ if (so->so_options & SO_DEBUG) {
+ ostate = tp->t_state;
+ tcp_saveti = *ti;
+ }
+ if (so->so_options & SO_ACCEPTCONN) {
+ so = sonewconn(so, 0);
+ if (so == 0)
+ goto drop;
+ /*
+ * This is ugly, but ....
+ *
+ * Mark socket as temporary until we're
+ * committed to keeping it. The code at
+ * ``drop'' and ``dropwithreset'' check the
+ * flag dropsocket to see if the temporary
+ * socket created here should be discarded.
+ * We mark the socket as discardable until
+ * we're committed to it below in TCPS_LISTEN.
+ */
+ dropsocket++;
+ inp = (struct inpcb *)so->so_pcb;
+ inp->inp_laddr = ti->ti_dst;
+ inp->inp_lport = ti->ti_dport;
+#if BSD>=43
+ inp->inp_options = ip_srcroute();
+#endif
+ tp = intotcpcb(inp);
+ tp->t_state = TCPS_LISTEN;
+
+ /* Compute proper scaling value from buffer space
+ */
+ while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+ TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
+ tp->request_r_scale++;
+ }
+ }
+
+ /*
+ * Segment received on connection.
+ * Reset idle time and keep-alive timer.
+ */
+ tp->t_idle = 0;
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+
+ /*
+ * Process options if not in LISTEN state,
+ * else do it below (after getting remote address).
+ */
+ if (optp && tp->t_state != TCPS_LISTEN)
+ tcp_dooptions(tp, optp, optlen, ti,
+ &ts_present, &ts_val, &ts_ecr);
+
+ /*
+ * Header prediction: check for the two common cases
+ * of a uni-directional data xfer. If the packet has
+ * no control flags, is in-sequence, the window didn't
+ * change and we're not retransmitting, it's a
+ * candidate. If the length is zero and the ack moved
+ * forward, we're the sender side of the xfer. Just
+ * free the data acked & wake any higher level process
+ * that was blocked waiting for space. If the length
+ * is non-zero and the ack didn't move, we're the
+ * receiver side. If we're getting packets in-order
+ * (the reassembly queue is empty), add the data to
+ * the socket buffer and note that we need a delayed ack.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+ (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
+ ti->ti_seq == tp->rcv_nxt &&
+ tiwin && tiwin == tp->snd_wnd &&
+ tp->snd_nxt == tp->snd_max) {
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record the timestamp.
+ */
+ if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+ SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = ts_val;
+ }
+
+ if (ti->ti_len == 0) {
+ if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
+ SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
+ tp->snd_cwnd >= tp->snd_wnd) {
+ /*
+ * this is a pure ack for outstanding data.
+ */
+ ++tcpstat.tcps_predack;
+ if (ts_present)
+ tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+ else if (tp->t_rtt &&
+ SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp, tp->t_rtt);
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+ sbdrop(&so->so_snd, acked);
+ tp->snd_una = ti->ti_ack;
+ m_freem(m);
+
+ /*
+ * If all outstanding data are acked, stop
+ * retransmit timer, otherwise restart timer
+ * using current (possibly backed-off) value.
+ * If process is waiting for space,
+ * wakeup/selwakeup/signal. If data
+ * are ready to send, let tcp_output
+ * decide between more output or persist.
+ */
+ if (tp->snd_una == tp->snd_max)
+ tp->t_timer[TCPT_REXMT] = 0;
+ else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ if (so->so_snd.sb_cc)
+ (void) tcp_output(tp);
+ return;
+ }
+ } else if (ti->ti_ack == tp->snd_una &&
+ tp->seg_next == (struct tcpiphdr *)tp &&
+ ti->ti_len <= sbspace(&so->so_rcv)) {
+ /*
+ * this is a pure, in-sequence data packet
+ * with nothing on the reassembly queue and
+ * we have enough buffer space to take it.
+ */
+ ++tcpstat.tcps_preddat;
+ tp->rcv_nxt += ti->ti_len;
+ tcpstat.tcps_rcvpack++;
+ tcpstat.tcps_rcvbyte += ti->ti_len;
+ /*
+ * Drop TCP, IP headers and TCP options then add data
+ * to socket buffer.
+ */
+ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ sbappend(&so->so_rcv, m);
+ sorwakeup(so);
+ tp->t_flags |= TF_DELACK;
+ return;
+ }
+ }
+
+ /*
+ * Drop TCP, IP headers and TCP options.
+ */
+ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+
+ /*
+ * Calculate amount of space in receive window,
+ * and then do TCP input processing.
+ * Receive window is amount of space in rcv queue,
+ * but not less than advertised window.
+ */
+ { int win;
+
+ win = sbspace(&so->so_rcv);
+ if (win < 0)
+ win = 0;
+ tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+ }
+
+ switch (tp->t_state) {
+
+ /*
+ * If the state is LISTEN then ignore segment if it contains an RST.
+ * If the segment contains an ACK then it is bad and send a RST.
+ * If it does not contain a SYN then it is not interesting; drop it.
+ * Don't bother responding if the destination was a broadcast.
+ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+ * tp->iss, and send a segment:
+ * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+ * Fill in remote peer address fields if not previously specified.
+ * Enter SYN_RECEIVED state, and process any other fields of this
+ * segment in this state.
+ */
+ case TCPS_LISTEN: {
+ struct mbuf *am;
+ register struct sockaddr_in *sin;
+
+ if (tiflags & TH_RST)
+ goto drop;
+ if (tiflags & TH_ACK)
+ goto dropwithreset;
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ /*
+ * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+ * in_broadcast() should never return true on a received
+ * packet with M_BCAST not set.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ti->ti_dst.s_addr))
+ goto drop;
+ am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */
+ if (am == NULL)
+ goto drop;
+ am->m_len = sizeof (struct sockaddr_in);
+ sin = mtod(am, struct sockaddr_in *);
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ti->ti_src;
+ sin->sin_port = ti->ti_sport;
+ bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+ laddr = inp->inp_laddr;
+ if (inp->inp_laddr.s_addr == INADDR_ANY)
+ inp->inp_laddr = ti->ti_dst;
+ if (in_pcbconnect(inp, am)) {
+ inp->inp_laddr = laddr;
+ (void) m_free(am);
+ goto drop;
+ }
+ (void) m_free(am);
+ tp->t_template = tcp_template(tp);
+ if (tp->t_template == 0) {
+ tp = tcp_drop(tp, ENOBUFS);
+ dropsocket = 0; /* socket is already gone */
+ goto drop;
+ }
+ if (optp)
+ tcp_dooptions(tp, optp, optlen, ti,
+ &ts_present, &ts_val, &ts_ecr);
+ if (iss)
+ tp->iss = iss;
+ else
+ tp->iss = tcp_iss;
+ tcp_iss += TCP_ISSINCR/2;
+ tp->irs = ti->ti_seq;
+ tcp_sendseqinit(tp);
+ tcp_rcvseqinit(tp);
+ tp->t_flags |= TF_ACKNOW;
+ tp->t_state = TCPS_SYN_RECEIVED;
+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+ dropsocket = 0; /* committed to socket */
+ tcpstat.tcps_accepts++;
+ goto trimthenstep6;
+ }
+
+ /*
+ * If the state is SYN_SENT:
+ * if seg contains an ACK, but not for our SYN, drop the input.
+ * if seg contains a RST, then drop the connection.
+ * if seg does not contain SYN, then drop it.
+ * Otherwise this is an acceptable SYN segment
+ * initialize tp->rcv_nxt and tp->irs
+ * if seg contains ack then advance tp->snd_una
+ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+ * arrange for segment to be acked (eventually)
+ * continue processing rest of data/controls, beginning with URG
+ */
+ case TCPS_SYN_SENT:
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LEQ(ti->ti_ack, tp->iss) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max)))
+ goto dropwithreset;
+ if (tiflags & TH_RST) {
+ if (tiflags & TH_ACK)
+ tp = tcp_drop(tp, ECONNREFUSED);
+ goto drop;
+ }
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ if (tiflags & TH_ACK) {
+ tp->snd_una = ti->ti_ack;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+ tp->snd_nxt = tp->snd_una;
+ }
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->irs = ti->ti_seq;
+ tcp_rcvseqinit(tp);
+ tp->t_flags |= TF_ACKNOW;
+ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ tp->t_state = TCPS_ESTABLISHED;
+ /* Do window scaling on this connection? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ (void) tcp_reass(tp, (struct tcpiphdr *)0,
+ (struct mbuf *)0);
+ /*
+ * if we didn't have to retransmit the SYN,
+ * use its rtt as our initial srtt & rtt var.
+ */
+ if (tp->t_rtt)
+ tcp_xmit_timer(tp, tp->t_rtt);
+ } else
+ tp->t_state = TCPS_SYN_RECEIVED;
+
+trimthenstep6:
+ /*
+ * Advance ti->ti_seq to correspond to first data byte.
+ * If data, trim to stay within window,
+ * dropping FIN if necessary.
+ */
+ ti->ti_seq++;
+ if (ti->ti_len > tp->rcv_wnd) {
+ todrop = ti->ti_len - tp->rcv_wnd;
+ m_adj(m, -todrop);
+ ti->ti_len = tp->rcv_wnd;
+ tiflags &= ~TH_FIN;
+ tcpstat.tcps_rcvpackafterwin++;
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ }
+ tp->snd_wl1 = ti->ti_seq - 1;
+ tp->rcv_up = ti->ti_seq;
+ goto step6;
+ }
+
+ /*
+ * States other than LISTEN or SYN_SENT.
+ * First check timestamp, if present.
+ * Then check that at least some bytes of segment are within
+ * receive window. If segment begins before rcv_nxt,
+ * drop leading data (and SYN); if nothing left, just ack.
+ *
+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
+ * and it's less than ts_recent, drop it.
+ */
+ if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
+ TSTMP_LT(ts_val, tp->ts_recent)) {
+
+ /* Check to see if ts_recent is over 24 days old. */
+ if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+ /*
+ * Invalidate ts_recent. If this segment updates
+ * ts_recent, the age will be reset later and ts_recent
+ * will get a valid value. If it does not, setting
+ * ts_recent to zero will at least satisfy the
+ * requirement that zero be placed in the timestamp
+ * echo reply when ts_recent isn't valid. The
+ * age isn't reset until we get a valid ts_recent
+ * because we don't want out-of-order segments to be
+ * dropped when ts_recent is old.
+ */
+ tp->ts_recent = 0;
+ } else {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ tcpstat.tcps_pawsdrop++;
+ goto dropafterack;
+ }
+ }
+
+ todrop = tp->rcv_nxt - ti->ti_seq;
+ if (todrop > 0) {
+ if (tiflags & TH_SYN) {
+ tiflags &= ~TH_SYN;
+ ti->ti_seq++;
+ if (ti->ti_urp > 1)
+ ti->ti_urp--;
+ else
+ tiflags &= ~TH_URG;
+ todrop--;
+ }
+ if (todrop >= ti->ti_len) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ /*
+ * If segment is just one to the left of the window,
+ * check two special cases:
+ * 1. Don't toss RST in response to 4.2-style keepalive.
+ * 2. If the only thing to drop is a FIN, we can drop
+ * it, but check the ACK or we will get into FIN
+ * wars if our FINs crossed (both CLOSING).
+ * In either case, send ACK to resynchronize,
+ * but keep on processing for RST or ACK.
+ */
+ if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
+#ifdef TCP_COMPAT_42
+ || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
+#endif
+ ) {
+ todrop = ti->ti_len;
+ tiflags &= ~TH_FIN;
+ tp->t_flags |= TF_ACKNOW;
+ } else {
+ /*
+ * Handle the case when a bound socket connects
+ * to itself. Allow packets with a SYN and
+ * an ACK to continue with the processing.
+ */
+ if (todrop != 0 || (tiflags & TH_ACK) == 0)
+ goto dropafterack;
+ }
+ } else {
+ tcpstat.tcps_rcvpartduppack++;
+ tcpstat.tcps_rcvpartdupbyte += todrop;
+ }
+ m_adj(m, todrop);
+ ti->ti_seq += todrop;
+ ti->ti_len -= todrop;
+ if (ti->ti_urp > todrop)
+ ti->ti_urp -= todrop;
+ else {
+ tiflags &= ~TH_URG;
+ ti->ti_urp = 0;
+ }
+ }
+
+ /*
+ * If new data are received on a connection after the
+ * user processes are gone, then RST the other end.
+ */
+ if ((so->so_state & SS_NOFDREF) &&
+ tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
+ tp = tcp_close(tp);
+ tcpstat.tcps_rcvafterclose++;
+ goto dropwithreset;
+ }
+
+ /*
+ * If segment ends after window, drop trailing data
+ * (and PUSH and FIN); if nothing left, just ACK.
+ */
+ todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
+ if (todrop > 0) {
+ tcpstat.tcps_rcvpackafterwin++;
+ if (todrop >= ti->ti_len) {
+ tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
+ /*
+ * If a new connection request is received
+ * while in TIME_WAIT, drop the old connection
+ * and start over if the sequence numbers
+ * are above the previous ones.
+ */
+ if (tiflags & TH_SYN &&
+ tp->t_state == TCPS_TIME_WAIT &&
+ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
+ iss = tp->rcv_nxt + TCP_ISSINCR;
+ tp = tcp_close(tp);
+ goto findpcb;
+ }
+ /*
+ * If window is closed can only take segments at
+ * window edge, and have to drop data and PUSH from
+ * incoming segments. Continue processing, but
+ * remember to ack. Otherwise, drop segment
+ * and ack.
+ */
+ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
+ tp->t_flags |= TF_ACKNOW;
+ tcpstat.tcps_rcvwinprobe++;
+ } else
+ goto dropafterack;
+ } else
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ m_adj(m, -todrop);
+ ti->ti_len -= todrop;
+ tiflags &= ~(TH_PUSH|TH_FIN);
+ }
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record its timestamp.
+ */
+ if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+ SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
+ ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = ts_val;
+ }
+
+ /*
+ * If the RST bit is set examine the state:
+ * SYN_RECEIVED STATE:
+ * If passive open, return to LISTEN state.
+ * If active open, inform user that connection was refused.
+ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+ * Inform user that connection was reset, and close tcb.
+ * CLOSING, LAST_ACK, TIME_WAIT STATES
+ * Close the tcb.
+ */
+ if (tiflags&TH_RST) switch (tp->t_state) {
+
+ case TCPS_SYN_RECEIVED:
+ so->so_error = ECONNREFUSED;
+ goto close;
+
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ so->so_error = ECONNRESET;
+ close:
+ tp->t_state = TCPS_CLOSED;
+ tcpstat.tcps_drops++;
+ tp = tcp_close(tp);
+ goto drop;
+
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+ tp = tcp_close(tp);
+ goto drop;
+ }
+
+ /*
+ * If a SYN is in the window, then this is an
+ * error and we send an RST and drop the connection.
+ */
+ if (tiflags & TH_SYN) {
+ tp = tcp_drop(tp, ECONNRESET);
+ goto dropwithreset;
+ }
+
+ /*
+ * If the ACK bit is off we drop the segment and return.
+ */
+ if ((tiflags & TH_ACK) == 0)
+ goto drop;
+
+ /*
+ * Ack processing.
+ */
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED state if the ack ACKs our SYN then enter
+ * ESTABLISHED state and continue processing, otherwise
+ * send an RST.
+ */
+ case TCPS_SYN_RECEIVED:
+ if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max))
+ goto dropwithreset;
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ tp->t_state = TCPS_ESTABLISHED;
+ /* Do window scaling? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
+ tp->snd_wl1 = ti->ti_seq - 1;
+ /* fall into ... */
+
+ /*
+ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+ * ACKs. If the ack is in the range
+ * tp->snd_una < ti->ti_ack <= tp->snd_max
+ * then advance tp->snd_una to ti->ti_ack and drop
+ * data from the retransmission queue. If this ACK reflects
+ * more up to date window information we update our window information.
+ */
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+
+ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
+ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
+ tcpstat.tcps_rcvdupack++;
+ /*
+ * If we have outstanding data (other than
+ * a window probe), this is a completely
+ * duplicate ack (ie, window info didn't
+ * change), the ack is the biggest we've
+ * seen and we've seen exactly our rexmt
+ * threshhold of them, assume a packet
+ * has been dropped and retransmit it.
+ * Kludge snd_nxt & the congestion
+ * window so we send only this one
+ * packet.
+ *
+ * We know we're losing at the current
+ * window size so do congestion avoidance
+ * (set ssthresh to half the current window
+ * and pull our congestion window back to
+ * the new ssthresh).
+ *
+ * Dup acks mean that packets have left the
+ * network (they're now cached at the receiver)
+ * so bump cwnd by the amount in the receiver
+ * to keep a constant cwnd packets in the
+ * network.
+ */
+ if (tp->t_timer[TCPT_REXMT] == 0 ||
+ ti->ti_ack != tp->snd_una)
+ tp->t_dupacks = 0;
+ else if (++tp->t_dupacks == tcprexmtthresh) {
+ tcp_seq onxt = tp->snd_nxt;
+ u_int win =
+ min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+ tp->t_maxseg;
+
+ if (win < 2)
+ win = 2;
+ tp->snd_ssthresh = win * tp->t_maxseg;
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rtt = 0;
+ tp->snd_nxt = ti->ti_ack;
+ tp->snd_cwnd = tp->t_maxseg;
+ (void) tcp_output(tp);
+ tp->snd_cwnd = tp->snd_ssthresh +
+ tp->t_maxseg * tp->t_dupacks;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ goto drop;
+ } else if (tp->t_dupacks > tcprexmtthresh) {
+ tp->snd_cwnd += tp->t_maxseg;
+ (void) tcp_output(tp);
+ goto drop;
+ }
+ } else
+ tp->t_dupacks = 0;
+ break;
+ }
+ /*
+ * If the congestion window was inflated to account
+ * for the other side's cached packets, retract it.
+ */
+ if (tp->t_dupacks > tcprexmtthresh &&
+ tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->t_dupacks = 0;
+ if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
+ tcpstat.tcps_rcvacktoomuch++;
+ goto dropafterack;
+ }
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+
+ /*
+ * If we have a timestamp reply, update smoothed
+ * round trip time. If no timestamp is present but
+ * transmit timer is running and timed sequence
+ * number was acked, update smoothed round trip time.
+ * Since we now have an rtt measurement, cancel the
+ * timer backoff (cf., Phil Karn's retransmit alg.).
+ * Recompute the initial retransmit timer.
+ */
+ if (ts_present)
+ tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+ else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp,tp->t_rtt);
+
+ /*
+ * If all outstanding data is acked, stop retransmit
+ * timer and remember to restart (more output or persist).
+ * If there is more data to be acked, restart retransmit
+ * timer, using current (possibly backed-off) value.
+ */
+ if (ti->ti_ack == tp->snd_max) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ needoutput = 1;
+ } else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ /*
+ * When new data is acked, open the congestion window.
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (maxseg per packet).
+ * Otherwise open linearly: maxseg per window
+ * (maxseg^2 / cwnd per packet), plus a constant
+ * fraction of a packet (maxseg/8) to help larger windows
+ * open quickly enough.
+ */
+ {
+ register u_int cw = tp->snd_cwnd;
+ register u_int incr = tp->t_maxseg;
+
+ if (cw > tp->snd_ssthresh)
+ incr = incr * incr / cw + incr / 8;
+ tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
+ }
+ if (acked > so->so_snd.sb_cc) {
+ tp->snd_wnd -= so->so_snd.sb_cc;
+ sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+ ourfinisacked = 1;
+ } else {
+ sbdrop(&so->so_snd, acked);
+ tp->snd_wnd -= acked;
+ ourfinisacked = 0;
+ }
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ tp->snd_una = ti->ti_ack;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+ tp->snd_nxt = tp->snd_una;
+
+ switch (tp->t_state) {
+
+ /*
+ * In FIN_WAIT_1 STATE in addition to the processing
+ * for the ESTABLISHED state if our FIN is now acknowledged
+ * then enter FIN_WAIT_2.
+ */
+ case TCPS_FIN_WAIT_1:
+ if (ourfinisacked) {
+ /*
+ * If we can't receive any more
+ * data, then closing user can proceed.
+ * Starting the timer is contrary to the
+ * specification, but if we don't get a FIN
+ * we'll hang forever.
+ */
+ if (so->so_state & SS_CANTRCVMORE) {
+ soisdisconnected(so);
+ tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+ }
+ tp->t_state = TCPS_FIN_WAIT_2;
+ }
+ break;
+
+ /*
+ * In CLOSING STATE in addition to the processing for
+ * the ESTABLISHED state if the ACK acknowledges our FIN
+ * then enter the TIME-WAIT state, otherwise ignore
+ * the segment.
+ */
+ case TCPS_CLOSING:
+ if (ourfinisacked) {
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ }
+ break;
+
+ /*
+ * In LAST_ACK, we may still be waiting for data to drain
+ * and/or to be acked, as well as for the ack of our FIN.
+ * If our FIN is now acknowledged, delete the TCB,
+ * enter the closed state and return.
+ */
+ case TCPS_LAST_ACK:
+ if (ourfinisacked) {
+ tp = tcp_close(tp);
+ goto drop;
+ }
+ break;
+
+ /*
+ * In TIME_WAIT state the only thing that should arrive
+ * is a retransmission of the remote FIN. Acknowledge
+ * it and restart the finack timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ goto dropafterack;
+ }
+ }
+
+step6:
+ /*
+ * Update window information.
+ * Don't look at window if no ACK: TAC's send garbage on first SYN.
+ */
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
+ (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
+ tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) {
+ /* keep track of pure window updates */
+ if (ti->ti_len == 0 &&
+ tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
+ tcpstat.tcps_rcvwinupd++;
+ tp->snd_wnd = tiwin;
+ tp->snd_wl1 = ti->ti_seq;
+ tp->snd_wl2 = ti->ti_ack;
+ if (tp->snd_wnd > tp->max_sndwnd)
+ tp->max_sndwnd = tp->snd_wnd;
+ needoutput = 1;
+ }
+
+ /*
+ * Process segments with URG.
+ */
+ if ((tiflags & TH_URG) && ti->ti_urp &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ /*
+ * This is a kludge, but if we receive and accept
+ * random urgent pointers, we'll crash in
+ * soreceive. It's hard to imagine someone
+ * actually wanting to send this much urgent data.
+ */
+ if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
+ ti->ti_urp = 0; /* XXX */
+ tiflags &= ~TH_URG; /* XXX */
+ goto dodata; /* XXX */
+ }
+ /*
+ * If this segment advances the known urgent pointer,
+ * then mark the data stream. This should not happen
+ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+ * a FIN has been received from the remote side.
+ * In these states we ignore the URG.
+ *
+ * According to RFC961 (Assigned Protocols),
+ * the urgent pointer points to the last octet
+ * of urgent data. We continue, however,
+ * to consider it to indicate the first octet
+ * of data past the urgent section as the original
+ * spec states (in one of two places).
+ */
+ if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
+ tp->rcv_up = ti->ti_seq + ti->ti_urp;
+ so->so_oobmark = so->so_rcv.sb_cc +
+ (tp->rcv_up - tp->rcv_nxt) - 1;
+ if (so->so_oobmark == 0)
+ so->so_state |= SS_RCVATMARK;
+ sohasoutofband(so);
+ tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+ }
+ /*
+ * Remove out of band data so doesn't get presented to user.
+ * This can happen independent of advancing the URG pointer,
+ * but if two URG's are pending at once, some out-of-band
+ * data may creep in... ick.
+ */
+ if (ti->ti_urp <= ti->ti_len
+#ifdef SO_OOBINLINE
+ && (so->so_options & SO_OOBINLINE) == 0
+#endif
+ )
+ tcp_pulloutofband(so, ti, m);
+ } else
+ /*
+ * If no out of band data is expected,
+ * pull receive urgent pointer along
+ * with the receive window.
+ */
+ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+ tp->rcv_up = tp->rcv_nxt;
+dodata: /* XXX */
+
+ /*
+ * Process the segment text, merging it into the TCP sequencing queue,
+ * and arranging for acknowledgment of receipt if necessary.
+ * This process logically involves adjusting tp->rcv_wnd as data
+ * is presented to the user (this happens in tcp_usrreq.c,
+ * case PRU_RCVD). If a FIN has already been received on this
+ * connection then we just ignore the text.
+ */
+ if ((ti->ti_len || (tiflags&TH_FIN)) &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ TCP_REASS(tp, ti, m, so, tiflags);
+ /*
+ * Note the amount of data that peer has sent into
+ * our window, in order to estimate the sender's
+ * buffer size.
+ */
+ len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+ } else {
+ m_freem(m);
+ tiflags &= ~TH_FIN;
+ }
+
+ /*
+ * If FIN is received ACK the FIN and let the user know
+ * that the connection is closing.
+ */
+ if (tiflags & TH_FIN) {
+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ socantrcvmore(so);
+ tp->t_flags |= TF_ACKNOW;
+ tp->rcv_nxt++;
+ }
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED and ESTABLISHED STATES
+ * enter the CLOSE_WAIT state.
+ */
+ case TCPS_SYN_RECEIVED:
+ case TCPS_ESTABLISHED:
+ tp->t_state = TCPS_CLOSE_WAIT;
+ break;
+
+ /*
+ * If still in FIN_WAIT_1 STATE FIN has not been acked so
+ * enter the CLOSING state.
+ */
+ case TCPS_FIN_WAIT_1:
+ tp->t_state = TCPS_CLOSING;
+ break;
+
+ /*
+ * In FIN_WAIT_2 state enter the TIME_WAIT state,
+ * starting the time-wait timer, turning off the other
+ * standard timers.
+ */
+ case TCPS_FIN_WAIT_2:
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ break;
+
+ /*
+ * In TIME_WAIT state restart the 2 MSL time_wait timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ break;
+ }
+ }
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
+
+ /*
+ * Return any desired output.
+ */
+ if (needoutput || (tp->t_flags & TF_ACKNOW))
+ (void) tcp_output(tp);
+ return;
+
+dropafterack:
+ /*
+ * Generate an ACK dropping incoming segment if it occupies
+ * sequence space, where the ACK reflects our state.
+ */
+ if (tiflags & TH_RST)
+ goto drop;
+ m_freem(m);
+ tp->t_flags |= TF_ACKNOW;
+ (void) tcp_output(tp);
+ return;
+
+dropwithreset:
+ /*
+ * Generate a RST, dropping incoming segment.
+ * Make ACK acceptable to originator of segment.
+ * Don't bother to respond if destination was broadcast/multicast.
+ */
+ if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ti->ti_dst.s_addr))
+ goto drop;
+ if (tiflags & TH_ACK)
+ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
+ else {
+ if (tiflags & TH_SYN)
+ ti->ti_len++;
+ tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
+ TH_RST|TH_ACK);
+ }
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+
+drop:
+ /*
+ * Drop space held by incoming segment and return.
+ */
+ if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+ m_freem(m);
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+#ifndef TUBA_INCLUDE
+}
+
+void
+tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
+ struct tcpcb *tp;
+ u_char *cp;
+ int cnt;
+ struct tcpiphdr *ti;
+ int *ts_present;
+ u_long *ts_val, *ts_ecr;
+{
+ u_short mss;
+ int opt, optlen;
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[0];
+ if (opt == TCPOPT_EOL)
+ break;
+ if (opt == TCPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[1];
+ if (optlen <= 0)
+ break;
+ }
+ switch (opt) {
+
+ default:
+ continue;
+
+ case TCPOPT_MAXSEG:
+ if (optlen != TCPOLEN_MAXSEG)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+ NTOHS(mss);
+ (void) tcp_mss(tp, mss); /* sets t_maxseg */
+ break;
+
+ case TCPOPT_WINDOW:
+ if (optlen != TCPOLEN_WINDOW)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ tp->t_flags |= TF_RCVD_SCALE;
+ tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+ break;
+
+ case TCPOPT_TIMESTAMP:
+ if (optlen != TCPOLEN_TIMESTAMP)
+ continue;
+ *ts_present = 1;
+ bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
+ NTOHL(*ts_val);
+ bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
+ NTOHL(*ts_ecr);
+
+ /*
+ * A timestamp received in a SYN makes
+ * it ok to send timestamp requests and replies.
+ */
+ if (ti->ti_flags & TH_SYN) {
+ tp->t_flags |= TF_RCVD_TSTMP;
+ tp->ts_recent = *ts_val;
+ tp->ts_recent_age = tcp_now;
+ }
+ break;
+ }
+ }
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+void
+tcp_pulloutofband(so, ti, m)
+ struct socket *so;
+ struct tcpiphdr *ti;
+ register struct mbuf *m;
+{
+ int cnt = ti->ti_urp - 1;
+
+ while (cnt >= 0) {
+ if (m->m_len > cnt) {
+ char *cp = mtod(m, caddr_t) + cnt;
+ struct tcpcb *tp = sototcpcb(so);
+
+ tp->t_iobc = *cp;
+ tp->t_oobflags |= TCPOOB_HAVEDATA;
+ bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+ m->m_len--;
+ return;
+ }
+ cnt -= m->m_len;
+ m = m->m_next;
+ if (m == 0)
+ break;
+ }
+ panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+void
+tcp_xmit_timer(tp, rtt)
+ register struct tcpcb *tp;
+ short rtt;
+{
+ register short delta;
+
+ tcpstat.tcps_rttupdated++;
+ if (tp->t_srtt != 0) {
+ /*
+ * srtt is stored as fixed point with 3 bits after the
+ * binary point (i.e., scaled by 8). The following magic
+ * is equivalent to the smoothing algorithm in rfc793 with
+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+ * point). Adjust rtt to origin 0.
+ */
+ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
+ if ((tp->t_srtt += delta) <= 0)
+ tp->t_srtt = 1;
+ /*
+ * We accumulate a smoothed rtt variance (actually, a
+ * smoothed mean difference), then set the retransmit
+ * timer to smoothed rtt + 4 times the smoothed variance.
+ * rttvar is stored as fixed point with 2 bits after the
+ * binary point (scaled by 4). The following is
+ * equivalent to rfc793 smoothing with an alpha of .75
+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
+ * rfc793's wired-in beta.
+ */
+ if (delta < 0)
+ delta = -delta;
+ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
+ if ((tp->t_rttvar += delta) <= 0)
+ tp->t_rttvar = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt).
+ */
+ tp->t_srtt = rtt << TCP_RTT_SHIFT;
+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+ }
+ tp->t_rtt = 0;
+ tp->t_rxtshift = 0;
+
+ /*
+ * the retransmit should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ */
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ tp->t_rttmin, TCPTV_REXMTMAX);
+
+ /*
+ * We received an ack for a packet that wasn't retransmitted;
+ * it is probably safe to discard any error indications we've
+ * received recently. This isn't quite right, but close enough
+ * for now (a route might have failed after we sent a segment,
+ * and the return path might not be symmetrical).
+ */
+ tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs. If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks. We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ */
+int
+tcp_mss(tp, offer)
+ register struct tcpcb *tp;
+ u_int offer;
+{
+ struct route *ro;
+ register struct rtentry *rt;
+ struct ifnet *ifp;
+ register int rtt, mss;
+ u_long bufsize;
+ struct inpcb *inp;
+ struct socket *so;
+ extern int tcp_mssdflt;
+
+ inp = tp->t_inpcb;
+ ro = &inp->inp_route;
+
+ if ((rt = ro->ro_rt) == (struct rtentry *)0) {
+ /* No route yet, so try to acquire one */
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+ ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+ inp->inp_faddr;
+ rtalloc(ro);
+ }
+ if ((rt = ro->ro_rt) == (struct rtentry *)0)
+ return (tcp_mssdflt);
+ }
+ ifp = rt->rt_ifp;
+ so = inp->inp_socket;
+
+#ifdef RTV_MTU /* if route characteristics exist ... */
+ /*
+ * While we're here, check if there's an initial rtt
+ * or rttvar. Convert from the route-table units
+ * to scaled multiples of the slow timeout timer.
+ */
+ if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+ /*
+ * XXX the lock bit for MTU indicates that the value
+ * is also a minimum value; this is subject to time.
+ */
+ if (rt->rt_rmx.rmx_locks & RTV_RTT)
+ tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
+ tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ if (rt->rt_rmx.rmx_rttvar)
+ tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ else
+ /* default variation is +- 1 rtt */
+ tp->t_rttvar =
+ tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+ tp->t_rttmin, TCPTV_REXMTMAX);
+ }
+ /*
+ * if there's an mtu associated with the route, use it
+ */
+ if (rt->rt_rmx.rmx_mtu)
+ mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
+ else
+#endif /* RTV_MTU */
+ {
+ mss = ifp->if_mtu - sizeof(struct tcpiphdr);
+#if (MCLBYTES & (MCLBYTES - 1)) == 0
+ if (mss > MCLBYTES)
+ mss &= ~(MCLBYTES-1);
+#else
+ if (mss > MCLBYTES)
+ mss = mss / MCLBYTES * MCLBYTES;
+#endif
+ if (!in_localaddr(inp->inp_faddr))
+ mss = min(mss, tcp_mssdflt);
+ }
+ /*
+ * The current mss, t_maxseg, is initialized to the default value.
+ * If we compute a smaller value, reduce the current mss.
+ * If we compute a larger value, return it for use in sending
+ * a max seg size option, but don't store it for use
+ * unless we received an offer at least that large from peer.
+ * However, do not accept offers under 32 bytes.
+ */
+ if (offer)
+ mss = min(mss, offer);
+ mss = max(mss, 32); /* sanity */
+ if (mss < tp->t_maxseg || offer != 0) {
+ /*
+ * If there's a pipesize, change the socket buffer
+ * to that size. Make the socket buffers an integral
+ * number of mss units; if the mss is larger than
+ * the socket buffer, decrease the mss.
+ */
+#ifdef RTV_SPIPE
+ if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+ bufsize = so->so_snd.sb_hiwat;
+ if (bufsize < mss)
+ mss = bufsize;
+ else {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_snd, bufsize);
+ }
+ tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+ if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+ bufsize = so->so_rcv.sb_hiwat;
+ if (bufsize > mss) {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_rcv, bufsize);
+ }
+ }
+ tp->snd_cwnd = mss;
+
+#ifdef RTV_SSTHRESH
+ if (rt->rt_rmx.rmx_ssthresh) {
+ /*
+ * There's some sort of gateway or interface
+ * buffer limit on the path. Use this to set
+ * the slow start threshhold, but set the
+ * threshold to no less than 2*mss.
+ */
+ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+ }
+#endif /* RTV_MTU */
+ return (mss);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_seq.h b/sys/netinet/tcp_seq.h
new file mode 100644
index 000000000000..8912299ff799
--- /dev/null
+++ b/sys/netinet/tcp_seq.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_seq.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * TCP sequence numbers are 32 bit integers operated
+ * on with modular arithmetic. These macros can be
+ * used to compare such integers.
+ */
+#define SEQ_LT(a,b) ((int)((a)-(b)) < 0)
+#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
+#define SEQ_GT(a,b) ((int)((a)-(b)) > 0)
+#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
+/*
+ * Macros to initialize tcp sequence numbers for
+ * send and receive from initial send and receive
+ * sequence numbers.
+ */
+#define tcp_rcvseqinit(tp) \
+ (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1
+
+#define tcp_sendseqinit(tp) \
+ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
+ (tp)->iss
+
+#define TCP_ISSINCR (125*1024) /* increment for tcp_iss each second */
+
+#ifdef KERNEL
+tcp_seq tcp_iss; /* tcp initial send seq # */
+#endif
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
new file mode 100644
index 000000000000..8edb853bedea
--- /dev/null
+++ b/sys/netinet/tcp_subr.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+
+/* patchable/settable parameters for tcp */
+int tcp_mssdflt = TCP_MSS;
+int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+int tcp_do_rfc1323 = 1;
+
+extern struct inpcb *tcp_last_inpcb;
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init()
+{
+
+ tcp_iss = 1; /* wrong */
+ tcb.inp_next = tcb.inp_prev = &tcb;
+ if (max_protohdr < sizeof(struct tcpiphdr))
+ max_protohdr = sizeof(struct tcpiphdr);
+ if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
+ panic("tcp_init");
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcpiphdr *
+tcp_template(tp)
+ struct tcpcb *tp;
+{
+ register struct inpcb *inp = tp->t_inpcb;
+ register struct mbuf *m;
+ register struct tcpiphdr *n;
+
+ if ((n = tp->t_template) == 0) {
+ m = m_get(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return (0);
+ m->m_len = sizeof (struct tcpiphdr);
+ n = mtod(m, struct tcpiphdr *);
+ }
+ n->ti_next = n->ti_prev = 0;
+ n->ti_x1 = 0;
+ n->ti_pr = IPPROTO_TCP;
+ n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
+ n->ti_src = inp->inp_laddr;
+ n->ti_dst = inp->inp_faddr;
+ n->ti_sport = inp->inp_lport;
+ n->ti_dport = inp->inp_fport;
+ n->ti_seq = 0;
+ n->ti_ack = 0;
+ n->ti_x2 = 0;
+ n->ti_off = 5;
+ n->ti_flags = 0;
+ n->ti_win = 0;
+ n->ti_sum = 0;
+ n->ti_urp = 0;
+ return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header. If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template. If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ */
+void
+tcp_respond(tp, ti, m, ack, seq, flags)
+ struct tcpcb *tp;
+ register struct tcpiphdr *ti;
+ register struct mbuf *m;
+ tcp_seq ack, seq;
+ int flags;
+{
+ register int tlen;
+ int win = 0;
+ struct route *ro = 0;
+
+ if (tp) {
+ win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+ ro = &tp->t_inpcb->inp_route;
+ }
+ if (m == 0) {
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+#ifdef TCP_COMPAT_42
+ tlen = 1;
+#else
+ tlen = 0;
+#endif
+ m->m_data += max_linkhdr;
+ *mtod(m, struct tcpiphdr *) = *ti;
+ ti = mtod(m, struct tcpiphdr *);
+ flags = TH_ACK;
+ } else {
+ m_freem(m->m_next);
+ m->m_next = 0;
+ m->m_data = (caddr_t)ti;
+ m->m_len = sizeof (struct tcpiphdr);
+ tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
+ xchg(ti->ti_dport, ti->ti_sport, u_short);
+#undef xchg
+ }
+ ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
+ tlen += sizeof (struct tcpiphdr);
+ m->m_len = tlen;
+ m->m_pkthdr.len = tlen;
+ m->m_pkthdr.rcvif = (struct ifnet *) 0;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_seq = htonl(seq);
+ ti->ti_ack = htonl(ack);
+ ti->ti_x2 = 0;
+ ti->ti_off = sizeof (struct tcphdr) >> 2;
+ ti->ti_flags = flags;
+ if (tp)
+ ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
+ else
+ ti->ti_win = htons((u_short)win);
+ ti->ti_urp = 0;
+ ti->ti_sum = 0;
+ ti->ti_sum = in_cksum(m, tlen);
+ ((struct ip *)ti)->ip_len = tlen;
+ ((struct ip *)ti)->ip_ttl = ip_defttl;
+ (void) ip_output(m, NULL, ro, 0, NULL);
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.
+ */
+struct tcpcb *
+tcp_newtcpcb(inp)
+ struct inpcb *inp;
+{
+ register struct tcpcb *tp;
+
+ tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
+ if (tp == NULL)
+ return ((struct tcpcb *)0);
+ bzero((char *) tp, sizeof(struct tcpcb));
+ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
+ tp->t_maxseg = tcp_mssdflt;
+
+ tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
+ tp->t_inpcb = inp;
+ /*
+ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
+ * reasonable initial retransmit time.
+ */
+ tp->t_srtt = TCPTV_SRTTBASE;
+ tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
+ tp->t_rttmin = TCPTV_MIN;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
+ TCPTV_MIN, TCPTV_REXMTMAX);
+ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ inp->inp_ip.ip_ttl = ip_defttl;
+ inp->inp_ppcb = (caddr_t)tp;
+ return (tp);
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error. If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(tp, errno)
+ register struct tcpcb *tp;
+ int errno;
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ if (TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_state = TCPS_CLOSED;
+ (void) tcp_output(tp);
+ tcpstat.tcps_drops++;
+ } else
+ tcpstat.tcps_conndrops++;
+ if (errno == ETIMEDOUT && tp->t_softerror)
+ errno = tp->t_softerror;
+ so->so_error = errno;
+ return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ * discard all space held by the tcp
+ * discard internet protocol block
+ * wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+ register struct tcpcb *tp;
+{
+ register struct tcpiphdr *t;
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
+ register struct mbuf *m;
+#ifdef RTV_RTT
+ register struct rtentry *rt;
+
+ /*
+ * If we sent enough data to get some meaningful characteristics,
+ * save them in the routing entry. 'Enough' is arbitrarily
+ * defined as the sendpipesize (default 4K) * 16. This would
+ * give us 16 rtt samples assuming we only get one sample per
+ * window (the usual case on a long haul net). 16 samples is
+ * enough for the srtt filter to converge to within 5% of the correct
+ * value; fewer samples and we could save a very bogus rtt.
+ *
+ * Don't update the default route's characteristics and don't
+ * update anything that the user "locked".
+ */
+ if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
+ (rt = inp->inp_route.ro_rt) &&
+ ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
+ register u_long i;
+
+ if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+ i = tp->t_srtt *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ if (rt->rt_rmx.rmx_rtt && i)
+ /*
+ * filter this update to half the old & half
+ * the new values, converting scale.
+ * See route.h and tcp_var.h for a
+ * description of the scaling constants.
+ */
+ rt->rt_rmx.rmx_rtt =
+ (rt->rt_rmx.rmx_rtt + i) / 2;
+ else
+ rt->rt_rmx.rmx_rtt = i;
+ }
+ if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+ i = tp->t_rttvar *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ if (rt->rt_rmx.rmx_rttvar && i)
+ rt->rt_rmx.rmx_rttvar =
+ (rt->rt_rmx.rmx_rttvar + i) / 2;
+ else
+ rt->rt_rmx.rmx_rttvar = i;
+ }
+ /*
+ * update the pipelimit (ssthresh) if it has been updated
+ * already or if a pipesize was specified & the threshhold
+ * got below half the pipesize. I.e., wait for bad news
+ * before we start updating, then update on both good
+ * and bad news.
+ */
+ if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+ (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh ||
+ i < (rt->rt_rmx.rmx_sendpipe / 2)) {
+ /*
+ * convert the limit from user data bytes to
+ * packets then to packet data bytes.
+ */
+ i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+ if (i < 2)
+ i = 2;
+ i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
+ if (rt->rt_rmx.rmx_ssthresh)
+ rt->rt_rmx.rmx_ssthresh =
+ (rt->rt_rmx.rmx_ssthresh + i) / 2;
+ else
+ rt->rt_rmx.rmx_ssthresh = i;
+ }
+ }
+#endif /* RTV_RTT */
+ /* free the reassembly queue, if any */
+ t = tp->seg_next;
+ while (t != (struct tcpiphdr *)tp) {
+ t = (struct tcpiphdr *)t->ti_next;
+ m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
+ remque(t->ti_prev);
+ m_freem(m);
+ }
+ if (tp->t_template)
+ (void) m_free(dtom(tp->t_template));
+ free(tp, M_PCB);
+ inp->inp_ppcb = 0;
+ soisdisconnected(so);
+ /* clobber input pcb cache if we're closing the cached connection */
+ if (inp == tcp_last_inpcb)
+ tcp_last_inpcb = &tcb;
+ in_pcbdetach(inp);
+ tcpstat.tcps_closed++;
+ return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain()
+{
+
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ */
+void
+tcp_notify(inp, error)
+ struct inpcb *inp;
+ int error;
+{
+ register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+ register struct socket *so = inp->inp_socket;
+
+ /*
+ * Ignore some errors if we are hooked up.
+ * If connection hasn't completed, has retransmitted several times,
+ * and receives a second error, give up now. This is better
+ * than waiting a long time to establish a connection that
+ * can never complete.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (error == EHOSTUNREACH || error == ENETUNREACH ||
+ error == EHOSTDOWN)) {
+ return;
+ } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+ tp->t_softerror)
+ so->so_error = error;
+ else
+ tp->t_softerror = error;
+ wakeup((caddr_t) &so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+}
+
+void
+tcp_ctlinput(cmd, sa, ip)
+ int cmd;
+ struct sockaddr *sa;
+ register struct ip *ip;
+{
+ register struct tcphdr *th;
+ extern struct in_addr zeroin_addr;
+ extern u_char inetctlerrmap[];
+ void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+
+ if (cmd == PRC_QUENCH)
+ notify = tcp_quench;
+ else if (!PRC_IS_REDIRECT(cmd) &&
+ ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
+ return;
+ if (ip) {
+ th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+ in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
+ cmd, notify);
+ } else
+ in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
+}
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment. We will gradually open it again as we proceed.
+ */
+void
+tcp_quench(inp, errno)
+ struct inpcb *inp;
+ int errno;
+{
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if (tp)
+ tp->snd_cwnd = tp->t_maxseg;
+}
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
new file mode 100644
index 000000000000..0c0f0f8c2f19
--- /dev/null
+++ b/sys/netinet/tcp_timer.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+
+int tcp_keepidle = TCPTV_KEEP_IDLE;
+int tcp_keepintvl = TCPTV_KEEPINTVL;
+int tcp_maxidle;
+#endif /* TUBA_INCLUDE */
+/*
+ * Fast timeout routine for processing delayed acks
+ */
+void
+tcp_fasttimo()
+{
+ register struct inpcb *inp;
+ register struct tcpcb *tp;
+ int s = splnet();
+
+ inp = tcb.inp_next;
+ if (inp)
+ for (; inp != &tcb; inp = inp->inp_next)
+ if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
+ (tp->t_flags & TF_DELACK)) {
+ tp->t_flags &= ~TF_DELACK;
+ tp->t_flags |= TF_ACKNOW;
+ tcpstat.tcps_delack++;
+ (void) tcp_output(tp);
+ }
+ splx(s);
+}
+
+/*
+ * Tcp protocol timeout routine called every 500 ms.
+ * Updates the timers in all active tcb's and
+ * causes finite state machine actions if timers expire.
+ */
+void
+tcp_slowtimo()
+{
+ register struct inpcb *ip, *ipnxt;
+ register struct tcpcb *tp;
+ int s = splnet();
+ register int i;
+
+ tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
+ /*
+ * Search through tcb's and update active timers.
+ */
+ ip = tcb.inp_next;
+ if (ip == 0) {
+ splx(s);
+ return;
+ }
+ for (; ip != &tcb; ip = ipnxt) {
+ ipnxt = ip->inp_next;
+ tp = intotcpcb(ip);
+ if (tp == 0)
+ continue;
+ for (i = 0; i < TCPT_NTIMERS; i++) {
+ if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
+ (void) tcp_usrreq(tp->t_inpcb->inp_socket,
+ PRU_SLOWTIMO, (struct mbuf *)0,
+ (struct mbuf *)i, (struct mbuf *)0);
+ if (ipnxt->inp_prev != ip)
+ goto tpgone;
+ }
+ }
+ tp->t_idle++;
+ if (tp->t_rtt)
+ tp->t_rtt++;
+tpgone:
+ ;
+ }
+ tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */
+#ifdef TCP_COMPAT_42
+ if ((int)tcp_iss < 0)
+ tcp_iss = 0; /* XXX */
+#endif
+ tcp_now++; /* for timestamps */
+ splx(s);
+}
+#ifndef TUBA_INCLUDE
+
+/*
+ * Cancel all timers for TCP tp.
+ */
+void
+tcp_canceltimers(tp)
+ struct tcpcb *tp;
+{
+ register int i;
+
+ for (i = 0; i < TCPT_NTIMERS; i++)
+ tp->t_timer[i] = 0;
+}
+
+int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+ { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+
+/*
+ * TCP timer processing.
+ */
+struct tcpcb *
+tcp_timers(tp, timer)
+ register struct tcpcb *tp;
+ int timer;
+{
+ register int rexmt;
+
+ switch (timer) {
+
+ /*
+ * 2 MSL timeout in shutdown went off. If we're closed but
+ * still waiting for peer to close and connection has been idle
+ * too long, or if 2MSL time is up from TIME_WAIT, delete connection
+ * control block. Otherwise, check again in a bit.
+ */
+ case TCPT_2MSL:
+ if (tp->t_state != TCPS_TIME_WAIT &&
+ tp->t_idle <= tcp_maxidle)
+ tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
+ else
+ tp = tcp_close(tp);
+ break;
+
+ /*
+ * Retransmission timer went off. Message has not
+ * been acked within retransmit interval. Back off
+ * to a longer retransmit interval and retransmit one segment.
+ */
+ case TCPT_REXMT:
+ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+ tp->t_rxtshift = TCP_MAXRXTSHIFT;
+ tcpstat.tcps_timeoutdrop++;
+ tp = tcp_drop(tp, tp->t_softerror ?
+ tp->t_softerror : ETIMEDOUT);
+ break;
+ }
+ tcpstat.tcps_rexmttimeo++;
+ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
+ TCPT_RANGESET(tp->t_rxtcur, rexmt,
+ tp->t_rttmin, TCPTV_REXMTMAX);
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ /*
+ * If losing, let the lower level know and try for
+ * a better route. Also, if we backed off this far,
+ * our srtt estimate is probably bogus. Clobber it
+ * so we'll take the next rtt measurement as our srtt;
+ * move the current srtt into rttvar to keep the current
+ * retransmit times until then.
+ */
+ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+ in_losing(tp->t_inpcb);
+ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
+ tp->t_srtt = 0;
+ }
+ tp->snd_nxt = tp->snd_una;
+ /*
+ * If timing a segment in this window, stop the timer.
+ */
+ tp->t_rtt = 0;
+ /*
+ * Close the congestion window down to one segment
+ * (we'll open it by one segment for each ack we get).
+ * Since we probably have a window's worth of unacked
+ * data accumulated, this "slow start" keeps us from
+ * dumping all that data as back-to-back packets (which
+ * might overwhelm an intermediate gateway).
+ *
+ * There are two phases to the opening: Initially we
+ * open by one mss on each ack. This makes the window
+ * size increase exponentially with time. If the
+ * window is larger than the path can handle, this
+ * exponential growth results in dropped packet(s)
+ * almost immediately. To get more time between
+ * drops but still "push" the network to take advantage
+ * of improving conditions, we switch from exponential
+ * to linear window opening at some threshhold size.
+ * For a threshhold, we use half the current window
+ * size, truncated to a multiple of the mss.
+ *
+ * (the minimum cwnd that will give us exponential
+ * growth is 2 mss. We don't allow the threshhold
+ * to go below this.)
+ */
+ {
+ u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+ if (win < 2)
+ win = 2;
+ tp->snd_cwnd = tp->t_maxseg;
+ tp->snd_ssthresh = win * tp->t_maxseg;
+ tp->t_dupacks = 0;
+ }
+ (void) tcp_output(tp);
+ break;
+
+ /*
+ * Persistance timer into zero window.
+ * Force a byte to be output, if possible.
+ */
+ case TCPT_PERSIST:
+ tcpstat.tcps_persisttimeo++;
+ tcp_setpersist(tp);
+ tp->t_force = 1;
+ (void) tcp_output(tp);
+ tp->t_force = 0;
+ break;
+
+ /*
+ * Keep-alive timer went off; send something
+ * or drop connection if idle for too long.
+ */
+ case TCPT_KEEP:
+ tcpstat.tcps_keeptimeo++;
+ if (tp->t_state < TCPS_ESTABLISHED)
+ goto dropit;
+ if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
+ tp->t_state <= TCPS_CLOSE_WAIT) {
+ if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
+ goto dropit;
+ /*
+ * Send a packet designed to force a response
+ * if the peer is up and reachable:
+ * either an ACK if the connection is still alive,
+ * or an RST if the peer has closed the connection
+ * due to timeout or reboot.
+ * Using sequence number tp->snd_una-1
+ * causes the transmitted zero-length segment
+ * to lie outside the receive window;
+ * by the protocol spec, this requires the
+ * correspondent TCP to respond.
+ */
+ tcpstat.tcps_keepprobe++;
+#ifdef TCP_COMPAT_42
+ /*
+ * The keepalive packet must have nonzero length
+ * to get a 4.2 host to respond.
+ */
+ tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+ tp->rcv_nxt - 1, tp->snd_una - 1, 0);
+#else
+ tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+ tp->rcv_nxt, tp->snd_una - 1, 0);
+#endif
+ tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
+ } else
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+ break;
+ dropit:
+ tcpstat.tcps_keepdrops++;
+ tp = tcp_drop(tp, ETIMEDOUT);
+ break;
+ }
+ return (tp);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
new file mode 100644
index 000000000000..301a10f4034c
--- /dev/null
+++ b/sys/netinet/tcp_timer.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions of the TCP timers. These timers are counted
+ * down PR_SLOWHZ times a second.
+ */
+#define TCPT_NTIMERS 4
+
+#define TCPT_REXMT 0 /* retransmit */
+#define TCPT_PERSIST 1 /* retransmit persistance */
+#define TCPT_KEEP 2 /* keep alive */
+#define TCPT_2MSL 3 /* 2*msl quiet time timer */
+
+/*
+ * The TCPT_REXMT timer is used to force retransmissions.
+ * The TCP has the TCPT_REXMT timer set whenever segments
+ * have been sent for which ACKs are expected but not yet
+ * received. If an ACK is received which advances tp->snd_una,
+ * then the retransmit timer is cleared (if there are no more
+ * outstanding segments) or reset to the base value (if there
+ * are more ACKs expected). Whenever the retransmit timer goes off,
+ * we retransmit one unacknowledged segment, and do a backoff
+ * on the retransmit timer.
+ *
+ * The TCPT_PERSIST timer is used to keep window size information
+ * flowing even if the window goes shut. If all previous transmissions
+ * have been acknowledged (so that there are no retransmissions in progress),
+ * and the window is too small to bother sending anything, then we start
+ * the TCPT_PERSIST timer. When it expires, if the window is nonzero,
+ * we go to transmit state. Otherwise, at intervals send a single byte
+ * into the peer's window to force him to update our window information.
+ * We do this at most as often as TCPT_PERSMIN time intervals,
+ * but no more frequently than the current estimate of round-trip
+ * packet time. The TCPT_PERSIST timer is cleared whenever we receive
+ * a window update from the peer.
+ *
+ * The TCPT_KEEP timer is used to keep connections alive. If an
+ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time,
+ * but not yet established, then we drop the connection. Once the connection
+ * is established, if the connection is idle for TCPTV_KEEP_IDLE time
+ * (and keepalives have been enabled on the socket), we begin to probe
+ * the connection. We force the peer to send us a segment by sending:
+ * <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
+ * This segment is (deliberately) outside the window, and should elicit
+ * an ack segment in response from the peer. If, despite the TCPT_KEEP
+ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
+ * amount of time probing, then we drop the connection.
+ */
+
+/*
+ * Time constants.
+ */
+#define TCPTV_MSL ( 30*PR_SLOWHZ) /* max seg lifetime (hah!) */
+#define TCPTV_SRTTBASE 0 /* base roundtrip time;
+ if 0, no idea yet */
+#define TCPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */
+
+#define TCPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistance */
+#define TCPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */
+
+#define TCPTV_KEEP_INIT ( 75*PR_SLOWHZ) /* initial connect keep alive */
+#define TCPTV_KEEP_IDLE (120*60*PR_SLOWHZ) /* dflt time before probing */
+#define TCPTV_KEEPINTVL ( 75*PR_SLOWHZ) /* default probe interval */
+#define TCPTV_KEEPCNT 8 /* max probes before drop */
+
+#define TCPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */
+#define TCPTV_REXMTMAX ( 64*PR_SLOWHZ) /* max allowable REXMT value */
+
+#define TCP_LINGERTIME 120 /* linger at most 2 minutes */
+
+#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */
+
+#ifdef TCPTIMERS
+char *tcptimers[] =
+ { "REXMT", "PERSIST", "KEEP", "2MSL" };
+#endif
+
+/*
+ * Force a time value to be in a certain range.
+ */
+#define TCPT_RANGESET(tv, value, tvmin, tvmax) { \
+ (tv) = (value); \
+ if ((tv) < (tvmin)) \
+ (tv) = (tvmin); \
+ else if ((tv) > (tvmax)) \
+ (tv) = (tvmax); \
+}
+
+#ifdef KERNEL
+extern int tcp_keepidle; /* time before keepalive probes begin */
+extern int tcp_keepintvl; /* time between keepalive probes */
+extern int tcp_maxidle; /* time to drop after starting probes */
+extern int tcp_ttl; /* time to live for TCP segs */
+extern int tcp_backoff[];
+#endif
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
new file mode 100644
index 000000000000..8edb853bedea
--- /dev/null
+++ b/sys/netinet/tcp_timewait.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+
+/* patchable/settable parameters for tcp */
+int tcp_mssdflt = TCP_MSS;
+int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+int tcp_do_rfc1323 = 1;
+
+extern struct inpcb *tcp_last_inpcb;
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init()
+{
+
+ tcp_iss = 1; /* wrong */
+ tcb.inp_next = tcb.inp_prev = &tcb;
+ if (max_protohdr < sizeof(struct tcpiphdr))
+ max_protohdr = sizeof(struct tcpiphdr);
+ if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
+ panic("tcp_init");
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcpiphdr *
+tcp_template(tp)
+ struct tcpcb *tp;
+{
+ register struct inpcb *inp = tp->t_inpcb;
+ register struct mbuf *m;
+ register struct tcpiphdr *n;
+
+ if ((n = tp->t_template) == 0) {
+ m = m_get(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return (0);
+ m->m_len = sizeof (struct tcpiphdr);
+ n = mtod(m, struct tcpiphdr *);
+ }
+ n->ti_next = n->ti_prev = 0;
+ n->ti_x1 = 0;
+ n->ti_pr = IPPROTO_TCP;
+ n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
+ n->ti_src = inp->inp_laddr;
+ n->ti_dst = inp->inp_faddr;
+ n->ti_sport = inp->inp_lport;
+ n->ti_dport = inp->inp_fport;
+ n->ti_seq = 0;
+ n->ti_ack = 0;
+ n->ti_x2 = 0;
+ n->ti_off = 5;
+ n->ti_flags = 0;
+ n->ti_win = 0;
+ n->ti_sum = 0;
+ n->ti_urp = 0;
+ return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header. If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template. If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ */
+void
+tcp_respond(tp, ti, m, ack, seq, flags)
+ struct tcpcb *tp;
+ register struct tcpiphdr *ti;
+ register struct mbuf *m;
+ tcp_seq ack, seq;
+ int flags;
+{
+ register int tlen;
+ int win = 0;
+ struct route *ro = 0;
+
+ if (tp) {
+ win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+ ro = &tp->t_inpcb->inp_route;
+ }
+ if (m == 0) {
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+#ifdef TCP_COMPAT_42
+ tlen = 1;
+#else
+ tlen = 0;
+#endif
+ m->m_data += max_linkhdr;
+ *mtod(m, struct tcpiphdr *) = *ti;
+ ti = mtod(m, struct tcpiphdr *);
+ flags = TH_ACK;
+ } else {
+ m_freem(m->m_next);
+ m->m_next = 0;
+ m->m_data = (caddr_t)ti;
+ m->m_len = sizeof (struct tcpiphdr);
+ tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
+ xchg(ti->ti_dport, ti->ti_sport, u_short);
+#undef xchg
+ }
+ ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
+ tlen += sizeof (struct tcpiphdr);
+ m->m_len = tlen;
+ m->m_pkthdr.len = tlen;
+ m->m_pkthdr.rcvif = (struct ifnet *) 0;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_seq = htonl(seq);
+ ti->ti_ack = htonl(ack);
+ ti->ti_x2 = 0;
+ ti->ti_off = sizeof (struct tcphdr) >> 2;
+ ti->ti_flags = flags;
+ if (tp)
+ ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
+ else
+ ti->ti_win = htons((u_short)win);
+ ti->ti_urp = 0;
+ ti->ti_sum = 0;
+ ti->ti_sum = in_cksum(m, tlen);
+ ((struct ip *)ti)->ip_len = tlen;
+ ((struct ip *)ti)->ip_ttl = ip_defttl;
+ (void) ip_output(m, NULL, ro, 0, NULL);
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.
+ */
+struct tcpcb *
+tcp_newtcpcb(inp)
+ struct inpcb *inp;
+{
+ register struct tcpcb *tp;
+
+ tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
+ if (tp == NULL)
+ return ((struct tcpcb *)0);
+ bzero((char *) tp, sizeof(struct tcpcb));
+ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
+ tp->t_maxseg = tcp_mssdflt;
+
+ tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
+ tp->t_inpcb = inp;
+ /*
+ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
+ * reasonable initial retransmit time.
+ */
+ tp->t_srtt = TCPTV_SRTTBASE;
+ tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
+ tp->t_rttmin = TCPTV_MIN;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
+ TCPTV_MIN, TCPTV_REXMTMAX);
+ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ inp->inp_ip.ip_ttl = ip_defttl;
+ inp->inp_ppcb = (caddr_t)tp;
+ return (tp);
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error. If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(tp, errno)
+ register struct tcpcb *tp;
+ int errno;
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ if (TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_state = TCPS_CLOSED;
+ (void) tcp_output(tp);
+ tcpstat.tcps_drops++;
+ } else
+ tcpstat.tcps_conndrops++;
+ if (errno == ETIMEDOUT && tp->t_softerror)
+ errno = tp->t_softerror;
+ so->so_error = errno;
+ return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ * discard all space held by the tcp
+ * discard internet protocol block
+ * wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+ register struct tcpcb *tp;
+{
+ register struct tcpiphdr *t;
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
+ register struct mbuf *m;
+#ifdef RTV_RTT
+ register struct rtentry *rt;
+
+ /*
+ * If we sent enough data to get some meaningful characteristics,
+ * save them in the routing entry. 'Enough' is arbitrarily
+ * defined as the sendpipesize (default 4K) * 16. This would
+ * give us 16 rtt samples assuming we only get one sample per
+ * window (the usual case on a long haul net). 16 samples is
+ * enough for the srtt filter to converge to within 5% of the correct
+ * value; fewer samples and we could save a very bogus rtt.
+ *
+ * Don't update the default route's characteristics and don't
+ * update anything that the user "locked".
+ */
+ if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
+ (rt = inp->inp_route.ro_rt) &&
+ ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
+ register u_long i;
+
+ if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+ i = tp->t_srtt *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ if (rt->rt_rmx.rmx_rtt && i)
+ /*
+ * filter this update to half the old & half
+ * the new values, converting scale.
+ * See route.h and tcp_var.h for a
+ * description of the scaling constants.
+ */
+ rt->rt_rmx.rmx_rtt =
+ (rt->rt_rmx.rmx_rtt + i) / 2;
+ else
+ rt->rt_rmx.rmx_rtt = i;
+ }
+ if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+ i = tp->t_rttvar *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ if (rt->rt_rmx.rmx_rttvar && i)
+ rt->rt_rmx.rmx_rttvar =
+ (rt->rt_rmx.rmx_rttvar + i) / 2;
+ else
+ rt->rt_rmx.rmx_rttvar = i;
+ }
+ /*
+ * update the pipelimit (ssthresh) if it has been updated
+ * already or if a pipesize was specified & the threshhold
+ * got below half the pipesize. I.e., wait for bad news
+ * before we start updating, then update on both good
+ * and bad news.
+ */
+ if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+ (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh ||
+ i < (rt->rt_rmx.rmx_sendpipe / 2)) {
+ /*
+ * convert the limit from user data bytes to
+ * packets then to packet data bytes.
+ */
+ i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+ if (i < 2)
+ i = 2;
+ i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
+ if (rt->rt_rmx.rmx_ssthresh)
+ rt->rt_rmx.rmx_ssthresh =
+ (rt->rt_rmx.rmx_ssthresh + i) / 2;
+ else
+ rt->rt_rmx.rmx_ssthresh = i;
+ }
+ }
+#endif /* RTV_RTT */
+ /* free the reassembly queue, if any */
+ t = tp->seg_next;
+ while (t != (struct tcpiphdr *)tp) {
+ t = (struct tcpiphdr *)t->ti_next;
+ m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
+ remque(t->ti_prev);
+ m_freem(m);
+ }
+ if (tp->t_template)
+ (void) m_free(dtom(tp->t_template));
+ free(tp, M_PCB);
+ inp->inp_ppcb = 0;
+ soisdisconnected(so);
+ /* clobber input pcb cache if we're closing the cached connection */
+ if (inp == tcp_last_inpcb)
+ tcp_last_inpcb = &tcb;
+ in_pcbdetach(inp);
+ tcpstat.tcps_closed++;
+ return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain()
+{
+
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ */
+void
+tcp_notify(inp, error)
+ struct inpcb *inp;
+ int error;
+{
+ register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+ register struct socket *so = inp->inp_socket;
+
+ /*
+ * Ignore some errors if we are hooked up.
+ * If connection hasn't completed, has retransmitted several times,
+ * and receives a second error, give up now. This is better
+ * than waiting a long time to establish a connection that
+ * can never complete.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (error == EHOSTUNREACH || error == ENETUNREACH ||
+ error == EHOSTDOWN)) {
+ return;
+ } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+ tp->t_softerror)
+ so->so_error = error;
+ else
+ tp->t_softerror = error;
+ wakeup((caddr_t) &so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+}
+
+void
+tcp_ctlinput(cmd, sa, ip)
+ int cmd;
+ struct sockaddr *sa;
+ register struct ip *ip;
+{
+ register struct tcphdr *th;
+ extern struct in_addr zeroin_addr;
+ extern u_char inetctlerrmap[];
+ void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+
+ if (cmd == PRC_QUENCH)
+ notify = tcp_quench;
+ else if (!PRC_IS_REDIRECT(cmd) &&
+ ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
+ return;
+ if (ip) {
+ th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+ in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
+ cmd, notify);
+ } else
+ in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
+}
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment. We will gradually open it again as we proceed.
+ */
+void
+tcp_quench(inp, errno)
+ struct inpcb *inp;
+ int errno;
+{
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if (tp)
+ tp->snd_cwnd = tp->t_maxseg;
+}
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
new file mode 100644
index 000000000000..38a08d6d0c2e
--- /dev/null
+++ b/sys/netinet/tcp_usrreq.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+extern char *tcpstates[];
+
+/*
+ * Process a TCP user request for TCP tb. If this is a send request
+ * then m is the mbuf chain of send data. If this is a timer expiration
+ * (called from the software clock routine), then timertype tells which timer.
+ */
+/*ARGSUSED*/
+int
+tcp_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ register struct inpcb *inp;
+ register struct tcpcb *tp;
+ int s;
+ int error = 0;
+ int ostate;
+
+ if (req == PRU_CONTROL)
+ return (in_control(so, (int)m, (caddr_t)nam,
+ (struct ifnet *)control));
+ if (control && control->m_len) {
+ m_freem(control);
+ if (m)
+ m_freem(m);
+ return (EINVAL);
+ }
+
+ s = splnet();
+ inp = sotoinpcb(so);
+ /*
+ * When a TCP is attached to a socket, then there will be
+ * a (struct inpcb) pointed at by the socket, and this
+ * structure will point at a subsidary (struct tcpcb).
+ */
+ if (inp == 0 && req != PRU_ATTACH) {
+ splx(s);
+ return (EINVAL); /* XXX */
+ }
+ if (inp) {
+ tp = intotcpcb(inp);
+ /* WHAT IF TP IS 0? */
+#ifdef KPROF
+ tcp_acounts[tp->t_state][req]++;
+#endif
+ ostate = tp->t_state;
+ } else
+ ostate = 0;
+ switch (req) {
+
+ /*
+ * TCP attaches to socket via PRU_ATTACH, reserving space,
+ * and an internet control block.
+ */
+ case PRU_ATTACH:
+ if (inp) {
+ error = EISCONN;
+ break;
+ }
+ error = tcp_attach(so);
+ if (error)
+ break;
+ if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+ so->so_linger = TCP_LINGERTIME;
+ tp = sototcpcb(so);
+ break;
+
+ /*
+ * PRU_DETACH detaches the TCP protocol from the socket.
+ * If the protocol state is non-embryonic, then can't
+ * do this directly: have to initiate a PRU_DISCONNECT,
+ * which may finish later; embryonic TCB's can just
+ * be discarded here.
+ */
+ case PRU_DETACH:
+ if (tp->t_state > TCPS_LISTEN)
+ tp = tcp_disconnect(tp);
+ else
+ tp = tcp_close(tp);
+ break;
+
+ /*
+ * Give the socket an address.
+ */
+ case PRU_BIND:
+ error = in_pcbbind(inp, nam);
+ if (error)
+ break;
+ break;
+
+ /*
+ * Prepare to accept connections.
+ */
+ case PRU_LISTEN:
+ if (inp->inp_lport == 0)
+ error = in_pcbbind(inp, (struct mbuf *)0);
+ if (error == 0)
+ tp->t_state = TCPS_LISTEN;
+ break;
+
+ /*
+ * Initiate connection to peer.
+ * Create a template for use in transmissions on this connection.
+ * Enter SYN_SENT state, and mark socket as connecting.
+ * Start keep-alive timer, and seed output sequence space.
+ * Send initial segment on connection.
+ */
+ case PRU_CONNECT:
+ if (inp->inp_lport == 0) {
+ error = in_pcbbind(inp, (struct mbuf *)0);
+ if (error)
+ break;
+ }
+ error = in_pcbconnect(inp, nam);
+ if (error)
+ break;
+ tp->t_template = tcp_template(tp);
+ if (tp->t_template == 0) {
+ in_pcbdisconnect(inp);
+ error = ENOBUFS;
+ break;
+ }
+ /* Compute window scaling to request. */
+ while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+ (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+ tp->request_r_scale++;
+ soisconnecting(so);
+ tcpstat.tcps_connattempt++;
+ tp->t_state = TCPS_SYN_SENT;
+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+ tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
+ tcp_sendseqinit(tp);
+ error = tcp_output(tp);
+ break;
+
+ /*
+ * Create a TCP connection between two sockets.
+ */
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ /*
+ * Initiate disconnect from peer.
+ * If connection never passed embryonic stage, just drop;
+ * else if don't need to let data drain, then can just drop anyways,
+ * else have to begin TCP shutdown process: mark socket disconnecting,
+ * drain unread data, state switch to reflect user close, and
+ * send segment (e.g. FIN) to peer. Socket will be really disconnected
+ * when peer sends FIN and acks ours.
+ *
+ * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+ */
+ case PRU_DISCONNECT:
+ tp = tcp_disconnect(tp);
+ break;
+
+ /*
+ * Accept a connection. Essentially all the work is
+ * done at higher levels; just return the address
+ * of the peer, storing through addr.
+ */
+ case PRU_ACCEPT:
+ in_setpeeraddr(inp, nam);
+ break;
+
+ /*
+ * Mark the connection as being incapable of further output.
+ */
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ tp = tcp_usrclosed(tp);
+ if (tp)
+ error = tcp_output(tp);
+ break;
+
+ /*
+ * After a receive, possibly send window update to peer.
+ */
+ case PRU_RCVD:
+ (void) tcp_output(tp);
+ break;
+
+ /*
+ * Do a send by putting data in output queue and updating urgent
+ * marker if URG set. Possibly send more data.
+ */
+ case PRU_SEND:
+ sbappend(&so->so_snd, m);
+ error = tcp_output(tp);
+ break;
+
+ /*
+ * Abort the TCP.
+ */
+ case PRU_ABORT:
+ tp = tcp_drop(tp, ECONNABORTED);
+ break;
+
+ case PRU_SENSE:
+ ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+ (void) splx(s);
+ return (0);
+
+ case PRU_RCVOOB:
+ if ((so->so_oobmark == 0 &&
+ (so->so_state & SS_RCVATMARK) == 0) ||
+ so->so_options & SO_OOBINLINE ||
+ tp->t_oobflags & TCPOOB_HADDATA) {
+ error = EINVAL;
+ break;
+ }
+ if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
+ error = EWOULDBLOCK;
+ break;
+ }
+ m->m_len = 1;
+ *mtod(m, caddr_t) = tp->t_iobc;
+ if (((int)nam & MSG_PEEK) == 0)
+ tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+ break;
+
+ case PRU_SENDOOB:
+ if (sbspace(&so->so_snd) < -512) {
+ m_freem(m);
+ error = ENOBUFS;
+ break;
+ }
+ /*
+ * According to RFC961 (Assigned Protocols),
+ * the urgent pointer points to the last octet
+ * of urgent data. We continue, however,
+ * to consider it to indicate the first octet
+ * of data past the urgent section.
+ * Otherwise, snd_up should be one lower.
+ */
+ sbappend(&so->so_snd, m);
+ tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+ tp->t_force = 1;
+ error = tcp_output(tp);
+ tp->t_force = 0;
+ break;
+
+ case PRU_SOCKADDR:
+ in_setsockaddr(inp, nam);
+ break;
+
+ case PRU_PEERADDR:
+ in_setpeeraddr(inp, nam);
+ break;
+
+ /*
+ * TCP slow timer went off; going through this
+ * routine for tracing's sake.
+ */
+ case PRU_SLOWTIMO:
+ tp = tcp_timers(tp, (int)nam);
+ req |= (int)nam << 8; /* for debug's sake */
+ break;
+
+ default:
+ panic("tcp_usrreq");
+ }
+ if (tp && (so->so_options & SO_DEBUG))
+ tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
+ splx(s);
+ return (error);
+}
+
+int
+tcp_ctloutput(op, so, level, optname, mp)
+ int op;
+ struct socket *so;
+ int level, optname;
+ struct mbuf **mp;
+{
+ int error = 0, s;
+ struct inpcb *inp;
+ register struct tcpcb *tp;
+ register struct mbuf *m;
+ register int i;
+
+ s = splnet();
+ inp = sotoinpcb(so);
+ if (inp == NULL) {
+ splx(s);
+ if (op == PRCO_SETOPT && *mp)
+ (void) m_free(*mp);
+ return (ECONNRESET);
+ }
+ if (level != IPPROTO_TCP) {
+ error = ip_ctloutput(op, so, level, optname, mp);
+ splx(s);
+ return (error);
+ }
+ tp = intotcpcb(inp);
+
+ switch (op) {
+
+ case PRCO_SETOPT:
+ m = *mp;
+ switch (optname) {
+
+ case TCP_NODELAY:
+ if (m == NULL || m->m_len < sizeof (int))
+ error = EINVAL;
+ else if (*mtod(m, int *))
+ tp->t_flags |= TF_NODELAY;
+ else
+ tp->t_flags &= ~TF_NODELAY;
+ break;
+
+ case TCP_MAXSEG:
+ if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
+ tp->t_maxseg = i;
+ else
+ error = EINVAL;
+ break;
+
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ if (m)
+ (void) m_free(m);
+ break;
+
+ case PRCO_GETOPT:
+ *mp = m = m_get(M_WAIT, MT_SOOPTS);
+ m->m_len = sizeof(int);
+
+ switch (optname) {
+ case TCP_NODELAY:
+ *mtod(m, int *) = tp->t_flags & TF_NODELAY;
+ break;
+ case TCP_MAXSEG:
+ *mtod(m, int *) = tp->t_maxseg;
+ break;
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+ break;
+ }
+ splx(s);
+ return (error);
+}
+
+u_long tcp_sendspace = 1024*8;
+u_long tcp_recvspace = 1024*8;
+
+/*
+ * Attach TCP protocol to socket, allocating
+ * internet protocol control block, tcp control block,
+ * bufer space, and entering LISTEN state if to accept connections.
+ */
+int
+tcp_attach(so)
+ struct socket *so;
+{
+ register struct tcpcb *tp;
+ struct inpcb *inp;
+ int error;
+
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ error = soreserve(so, tcp_sendspace, tcp_recvspace);
+ if (error)
+ return (error);
+ }
+ error = in_pcballoc(so, &tcb);
+ if (error)
+ return (error);
+ inp = sotoinpcb(so);
+ tp = tcp_newtcpcb(inp);
+ if (tp == 0) {
+ int nofd = so->so_state & SS_NOFDREF; /* XXX */
+
+ so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
+ in_pcbdetach(inp);
+ so->so_state |= nofd;
+ return (ENOBUFS);
+ }
+ tp->t_state = TCPS_CLOSED;
+ return (0);
+}
+
+/*
+ * Initiate (or continue) disconnect.
+ * If embryonic state, just send reset (once).
+ * If in ``let data drain'' option and linger null, just drop.
+ * Otherwise (hard), mark socket disconnecting and drop
+ * current input data; switch states based on user close, and
+ * send segment to peer (with FIN).
+ */
+struct tcpcb *
+tcp_disconnect(tp)
+ register struct tcpcb *tp;
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ if (tp->t_state < TCPS_ESTABLISHED)
+ tp = tcp_close(tp);
+ else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+ tp = tcp_drop(tp, 0);
+ else {
+ soisdisconnecting(so);
+ sbflush(&so->so_rcv);
+ tp = tcp_usrclosed(tp);
+ if (tp)
+ (void) tcp_output(tp);
+ }
+ return (tp);
+}
+
+/*
+ * User issued close, and wish to trail through shutdown states:
+ * if never received SYN, just forget it. If got a SYN from peer,
+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
+ * If already got a FIN from peer, then almost done; go to LAST_ACK
+ * state. In all other cases, have already sent FIN to peer (e.g.
+ * after PRU_SHUTDOWN), and just have to play tedious game waiting
+ * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
+ */
+struct tcpcb *
+tcp_usrclosed(tp)
+ register struct tcpcb *tp;
+{
+
+ switch (tp->t_state) {
+
+ case TCPS_CLOSED:
+ case TCPS_LISTEN:
+ case TCPS_SYN_SENT:
+ tp->t_state = TCPS_CLOSED;
+ tp = tcp_close(tp);
+ break;
+
+ case TCPS_SYN_RECEIVED:
+ case TCPS_ESTABLISHED:
+ tp->t_state = TCPS_FIN_WAIT_1;
+ break;
+
+ case TCPS_CLOSE_WAIT:
+ tp->t_state = TCPS_LAST_ACK;
+ break;
+ }
+ if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
+ soisdisconnected(tp->t_inpcb->inp_socket);
+ return (tp);
+}
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
new file mode 100644
index 000000000000..8a8e75121141
--- /dev/null
+++ b/sys/netinet/tcp_var.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94
+ */
+
+/*
+ * Kernel variables for tcp.
+ */
+
+/*
+ * Tcp control block, one per tcp; fields:
+ */
+struct tcpcb {
+ struct tcpiphdr *seg_next; /* sequencing queue */
+ struct tcpiphdr *seg_prev;
+ short t_state; /* state of this connection */
+ short t_timer[TCPT_NTIMERS]; /* tcp timers */
+ short t_rxtshift; /* log(2) of rexmt exp. backoff */
+ short t_rxtcur; /* current retransmit value */
+ short t_dupacks; /* consecutive dup acks recd */
+ u_short t_maxseg; /* maximum segment size */
+ char t_force; /* 1 if forcing out a byte */
+ u_short t_flags;
+#define TF_ACKNOW 0x0001 /* ack peer immediately */
+#define TF_DELACK 0x0002 /* ack, but try to delay it */
+#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */
+#define TF_NOOPT 0x0008 /* don't use tcp options */
+#define TF_SENTFIN 0x0010 /* have sent FIN */
+#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */
+#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */
+#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */
+#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */
+#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */
+
+ struct tcpiphdr *t_template; /* skeletal packet for transmit */
+ struct inpcb *t_inpcb; /* back pointer to internet pcb */
+/*
+ * The following fields are used as in the protocol specification.
+ * See RFC783, Dec. 1981, page 21.
+ */
+/* send sequence variables */
+ tcp_seq snd_una; /* send unacknowledged */
+ tcp_seq snd_nxt; /* send next */
+ tcp_seq snd_up; /* send urgent pointer */
+ tcp_seq snd_wl1; /* window update seg seq number */
+ tcp_seq snd_wl2; /* window update seg ack number */
+ tcp_seq iss; /* initial send sequence number */
+ u_long snd_wnd; /* send window */
+/* receive sequence variables */
+ u_long rcv_wnd; /* receive window */
+ tcp_seq rcv_nxt; /* receive next */
+ tcp_seq rcv_up; /* receive urgent pointer */
+ tcp_seq irs; /* initial receive sequence number */
+/*
+ * Additional variables for this implementation.
+ */
+/* receive variables */
+ tcp_seq rcv_adv; /* advertised window */
+/* retransmit variables */
+ tcp_seq snd_max; /* highest sequence number sent;
+ * used to recognize retransmits
+ */
+/* congestion control (for slow start, source quench, retransmit after loss) */
+ u_long snd_cwnd; /* congestion-controlled window */
+ u_long snd_ssthresh; /* snd_cwnd size threshhold for
+ * for slow start exponential to
+ * linear switch
+ */
+/*
+ * transmit timing stuff. See below for scale of srtt and rttvar.
+ * "Variance" is actually smoothed difference.
+ */
+ short t_idle; /* inactivity time */
+ short t_rtt; /* round trip time */
+ tcp_seq t_rtseq; /* sequence number being timed */
+ short t_srtt; /* smoothed round-trip time */
+ short t_rttvar; /* variance in round-trip time */
+ u_short t_rttmin; /* minimum rtt allowed */
+ u_long max_sndwnd; /* largest window peer has offered */
+
+/* out-of-band data */
+ char t_oobflags; /* have some */
+ char t_iobc; /* input character */
+#define TCPOOB_HAVEDATA 0x01
+#define TCPOOB_HADDATA 0x02
+ short t_softerror; /* possible error not yet reported */
+
+/* RFC 1323 variables */
+ u_char snd_scale; /* window scaling for send window */
+ u_char rcv_scale; /* window scaling for recv window */
+ u_char request_r_scale; /* pending window scaling */
+ u_char requested_s_scale;
+ u_long ts_recent; /* timestamp echo data */
+ u_long ts_recent_age; /* when last updated */
+ tcp_seq last_ack_sent;
+
+/* TUBA stuff */
+ caddr_t t_tuba_pcb; /* next level down pcb for TCP over z */
+};
+
+#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
+#define sototcpcb(so) (intotcpcb(sotoinpcb(so)))
+
+/*
+ * The smoothed round-trip time and estimated variance
+ * are stored as fixed point numbers scaled by the values below.
+ * For convenience, these scales are also used in smoothing the average
+ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
+ * With these scales, srtt has 3 bits to the right of the binary point,
+ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the
+ * binary point, and is smoothed with an ALPHA of 0.75.
+ */
+#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */
+#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */
+#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */
+#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */
+
+/*
+ * The initial retransmission should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ * This macro assumes that the value of TCP_RTTVAR_SCALE
+ * is the same as the multiplier for rttvar.
+ */
+#define TCP_REXMTVAL(tp) \
+ (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar)
+
+/* XXX
+ * We want to avoid doing m_pullup on incoming packets but that
+ * means avoiding dtom on the tcp reassembly code. That in turn means
+ * keeping an mbuf pointer in the reassembly queue (since we might
+ * have a cluster). As a quick hack, the source & destination
+ * port numbers (which are no longer needed once we've located the
+ * tcpcb) are overlayed with an mbuf pointer.
+ */
+#define REASS_MBUF(ti) (*(struct mbuf **)&((ti)->ti_t))
+
+/*
+ * TCP statistics.
+ * Many of these should be kept per connection,
+ * but that's inconvenient at the moment.
+ */
+struct tcpstat {
+ u_long tcps_connattempt; /* connections initiated */
+ u_long tcps_accepts; /* connections accepted */
+ u_long tcps_connects; /* connections established */
+ u_long tcps_drops; /* connections dropped */
+ u_long tcps_conndrops; /* embryonic connections dropped */
+ u_long tcps_closed; /* conn. closed (includes drops) */
+ u_long tcps_segstimed; /* segs where we tried to get rtt */
+ u_long tcps_rttupdated; /* times we succeeded */
+ u_long tcps_delack; /* delayed acks sent */
+ u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
+ u_long tcps_rexmttimeo; /* retransmit timeouts */
+ u_long tcps_persisttimeo; /* persist timeouts */
+ u_long tcps_keeptimeo; /* keepalive timeouts */
+ u_long tcps_keepprobe; /* keepalive probes sent */
+ u_long tcps_keepdrops; /* connections dropped in keepalive */
+
+ u_long tcps_sndtotal; /* total packets sent */
+ u_long tcps_sndpack; /* data packets sent */
+ u_long tcps_sndbyte; /* data bytes sent */
+ u_long tcps_sndrexmitpack; /* data packets retransmitted */
+ u_long tcps_sndrexmitbyte; /* data bytes retransmitted */
+ u_long tcps_sndacks; /* ack-only packets sent */
+ u_long tcps_sndprobe; /* window probes sent */
+ u_long tcps_sndurg; /* packets sent with URG only */
+ u_long tcps_sndwinup; /* window update-only packets sent */
+ u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
+
+ u_long tcps_rcvtotal; /* total packets received */
+ u_long tcps_rcvpack; /* packets received in sequence */
+ u_long tcps_rcvbyte; /* bytes received in sequence */
+ u_long tcps_rcvbadsum; /* packets received with ccksum errs */
+ u_long tcps_rcvbadoff; /* packets received with bad offset */
+ u_long tcps_rcvshort; /* packets received too short */
+ u_long tcps_rcvduppack; /* duplicate-only packets received */
+ u_long tcps_rcvdupbyte; /* duplicate-only bytes received */
+ u_long tcps_rcvpartduppack; /* packets with some duplicate data */
+ u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
+ u_long tcps_rcvoopack; /* out-of-order packets received */
+ u_long tcps_rcvoobyte; /* out-of-order bytes received */
+ u_long tcps_rcvpackafterwin; /* packets with data after window */
+ u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */
+ u_long tcps_rcvafterclose; /* packets rcvd after "close" */
+ u_long tcps_rcvwinprobe; /* rcvd window probe packets */
+ u_long tcps_rcvdupack; /* rcvd duplicate acks */
+ u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */
+ u_long tcps_rcvackpack; /* rcvd ack packets */
+ u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */
+ u_long tcps_rcvwinupd; /* rcvd window update packets */
+ u_long tcps_pawsdrop; /* segments dropped due to PAWS */
+ u_long tcps_predack; /* times hdr predict ok for acks */
+ u_long tcps_preddat; /* times hdr predict ok for data pkts */
+ u_long tcps_pcbcachemiss;
+};
+
+#ifdef KERNEL
+struct inpcb tcb; /* head of queue of active tcpcb's */
+struct tcpstat tcpstat; /* tcp statistics */
+u_long tcp_now; /* for RFC 1323 timestamps */
+
+int tcp_attach __P((struct socket *));
+void tcp_canceltimers __P((struct tcpcb *));
+struct tcpcb *
+ tcp_close __P((struct tcpcb *));
+void tcp_ctlinput __P((int, struct sockaddr *, struct ip *));
+int tcp_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
+struct tcpcb *
+ tcp_disconnect __P((struct tcpcb *));
+struct tcpcb *
+ tcp_drop __P((struct tcpcb *, int));
+void tcp_dooptions __P((struct tcpcb *,
+ u_char *, int, struct tcpiphdr *, int *, u_long *, u_long *));
+void tcp_drain __P((void));
+void tcp_fasttimo __P((void));
+void tcp_init __P((void));
+void tcp_input __P((struct mbuf *, int));
+int tcp_mss __P((struct tcpcb *, u_int));
+struct tcpcb *
+ tcp_newtcpcb __P((struct inpcb *));
+void tcp_notify __P((struct inpcb *, int));
+int tcp_output __P((struct tcpcb *));
+void tcp_pulloutofband __P((struct socket *,
+ struct tcpiphdr *, struct mbuf *));
+void tcp_quench __P((struct inpcb *, int));
+int tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf *));
+void tcp_respond __P((struct tcpcb *,
+ struct tcpiphdr *, struct mbuf *, u_long, u_long, int));
+void tcp_setpersist __P((struct tcpcb *));
+void tcp_slowtimo __P((void));
+struct tcpiphdr *
+ tcp_template __P((struct tcpcb *));
+struct tcpcb *
+ tcp_timers __P((struct tcpcb *, int));
+void tcp_trace __P((int, int, struct tcpcb *, struct tcpiphdr *, int));
+struct tcpcb *
+ tcp_usrclosed __P((struct tcpcb *));
+int tcp_usrreq __P((struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *));
+void tcp_xmit_timer __P((struct tcpcb *, int));
+#endif
diff --git a/sys/netinet/tcpip.h b/sys/netinet/tcpip.h
new file mode 100644
index 000000000000..5000ae303ce3
--- /dev/null
+++ b/sys/netinet/tcpip.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tcp+ip header, after ip options removed.
+ */
+struct tcpiphdr {
+ struct ipovly ti_i; /* overlaid ip structure */
+ struct tcphdr ti_t; /* tcp header */
+};
+#define ti_next ti_i.ih_next
+#define ti_prev ti_i.ih_prev
+#define ti_x1 ti_i.ih_x1
+#define ti_pr ti_i.ih_pr
+#define ti_len ti_i.ih_len
+#define ti_src ti_i.ih_src
+#define ti_dst ti_i.ih_dst
+#define ti_sport ti_t.th_sport
+#define ti_dport ti_t.th_dport
+#define ti_seq ti_t.th_seq
+#define ti_ack ti_t.th_ack
+#define ti_x2 ti_t.th_x2
+#define ti_off ti_t.th_off
+#define ti_flags ti_t.th_flags
+#define ti_win ti_t.th_win
+#define ti_sum ti_t.th_sum
+#define ti_urp ti_t.th_urp
diff --git a/sys/netinet/udp.h b/sys/netinet/udp.h
new file mode 100644
index 000000000000..354a213cbc2a
--- /dev/null
+++ b/sys/netinet/udp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)udp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Udp protocol header.
+ * Per RFC 768, September, 1981.
+ */
+struct udphdr {
+ u_short uh_sport; /* source port */
+ u_short uh_dport; /* destination port */
+ short uh_ulen; /* udp length */
+ u_short uh_sum; /* udp checksum */
+};
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
new file mode 100644
index 000000000000..95b1895ac0a2
--- /dev/null
+++ b/sys/netinet/udp_usrreq.c
@@ -0,0 +1,640 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+/*
+ * UDP protocol implementation.
+ * Per RFC 768, August, 1980.
+ */
+#ifndef COMPAT_42
+int udpcksum = 1;
+#else
+int udpcksum = 0; /* XXX */
+#endif
+
+struct sockaddr_in udp_in = { sizeof(udp_in), AF_INET };
+struct inpcb *udp_last_inpcb = &udb;
+
+static void udp_detach __P((struct inpcb *));
+static void udp_notify __P((struct inpcb *, int));
+static struct mbuf *udp_saveopt __P((caddr_t, int, int));
+
+void
+udp_init()
+{
+ udb.inp_next = udb.inp_prev = &udb;
+}
+
+void
+udp_input(m, iphlen)
+ register struct mbuf *m;
+ int iphlen;
+{
+ register struct ip *ip;
+ register struct udphdr *uh;
+ register struct inpcb *inp;
+ struct mbuf *opts = 0;
+ int len;
+ struct ip save_ip;
+
+ udpstat.udps_ipackets++;
+
+ /*
+ * Strip IP options, if any; should skip this,
+ * make available to user, and use on returned packets,
+ * but we don't yet have a way to check the checksum
+ * with options still present.
+ */
+ if (iphlen > sizeof (struct ip)) {
+ ip_stripoptions(m, (struct mbuf *)0);
+ iphlen = sizeof(struct ip);
+ }
+
+ /*
+ * Get IP and UDP header together in first mbuf.
+ */
+ ip = mtod(m, struct ip *);
+ if (m->m_len < iphlen + sizeof(struct udphdr)) {
+ if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+ udpstat.udps_hdrops++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ uh = (struct udphdr *)((caddr_t)ip + iphlen);
+
+ /*
+ * Make mbuf data length reflect UDP length.
+ * If not enough data to reflect UDP length, drop.
+ */
+ len = ntohs((u_short)uh->uh_ulen);
+ if (ip->ip_len != len) {
+ if (len > ip->ip_len) {
+ udpstat.udps_badlen++;
+ goto bad;
+ }
+ m_adj(m, len - ip->ip_len);
+ /* ip->ip_len = len; */
+ }
+ /*
+ * Save a copy of the IP header in case we want restore it
+ * for sending an ICMP error message in response.
+ */
+ save_ip = *ip;
+
+ /*
+ * Checksum extended UDP header and data.
+ */
+ if (udpcksum && uh->uh_sum) {
+ ((struct ipovly *)ip)->ih_next = 0;
+ ((struct ipovly *)ip)->ih_prev = 0;
+ ((struct ipovly *)ip)->ih_x1 = 0;
+ ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+ if (uh->uh_sum = in_cksum(m, len + sizeof (struct ip))) {
+ udpstat.udps_badsum++;
+ m_freem(m);
+ return;
+ }
+ }
+
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+ in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+ struct socket *last;
+ /*
+ * Deliver a multicast or broadcast datagram to *all* sockets
+ * for which the local and remote addresses and ports match
+ * those of the incoming datagram. This allows more than
+ * one process to receive multi/broadcasts on the same port.
+ * (This really ought to be done for unicast datagrams as
+ * well, but that would cause problems with existing
+ * applications that open both address-specific sockets and
+ * a wildcard socket listening to the same port -- they would
+ * end up receiving duplicates of every unicast datagram.
+ * Those applications open the multiple sockets to overcome an
+ * inadequacy of the UDP socket interface, but for backwards
+ * compatibility we avoid the problem here rather than
+ * fixing the interface. Maybe 4.5BSD will remedy this?)
+ */
+
+ /*
+ * Construct sockaddr format source address.
+ */
+ udp_in.sin_port = uh->uh_sport;
+ udp_in.sin_addr = ip->ip_src;
+ m->m_len -= sizeof (struct udpiphdr);
+ m->m_data += sizeof (struct udpiphdr);
+ /*
+ * Locate pcb(s) for datagram.
+ * (Algorithm copied from raw_intr().)
+ */
+ last = NULL;
+ for (inp = udb.inp_next; inp != &udb; inp = inp->inp_next) {
+ if (inp->inp_lport != uh->uh_dport)
+ continue;
+ if (inp->inp_laddr.s_addr != INADDR_ANY) {
+ if (inp->inp_laddr.s_addr !=
+ ip->ip_dst.s_addr)
+ continue;
+ }
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ if (inp->inp_faddr.s_addr !=
+ ip->ip_src.s_addr ||
+ inp->inp_fport != uh->uh_sport)
+ continue;
+ }
+
+ if (last != NULL) {
+ struct mbuf *n;
+
+ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+ if (sbappendaddr(&last->so_rcv,
+ (struct sockaddr *)&udp_in,
+ n, (struct mbuf *)0) == 0) {
+ m_freem(n);
+ udpstat.udps_fullsock++;
+ } else
+ sorwakeup(last);
+ }
+ }
+ last = inp->inp_socket;
+ /*
+ * Don't look for additional matches if this one does
+ * not have either the SO_REUSEPORT or SO_REUSEADDR
+ * socket options set. This heuristic avoids searching
+ * through all pcbs in the common case of a non-shared
+ * port. It * assumes that an application will never
+ * clear these options after setting them.
+ */
+ if ((last->so_options&(SO_REUSEPORT|SO_REUSEADDR) == 0))
+ break;
+ }
+
+ if (last == NULL) {
+ /*
+ * No matching pcb found; discard datagram.
+ * (No need to send an ICMP Port Unreachable
+ * for a broadcast or multicast datgram.)
+ */
+ udpstat.udps_noportbcast++;
+ goto bad;
+ }
+ if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&udp_in,
+ m, (struct mbuf *)0) == 0) {
+ udpstat.udps_fullsock++;
+ goto bad;
+ }
+ sorwakeup(last);
+ return;
+ }
+ /*
+ * Locate pcb for datagram.
+ */
+ inp = udp_last_inpcb;
+ if (inp->inp_lport != uh->uh_dport ||
+ inp->inp_fport != uh->uh_sport ||
+ inp->inp_faddr.s_addr != ip->ip_src.s_addr ||
+ inp->inp_laddr.s_addr != ip->ip_dst.s_addr) {
+ inp = in_pcblookup(&udb, ip->ip_src, uh->uh_sport,
+ ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD);
+ if (inp)
+ udp_last_inpcb = inp;
+ udpstat.udpps_pcbcachemiss++;
+ }
+ if (inp == 0) {
+ udpstat.udps_noport++;
+ if (m->m_flags & (M_BCAST | M_MCAST)) {
+ udpstat.udps_noportbcast++;
+ goto bad;
+ }
+ *ip = save_ip;
+ ip->ip_len += iphlen;
+ icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
+ return;
+ }
+
+ /*
+ * Construct sockaddr format source address.
+ * Stuff source address and datagram in user buffer.
+ */
+ udp_in.sin_port = uh->uh_sport;
+ udp_in.sin_addr = ip->ip_src;
+ if (inp->inp_flags & INP_CONTROLOPTS) {
+ struct mbuf **mp = &opts;
+
+ if (inp->inp_flags & INP_RECVDSTADDR) {
+ *mp = udp_saveopt((caddr_t) &ip->ip_dst,
+ sizeof(struct in_addr), IP_RECVDSTADDR);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+#ifdef notyet
+ /* options were tossed above */
+ if (inp->inp_flags & INP_RECVOPTS) {
+ *mp = udp_saveopt((caddr_t) opts_deleted_above,
+ sizeof(struct in_addr), IP_RECVOPTS);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+ /* ip_srcroute doesn't do what we want here, need to fix */
+ if (inp->inp_flags & INP_RECVRETOPTS) {
+ *mp = udp_saveopt((caddr_t) ip_srcroute(),
+ sizeof(struct in_addr), IP_RECVRETOPTS);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+#endif
+ }
+ iphlen += sizeof(struct udphdr);
+ m->m_len -= iphlen;
+ m->m_pkthdr.len -= iphlen;
+ m->m_data += iphlen;
+ if (sbappendaddr(&inp->inp_socket->so_rcv, (struct sockaddr *)&udp_in,
+ m, opts) == 0) {
+ udpstat.udps_fullsock++;
+ goto bad;
+ }
+ sorwakeup(inp->inp_socket);
+ return;
+bad:
+ m_freem(m);
+ if (opts)
+ m_freem(opts);
+}
+
+/*
+ * Create a "control" mbuf containing the specified data
+ * with the specified type for presentation with a datagram.
+ */
+struct mbuf *
+udp_saveopt(p, size, type)
+ caddr_t p;
+ register int size;
+ int type;
+{
+ register struct cmsghdr *cp;
+ struct mbuf *m;
+
+ if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
+ return ((struct mbuf *) NULL);
+ cp = (struct cmsghdr *) mtod(m, struct cmsghdr *);
+ bcopy(p, CMSG_DATA(cp), size);
+ size += sizeof(*cp);
+ m->m_len = size;
+ cp->cmsg_len = size;
+ cp->cmsg_level = IPPROTO_IP;
+ cp->cmsg_type = type;
+ return (m);
+}
+
+/*
+ * Notify a udp user of an asynchronous error;
+ * just wake up so that he can collect error status.
+ */
+static void
+udp_notify(inp, errno)
+ register struct inpcb *inp;
+ int errno;
+{
+ inp->inp_socket->so_error = errno;
+ sorwakeup(inp->inp_socket);
+ sowwakeup(inp->inp_socket);
+}
+
+void
+udp_ctlinput(cmd, sa, ip)
+ int cmd;
+ struct sockaddr *sa;
+ register struct ip *ip;
+{
+ register struct udphdr *uh;
+ extern struct in_addr zeroin_addr;
+ extern u_char inetctlerrmap[];
+
+ if (!PRC_IS_REDIRECT(cmd) &&
+ ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0))
+ return;
+ if (ip) {
+ uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+ in_pcbnotify(&udb, sa, uh->uh_dport, ip->ip_src, uh->uh_sport,
+ cmd, udp_notify);
+ } else
+ in_pcbnotify(&udb, sa, 0, zeroin_addr, 0, cmd, udp_notify);
+}
+
+int
+udp_output(inp, m, addr, control)
+ register struct inpcb *inp;
+ register struct mbuf *m;
+ struct mbuf *addr, *control;
+{
+ register struct udpiphdr *ui;
+ register int len = m->m_pkthdr.len;
+ struct in_addr laddr;
+ int s, error = 0;
+
+ if (control)
+ m_freem(control); /* XXX */
+
+ if (addr) {
+ laddr = inp->inp_laddr;
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ error = EISCONN;
+ goto release;
+ }
+ /*
+ * Must block input while temporarily connected.
+ */
+ s = splnet();
+ error = in_pcbconnect(inp, addr);
+ if (error) {
+ splx(s);
+ goto release;
+ }
+ } else {
+ if (inp->inp_faddr.s_addr == INADDR_ANY) {
+ error = ENOTCONN;
+ goto release;
+ }
+ }
+ /*
+ * Calculate data length and get a mbuf
+ * for UDP and IP headers.
+ */
+ M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
+ if (m == 0) {
+ error = ENOBUFS;
+ goto release;
+ }
+
+ /*
+ * Fill in mbuf with extended UDP header
+ * and addresses and length put into network format.
+ */
+ ui = mtod(m, struct udpiphdr *);
+ ui->ui_next = ui->ui_prev = 0;
+ ui->ui_x1 = 0;
+ ui->ui_pr = IPPROTO_UDP;
+ ui->ui_len = htons((u_short)len + sizeof (struct udphdr));
+ ui->ui_src = inp->inp_laddr;
+ ui->ui_dst = inp->inp_faddr;
+ ui->ui_sport = inp->inp_lport;
+ ui->ui_dport = inp->inp_fport;
+ ui->ui_ulen = ui->ui_len;
+
+ /*
+ * Stuff checksum and output datagram.
+ */
+ ui->ui_sum = 0;
+ if (udpcksum) {
+ if ((ui->ui_sum = in_cksum(m, sizeof (struct udpiphdr) + len)) == 0)
+ ui->ui_sum = 0xffff;
+ }
+ ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+ ((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl; /* XXX */
+ ((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos; /* XXX */
+ udpstat.udps_opackets++;
+ error = ip_output(m, inp->inp_options, &inp->inp_route,
+ inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST),
+ inp->inp_moptions);
+
+ if (addr) {
+ in_pcbdisconnect(inp);
+ inp->inp_laddr = laddr;
+ splx(s);
+ }
+ return (error);
+
+release:
+ m_freem(m);
+ return (error);
+}
+
+u_long udp_sendspace = 9216; /* really max datagram size */
+u_long udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
+ /* 40 1K datagrams */
+
+/*ARGSUSED*/
+int
+udp_usrreq(so, req, m, addr, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *addr, *control;
+{
+ struct inpcb *inp = sotoinpcb(so);
+ int error = 0;
+ int s;
+
+ if (req == PRU_CONTROL)
+ return (in_control(so, (int)m, (caddr_t)addr,
+ (struct ifnet *)control));
+ if (inp == NULL && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ /*
+ * Note: need to block udp_input while changing
+ * the udp pcb queue and/or pcb addresses.
+ */
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (inp != NULL) {
+ error = EINVAL;
+ break;
+ }
+ s = splnet();
+ error = in_pcballoc(so, &udb);
+ splx(s);
+ if (error)
+ break;
+ error = soreserve(so, udp_sendspace, udp_recvspace);
+ if (error)
+ break;
+ ((struct inpcb *) so->so_pcb)->inp_ip.ip_ttl = ip_defttl;
+ break;
+
+ case PRU_DETACH:
+ udp_detach(inp);
+ break;
+
+ case PRU_BIND:
+ s = splnet();
+ error = in_pcbbind(inp, addr);
+ splx(s);
+ break;
+
+ case PRU_LISTEN:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_CONNECT:
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ error = EISCONN;
+ break;
+ }
+ s = splnet();
+ error = in_pcbconnect(inp, addr);
+ splx(s);
+ if (error == 0)
+ soisconnected(so);
+ break;
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_ACCEPT:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_DISCONNECT:
+ if (inp->inp_faddr.s_addr == INADDR_ANY) {
+ error = ENOTCONN;
+ break;
+ }
+ s = splnet();
+ in_pcbdisconnect(inp);
+ inp->inp_laddr.s_addr = INADDR_ANY;
+ splx(s);
+ so->so_state &= ~SS_ISCONNECTED; /* XXX */
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ case PRU_SEND:
+ return (udp_output(inp, m, addr, control));
+
+ case PRU_ABORT:
+ soisdisconnected(so);
+ udp_detach(inp);
+ break;
+
+ case PRU_SOCKADDR:
+ in_setsockaddr(inp, addr);
+ break;
+
+ case PRU_PEERADDR:
+ in_setpeeraddr(inp, addr);
+ break;
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ case PRU_SENDOOB:
+ case PRU_FASTTIMO:
+ case PRU_SLOWTIMO:
+ case PRU_PROTORCV:
+ case PRU_PROTOSEND:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_RCVD:
+ case PRU_RCVOOB:
+ return (EOPNOTSUPP); /* do not free mbuf's */
+
+ default:
+ panic("udp_usrreq");
+ }
+
+release:
+ if (control) {
+ printf("udp control data unexpectedly retained\n");
+ m_freem(control);
+ }
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+static void
+udp_detach(inp)
+ struct inpcb *inp;
+{
+ int s = splnet();
+
+ if (inp == udp_last_inpcb)
+ udp_last_inpcb = &udb;
+ in_pcbdetach(inp);
+ splx(s);
+}
+
+/*
+ * Sysctl for udp variables.
+ */
+udp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+{
+ /* All sysctl names at this level are terminal. */
+ if (namelen != 1)
+ return (ENOTDIR);
+
+ switch (name[0]) {
+ case UDPCTL_CHECKSUM:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &udpcksum));
+ default:
+ return (ENOPROTOOPT);
+ }
+ /* NOTREACHED */
+}
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
new file mode 100644
index 000000000000..e8a21d261c50
--- /dev/null
+++ b/sys/netinet/udp_var.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)udp_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * UDP kernel structures and variables.
+ */
+struct udpiphdr {
+ struct ipovly ui_i; /* overlaid ip structure */
+ struct udphdr ui_u; /* udp header */
+};
+#define ui_next ui_i.ih_next
+#define ui_prev ui_i.ih_prev
+#define ui_x1 ui_i.ih_x1
+#define ui_pr ui_i.ih_pr
+#define ui_len ui_i.ih_len
+#define ui_src ui_i.ih_src
+#define ui_dst ui_i.ih_dst
+#define ui_sport ui_u.uh_sport
+#define ui_dport ui_u.uh_dport
+#define ui_ulen ui_u.uh_ulen
+#define ui_sum ui_u.uh_sum
+
+struct udpstat {
+ /* input statistics: */
+ u_long udps_ipackets; /* total input packets */
+ u_long udps_hdrops; /* packet shorter than header */
+ u_long udps_badsum; /* checksum error */
+ u_long udps_badlen; /* data length larger than packet */
+ u_long udps_noport; /* no socket on port */
+ u_long udps_noportbcast; /* of above, arrived as broadcast */
+ u_long udps_fullsock; /* not delivered, input socket full */
+ u_long udpps_pcbcachemiss; /* input packets missing pcb cache */
+ /* output statistics: */
+ u_long udps_opackets; /* total output packets */
+};
+
+/*
+ * Names for UDP sysctl objects
+ */
+#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */
+#define UDPCTL_MAXID 2
+
+#define UDPCTL_NAMES { \
+ { 0, 0 }, \
+ { "checksum", CTLTYPE_INT }, \
+}
+
+#ifdef KERNEL
+struct inpcb udb;
+struct udpstat udpstat;
+
+void udp_ctlinput __P((int, struct sockaddr *, struct ip *));
+void udp_init __P((void));
+void udp_input __P((struct mbuf *, int));
+int udp_output __P((struct inpcb *,
+ struct mbuf *, struct mbuf *, struct mbuf *));
+int udp_sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+int udp_usrreq __P((struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *));
+#endif
diff --git a/sys/netiso/argo_debug.h b/sys/netiso/argo_debug.h
new file mode 100644
index 000000000000..653982f005af
--- /dev/null
+++ b/sys/netiso/argo_debug.h
@@ -0,0 +1,296 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)argo_debug.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*****************************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: argo_debug.h,v 4.6 88/07/19 15:53:40 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/argo_debug.h,v $
+ */
+
+#ifndef __ARGO_DEBUG__
+#define __ARGO_DEBUG__
+
+#define dump_buf(a, b) Dump_buf((caddr_t)(a), (int)(b))
+
+/***********************************************
+ * Lint stuff
+ **********************************************/
+#if defined(lint)
+/*
+ * lint can't handle the flaky vacuous definitions
+ * of IFDEBUG, ENDDEBUG, etc.
+ */
+#endif /* defined(lint) */
+
+/***********************************************
+ * DEBUG ON:
+ **********************************************/
+#ifndef ARGO_DEBUG
+#define ARGO_DEBUG
+#endif /* ARGO_DEBUG */
+
+
+#ifdef ARGO_DEBUG
+/*
+ #ifndef TPPT
+ #define TPPT
+ #endif TPPT
+
+ #ifndef TP_PERF_MEAS
+ #define TP_PERF_MEAS
+ #endif TP_PERF_MEAS
+*/
+
+unsigned char argo_debug[128];
+
+#define IFDEBUG(ascii) \
+ if(argo_debug[ascii]) {
+#define ENDDEBUG ; }
+
+#else /* ARGO_DEBUG */
+
+/***********************************************
+ * DEBUG OFF:
+ **********************************************/
+
+#ifndef STAR
+#define STAR *
+#endif /* STAR */
+#define IFDEBUG(ascii) //*beginning of comment*/STAR
+#define ENDDEBUG STAR/*end of comment*//
+
+#endif /* ARGO_DEBUG */
+
+/***********************************************
+ * ASSERT
+ **********************************************/
+#ifdef ARGO_DEBUG
+
+#ifndef lint
+#define ASSERT(phrase) \
+if( !(phrase) ) printf("ASSERTION NOT VALID at line %d file %s\n",__LINE__,__FILE__)
+#else /* lint */
+#define ASSERT(phrase) /* phrase */
+#endif /* lint */
+
+#else /* ARGO_DEBUG */
+
+#define ASSERT(phrase) /* phrase */
+
+#endif /* ARGO_DEBUG */
+
+
+/***********************************************
+ * CLNP DEBUG OPTIONS
+ **********************************************/
+#define D_INPUT '\1'
+/* clnp input */
+#define D_OUTPUT '\2'
+/* clnp output */
+#define D_ROUTE '\3'
+/* clnp routing */
+#define D_CTLINPUT '\4'
+/* clnp control input */
+#define D_CTLOUTPUT '\5'
+/* clnp control output */
+#define D_OPTIONS '\6'
+/* clnp options */
+#define D_IOCTL '\7'
+/* iso ioctls */
+#define D_ETHER '\10'
+/* clnp over ethernet */
+#define D_TOKEN '\11'
+/* clnp over token ring */
+#define D_ADCOM '\12'
+/* clnp over the adcom */
+#define D_ISO '\13'
+/* iso address family */
+#define D_FORWARD '\14'
+/* clnp forwarding */
+#define D_DUMPOUT '\15'
+/* dump clnp outgoing packets */
+#define D_DUMPIN '\16'
+/* dump clnp input packets */
+#define D_DISCARD '\17'
+/* debug clnp packet discard/er function */
+#define D_FRAG '\20'
+/* clnp fragmentation */
+#define D_REASS '\21'
+/* clnp reassembly */
+
+char *clnp_iso_addrp();
+
+/***********************************************
+ * ESIS DEBUG OPTIONS
+ **********************************************/
+#define D_ESISOUTPUT '\30'
+#define D_ESISINPUT '\31'
+#define D_SNPA '\32'
+
+/***********************************************
+ * ISIS DEBUG OPTIONS
+ **********************************************/
+#define D_ISISOUTPUT '\40'
+#define D_ISISINPUT '\41'
+
+/***********************************************
+ * EON DEBUG OPTION
+ **********************************************/
+#define D_EON '\57'
+
+/***********************************************
+ * CONS DEBUG OPTIONS
+ **********************************************/
+
+#define D_ECNWORK '\60'
+#define D_ECNOUT '\61'
+#define D_ECNFIN '\62'
+#define D_ECNDWN '\63'
+#define D_ECNUTIL '\64'
+
+#define D_INCOMING '\70'
+#define D_CDATA '\71'
+#define D_CFIND '\72'
+#define D_CDUMP_REQ '\73'
+#define D_CADDR '\74'
+#define D_CCONS '\75'
+#define D_CCONN '\76'
+
+
+/***********************************************
+ * TP DEBUG OPTIONS
+ **********************************************/
+
+#define D_SETPARAMS '\137'
+#define D_RTT '\140'
+
+#define D_ACKRECV '\141'
+#define D_ACKSEND '\142'
+#define D_CONN '\143'
+#define D_CREDIT '\144'
+#define D_DATA '\145'
+#define D_DRIVER '\146'
+
+#define D_EMIT '\147'
+#define D_ERROR_EMIT '\150'
+#define D_TPINPUT '\151'
+#define D_INDICATION '\152'
+#define D_CHKSUM '\153'
+
+#define D_RENEG '\154'
+#define D_PERF_MEAS '\155'
+#define D_MBUF_MEAS '\156'
+#define D_RTC '\157'
+#define D_SB '\160'
+
+#define D_DISASTER_CHECK '\161'
+#define D_REQUEST '\162'
+#define D_STASH '\163'
+#define D_NEWSOCK '\164'
+#define D_TIMER '\165'
+
+#define D_TPIOCTL '\166'
+#define D_SIZE_CHECK '\167'
+#define D_2ER '\170'
+#define D_DISASTER_CHECK_W '\171'
+
+#define D_XPD '\172'
+#define D_SYSCALL '\173'
+#define D_DROP '\174'
+#define D_ZDREF '\175'
+#define D_TPISO '\176'
+#define D_QUENCH '\177'
+
+void dump_mbuf();
+
+/***********************************************
+ * New mbuf types for debugging w/ netstat -m
+ * This messes up 4.4 malloc for now. need bigger
+ * mbtypes array for now.
+ **********************************************/
+#ifdef notdef
+
+#define TPMT_DATA 0x21
+#define TPMT_RCVRTC 0x42
+#define TPMT_SNDRTC 0x41
+#define TPMT_TPHDR 0x22
+#define TPMT_IPHDR 0x32
+#define TPMT_SONAME 0x28
+#define TPMT_EOT 0x40
+#define TPMT_XPD 0x44
+#define TPMT_PCB 0x23
+#define TPMT_PERF 0x45
+
+#else /* ARGO_DEBUG */
+
+#define TPMT_DATA MT_DATA
+#define TPMT_RCVRTC MT_DATA
+#define TPMT_SNDRTC MT_DATA
+#define TPMT_IPHDR MT_HEADER
+#define TPMT_TPHDR MT_HEADER
+#define TPMT_SONAME MT_SONAME
+/* MT_EOT and MT_XPD are defined in tp_param.h */
+#define TPMT_XPD MT_OOBDATA
+#define TPMT_PCB MT_PCB
+#define TPMT_PERF MT_PCB
+
+#endif /* ARGO_DEBUG */
+
+#endif /* __ARGO_DEBUG__ */
diff --git a/sys/netiso/clnl.h b/sys/netiso/clnl.h
new file mode 100644
index 000000000000..87227dc273bb
--- /dev/null
+++ b/sys/netiso/clnl.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnl.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+struct clnl_protosw {
+ int (*clnl_input)(); /* input routine */
+};
diff --git a/sys/netiso/clnp.h b/sys/netiso/clnp.h
new file mode 100644
index 000000000000..4c81ba37d9cc
--- /dev/null
+++ b/sys/netiso/clnp.h
@@ -0,0 +1,463 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp.h 8.2 (Berkeley) 4/16/94
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /big/BSD4.4/isis-usr/src/sys/netiso/RCS/clnp.h,v 1.1 1992/02/07 18:14:59 hagens Exp hagens $ */
+/* $Source: /big/BSD4.4/isis-usr/src/sys/netiso/RCS/clnp.h,v $ */
+
+/* should be config option but cpp breaks with too many #defines */
+#define DECBIT
+
+/*
+ * Return true if the mbuf is a cluster mbuf
+ */
+#define IS_CLUSTER(m) ((m)->m_flags & M_EXT)
+
+/*
+ * Move the halfword into the two characters
+ */
+#define HTOC(msb, lsb, hword)\
+ (msb) = (u_char)((hword) >> 8);\
+ (lsb) = (u_char)((hword) & 0xff)
+/*
+ * Move the two charcters into the halfword
+ */
+#define CTOH(msb, lsb, hword)\
+ (hword) = ((msb) << 8) | (lsb)
+
+/*
+ * Return true if the checksum has been set - ie. the checksum is
+ * not zero
+ */
+#define CKSUM_REQUIRED(clnp)\
+ (((clnp)->cnf_cksum_msb != 0) || ((clnp)->cnf_cksum_lsb != 0))
+
+/*
+ * Fixed part of clnp header
+ */
+struct clnp_fixed {
+ u_char cnf_proto_id; /* network layer protocol identifier */
+ u_char cnf_hdr_len; /* length indicator (octets) */
+ u_char cnf_vers; /* version/protocol identifier extension */
+ u_char cnf_ttl; /* lifetime (500 milliseconds) */
+ u_char cnf_type; /* type code */
+ /* Includes err_ok, more_segs, and seg_ok */
+ u_char cnf_seglen_msb; /* pdu segment length (octets) high byte */
+ u_char cnf_seglen_lsb; /* pdu segment length (octets) low byte */
+ u_char cnf_cksum_msb; /* checksum high byte */
+ u_char cnf_cksum_lsb; /* checksum low byte */
+};
+#define CNF_TYPE 0x1f
+#define CNF_ERR_OK 0x20
+#define CNF_MORE_SEGS 0x40
+#define CNF_SEG_OK 0x80
+
+#define CLNP_CKSUM_OFF 0x07 /* offset of checksum */
+
+#define clnl_fixed clnp_fixed
+
+/*
+ * Segmentation part of clnp header
+ */
+struct clnp_segment {
+ u_short cng_id; /* data unit identifier */
+ u_short cng_off; /* segment offset */
+ u_short cng_tot_len; /* total length */
+};
+
+/*
+ * Clnp fragment reassembly structures:
+ *
+ * All packets undergoing reassembly are linked together in
+ * clnp_fragl structures. Each clnp_fragl structure contains a
+ * pointer to the original clnp packet header, as well as a
+ * list of packet fragments. Each packet fragment
+ * is headed by a clnp_frag structure. This structure contains the
+ * offset of the first and last byte of the fragment, as well as
+ * a pointer to the data (an mbuf chain) of the fragment.
+ */
+
+/*
+ * NOTE:
+ * The clnp_frag structure is stored in an mbuf immedately preceeding
+ * the fragment data. Since there are words in this struct,
+ * it must be word aligned.
+ *
+ * NOTE:
+ * All the fragment code assumes that the entire clnp header is
+ * contained in the first mbuf.
+ */
+struct clnp_frag {
+ u_int cfr_first; /* offset of first byte of this frag */
+ u_int cfr_last; /* offset of last byte of this frag */
+ u_int cfr_bytes; /* bytes to shave to get to data */
+ struct mbuf *cfr_data; /* ptr to data for this frag */
+ struct clnp_frag *cfr_next; /* next fragment in list */
+};
+
+struct clnp_fragl {
+ struct iso_addr cfl_src; /* source of the pkt */
+ struct iso_addr cfl_dst; /* destination of the pkt */
+ u_short cfl_id; /* id of the pkt */
+ u_char cfl_ttl; /* current ttl of pkt */
+ u_short cfl_last; /* offset of last byte of packet */
+ struct mbuf *cfl_orighdr; /* ptr to original header */
+ struct clnp_frag *cfl_frags; /* linked list of fragments for pkt */
+ struct clnp_fragl *cfl_next; /* next pkt being reassembled */
+};
+
+/*
+ * The following structure is used to index into an options section
+ * of a clnp datagram. These values can be used without worry that
+ * offset or length fields are invalid or too big, etc. That is,
+ * the consistancy of the options will be guaranteed before this
+ * structure is filled in. Any pointer (field ending in p) is
+ * actually the offset from the beginning of the mbuf the option
+ * is contained in. A value of NULL for any pointer
+ * means that the option is not present. The length any option
+ * does not include the option code or option length fields.
+ */
+struct clnp_optidx {
+ u_short cni_securep; /* ptr to beginning of security option */
+ char cni_secure_len; /* length of entire security option */
+
+ u_short cni_srcrt_s; /* offset of start of src rt option */
+ u_short cni_srcrt_len; /* length of entire src rt option */
+
+ u_short cni_recrtp; /* ptr to beginning of recrt option */
+ char cni_recrt_len; /* length of entire recrt option */
+
+ char cni_priorp; /* ptr to priority option */
+
+ u_short cni_qos_formatp; /* ptr to format of qos option */
+ char cni_qos_len; /* length of entire qos option */
+
+ u_char cni_er_reason; /* reason from ER pdu option */
+
+ /* ESIS options */
+
+ u_short cni_esct; /* value from ISH ESCT option */
+
+ u_short cni_netmaskp; /* ptr to beginning of netmask option */
+ char cni_netmask_len; /* length of entire netmask option */
+
+ u_short cni_snpamaskp; /* ptr to beginning of snpamask option */
+ char cni_snpamask_len; /* length of entire snpamask option */
+
+};
+
+#define ER_INVALREAS 0xff /* code for invalid ER pdu discard reason */
+
+/* given an mbuf and addr of option, return offset from data of mbuf */
+#define CLNP_OPTTOOFF(m, opt)\
+ ((u_short) (opt - mtod(m, caddr_t)))
+
+/* given an mbuf and offset of option, return address of option */
+#define CLNP_OFFTOOPT(m, off)\
+ ((caddr_t) (mtod(m, caddr_t) + off))
+
+/* return true iff src route is valid */
+#define CLNPSRCRT_VALID(oidx)\
+ ((oidx) && (oidx->cni_srcrt_s))
+
+/* return the offset field of the src rt */
+#define CLNPSRCRT_OFF(oidx, options)\
+ (*((u_char *)(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s) + 1)))
+
+/* return the type field of the src rt */
+#define CLNPSRCRT_TYPE(oidx, options)\
+ ((u_char)(*(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s))))
+
+/* return the length of the current address */
+#define CLNPSRCRT_CLEN(oidx, options)\
+ ((u_char)(*(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s) + CLNPSRCRT_OFF(oidx, options) - 1)))
+
+/* return the address of the current address */
+#define CLNPSRCRT_CADDR(oidx, options)\
+ ((caddr_t)(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s) + CLNPSRCRT_OFF(oidx, options)))
+
+/*
+ * return true if the src route has run out of routes
+ * this is true if the offset of next route is greater than the end of the rt
+ */
+#define CLNPSRCRT_TERM(oidx, options)\
+ (CLNPSRCRT_OFF(oidx, options) > oidx->cni_srcrt_len)
+
+/*
+ * Options a user can set/get
+ */
+#define CLNPOPT_FLAGS 0x01 /* flags: seg permitted, no er xmit, etc */
+#define CLNPOPT_OPTS 0x02 /* datagram options */
+
+/*
+ * Values for particular datagram options
+ */
+#define CLNPOVAL_PAD 0xcc /* padding */
+#define CLNPOVAL_SECURE 0xc5 /* security */
+#define CLNPOVAL_SRCRT 0xc8 /* source routing */
+#define CLNPOVAL_RECRT 0xcb /* record route */
+#define CLNPOVAL_QOS 0xc3 /* quality of service */
+#define CLNPOVAL_PRIOR 0xcd /* priority */
+#define CLNPOVAL_ERREAS 0xc1 /* ER PDU ONLY: reason for discard */
+
+#define CLNPOVAL_SRCSPEC 0x40 /* source address specific */
+#define CLNPOVAL_DSTSPEC 0x80 /* destination address specific */
+#define CLNPOVAL_GLOBAL 0xc0 /* globally unique */
+
+/* Globally Unique QOS */
+#define CLNPOVAL_SEQUENCING 0x10 /* sequencing preferred */
+#define CLNPOVAL_CONGESTED 0x08 /* congestion experienced */
+#define CLNPOVAL_LOWDELAY 0x04 /* low transit delay */
+
+#define CLNPOVAL_PARTRT 0x00 /* partial source routing */
+#define CLNPOVAL_COMPRT 0x01 /* complete source routing */
+
+/*
+ * Clnp flags used in a control block flags field.
+ * NOTE: these must be out of the range of bits defined in ../net/raw_cb.h
+ */
+#define CLNP_NO_SEG 0x010 /* segmentation not permitted */
+#define CLNP_NO_ER 0x020 /* do not generate ERs */
+#define CLNP_SEND_RAW 0x080 /* send pkt as RAW DT rather than TP DT */
+#define CLNP_NO_CKSUM 0x100 /* don't use clnp checksum */
+#define CLNP_ECHO 0x200 /* send echo request */
+#define CLNP_NOCACHE 0x400 /* don't store cache information */
+#define CLNP_ECHOR 0x800 /* send echo reply */
+
+/* valid clnp flags */
+#define CLNP_VFLAGS (CLNP_SEND_RAW|CLNP_NO_SEG|CLNP_NO_ER|CLNP_NO_CKSUM\
+ |CLNP_ECHO|CLNP_NOCACHE|CLNP_ECHOR)
+
+/*
+ * Constants used by clnp
+ */
+#define CLNP_HDR_MIN (sizeof (struct clnp_fixed))
+#define CLNP_HDR_MAX (254)
+#define CLNP_TTL_UNITS 2 /* 500 milliseconds */
+#define CLNP_TTL 15*CLNP_TTL_UNITS /* time to live (seconds) */
+#define ISO8473_V1 0x01
+
+/*
+ * Clnp packet types
+ * In order to test raw clnp and tp/clnp simultaneously, a third type of
+ * packet has been defined: CLNP_RAW. This is done so that the input
+ * routine can switch to the correct input routine (rclnp_input or
+ * tpclnp_input) based on the type field. If clnp had a higher level protocol
+ * field, this would not be necessary.
+ */
+#define CLNP_DT 0x1C /* normal data */
+#define CLNP_ER 0x01 /* error report */
+#define CLNP_RAW 0x1D /* debug only */
+#define CLNP_EC 0x1E /* echo packet */
+#define CLNP_ECR 0x1F /* echo reply */
+
+/*
+ * ER pdu error codes
+ */
+#define GEN_NOREAS 0x00 /* reason not specified */
+#define GEN_PROTOERR 0x01 /* protocol procedure error */
+#define GEN_BADCSUM 0x02 /* incorrect checksum */
+#define GEN_CONGEST 0x03 /* pdu discarded due to congestion */
+#define GEN_HDRSYNTAX 0x04 /* header syntax error */
+#define GEN_SEGNEEDED 0x05 /* segmentation needed, but not permitted */
+#define GEN_INCOMPLETE 0x06 /* incomplete pdu received */
+#define GEN_DUPOPT 0x07 /* duplicate option */
+
+/* address errors */
+#define ADDR_DESTUNREACH 0x80 /* destination address unreachable */
+#define ADDR_DESTUNKNOWN 0x81 /* destination address unknown */
+
+/* source routing */
+#define SRCRT_UNSPECERR 0x90 /* unspecified src rt error */
+#define SRCRT_SYNTAX 0x91 /* syntax error in src rt field */
+#define SRCRT_UNKNOWNADDR 0x92 /* unknown addr in src rt field */
+#define SRCRT_BADPATH 0x93 /* path not acceptable */
+
+/* lifetime */
+#define TTL_EXPTRANSIT 0xa0 /* lifetime expired during transit */
+#define TTL_EXPREASS 0xa1 /* lifetime expired during reassembly */
+
+/* pdu discarded */
+#define DISC_UNSUPPOPT 0xb0 /* unsupported option not specified? */
+#define DISC_UNSUPPVERS 0xb1 /* unsupported protocol version */
+#define DISC_UNSUPPSECURE 0xb2 /* unsupported security option */
+#define DISC_UNSUPPSRCRT 0xb3 /* unsupported src rt option */
+#define DISC_UNSUPPRECRT 0xb4 /* unsupported rec rt option */
+
+/* reassembly */
+#define REASS_INTERFERE 0xc0 /* reassembly interference */
+#define CLNP_ERRORS 22
+
+
+#ifdef KERNEL
+int clnp_er_index();
+#endif
+
+#ifdef CLNP_ER_CODES
+u_char clnp_er_codes[CLNP_ERRORS] = {
+GEN_NOREAS, GEN_PROTOERR, GEN_BADCSUM, GEN_CONGEST,
+GEN_HDRSYNTAX, GEN_SEGNEEDED, GEN_INCOMPLETE, GEN_DUPOPT,
+ADDR_DESTUNREACH, ADDR_DESTUNKNOWN,
+SRCRT_UNSPECERR, SRCRT_SYNTAX, SRCRT_UNKNOWNADDR, SRCRT_BADPATH,
+TTL_EXPTRANSIT, TTL_EXPREASS,
+DISC_UNSUPPOPT, DISC_UNSUPPVERS, DISC_UNSUPPSECURE,
+DISC_UNSUPPSRCRT, DISC_UNSUPPRECRT, REASS_INTERFERE };
+#endif
+
+#ifdef TROLL
+
+#define TR_DUPEND 0x01 /* duplicate end of fragment */
+#define TR_DUPPKT 0x02 /* duplicate entire packet */
+#define TR_DROPPKT 0x04 /* drop packet on output */
+#define TR_TRIM 0x08 /* trim bytes from packet */
+#define TR_CHANGE 0x10 /* change bytes in packet */
+#define TR_MTU 0x20 /* delta to change device mtu */
+#define TR_CHUCK 0x40 /* drop packet in rclnp_input */
+#define TR_BLAST 0x80 /* force rclnp_output to blast many packet */
+#define TR_RAWLOOP 0x100 /* make if_loop call clnpintr directly */
+struct troll {
+ int tr_ops; /* operations to perform */
+ float tr_dup_size; /* % to duplicate */
+ float tr_dup_freq; /* frequency to duplicate packets */
+ float tr_drop_freq; /* frequence to drop packets */
+ int tr_mtu_adj; /* delta to adjust if mtu */
+ int tr_blast_cnt; /* # of pkts to blast out */
+};
+
+#define SN_OUTPUT(clcp, m)\
+ troll_output(clcp->clc_ifp, m, clcp->clc_firsthop, clcp->clc_rt)
+
+#define SN_MTU(ifp, rt) (((rt && rt->rt_rmx.rmx_mtu) ?\
+ rt->rt_rmx.rmx_mtu : clnp_badmtu(ifp, rt, __LINE__, __FILE__))\
+ - trollctl.tr_mtu_adj)
+
+#ifdef KERNEL
+extern float troll_random;
+#endif
+
+#else /* NO TROLL */
+
+#define SN_OUTPUT(clcp, m)\
+ (*clcp->clc_ifp->if_output)(clcp->clc_ifp, m, clcp->clc_firsthop, clcp->clc_rt)
+
+#define SN_MTU(ifp, rt) (((rt && rt->rt_rmx.rmx_mtu) ?\
+ rt->rt_rmx.rmx_mtu : clnp_badmtu(ifp, rt, __LINE__, __FILE__)))
+
+#endif /* TROLL */
+
+/*
+ * Macro to remove an address from a clnp header
+ */
+#define CLNP_EXTRACT_ADDR(isoa, hoff, hend)\
+ {\
+ isoa.isoa_len = (u_char)*hoff;\
+ if ((((++hoff) + isoa.isoa_len) > hend) ||\
+ (isoa.isoa_len > 20) || (isoa.isoa_len == 0)) {\
+ hoff = (caddr_t)0;\
+ } else {\
+ (void) bcopy(hoff, (caddr_t)isoa.isoa_genaddr, isoa.isoa_len);\
+ hoff += isoa.isoa_len;\
+ }\
+ }
+
+/*
+ * Macro to insert an address into a clnp header
+ */
+#define CLNP_INSERT_ADDR(hoff, isoa)\
+ *hoff++ = (isoa).isoa_len;\
+ (void) bcopy((caddr_t)((isoa).isoa_genaddr), hoff, (isoa).isoa_len);\
+ hoff += (isoa).isoa_len;
+
+/*
+ * Clnp hdr cache. Whenever a clnp packet is sent, a copy of the
+ * header is made and kept in this cache. In addition to a copy of
+ * the cached clnp hdr, the cache contains
+ * information necessary to determine whether the new packet
+ * to send requires a new header to be built.
+ */
+struct clnp_cache {
+ /* these fields are used to check the validity of the cache */
+ struct iso_addr clc_dst; /* destination of packet */
+ struct mbuf *clc_options; /* ptr to options mbuf */
+ int clc_flags; /* flags passed to clnp_output */
+
+ /* these fields are state that clnp_output requires to finish the pkt */
+ int clc_segoff; /* offset of seg part of header */
+ struct rtentry *clc_rt; /* ptr to rtentry (points into
+ the route structure) */
+ struct sockaddr *clc_firsthop; /* first hop of packet */
+ struct ifnet *clc_ifp; /* ptr to interface structure */
+ struct iso_ifaddr *clc_ifa; /* ptr to interface address */
+ struct mbuf *clc_hdr; /* cached pkt hdr (finally)! */
+};
+
+#ifndef satosiso
+#define satosiso(sa)\
+ ((struct sockaddr_iso *)(sa))
+#endif
+
+#ifdef KERNEL
+caddr_t clnp_insert_addr();
+struct iso_addr *clnp_srcaddr();
+struct mbuf *clnp_reass();
+#ifdef TROLL
+struct troll trollctl;
+#endif /* TROLL */
+#endif /* KERNEL */
diff --git a/sys/netiso/clnp_debug.c b/sys/netiso/clnp_debug.c
new file mode 100644
index 000000000000..964638e244b6
--- /dev/null
+++ b/sys/netiso/clnp_debug.c
@@ -0,0 +1,260 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_debug.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: clnp_debug.c,v 4.2 88/06/29 14:58:34 hagens Exp $ */
+/* $Source: /usr/argo/sys/netargo/RCS/clnp_debug.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+#ifdef ARGO_DEBUG
+
+#ifdef TESTDEBUG
+#ifdef notdef
+struct addr_37 u_37 = {
+ {0x00, 0x02, 0x00, 0x10, 0x20, 0x30, 0x35},
+ {0x01, 0x02, 0x03, 0x04, 0x50, 0x60, 0x70, 0x80, 0x90}
+};
+struct addr_osinet u_osinet = {
+ {0x00, 0x04},
+ {0x00, 0x02, 0x00, 0x01, 0x23, 0x42, 0x78, 0x20, 0x01, 0x05, 0x00}
+};
+#endif /* notdef */
+struct addr_rfc986 u_rfc986 = {
+ {0x00, 0x06},
+ {0x01, 0xc0, 0x0c, 0x0c, 0xab, 0x11}
+};
+struct addr_rfc986 u_bad = {
+ {0x00, 0x01},
+ {0x01, 0xc0, 0x0c, 0x0c, 0xab, 0x11}
+};
+#include <stdio.h>
+main()
+{
+ struct iso_addr a;
+
+ a.isoa_afi = AFI_37;
+ a.isoa_u.addr_37 = u_37;
+ a.isoa_len = 17;
+ printf("type 37: %s\n", clnp_iso_addrp(&a));
+
+ a.isoa_afi = AFI_OSINET;
+ a.isoa_u.addr_osinet = u_osinet;
+ a.isoa_len = 14;
+ printf("type osinet: %s\n", clnp_iso_addrp(&a));
+
+ a.isoa_afi = AFI_RFC986;
+ a.isoa_u.addr_rfc986 = u_rfc986;
+ a.isoa_len = 9;
+ printf("type rfc986: %s\n", clnp_iso_addrp(&a));
+
+ a.isoa_afi = 12;
+ a.isoa_u.addr_rfc986 = u_rfc986;
+ a.isoa_len = 9;
+ printf("type bad afi: %s\n", clnp_iso_addrp(&a));
+
+ a.isoa_afi = AFI_RFC986;
+ a.isoa_u.addr_rfc986 = u_bad;
+ a.isoa_len = 9;
+ printf("type bad idi: %s\n", clnp_iso_addrp(&a));
+}
+#endif /* TESTDEBUG */
+
+unsigned int clnp_debug;
+static char letters[] = "0123456789abcdef";
+
+/*
+ * Print buffer in hex, return addr of where we left off.
+ * Do not null terminate.
+ */
+char *
+clnp_hexp(src, len, where)
+char *src; /* src of data to print */
+int len; /* lengthof src */
+char *where; /* where to put data */
+{
+ int i;
+
+ for (i=0; i<len; i++) {
+ register int j = ((u_char *)src)[i];
+ *where++ = letters[j >> 4];
+ *where++ = letters[j & 0x0f];
+ }
+ return where;
+}
+
+/*
+ * Return a ptr to a human readable form of an iso addr
+ */
+static char iso_addr_b[50];
+#define DELIM '.';
+
+char *
+clnp_iso_addrp(isoa)
+struct iso_addr *isoa;
+{
+ char *cp;
+
+ /* print length */
+ sprintf(iso_addr_b, "[%d] ", isoa->isoa_len);
+
+ /* set cp to end of what we have */
+ cp = iso_addr_b;
+ while (*cp)
+ cp++;
+
+ /* print afi */
+ cp = clnp_hexp(isoa->isoa_genaddr, (int)isoa->isoa_len, cp);
+#ifdef notdef
+ *cp++ = DELIM;
+
+ /* print type specific part */
+ switch(isoa->isoa_afi) {
+ case AFI_37:
+ cp = clnp_hexp(isoa->t37_idi, ADDR37_IDI_LEN, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(isoa->t37_dsp, ADDR37_DSP_LEN, cp);
+ break;
+
+/* case AFI_OSINET:*/
+ case AFI_RFC986: {
+ u_short idi;
+
+ /* osinet and rfc986 have idi in the same place */
+ /* print idi */
+ cp = clnp_hexp(isoa->rfc986_idi, ADDROSINET_IDI_LEN, cp);
+ *cp++ = DELIM;
+ CTOH(isoa->rfc986_idi[0], isoa->rfc986_idi[1], idi);
+
+ if (idi == IDI_OSINET) {
+ struct ovl_osinet *oosi = (struct ovl_osinet *)isoa;
+ cp = clnp_hexp(oosi->oosi_orgid, OVLOSINET_ORGID_LEN, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(oosi->oosi_snetid, OVLOSINET_SNETID_LEN, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(oosi->oosi_snpa, OVLOSINET_SNPA_LEN, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(oosi->oosi_nsap, OVLOSINET_NSAP_LEN, cp);
+ } else if (idi == IDI_RFC986) {
+ struct ovl_rfc986 *o986 = (struct ovl_rfc986 *)isoa;
+ cp = clnp_hexp(&o986->o986_vers, 1, cp);
+ *cp++ = DELIM;
+#ifdef vax
+ sprintf(cp, "%d.%d.%d.%d.%d",
+ o986->o986_inetaddr[0] & 0xff,
+ o986->o986_inetaddr[1] & 0xff,
+ o986->o986_inetaddr[2] & 0xff,
+ o986->o986_inetaddr[3] & 0xff,
+ o986->o986_upid & 0xff);
+ return(iso_addr_b);
+#else
+ cp = clnp_hexp(&o986->o986_inetaddr[0], 1, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(&o986->o986_inetaddr[1], 1, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(&o986->o986_inetaddr[2], 1, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(&o986->o986_inetaddr[3], 1, cp);
+ *cp++ = DELIM;
+ cp = clnp_hexp(&o986->o986_upid, 1, cp);
+#endif /* vax */
+ }
+
+ } break;
+
+ default:
+ *cp++ = '?';
+ break;
+ }
+#endif /* notdef */
+ *cp = (char)0;
+
+ return(iso_addr_b);
+}
+
+char *
+clnp_saddr_isop(s)
+register struct sockaddr_iso *s;
+{
+ register char *cp = clnp_iso_addrp(&s->siso_addr);
+
+ while (*cp) cp++;
+ *cp++ = '(';
+ cp = clnp_hexp(TSEL(s), (int)s->siso_tlen, cp);
+ *cp++ = ')';
+ *cp++ = 0;
+ return (iso_addr_b);
+}
+
+#endif /* ARGO_DEBUG */
diff --git a/sys/netiso/clnp_er.c b/sys/netiso/clnp_er.c
new file mode 100644
index 000000000000..8b7f45b77a47
--- /dev/null
+++ b/sys/netiso/clnp_er.c
@@ -0,0 +1,375 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_er.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_er.c,v 5.1 89/02/09 16:20:18 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_er.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_pcb.h>
+#define CLNP_ER_CODES
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+static struct clnp_fixed er_template = {
+ ISO8473_CLNP, /* network identifier */
+ 0, /* length */
+ ISO8473_V1, /* version */
+ CLNP_TTL, /* ttl */
+ CLNP_ER, /* type */
+ 0, /* segment length */
+ 0 /* checksum */
+};
+
+/*
+ * FUNCTION: clnp_er_input
+ *
+ * PURPOSE: Process an ER pdu.
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+clnp_er_input(m, src, reason)
+struct mbuf *m; /* ptr to packet itself */
+struct iso_addr *src; /* ptr to src of er */
+u_char reason; /* reason code of er */
+{
+ int cmd = -1;
+ extern u_char clnp_protox[];
+
+ IFDEBUG(D_CTLINPUT)
+ printf("clnp_er_input: m x%x, src %s, reason x%x\n", m,
+ clnp_iso_addrp(src), reason);
+ ENDDEBUG
+
+ INCSTAT(cns_er_inhist[clnp_er_index(reason)]);
+ switch (reason) {
+ case GEN_NOREAS:
+ case GEN_PROTOERR:
+ break;
+ case GEN_BADCSUM:
+ cmd = PRC_PARAMPROB;
+ break;
+ case GEN_CONGEST:
+ cmd = PRC_QUENCH;
+ break;
+ case GEN_HDRSYNTAX:
+ cmd = PRC_PARAMPROB;
+ break;
+ case GEN_SEGNEEDED:
+ cmd = PRC_MSGSIZE;
+ break;
+ case GEN_INCOMPLETE:
+ cmd = PRC_PARAMPROB;
+ break;
+ case GEN_DUPOPT:
+ cmd = PRC_PARAMPROB;
+ break;
+ case ADDR_DESTUNREACH:
+ cmd = PRC_UNREACH_HOST;
+ break;
+ case ADDR_DESTUNKNOWN:
+ cmd = PRC_UNREACH_PROTOCOL;
+ break;
+ case SRCRT_UNSPECERR:
+ case SRCRT_SYNTAX:
+ case SRCRT_UNKNOWNADDR:
+ case SRCRT_BADPATH:
+ cmd = PRC_UNREACH_SRCFAIL;
+ break;
+ case TTL_EXPTRANSIT:
+ cmd = PRC_TIMXCEED_INTRANS;
+ break;
+ case TTL_EXPREASS:
+ cmd = PRC_TIMXCEED_REASS;
+ break;
+ case DISC_UNSUPPOPT:
+ case DISC_UNSUPPVERS:
+ case DISC_UNSUPPSECURE:
+ case DISC_UNSUPPSRCRT:
+ case DISC_UNSUPPRECRT:
+ cmd = PRC_PARAMPROB;
+ break;
+ case REASS_INTERFERE:
+ cmd = PRC_TIMXCEED_REASS;
+ break;
+ }
+
+ /*
+ * tpclnp_ctlinput1 is called directly so that we don't
+ * have to build an iso_sockaddr out of src.
+ */
+ if (cmd >= 0)
+ tpclnp_ctlinput1(cmd, src);
+
+ m_freem(m);
+}
+
+/*
+ * FUNCTION: clnp_discard
+ *
+ * PURPOSE: Discard a clnp datagram
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS: Will emit an ER pdu if possible
+ *
+ * NOTES: This code assumes that we have previously tried to pull
+ * up the header of the datagram into one mbuf.
+ */
+clnp_discard(m, reason)
+struct mbuf *m; /* header of packet to discard */
+char reason; /* reason for discard */
+{
+ IFDEBUG(D_DISCARD)
+ printf("clnp_discard: m x%x, reason x%x\n", m, reason);
+ ENDDEBUG
+
+ if (m != NULL) {
+ if (m->m_len >= sizeof(struct clnp_fixed)) {
+ register struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+
+ if (((clnp->cnf_type & CNF_TYPE) != CLNP_ER) &&
+ (clnp->cnf_type & CNF_ERR_OK)) {
+ clnp_emit_er(m, reason);
+ return;
+ }
+ }
+ m_freem(m);
+ }
+}
+
+/*
+ * FUNCTION: clnp_emit_er
+ *
+ * PURPOSE: Send an ER pdu.
+ * The src of the of the ER pdu is the host that is sending
+ * the ER (ie. us), *not* the original destination of the
+ * packet.
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Takes responsibility for freeing mbuf passed
+ * This function may be called with a packet that
+ * was created by us; in this case, do not send
+ * an ER.
+ */
+clnp_emit_er(m, reason)
+struct mbuf *m; /* header of packet to discard */
+char reason; /* reason for discard */
+{
+ register struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+ register struct clnp_fixed *er;
+ struct route_iso route;
+ struct ifnet *ifp;
+ struct sockaddr *first_hop;
+ struct iso_addr src, dst, *our_addr;
+ caddr_t hoff, hend;
+ int total_len; /* total len of dg */
+ struct mbuf *m0; /* contains er pdu hdr */
+ struct iso_ifaddr *ia = 0;
+
+ IFDEBUG(D_DISCARD)
+ printf("clnp_emit_er: m x%x, hdr len %d\n", m, clnp->cnf_hdr_len);
+ ENDDEBUG
+
+ bzero((caddr_t)&route, sizeof(route));
+
+ /*
+ * If header length is incorrect, or entire header is not contained
+ * in this mbuf, we punt
+ */
+ if ((clnp->cnf_hdr_len < CLNP_HDR_MIN) ||
+ (clnp->cnf_hdr_len > CLNP_HDR_MAX) ||
+ (clnp->cnf_hdr_len > m->m_len))
+ goto bad;
+
+ /* extract src, dest address */
+ hend = (caddr_t)clnp + clnp->cnf_hdr_len;
+ hoff = (caddr_t)clnp + sizeof(struct clnp_fixed);
+ CLNP_EXTRACT_ADDR(dst, hoff, hend);
+ if (hoff == (caddr_t)0) {
+ goto bad;
+ }
+ CLNP_EXTRACT_ADDR(src, hoff, hend);
+ if (hoff == (caddr_t)0) {
+ goto bad;
+ }
+
+ /*
+ * Do not send ER if we generated the packet.
+ */
+ if (clnp_ours(&src))
+ goto bad;
+
+ /*
+ * Trim mbuf to hold only the header.
+ * This mbuf will be the 'data' of the er pdu
+ */
+ if (m->m_next != NULL) {
+ m_freem(m->m_next);
+ m->m_next = NULL;
+ }
+
+ if (m->m_len > clnp->cnf_hdr_len)
+ m_adj(m, (int)-(m->m_len - (int)clnp->cnf_hdr_len));
+
+ /* route er pdu: note we send pkt to src of original packet */
+ if (clnp_route(&src, &route, /* flags */0, &first_hop, &ia) != 0)
+ goto bad;
+
+ /* compute our address based upon firsthop/ifp */
+ if (ia)
+ our_addr = &ia->ia_addr.siso_addr;
+ else
+ goto bad;
+ ifp = ia->ia_ifp;
+
+ IFDEBUG(D_DISCARD)
+ printf("clnp_emit_er: to %s", clnp_iso_addrp(&src));
+ printf(" from %s\n", clnp_iso_addrp(our_addr));
+ ENDDEBUG
+
+ IFDEBUG(D_DISCARD)
+ printf("clnp_emit_er: packet routed to %s\n",
+ clnp_iso_addrp(&((struct sockaddr_iso *)first_hop)->siso_addr));
+ ENDDEBUG
+
+ /* allocate mbuf for er pdu header: punt on no space */
+ MGET(m0, M_DONTWAIT, MT_HEADER);
+ if (m0 == 0)
+ goto bad;
+
+ m0->m_next = m;
+ er = mtod(m0, struct clnp_fixed *);
+ *er = er_template;
+
+ /* setup src/dst on er pdu */
+ /* NOTE REVERSAL OF SRC/DST */
+ hoff = (caddr_t)er + sizeof(struct clnp_fixed);
+ CLNP_INSERT_ADDR(hoff, src);
+ CLNP_INSERT_ADDR(hoff, *our_addr);
+
+ /*
+ * TODO: if complete src rt was specified, then reverse path, and
+ * copy into er as option.
+ */
+
+ /* add er option */
+ *hoff++ = CLNPOVAL_ERREAS; /* code */
+ *hoff++ = 2; /* length */
+ *hoff++ = reason; /* discard reason */
+ *hoff++ = 0; /* error localization = not specified */
+
+ /* set length */
+ er->cnf_hdr_len = m0->m_len = (u_char)(hoff - (caddr_t)er);
+ total_len = m0->m_len + m->m_len;
+ HTOC(er->cnf_seglen_msb, er->cnf_seglen_lsb, total_len);
+
+ /* compute checksum (on header only) */
+ iso_gen_csum(m0, CLNP_CKSUM_OFF, (int)er->cnf_hdr_len);
+
+ /* trim packet if too large for interface */
+ if (total_len > ifp->if_mtu)
+ m_adj(m0, -(total_len - ifp->if_mtu));
+
+ /* send packet */
+ INCSTAT(cns_er_outhist[clnp_er_index(reason)]);
+ (void) (*ifp->if_output)(ifp, m0, first_hop, route.ro_rt);
+ goto done;
+
+bad:
+ m_freem(m);
+
+done:
+ /* free route if it is a temp */
+ if (route.ro_rt != NULL)
+ RTFREE(route.ro_rt);
+}
+
+clnp_er_index(p)
+u_char p;
+{
+ register u_char *cp = clnp_er_codes + CLNP_ERRORS;
+ while (cp > clnp_er_codes) {
+ cp--;
+ if (*cp == p)
+ return (cp - clnp_er_codes);
+ }
+ return (CLNP_ERRORS + 1);
+}
diff --git a/sys/netiso/clnp_frag.c b/sys/netiso/clnp_frag.c
new file mode 100644
index 000000000000..546a592ccf70
--- /dev/null
+++ b/sys/netiso/clnp_frag.c
@@ -0,0 +1,859 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_frag.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_frag.c,v 5.1 89/02/09 16:20:26 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_frag.c,v $ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+/* all fragments are hung off this list */
+struct clnp_fragl *clnp_frags = NULL;
+
+struct mbuf *clnp_comp_pdu();
+
+
+/*
+ * FUNCTION: clnp_fragment
+ *
+ * PURPOSE: Fragment a datagram, and send the itty bitty pieces
+ * out over an interface.
+ *
+ * RETURNS: success - 0
+ * failure - unix error code
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: If there is an error sending the packet, clnp_discard
+ * is called to discard the packet and send an ER. If
+ * clnp_fragment was called from clnp_output, then
+ * we generated the packet, and should not send an
+ * ER -- clnp_emit_er will check for this. Otherwise,
+ * the packet was fragmented during forwarding. In this
+ * case, we ought to send an ER back.
+ */
+clnp_fragment(ifp, m, first_hop, total_len, segoff, flags, rt)
+struct ifnet *ifp; /* ptr to outgoing interface */
+struct mbuf *m; /* ptr to packet */
+struct sockaddr *first_hop; /* ptr to first hop */
+int total_len; /* length of datagram */
+int segoff; /* offset of segpart in hdr */
+int flags; /* flags passed to clnp_output */
+struct rtentry *rt; /* route if direct ether */
+{
+ struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+ int hdr_len = (int)clnp->cnf_hdr_len;
+ int frag_size = (SN_MTU(ifp, rt) - hdr_len) & ~7;
+
+ total_len -= hdr_len;
+ if ((clnp->cnf_type & CNF_SEG_OK) &&
+ (total_len >= 8) &&
+ (frag_size > 8 || (frag_size == 8 && !(total_len & 7)))) {
+
+ struct mbuf *hdr = NULL; /* save copy of clnp hdr */
+ struct mbuf *frag_hdr = NULL;
+ struct mbuf *frag_data = NULL;
+ struct clnp_segment seg_part; /* segmentation header */
+ int frag_base;
+ int error = 0;
+
+
+ INCSTAT(cns_fragmented);
+ (void) bcopy(segoff + mtod(m, caddr_t), (caddr_t)&seg_part,
+ sizeof(seg_part));
+ frag_base = ntohs(seg_part.cng_off);
+ /*
+ * Duplicate header, and remove from packet
+ */
+ if ((hdr = m_copy(m, 0, hdr_len)) == NULL) {
+ clnp_discard(m, GEN_CONGEST);
+ return(ENOBUFS);
+ }
+ m_adj(m, hdr_len);
+
+ while (total_len > 0) {
+ int remaining, last_frag;
+
+ IFDEBUG(D_FRAG)
+ struct mbuf *mdump = frag_hdr;
+ int tot_mlen = 0;
+ printf("clnp_fragment: total_len %d:\n", total_len);
+ while (mdump != NULL) {
+ printf("\tmbuf x%x, m_len %d\n",
+ mdump, mdump->m_len);
+ tot_mlen += mdump->m_len;
+ mdump = mdump->m_next;
+ }
+ printf("clnp_fragment: sum of mbuf chain %d:\n", tot_mlen);
+ ENDDEBUG
+
+ frag_size = min(total_len, frag_size);
+ if ((remaining = total_len - frag_size) == 0)
+ last_frag = 1;
+ else {
+ /*
+ * If this fragment will cause the last one to
+ * be less than 8 bytes, shorten this fragment a bit.
+ * The obscure test on frag_size above ensures that
+ * frag_size will be positive.
+ */
+ last_frag = 0;
+ if (remaining < 8)
+ frag_size -= 8;
+ }
+
+
+ IFDEBUG(D_FRAG)
+ printf("clnp_fragment: seg off %d, size %d, remaining %d\n",
+ ntohs(seg_part.cng_off), frag_size, total_len-frag_size);
+ if (last_frag)
+ printf("clnp_fragment: last fragment\n");
+ ENDDEBUG
+
+ if (last_frag) {
+ /*
+ * this is the last fragment; we don't need to get any other
+ * mbufs.
+ */
+ frag_hdr = hdr;
+ frag_data = m;
+ } else {
+ /* duplicate header and data mbufs */
+ if ((frag_hdr = m_copy(hdr, 0, (int)M_COPYALL)) == NULL) {
+ clnp_discard(hdr, GEN_CONGEST);
+ m_freem(m);
+ return(ENOBUFS);
+ }
+ if ((frag_data = m_copy(m, 0, frag_size)) == NULL) {
+ clnp_discard(hdr, GEN_CONGEST);
+ m_freem(m);
+ m_freem(frag_hdr);
+ return(ENOBUFS);
+ }
+ INCSTAT(cns_fragments);
+ }
+ clnp = mtod(frag_hdr, struct clnp_fixed *);
+
+ if (!last_frag)
+ clnp->cnf_type |= CNF_MORE_SEGS;
+
+ /* link together */
+ m_cat(frag_hdr, frag_data);
+
+ /* insert segmentation part; updated below */
+ bcopy((caddr_t)&seg_part, mtod(frag_hdr, caddr_t) + segoff,
+ sizeof(struct clnp_segment));
+
+ {
+ int derived_len = hdr_len + frag_size;
+ HTOC(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, derived_len);
+ if ((frag_hdr->m_flags & M_PKTHDR) == 0)
+ panic("clnp_frag:lost header");
+ frag_hdr->m_pkthdr.len = derived_len;
+ }
+ /* compute clnp checksum (on header only) */
+ if (flags & CLNP_NO_CKSUM) {
+ HTOC(clnp->cnf_cksum_msb, clnp->cnf_cksum_lsb, 0);
+ } else {
+ iso_gen_csum(frag_hdr, CLNP_CKSUM_OFF, hdr_len);
+ }
+
+ IFDEBUG(D_DUMPOUT)
+ struct mbuf *mdump = frag_hdr;
+ printf("clnp_fragment: sending dg:\n");
+ while (mdump != NULL) {
+ printf("\tmbuf x%x, m_len %d\n", mdump, mdump->m_len);
+ mdump = mdump->m_next;
+ }
+ ENDDEBUG
+
+#ifdef TROLL
+ error = troll_output(ifp, frag_hdr, first_hop, rt);
+#else
+ error = (*ifp->if_output)(ifp, frag_hdr, first_hop, rt);
+#endif /* TROLL */
+
+ /*
+ * Tough situation: if the error occured on the last
+ * fragment, we can not send an ER, as the if_output
+ * routine consumed the packet. If the error occured
+ * on any intermediate packets, we can send an ER
+ * because we still have the original header in (m).
+ */
+ if (error) {
+ if (frag_hdr != hdr) {
+ /*
+ * The error was not on the last fragment. We must
+ * free hdr and m before returning
+ */
+ clnp_discard(hdr, GEN_NOREAS);
+ m_freem(m);
+ }
+ return(error);
+ }
+
+ /* bump segment offset, trim data mbuf, and decrement count left */
+#ifdef TROLL
+ /*
+ * Decrement frag_size by some fraction. This will cause the
+ * next fragment to start 'early', thus duplicating the end
+ * of the current fragment. troll.tr_dup_size controls
+ * the fraction. If positive, it specifies the fraction. If
+ * negative, a random fraction is used.
+ */
+ if ((trollctl.tr_ops & TR_DUPEND) && (!last_frag)) {
+ int num_bytes = frag_size;
+
+ if (trollctl.tr_dup_size > 0)
+ num_bytes *= trollctl.tr_dup_size;
+ else
+ num_bytes *= troll_random();
+ frag_size -= num_bytes;
+ }
+#endif /* TROLL */
+ total_len -= frag_size;
+ if (!last_frag) {
+ frag_base += frag_size;
+ seg_part.cng_off = htons(frag_base);
+ m_adj(m, frag_size);
+ }
+ }
+ return(0);
+ } else {
+ cantfrag:
+ INCSTAT(cns_cantfrag);
+ clnp_discard(m, GEN_SEGNEEDED);
+ return(EMSGSIZE);
+ }
+}
+
+/*
+ * FUNCTION: clnp_reass
+ *
+ * PURPOSE: Attempt to reassemble a clnp packet given the current
+ * fragment. If reassembly succeeds (all the fragments
+ * are present), then return a pointer to an mbuf chain
+ * containing the reassembled packet. This packet will
+ * appear in the mbufs as if it had just arrived in
+ * one piece.
+ *
+ * If reassembly fails, then save this fragment and
+ * return 0.
+ *
+ * RETURNS: Ptr to assembled packet, or 0
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ * clnp_slowtimo can not affect this code because clnpintr, and thus
+ * this code, is called at a higher priority than clnp_slowtimo.
+ */
+struct mbuf *
+clnp_reass(m, src, dst, seg)
+struct mbuf *m; /* new fragment */
+struct iso_addr *src; /* src of new fragment */
+struct iso_addr *dst; /* dst of new fragment */
+struct clnp_segment *seg; /* segment part of fragment header */
+{
+ register struct clnp_fragl *cfh;
+
+ /* look for other fragments of this datagram */
+ for (cfh = clnp_frags; cfh != NULL; cfh = cfh->cfl_next) {
+ if (seg->cng_id == cfh->cfl_id &&
+ iso_addrmatch1(src, &cfh->cfl_src) &&
+ iso_addrmatch1(dst, &cfh->cfl_dst)) {
+ IFDEBUG(D_REASS)
+ printf("clnp_reass: found packet\n");
+ ENDDEBUG
+ /*
+ * There are other fragments here already. Lets see if
+ * this fragment is of any help
+ */
+ clnp_insert_frag(cfh, m, seg);
+ if (m = clnp_comp_pdu(cfh)) {
+ register struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+ HTOC(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb,
+ seg->cng_tot_len);
+ }
+ return (m);
+ }
+ }
+
+ IFDEBUG(D_REASS)
+ printf("clnp_reass: new packet!\n");
+ ENDDEBUG
+
+ /*
+ * This is the first fragment. If src is not consuming too many
+ * resources, then create a new fragment list and add
+ * this fragment to the list.
+ */
+ /* TODO: don't let one src hog all the reassembly buffers */
+ if (!clnp_newpkt(m, src, dst, seg) /* || this src is a hog */) {
+ INCSTAT(cns_fragdropped);
+ clnp_discard(m, GEN_CONGEST);
+ }
+
+ return(NULL);
+}
+
+/*
+ * FUNCTION: clnp_newpkt
+ *
+ * PURPOSE: Create the necessary structures to handle a new
+ * fragmented clnp packet.
+ *
+ * RETURNS: non-zero if it succeeds, zero if fails.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Failure is only due to insufficient resources.
+ */
+clnp_newpkt(m, src, dst, seg)
+struct mbuf *m; /* new fragment */
+struct iso_addr *src; /* src of new fragment */
+struct iso_addr *dst; /* dst of new fragment */
+struct clnp_segment *seg; /* segment part of fragment header */
+{
+ register struct clnp_fragl *cfh;
+ register struct clnp_fixed *clnp;
+ struct mbuf *m0;
+
+ clnp = mtod(m, struct clnp_fixed *);
+
+ /*
+ * Allocate new clnp fragl structure to act as header of all fragments
+ * for this datagram.
+ */
+ MGET(m0, M_DONTWAIT, MT_FTABLE);
+ if (m0 == NULL) {
+ return (0);
+ }
+ cfh = mtod(m0, struct clnp_fragl *);
+
+ /*
+ * Duplicate the header of this fragment, and save in cfh.
+ * Free m0 and return if m_copy does not succeed.
+ */
+ if ((cfh->cfl_orighdr = m_copy(m, 0, (int)clnp->cnf_hdr_len)) == NULL) {
+ m_freem(m0);
+ return (0);
+ }
+
+ /* Fill in rest of fragl structure */
+ bcopy((caddr_t)src, (caddr_t)&cfh->cfl_src, sizeof(struct iso_addr));
+ bcopy((caddr_t)dst, (caddr_t)&cfh->cfl_dst, sizeof(struct iso_addr));
+ cfh->cfl_id = seg->cng_id;
+ cfh->cfl_ttl = clnp->cnf_ttl;
+ cfh->cfl_last = (seg->cng_tot_len - clnp->cnf_hdr_len) - 1;
+ cfh->cfl_frags = NULL;
+ cfh->cfl_next = NULL;
+
+ /* Insert into list of packets */
+ cfh->cfl_next = clnp_frags;
+ clnp_frags = cfh;
+
+ /* Insert this fragment into list headed by cfh */
+ clnp_insert_frag(cfh, m, seg);
+ return(1);
+}
+
+/*
+ * FUNCTION: clnp_insert_frag
+ *
+ * PURPOSE: Insert fragment into list headed by 'cf'.
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: This is the 'guts' of the reassembly algorithm.
+ * Each fragment in this list contains a clnp_frag
+ * structure followed by the data of the fragment.
+ * The clnp_frag structure actually lies on top of
+ * part of the old clnp header.
+ */
+clnp_insert_frag(cfh, m, seg)
+struct clnp_fragl *cfh; /* header of list of packet fragments */
+struct mbuf *m; /* new fragment */
+struct clnp_segment *seg; /* segment part of fragment header */
+{
+ register struct clnp_fixed *clnp; /* clnp hdr of fragment */
+ register struct clnp_frag *cf; /* generic fragment ptr */
+ register struct clnp_frag *cf_sub = NULL; /* frag subsequent to new one */
+ register struct clnp_frag *cf_prev = NULL; /* frag previous to new one */
+ u_short first; /* offset of first byte of initial pdu*/
+ u_short last; /* offset of last byte of initial pdu */
+ u_short fraglen;/* length of fragment */
+
+ clnp = mtod(m, struct clnp_fixed *);
+ first = seg->cng_off;
+ CTOH(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, fraglen);
+ fraglen -= clnp->cnf_hdr_len;
+ last = (first + fraglen) - 1;
+
+ IFDEBUG(D_REASS)
+ printf("clnp_insert_frag: New fragment: [%d ... %d], len %d\n",
+ first, last, fraglen);
+ printf("clnp_insert_frag: current fragments:\n");
+ for (cf = cfh->cfl_frags; cf != NULL; cf = cf->cfr_next) {
+ printf("\tcf x%x: [%d ... %d]\n", cf, cf->cfr_first, cf->cfr_last);
+ }
+ ENDDEBUG
+
+ if (cfh->cfl_frags != NULL) {
+ /*
+ * Find fragment which begins after the new one
+ */
+ for (cf = cfh->cfl_frags; cf != NULL; cf_prev = cf, cf = cf->cfr_next) {
+ if (cf->cfr_first > first) {
+ cf_sub = cf;
+ break;
+ }
+ }
+
+ IFDEBUG(D_REASS)
+ printf("clnp_insert_frag: Previous frag is ");
+ if (cf_prev == NULL)
+ printf("NULL\n");
+ else
+ printf("[%d ... %d]\n", cf_prev->cfr_first, cf_prev->cfr_last);
+ printf("clnp_insert_frag: Subsequent frag is ");
+ if (cf_sub == NULL)
+ printf("NULL\n");
+ else
+ printf("[%d ... %d]\n", cf_sub->cfr_first, cf_sub->cfr_last);
+ ENDDEBUG
+
+ /*
+ * If there is a fragment before the new one, check if it
+ * overlaps the new one. If so, then trim the end of the
+ * previous one.
+ */
+ if (cf_prev != NULL) {
+ if (cf_prev->cfr_last > first) {
+ u_short overlap = cf_prev->cfr_last - first;
+
+ IFDEBUG(D_REASS)
+ printf("clnp_insert_frag: previous overlaps by %d\n",
+ overlap);
+ ENDDEBUG
+
+ if (overlap > fraglen) {
+ /*
+ * The new fragment is entirely contained in the
+ * preceeding one. We can punt on the new frag
+ * completely.
+ */
+ m_freem(m);
+ return;
+ } else {
+ /* Trim data off of end of previous fragment */
+ /* inc overlap to prevent duplication of last byte */
+ overlap++;
+ m_adj(cf_prev->cfr_data, -(int)overlap);
+ cf_prev->cfr_last -= overlap;
+ }
+ }
+ }
+
+ /*
+ * For all fragments past the new one, check if any data on
+ * the new one overlaps data on existing fragments. If so,
+ * then trim the extra data off the end of the new one.
+ */
+ for (cf = cf_sub; cf != NULL; cf = cf->cfr_next) {
+ if (cf->cfr_first < last) {
+ u_short overlap = last - cf->cfr_first;
+
+ IFDEBUG(D_REASS)
+ printf("clnp_insert_frag: subsequent overlaps by %d\n",
+ overlap);
+ ENDDEBUG
+
+ if (overlap > fraglen) {
+ /*
+ * The new fragment is entirely contained in the
+ * succeeding one. This should not happen, because
+ * early on in this code we scanned for the fragment
+ * which started after the new one!
+ */
+ m_freem(m);
+ printf("clnp_insert_frag: internal error!\n");
+ return;
+ } else {
+ /* Trim data off of end of new fragment */
+ /* inc overlap to prevent duplication of last byte */
+ overlap++;
+ m_adj(m, -(int)overlap);
+ last -= overlap;
+ }
+ }
+ }
+ }
+
+ /*
+ * Insert the new fragment beween cf_prev and cf_sub
+ *
+ * Note: the clnp hdr is still in the mbuf.
+ * If the data of the mbuf is not word aligned, shave off enough
+ * so that it is. Then, cast the clnp_frag structure on top
+ * of the clnp header.
+ * The clnp_hdr will not be used again (as we already have
+ * saved a copy of it).
+ *
+ * Save in cfr_bytes the number of bytes to shave off to get to
+ * the data of the packet. This is used when we coalesce fragments;
+ * the clnp_frag structure must be removed before joining mbufs.
+ */
+ {
+ int pad;
+ u_int bytes;
+
+ /* determine if header is not word aligned */
+ pad = (int)clnp % 4;
+ if (pad < 0)
+ pad = -pad;
+
+ /* bytes is number of bytes left in front of data */
+ bytes = clnp->cnf_hdr_len - pad;
+
+ IFDEBUG(D_REASS)
+ printf("clnp_insert_frag: clnp x%x requires %d alignment\n",
+ clnp, pad);
+ ENDDEBUG
+
+ /* make it word aligned if necessary */
+ if (pad)
+ m_adj(m, pad);
+
+ cf = mtod(m, struct clnp_frag *);
+ cf->cfr_bytes = bytes;
+
+ IFDEBUG(D_REASS)
+ printf("clnp_insert_frag: cf now x%x, cfr_bytes %d\n", cf,
+ cf->cfr_bytes);
+ ENDDEBUG
+ }
+ cf->cfr_first = first;
+ cf->cfr_last = last;
+
+
+ /*
+ * The data is the mbuf itself, although we must remember that the
+ * first few bytes are actually a clnp_frag structure
+ */
+ cf->cfr_data = m;
+
+ /* link into place */
+ cf->cfr_next = cf_sub;
+ if (cf_prev == NULL)
+ cfh->cfl_frags = cf;
+ else
+ cf_prev->cfr_next = cf;
+}
+
+/*
+ * FUNCTION: clnp_comp_pdu
+ *
+ * PURPOSE: Scan the list of fragments headed by cfh. Merge
+ * any contigious fragments into one. If, after
+ * traversing all the fragments, it is determined that
+ * the packet is complete, then return a pointer to
+ * the packet (with header prepended). Otherwise,
+ * return NULL.
+ *
+ * RETURNS: NULL, or a pointer to the assembled pdu in an mbuf chain.
+ *
+ * SIDE EFFECTS: Will colapse contigious fragments into one.
+ *
+ * NOTES: This code assumes that there are no overlaps of
+ * fragment pdus.
+ */
+struct mbuf *
+clnp_comp_pdu(cfh)
+struct clnp_fragl *cfh; /* fragment header */
+{
+ register struct clnp_frag *cf = cfh->cfl_frags;
+
+ while (cf->cfr_next != NULL) {
+ register struct clnp_frag *cf_next = cf->cfr_next;
+
+ IFDEBUG(D_REASS)
+ printf("clnp_comp_pdu: comparing: [%d ... %d] to [%d ... %d]\n",
+ cf->cfr_first, cf->cfr_last, cf_next->cfr_first,
+ cf_next->cfr_last);
+ ENDDEBUG
+
+ if (cf->cfr_last == (cf_next->cfr_first - 1)) {
+ /*
+ * Merge fragment cf and cf_next
+ *
+ * - update cf header
+ * - trim clnp_frag structure off of cf_next
+ * - append cf_next to cf
+ */
+ struct clnp_frag cf_next_hdr;
+ struct clnp_frag *next_frag;
+
+ cf_next_hdr = *cf_next;
+ next_frag = cf_next->cfr_next;
+
+ IFDEBUG(D_REASS)
+ struct mbuf *mdump;
+ int l;
+ printf("clnp_comp_pdu: merging fragments\n");
+ printf("clnp_comp_pdu: 1st: [%d ... %d] (bytes %d)\n",
+ cf->cfr_first, cf->cfr_last, cf->cfr_bytes);
+ mdump = cf->cfr_data;
+ l = 0;
+ while (mdump != NULL) {
+ printf("\tmbuf x%x, m_len %d\n", mdump, mdump->m_len);
+ l += mdump->m_len;
+ mdump = mdump->m_next;
+ }
+ printf("\ttotal len: %d\n", l);
+ printf("clnp_comp_pdu: 2nd: [%d ... %d] (bytes %d)\n",
+ cf_next->cfr_first, cf_next->cfr_last, cf_next->cfr_bytes);
+ mdump = cf_next->cfr_data;
+ l = 0;
+ while (mdump != NULL) {
+ printf("\tmbuf x%x, m_len %d\n", mdump, mdump->m_len);
+ l += mdump->m_len;
+ mdump = mdump->m_next;
+ }
+ printf("\ttotal len: %d\n", l);
+ ENDDEBUG
+
+ cf->cfr_last = cf_next->cfr_last;
+ /*
+ * After this m_adj, the cf_next ptr is useless because we
+ * have adjusted the clnp_frag structure away...
+ */
+ IFDEBUG(D_REASS)
+ printf("clnp_comp_pdu: shaving off %d bytes\n",
+ cf_next_hdr.cfr_bytes);
+ ENDDEBUG
+ m_adj(cf_next_hdr.cfr_data, (int)cf_next_hdr.cfr_bytes);
+ m_cat(cf->cfr_data, cf_next_hdr.cfr_data);
+ cf->cfr_next = next_frag;
+ } else {
+ cf = cf->cfr_next;
+ }
+ }
+
+ cf = cfh->cfl_frags;
+
+ IFDEBUG(D_REASS)
+ struct mbuf *mdump = cf->cfr_data;
+ printf("clnp_comp_pdu: first frag now: [%d ... %d]\n", cf->cfr_first,
+ cf->cfr_last);
+ printf("clnp_comp_pdu: data for frag:\n");
+ while (mdump != NULL) {
+ printf("mbuf x%x, m_len %d\n", mdump, mdump->m_len);
+/* dump_buf(mtod(mdump, caddr_t), mdump->m_len);*/
+ mdump = mdump->m_next;
+ }
+ ENDDEBUG
+
+ /* Check if datagram is complete */
+ if ((cf->cfr_first == 0) && (cf->cfr_last == cfh->cfl_last)) {
+ /*
+ * We have a complete pdu!
+ * - Remove the frag header from (only) remaining fragment
+ * (which is not really a fragment anymore, as the datagram is
+ * complete).
+ * - Prepend a clnp header
+ */
+ struct mbuf *data = cf->cfr_data;
+ struct mbuf *hdr = cfh->cfl_orighdr;
+ struct clnp_fragl *scan;
+
+ IFDEBUG(D_REASS)
+ printf("clnp_comp_pdu: complete pdu!\n");
+ ENDDEBUG
+
+ m_adj(data, (int)cf->cfr_bytes);
+ m_cat(hdr, data);
+
+ IFDEBUG(D_DUMPIN)
+ struct mbuf *mdump = hdr;
+ printf("clnp_comp_pdu: pdu is:\n");
+ while (mdump != NULL) {
+ printf("mbuf x%x, m_len %d\n", mdump, mdump->m_len);
+/* dump_buf(mtod(mdump, caddr_t), mdump->m_len);*/
+ mdump = mdump->m_next;
+ }
+ ENDDEBUG
+
+ /*
+ * Remove cfh from the list of fragmented pdus
+ */
+ if (clnp_frags == cfh) {
+ clnp_frags = cfh->cfl_next;
+ } else {
+ for (scan = clnp_frags; scan != NULL; scan = scan->cfl_next) {
+ if (scan->cfl_next == cfh) {
+ scan->cfl_next = cfh->cfl_next;
+ break;
+ }
+ }
+ }
+
+ /* free cfh */
+ m_freem(dtom(cfh));
+
+ return(hdr);
+ }
+
+ return(NULL);
+}
+#ifdef TROLL
+static int troll_cnt;
+#include <sys/time.h>
+/*
+ * FUNCTION: troll_random
+ *
+ * PURPOSE: generate a pseudo-random number between 0 and 1
+ *
+ * RETURNS: the random number
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: This is based on the clock.
+ */
+float troll_random()
+{
+ extern struct timeval time;
+ long t = time.tv_usec % 100;
+
+ return((float)t / (float) 100);
+}
+
+/*
+ * FUNCTION: troll_output
+ *
+ * PURPOSE: Do something sneaky with the datagram passed. Possible
+ * operations are:
+ * Duplicate the packet
+ * Drop the packet
+ * Trim some number of bytes from the packet
+ * Munge some byte in the packet
+ *
+ * RETURNS: 0, or unix error code
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: The operation of this procedure is regulated by the
+ * troll control structure (Troll).
+ */
+troll_output(ifp, m, dst, rt)
+struct ifnet *ifp;
+struct mbuf *m;
+struct sockaddr *dst;
+struct rtentry *rt;
+{
+ int err = 0;
+ troll_cnt++;
+
+ if (trollctl.tr_ops & TR_DUPPKT) {
+ /*
+ * Duplicate every Nth packet
+ * TODO: random?
+ */
+ float f_freq = troll_cnt * trollctl.tr_dup_freq;
+ int i_freq = troll_cnt * trollctl.tr_dup_freq;
+ if (i_freq == f_freq) {
+ struct mbuf *dup = m_copy(m, 0, (int)M_COPYALL);
+ if (dup != NULL)
+ err = (*ifp->if_output)(ifp, dup, dst, rt);
+ }
+ if (!err)
+ err = (*ifp->if_output)(ifp, m, dst, rt);
+ return(err);
+ } else if (trollctl.tr_ops & TR_DROPPKT) {
+ } else if (trollctl.tr_ops & TR_CHANGE) {
+ struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+ clnp->cnf_cksum_msb = 0;
+ err = (*ifp->if_output)(ifp, m, dst, rt);
+ return(err);
+ } else {
+ err = (*ifp->if_output)(ifp, m, dst, rt);
+ return(err);
+ }
+}
+
+#endif /* TROLL */
diff --git a/sys/netiso/clnp_input.c b/sys/netiso/clnp_input.c
new file mode 100644
index 000000000000..c49de95e5fac
--- /dev/null
+++ b/sys/netiso/clnp_input.c
@@ -0,0 +1,551 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_input.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_input.c,v 5.1 89/02/09 16:20:32 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_input.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnp.h>
+#include <netiso/clnl.h>
+#include <netiso/esis.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+#include <netiso/eonvar.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+#ifdef ISO
+u_char clnp_protox[ISOPROTO_MAX];
+struct clnl_protosw clnl_protox[256];
+int clnpqmaxlen = IFQ_MAXLEN; /* RAH? why is this a variable */
+struct mbuf *clnp_data_ck();
+
+int clnp_input();
+
+int esis_input();
+
+#ifdef ISO_X25ESIS
+int x25esis_input();
+#endif /* ISO_X25ESIS */
+
+/*
+ * FUNCTION: clnp_init
+ *
+ * PURPOSE: clnp initialization. Fill in clnp switch tables.
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS: fills in clnp_protox table with correct offsets into
+ * the isosw table.
+ *
+ * NOTES:
+ */
+clnp_init()
+{
+ register struct protosw *pr;
+
+ /*
+ * CLNP protox initialization
+ */
+ if ((pr = pffindproto(PF_ISO, ISOPROTO_RAW, SOCK_RAW)) == 0)
+ printf("clnl_init: no raw CLNP\n");
+ else
+ clnp_protox[ISOPROTO_RAW] = pr - isosw;
+
+ if ((pr = pffindproto(PF_ISO, ISOPROTO_TP, SOCK_SEQPACKET)) == 0)
+ printf("clnl_init: no tp/clnp\n");
+ else
+ clnp_protox[ISOPROTO_TP] = pr - isosw;
+
+ /*
+ * CLNL protox initialization
+ */
+ clnl_protox[ISO8473_CLNP].clnl_input = clnp_input;
+
+ clnlintrq.ifq_maxlen = clnpqmaxlen;
+}
+
+/*
+ * FUNCTION: clnlintr
+ *
+ * PURPOSE: Process a packet on the clnl input queue
+ *
+ * RETURNS: nothing.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+clnlintr()
+{
+ register struct mbuf *m; /* ptr to first mbuf of pkt */
+ register struct clnl_fixed *clnl; /* ptr to fixed part of clnl hdr */
+ int s; /* save and restore priority */
+ struct clnl_protosw *clnlsw;/* ptr to protocol switch */
+ struct snpa_hdr sh; /* subnetwork hdr */
+
+ /*
+ * Get next datagram off clnl input queue
+ */
+next:
+ s = splimp();
+ /* IF_DEQUEUESNPAHDR(&clnlintrq, m, sh);*/
+ IF_DEQUEUE(&clnlintrq, m);
+ splx(s);
+
+
+ if (m == 0) /* nothing to do */
+ return;
+ if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.rcvif == 0) {
+ m_freem(m);
+ goto next;
+ } else {
+ register struct ifaddr *ifa;
+ for (ifa = m->m_pkthdr.rcvif->if_addrlist; ifa; ifa = ifa->ifa_next)
+ if (ifa->ifa_addr->sa_family == AF_ISO)
+ break;
+ if (ifa == 0) {
+ m_freem(m);
+ goto next;
+ }
+ }
+ bzero((caddr_t)&sh, sizeof(sh));
+ sh.snh_flags = m->m_flags & (M_MCAST|M_BCAST);
+ switch((sh.snh_ifp = m->m_pkthdr.rcvif)->if_type) {
+ extern int ether_output();
+ case IFT_EON:
+ bcopy(mtod(m, caddr_t), (caddr_t)sh.snh_dhost, sizeof(u_long));
+ bcopy(sizeof(u_long) + mtod(m, caddr_t),
+ (caddr_t)sh.snh_shost, sizeof(u_long));
+ sh.snh_dhost[4] = mtod(m, u_char *)[sizeof(struct ip) +
+ _offsetof(struct eon_hdr, eonh_class)];
+ m->m_data += EONIPLEN;
+ m->m_len -= EONIPLEN;
+ m->m_pkthdr.len -= EONIPLEN;
+ break;
+
+ default:
+ if (sh.snh_ifp->if_output == ether_output) {
+ bcopy((caddr_t)(mtod(m, struct ether_header *)->ether_dhost),
+ (caddr_t)sh.snh_dhost, 2*sizeof(sh.snh_dhost));
+ m->m_data += sizeof (struct ether_header);
+ m->m_len -= sizeof (struct ether_header);
+ m->m_pkthdr.len -= sizeof (struct ether_header);
+ }
+ }
+ IFDEBUG(D_INPUT)
+ int i;
+ printf("clnlintr: src:");
+ for (i=0; i<6; i++)
+ printf("%x%c", sh.snh_shost[i] & 0xff, (i<5) ? ':' : ' ');
+ printf(" dst:");
+ for (i=0; i<6; i++)
+ printf("%x%c", sh.snh_dhost[i] & 0xff, (i<5) ? ':' : ' ');
+ printf("\n");
+ ENDDEBUG
+
+ /*
+ * Get the fixed part of the clnl header into the first mbuf.
+ * Drop the packet if this fails.
+ * Do not call m_pullup if we have a cluster mbuf or the
+ * data is not there.
+ */
+ if ((IS_CLUSTER(m) || (m->m_len < sizeof(struct clnl_fixed))) &&
+ ((m = m_pullup(m, sizeof(struct clnl_fixed))) == 0)) {
+ INCSTAT(cns_toosmall); /* TODO: use clnl stats */
+ goto next; /* m_pullup discards mbuf */
+ }
+
+ clnl = mtod(m, struct clnl_fixed *);
+
+ /*
+ * Drop packet if the length of the header is not reasonable.
+ */
+ if ((clnl->cnf_hdr_len < CLNP_HDR_MIN) ||
+ (clnl->cnf_hdr_len > CLNP_HDR_MAX)) {
+ INCSTAT(cns_badhlen); /* TODO: use clnl stats */
+ m_freem(m);
+ goto next;
+ }
+
+ /*
+ * If the header is not contained in this mbuf, make it so.
+ * Drop packet if this fails.
+ * Note: m_pullup will allocate a cluster mbuf if necessary
+ */
+ if (clnl->cnf_hdr_len > m->m_len) {
+ if ((m = m_pullup(m, (int)clnl->cnf_hdr_len)) == 0) {
+ INCSTAT(cns_badhlen); /* TODO: use clnl stats */
+ goto next; /* m_pullup discards mbuf */
+ }
+ clnl = mtod(m, struct clnl_fixed *);
+ }
+
+ clnlsw = &clnl_protox[clnl->cnf_proto_id];
+
+
+ if (clnlsw->clnl_input)
+ (*clnlsw->clnl_input) (m, &sh);
+ else
+ m_freem(m);
+
+ goto next;
+}
+
+/*
+ * FUNCTION: clnp_input
+ *
+ * PURPOSE: process an incoming clnp packet
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS: increments fields of clnp_stat structure.
+ *
+ * NOTES:
+ * TODO: I would like to make seg_part a pointer into the mbuf, but
+ * will it be correctly aligned?
+ */
+clnp_input(m, shp)
+struct mbuf *m; /* ptr to first mbuf of pkt */
+struct snpa_hdr *shp; /* subnetwork header */
+{
+ register struct clnp_fixed *clnp; /* ptr to fixed part of header */
+ struct sockaddr_iso source; /* source address of pkt */
+ struct sockaddr_iso target; /* destination address of pkt */
+#define src source.siso_addr
+#define dst target.siso_addr
+ caddr_t hoff; /* current offset in packet */
+ caddr_t hend; /* address of end of header info */
+ struct clnp_segment seg_part; /* segment part of hdr */
+ int seg_off=0; /* offset of segment part of hdr */
+ int seg_len;/* length of packet data&hdr in bytes */
+ struct clnp_optidx oidx, *oidxp = NULL; /* option index */
+ extern int iso_systype; /* used by ESIS config resp */
+ extern struct sockaddr_iso blank_siso; /* used for initializing */
+ int need_afrin = 0;
+ /* true if congestion experienced */
+ /* which means you need afrin nose */
+ /* spray. How clever! */
+
+ IFDEBUG(D_INPUT)
+ printf(
+ "clnp_input: proccessing dg; First mbuf m_len %d, m_type x%x, %s\n",
+ m->m_len, m->m_type, IS_CLUSTER(m) ? "cluster" : "normal");
+ ENDDEBUG
+ need_afrin = 0;
+
+ /*
+ * If no iso addresses have been set, there is nothing
+ * to do with the packet.
+ */
+ if (iso_ifaddr == NULL) {
+ clnp_discard(m, ADDR_DESTUNREACH);
+ return;
+ }
+
+ INCSTAT(cns_total);
+ clnp = mtod(m, struct clnp_fixed *);
+
+ IFDEBUG(D_DUMPIN)
+ struct mbuf *mhead;
+ int total_len = 0;
+ printf("clnp_input: clnp header:\n");
+ dump_buf(mtod(m, caddr_t), clnp->cnf_hdr_len);
+ printf("clnp_input: mbuf chain:\n");
+ for (mhead = m; mhead != NULL; mhead=mhead->m_next) {
+ printf("m x%x, len %d\n", mhead, mhead->m_len);
+ total_len += mhead->m_len;
+ }
+ printf("clnp_input: total length of mbuf chain %d:\n", total_len);
+ ENDDEBUG
+
+ /*
+ * Compute checksum (if necessary) and drop packet if
+ * checksum does not match
+ */
+ if (CKSUM_REQUIRED(clnp) && iso_check_csum(m, (int)clnp->cnf_hdr_len)) {
+ INCSTAT(cns_badcsum);
+ clnp_discard(m, GEN_BADCSUM);
+ return;
+ }
+
+ if (clnp->cnf_vers != ISO8473_V1) {
+ INCSTAT(cns_badvers);
+ clnp_discard(m, DISC_UNSUPPVERS);
+ return;
+ }
+
+
+ /* check mbuf data length: clnp_data_ck will free mbuf upon error */
+ CTOH(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, seg_len);
+ if ((m = clnp_data_ck(m, seg_len)) == 0)
+ return;
+
+ clnp = mtod(m, struct clnp_fixed *);
+ hend = (caddr_t)clnp + clnp->cnf_hdr_len;
+
+ /*
+ * extract the source and destination address
+ * drop packet on failure
+ */
+ source = target = blank_siso;
+
+ hoff = (caddr_t)clnp + sizeof(struct clnp_fixed);
+ CLNP_EXTRACT_ADDR(dst, hoff, hend);
+ if (hoff == (caddr_t)0) {
+ INCSTAT(cns_badaddr);
+ clnp_discard(m, GEN_INCOMPLETE);
+ return;
+ }
+ CLNP_EXTRACT_ADDR(src, hoff, hend);
+ if (hoff == (caddr_t)0) {
+ INCSTAT(cns_badaddr);
+ clnp_discard(m, GEN_INCOMPLETE);
+ return;
+ }
+
+ IFDEBUG(D_INPUT)
+ printf("clnp_input: from %s", clnp_iso_addrp(&src));
+ printf(" to %s\n", clnp_iso_addrp(&dst));
+ ENDDEBUG
+
+ /*
+ * extract the segmentation information, if it is present.
+ * drop packet on failure
+ */
+ if (((clnp->cnf_type & CNF_TYPE) != CLNP_ER) &&
+ (clnp->cnf_type & CNF_SEG_OK)) {
+ if (hoff + sizeof(struct clnp_segment) > hend) {
+ INCSTAT(cns_noseg);
+ clnp_discard(m, GEN_INCOMPLETE);
+ return;
+ } else {
+ (void) bcopy(hoff, (caddr_t)&seg_part, sizeof(struct clnp_segment));
+ /* make sure segmentation fields are in host order */
+ seg_part.cng_id = ntohs(seg_part.cng_id);
+ seg_part.cng_off = ntohs(seg_part.cng_off);
+ seg_part.cng_tot_len = ntohs(seg_part.cng_tot_len);
+ seg_off = hoff - (caddr_t)clnp;
+ hoff += sizeof(struct clnp_segment);
+ }
+ }
+
+ /*
+ * process options if present. If clnp_opt_sanity returns
+ * false (indicating an error was found in the options) or
+ * an unsupported option was found
+ * then drop packet and emit an ER.
+ */
+ if (hoff < hend) {
+ int errcode;
+
+ oidxp = &oidx;
+ errcode = clnp_opt_sanity(m, hoff, hend-hoff, oidxp);
+
+ /* we do not support security */
+ if ((errcode == 0) && (oidxp->cni_securep))
+ errcode = DISC_UNSUPPSECURE;
+
+ /* the er option is valid with ER pdus only */
+ if ((errcode == 0) && (oidxp->cni_er_reason != ER_INVALREAS) &&
+ ((clnp->cnf_type & CNF_TYPE) != CLNP_ER))
+ errcode = DISC_UNSUPPOPT;
+
+#ifdef DECBIT
+ /* check if the congestion experienced bit is set */
+ if (oidxp->cni_qos_formatp) {
+ caddr_t qosp = CLNP_OFFTOOPT(m, oidxp->cni_qos_formatp);
+ u_char qos = *qosp;
+
+ need_afrin = ((qos & (CLNPOVAL_GLOBAL|CLNPOVAL_CONGESTED)) ==
+ (CLNPOVAL_GLOBAL|CLNPOVAL_CONGESTED));
+ if (need_afrin)
+ INCSTAT(cns_congest_rcvd);
+ }
+#endif /* DECBIT */
+
+ if (errcode != 0) {
+ clnp_discard(m, (char)errcode);
+ IFDEBUG(D_INPUT)
+ printf("clnp_input: dropped (err x%x) due to bad options\n",
+ errcode);
+ ENDDEBUG
+ return;
+ }
+ }
+
+ /*
+ * check if this packet is for us. if not, then forward
+ */
+ if (clnp_ours(&dst) == 0) {
+ IFDEBUG(D_INPUT)
+ printf("clnp_input: forwarding packet not for us\n");
+ ENDDEBUG
+ clnp_forward(m, seg_len, &dst, oidxp, seg_off, shp);
+ return;
+ }
+
+ /*
+ * ESIS Configuration Response Function
+ *
+ * If the packet received was sent to the multicast address
+ * all end systems, then send an esh to the source
+ */
+ if ((shp->snh_flags & M_MCAST) && (iso_systype == SNPA_ES)) {
+ extern short esis_holding_time;
+
+ esis_shoutput(shp->snh_ifp, ESIS_ESH, esis_holding_time,
+ shp->snh_shost, 6, &dst);
+ }
+
+ /*
+ * If this is a fragment, then try to reassemble it. If clnp_reass
+ * returns non NULL, the packet has been reassembled, and should
+ * be give to TP. Otherwise the fragment has been delt with
+ * by the reassembly code (either stored or deleted). In either case
+ * we should have nothing more to do with it.
+ */
+ if (((clnp->cnf_type & CNF_TYPE) != CLNP_ER) &&
+ (clnp->cnf_type & CNF_SEG_OK) &&
+ (seg_len != seg_part.cng_tot_len)) {
+ struct mbuf *m0;
+
+ if ((m0 = clnp_reass(m, &src, &dst, &seg_part)) != NULL) {
+ m = m0;
+ clnp = mtod(m, struct clnp_fixed *);
+ INCSTAT(cns_reassembled);
+ } else {
+ return;
+ }
+ }
+
+ /*
+ * give the packet to the higher layer
+ *
+ * Note: the total length of packet
+ * is the total length field of the segmentation part,
+ * or, if absent, the segment length field of the
+ * header.
+ */
+ INCSTAT(cns_delivered);
+ switch (clnp->cnf_type & CNF_TYPE) {
+ case CLNP_ER:
+ /*
+ * This ER must have the er option.
+ * If the option is not present, discard datagram.
+ */
+ if (oidxp == NULL || oidxp->cni_er_reason == ER_INVALREAS) {
+ clnp_discard(m, GEN_HDRSYNTAX);
+ } else {
+ clnp_er_input(m, &src, oidxp->cni_er_reason);
+ }
+ break;
+
+ case CLNP_DT:
+ (*isosw[clnp_protox[ISOPROTO_TP]].pr_input)(m, &source, &target,
+ clnp->cnf_hdr_len, need_afrin);
+ break;
+
+ case CLNP_RAW:
+ case CLNP_ECR:
+ IFDEBUG(D_INPUT)
+ printf("clnp_input: raw input of %d bytes\n",
+ clnp->cnf_type & CNF_SEG_OK ? seg_part.cng_tot_len : seg_len);
+ ENDDEBUG
+ (*isosw[clnp_protox[ISOPROTO_RAW]].pr_input)(m, &source, &target,
+ clnp->cnf_hdr_len);
+ break;
+
+ case CLNP_EC:
+ IFDEBUG(D_INPUT)
+ printf("clnp_input: echoing packet\n");
+ ENDDEBUG
+ (void)clnp_echoreply(m,
+ (clnp->cnf_type & CNF_SEG_OK ? (int)seg_part.cng_tot_len : seg_len),
+ &source, &target, oidxp);
+ break;
+
+ default:
+ printf("clnp_input: unknown clnp pkt type %d\n",
+ clnp->cnf_type & CNF_TYPE);
+ clnp_stat.cns_delivered--;
+ clnp_stat.cns_noproto++;
+ clnp_discard(m, GEN_HDRSYNTAX);
+ break;
+ }
+}
+#endif /* ISO */
diff --git a/sys/netiso/clnp_options.c b/sys/netiso/clnp_options.c
new file mode 100644
index 000000000000..250b438664f9
--- /dev/null
+++ b/sys/netiso/clnp_options.c
@@ -0,0 +1,532 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_options.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_options.c,v 5.1 89/02/09 16:20:37 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_options.c,v $ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+/*
+ * FUNCTION: clnp_update_srcrt
+ *
+ * PURPOSE: Process src rt option accompanying a clnp datagram.
+ * - bump src route ptr if src routing and
+ * we appear current in src route list.
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: If source routing has been terminated, do nothing.
+ */
+clnp_update_srcrt(options, oidx)
+struct mbuf *options; /* ptr to options mbuf */
+struct clnp_optidx *oidx; /* ptr to option index */
+{
+ u_char len; /* length of current address */
+ struct iso_addr isoa; /* copy current address into here */
+
+ if (CLNPSRCRT_TERM(oidx, options)) {
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_update_srcrt: src rt terminated\n");
+ ENDDEBUG
+ return;
+ }
+
+ len = CLNPSRCRT_CLEN(oidx, options);
+ bcopy(CLNPSRCRT_CADDR(oidx, options), (caddr_t)&isoa, len);
+ isoa.isoa_len = len;
+
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_update_srcrt: current src rt: %s\n",
+ clnp_iso_addrp(&isoa));
+ ENDDEBUG
+
+ if (clnp_ours(&isoa)) {
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_update_srcrt: updating src rt\n");
+ ENDDEBUG
+
+ /* update pointer to next src route */
+ len++; /* count length byte too! */
+ CLNPSRCRT_OFF(oidx, options) += len;
+ }
+}
+
+/*
+ * FUNCTION: clnp_dooptions
+ *
+ * PURPOSE: Process options accompanying a clnp datagram.
+ * Processing includes
+ * - log our address if recording route
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+clnp_dooptions(options, oidx, ifp, isoa)
+struct mbuf *options; /* ptr to options mbuf */
+struct clnp_optidx *oidx; /* ptr to option index */
+struct ifnet *ifp; /* ptr to interface pkt is leaving on */
+struct iso_addr *isoa; /* ptr to our address for this ifp */
+{
+ /*
+ * If record route is specified, move all
+ * existing records over, and insert the address of
+ * interface passed
+ */
+ if (oidx->cni_recrtp) {
+ char *opt; /* ptr to beginning of recrt option */
+ u_char off; /* offset from opt of first free byte */
+ char *rec_start; /* beginning of new rt recorded */
+
+ opt = CLNP_OFFTOOPT(options, oidx->cni_recrtp);
+ off = *(opt + 1);
+ rec_start = opt + off - 1;
+
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_dooptions: record route: option x%x for %d bytes\n",
+ opt, oidx->cni_recrt_len);
+ printf("\tfree slot offset x%x\n", off);
+ printf("clnp_dooptions: recording %s\n", clnp_iso_addrp(isoa));
+ printf("clnp_dooptions: option dump:\n");
+ dump_buf(opt, oidx->cni_recrt_len);
+ ENDDEBUG
+
+ /* proceed only if recording has not been terminated */
+ if (off != 0xff) {
+ int new_addrlen = isoa->isoa_len + 1;
+ /*
+ * if there is insufficient room to store the next address,
+ * then terminate recording. Plus 1 on isoa_len is for the
+ * length byte itself
+ */
+ if (oidx->cni_recrt_len - (off - 1) < new_addrlen) {
+ *(opt + 1) = 0xff; /* terminate recording */
+ } else {
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_dooptions: new addr at x%x for %d\n",
+ rec_start, new_addrlen);
+ ENDDEBUG
+
+ bcopy((caddr_t)isoa, rec_start, new_addrlen);
+
+ /* update offset field */
+ *(opt + 1) += new_addrlen;
+
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_dooptions: new option dump:\n");
+ dump_buf(opt, oidx->cni_recrt_len);
+ ENDDEBUG
+ }
+ }
+ }
+}
+
+/*
+ * FUNCTION: clnp_set_opts
+ *
+ * PURPOSE: Check the data mbuf passed for option sanity. If it is
+ * ok, then set the options ptr to address the data mbuf.
+ * If an options mbuf exists, free it. This implies that
+ * any old options will be lost. If data is NULL, simply
+ * free any old options.
+ *
+ * RETURNS: unix error code
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+clnp_set_opts(options, data)
+struct mbuf **options; /* target for option information */
+struct mbuf **data; /* source of option information */
+{
+ int error = 0; /* error return value */
+ struct clnp_optidx dummy; /* dummy index - not used */
+
+ /*
+ * remove any existing options
+ */
+ if (*options != NULL) {
+ m_freem(*options);
+ *options = NULL;
+ }
+
+ if (*data != NULL) {
+ /*
+ * Insure that the options are reasonable.
+ *
+ * Also, we do not support security, priority,
+ * nor do we allow one to send an ER option
+ *
+ * The QOS parameter is checked for the DECBIT.
+ */
+ if ((clnp_opt_sanity(*data, mtod(*data, caddr_t), (*data)->m_len,
+ &dummy) != 0) ||
+ (dummy.cni_securep) ||
+ (dummy.cni_priorp) ||
+ (dummy.cni_er_reason != ER_INVALREAS)) {
+ error = EINVAL;
+ } else {
+ *options = *data;
+ *data = NULL; /* so caller won't free mbuf @ *data */
+ }
+ }
+ return error;
+}
+
+/*
+ * FUNCTION: clnp_opt_sanity
+ *
+ * PURPOSE: Check the options (beginning at opts for len bytes) for
+ * sanity. In addition, fill in the option index structure
+ * in with information about each option discovered.
+ *
+ * RETURNS: success (options check out) - 0
+ * failure - an ER pdu error code describing failure
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Each pointer field of the option index is filled in with
+ * the offset from the beginning of the mbuf data, not the
+ * actual address.
+ */
+clnp_opt_sanity(m, opts, len, oidx)
+struct mbuf *m; /* mbuf options reside in */
+caddr_t opts; /* ptr to buffer containing options */
+int len; /* length of buffer */
+struct clnp_optidx *oidx; /* RETURN: filled in with option idx info */
+{
+ u_char opcode; /* code of particular option */
+ u_char oplen; /* length of a particular option */
+ caddr_t opts_end; /* ptr to end of options */
+ u_char pad = 0, secure = 0, srcrt = 0, recrt = 0, qos = 0, prior = 0;
+ /* flags for catching duplicate options */
+
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_opt_sanity: checking %d bytes of data:\n", len);
+ dump_buf(opts, len);
+ ENDDEBUG
+
+ /* clear option index field if passed */
+ bzero((caddr_t)oidx, sizeof(struct clnp_optidx));
+
+ /*
+ * We need to indicate whether the ER option is present. This is done
+ * by overloading the er_reason field to also indicate presense of
+ * the option along with the option value. I would like ER_INVALREAS
+ * to have value 0, but alas, 0 is a valid er reason...
+ */
+ oidx->cni_er_reason = ER_INVALREAS;
+
+ opts_end = opts + len;
+ while (opts < opts_end) {
+ /* must have at least 2 bytes per option (opcode and len) */
+ if (opts + 2 > opts_end)
+ return(GEN_INCOMPLETE);
+
+ opcode = *opts++;
+ oplen = *opts++;
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_opt_sanity: opcode is %x and oplen %d\n",
+ opcode, oplen);
+ printf("clnp_opt_sanity: clnpoval_SRCRT is %x\n", CLNPOVAL_SRCRT);
+
+ switch (opcode) {
+ case CLNPOVAL_PAD: {
+ printf("CLNPOVAL_PAD\n");
+ } break;
+ case CLNPOVAL_SECURE: {
+ printf("CLNPOVAL_SECURE\n");
+ } break;
+ case CLNPOVAL_SRCRT: {
+ printf("CLNPOVAL_SRCRT\n");
+ } break;
+ case CLNPOVAL_RECRT: {
+ printf("CLNPOVAL_RECRT\n");
+ } break;
+ case CLNPOVAL_QOS: {
+ printf("CLNPOVAL_QOS\n");
+ } break;
+ case CLNPOVAL_PRIOR: {
+ printf("CLNPOVAL_PRIOR\n");
+ } break;
+ case CLNPOVAL_ERREAS: {
+ printf("CLNPOVAL_ERREAS\n");
+ } break;
+ default:
+ printf("UKNOWN option %x\n", opcode);
+ }
+ ENDDEBUG
+
+ /* don't allow crazy length values */
+ if (opts + oplen > opts_end)
+ return(GEN_INCOMPLETE);
+
+ switch (opcode) {
+ case CLNPOVAL_PAD:
+ /*
+ * Padding: increment pointer by length of padding
+ */
+ if (pad++) /* duplicate ? */
+ return(GEN_DUPOPT);
+ opts += oplen;
+ break;
+
+ case CLNPOVAL_SECURE: {
+ u_char format = *opts;
+
+ if (secure++) /* duplicate ? */
+ return(GEN_DUPOPT);
+ /*
+ * Security: high 2 bits of first octet indicate format
+ * (00 in high bits is reserved).
+ * Remaining bits must be 0. Remaining octets indicate
+ * actual security
+ */
+ if (((format & 0x3f) > 0) || /* low 6 bits set ? */
+ ((format & 0xc0) == 0)) /* high 2 bits zero ? */
+ return(GEN_HDRSYNTAX);
+
+ oidx->cni_securep = CLNP_OPTTOOFF(m, opts);
+ oidx->cni_secure_len = oplen;
+ opts += oplen;
+ } break;
+
+ case CLNPOVAL_SRCRT: {
+ u_char type, offset; /* type of rt, offset of start */
+ caddr_t route_end; /* address of end of route option */
+
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_opt_sanity: SRC RT\n");
+ ENDDEBUG
+
+ if (srcrt++) /* duplicate ? */
+ return(GEN_DUPOPT);
+ /*
+ * source route: There must be 2 bytes following the length
+ * field: type and offset. The type must be either
+ * partial route or complete route. The offset field must
+ * be within the option. A single exception is made, however.
+ * The offset may be 1 greater than the length. This case
+ * occurs when the last source route record is consumed.
+ * In this case, we ignore the source route option.
+ * RAH? You should be able to set offset to 'ff' like in record
+ * route!
+ * Following this is a series of address fields.
+ * Each address field is composed of a (length, address) pair.
+ * Insure that the offset and each address length is reasonable
+ */
+ route_end = opts + oplen;
+
+ if (opts + 2 > route_end)
+ return(SRCRT_SYNTAX);
+
+ type = *opts;
+ offset = *(opts+1);
+
+
+ /* type must be partial or complete */
+ if (!((type == CLNPOVAL_PARTRT) || (type == CLNPOVAL_COMPRT)))
+ return(SRCRT_SYNTAX);
+
+ oidx->cni_srcrt_s = CLNP_OPTTOOFF(m, opts);
+ oidx->cni_srcrt_len = oplen;
+
+ opts += offset-1; /*set opts to first addr in rt */
+
+ /*
+ * Offset must be reasonable:
+ * less than end of options, or equal to end of options
+ */
+ if (opts >= route_end) {
+ if (opts == route_end) {
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_opt_sanity: end of src route info\n");
+ ENDDEBUG
+ break;
+ } else
+ return(SRCRT_SYNTAX);
+ }
+
+ while (opts < route_end) {
+ u_char addrlen = *opts++;
+ if (opts + addrlen > route_end)
+ return(SRCRT_SYNTAX);
+ opts += addrlen;
+ }
+ } break;
+ case CLNPOVAL_RECRT: {
+ u_char type, offset; /* type of rt, offset of start */
+ caddr_t record_end; /* address of end of record option */
+
+ if (recrt++) /* duplicate ? */
+ return(GEN_DUPOPT);
+ /*
+ * record route: after the length field, expect a
+ * type and offset. Type must be partial or complete.
+ * Offset indicates where to start recording. Insure it
+ * is within the option. All ones for offset means
+ * recording is terminated.
+ */
+ record_end = opts + oplen;
+
+ oidx->cni_recrtp = CLNP_OPTTOOFF(m, opts);
+ oidx->cni_recrt_len = oplen;
+
+ if (opts + 2 > record_end)
+ return(GEN_INCOMPLETE);
+
+ type = *opts;
+ offset = *(opts+1);
+
+ /* type must be partial or complete */
+ if (!((type == CLNPOVAL_PARTRT) || (type == CLNPOVAL_COMPRT)))
+ return(GEN_HDRSYNTAX);
+
+ /* offset must be reasonable */
+ if ((offset < 0xff) && (opts + offset > record_end))
+ return(GEN_HDRSYNTAX);
+ opts += oplen;
+ } break;
+ case CLNPOVAL_QOS: {
+ u_char format = *opts;
+
+ if (qos++) /* duplicate ? */
+ return(GEN_DUPOPT);
+ /*
+ * qos: high 2 bits of first octet indicate format
+ * (00 in high bits is reserved).
+ * Remaining bits must be 0 (unless format indicates
+ * globally unique qos, in which case remaining bits indicate
+ * qos (except bit 6 which is reserved)). Otherwise,
+ * remaining octets indicate actual qos.
+ */
+ if (((format & 0xc0) == 0) || /* high 2 bits zero ? */
+ (((format & 0xc0) != CLNPOVAL_GLOBAL) &&
+ ((format & 0x3f) > 0))) /* not global,low bits used ? */
+ return(GEN_HDRSYNTAX);
+
+ oidx->cni_qos_formatp = CLNP_OPTTOOFF(m, opts);
+ oidx->cni_qos_len = oplen;
+
+ opts += oplen;
+ } break;
+
+ case CLNPOVAL_PRIOR: {
+ if (prior++) /* duplicate ? */
+ return(GEN_DUPOPT);
+ /*
+ * priority: value must be one byte long
+ */
+ if (oplen != 1)
+ return(GEN_HDRSYNTAX);
+
+ oidx->cni_priorp = CLNP_OPTTOOFF(m, opts);
+
+ opts += oplen;
+ } break;
+
+ case CLNPOVAL_ERREAS: {
+ /*
+ * er reason: value must be two bytes long
+ */
+ if (oplen != 2)
+ return(GEN_HDRSYNTAX);
+
+ oidx->cni_er_reason = *opts;
+
+ opts += oplen;
+ } break;
+
+ default: {
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_opt_sanity: UNKNOWN OPTION 0x%x\n", opcode);
+ ENDDEBUG
+ return(DISC_UNSUPPOPT);
+ }
+ }
+ }
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_opt_sanity: return(0)\n", opcode);
+ ENDDEBUG
+ return(0);
+}
+#endif /* ISO */
diff --git a/sys/netiso/clnp_output.c b/sys/netiso/clnp_output.c
new file mode 100644
index 000000000000..aba9f6e00bdf
--- /dev/null
+++ b/sys/netiso/clnp_output.c
@@ -0,0 +1,561 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_output.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_output.c,v 5.0 89/02/08 12:00:15 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_output.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+static struct clnp_fixed dt_template = {
+ ISO8473_CLNP, /* network identifier */
+ 0, /* length */
+ ISO8473_V1, /* version */
+ CLNP_TTL, /* ttl */
+ CLNP_DT|CNF_SEG_OK|CNF_ERR_OK, /* type */
+ 0, /* segment length */
+ 0 /* checksum */
+};
+
+static struct clnp_fixed raw_template = {
+ ISO8473_CLNP, /* network identifier */
+ 0, /* length */
+ ISO8473_V1, /* version */
+ CLNP_TTL, /* ttl */
+ CLNP_RAW|CNF_SEG_OK|CNF_ERR_OK, /* type */
+ 0, /* segment length */
+ 0 /* checksum */
+};
+
+static struct clnp_fixed echo_template = {
+ ISO8473_CLNP, /* network identifier */
+ 0, /* length */
+ ISO8473_V1, /* version */
+ CLNP_TTL, /* ttl */
+ CLNP_EC|CNF_SEG_OK|CNF_ERR_OK, /* type */
+ 0, /* segment length */
+ 0 /* checksum */
+};
+
+static struct clnp_fixed echor_template = {
+ ISO8473_CLNP, /* network identifier */
+ 0, /* length */
+ ISO8473_V1, /* version */
+ CLNP_TTL, /* ttl */
+ CLNP_ECR|CNF_SEG_OK|CNF_ERR_OK, /* type */
+ 0, /* segment length */
+ 0 /* checksum */
+};
+
+#ifdef DECBIT
+u_char qos_option[] = {CLNPOVAL_QOS, 1,
+ CLNPOVAL_GLOBAL|CLNPOVAL_SEQUENCING|CLNPOVAL_LOWDELAY};
+#endif /* DECBIT */
+
+int clnp_id = 0; /* id for segmented dgrams */
+
+/*
+ * FUNCTION: clnp_output
+ *
+ * PURPOSE: output the data in the mbuf as a clnp datagram
+ *
+ * The data specified by m0 is sent as a clnp datagram.
+ * The mbuf chain m0 will be freed when this routine has
+ * returned.
+ *
+ * If options is non-null, it points to an mbuf which contains
+ * options to be sent with the datagram. The options must
+ * be formatted in the mbuf according to clnp rules. Options
+ * will not be freed.
+ *
+ * Datalen specifies the length of the data in m0.
+ *
+ * Src and dst are the addresses for the packet.
+ *
+ * If route is non-null, it is used as the route for
+ * the packet.
+ *
+ * By default, a DT is sent. However, if flags & CNLP_SEND_ER
+ * then an ER will be sent. If flags & CLNP_SEND_RAW, then
+ * the packet will be send as raw clnp.
+ *
+ * RETURNS: 0 success
+ * appropriate error code
+ *
+ * SIDE EFFECTS: none
+ *
+ * NOTES:
+ * Flags are interpretated as follows:
+ * CLNP_NO_SEG - do not allow this pkt to be segmented.
+ * CLNP_NO_ER - have pkt request ER suppression.
+ * CLNP_SEND_RAW - send pkt as RAW DT rather than TP DT
+ * CLNP_NO_CKSUM - don't compute clnp checksum
+ * CLNP_ECHO - send as ECHO packet
+ *
+ * When checking for a cached packet, clnp checks
+ * that the route taken is still up. It does not
+ * check that the route is still to the same destination.
+ * This means that any entity that alters an existing
+ * route for an isopcb (such as when a redirect arrives)
+ * must invalidate the clnp cache. It might be perferable
+ * to have clnp check that the route has the same dest, but
+ * by avoiding this check, we save a call to iso_addrmatch1.
+ */
+clnp_output(m0, isop, datalen, flags)
+struct mbuf *m0; /* data for the packet */
+struct isopcb *isop; /* iso pcb */
+int datalen; /* number of bytes of data in m0 */
+int flags; /* flags */
+{
+ int error = 0; /* return value of function */
+ register struct mbuf *m = m0; /* mbuf for clnp header chain */
+ register struct clnp_fixed *clnp; /* ptr to fixed part of hdr */
+ register caddr_t hoff; /* offset into header */
+ int total_len; /* total length of packet */
+ struct iso_addr *src; /* ptr to source address */
+ struct iso_addr *dst; /* ptr to destination address */
+ struct clnp_cache clc; /* storage for cache information */
+ struct clnp_cache *clcp = NULL; /* ptr to clc */
+ int hdrlen = 0;
+
+ dst = &isop->isop_faddr->siso_addr;
+ if (isop->isop_laddr == 0) {
+ struct iso_ifaddr *ia = 0;
+ clnp_route(dst, &isop->isop_route, flags, 0, &ia);
+ if (ia == 0 || ia->ia_ifa.ifa_addr->sa_family != AF_ISO)
+ return (ENETUNREACH);
+ src = &ia->ia_addr.siso_addr;
+ } else
+ src = &isop->isop_laddr->siso_addr;
+
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: to %s", clnp_iso_addrp(dst));
+ printf(" from %s of %d bytes\n", clnp_iso_addrp(src), datalen);
+ printf("\toptions x%x, flags x%x, isop_clnpcache x%x\n",
+ isop->isop_options, flags, isop->isop_clnpcache);
+ ENDDEBUG
+
+ if (isop->isop_clnpcache != NULL) {
+ clcp = mtod(isop->isop_clnpcache, struct clnp_cache *);
+ }
+
+ /*
+ * Check if cache is valid ...
+ */
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: ck cache: clcp %x\n", clcp);
+ if (clcp != NULL) {
+ printf("\tclc_dst %s\n", clnp_iso_addrp(&clcp->clc_dst));
+ printf("\tisop_opts x%x, clc_opts x%x\n", isop->isop_options,
+ clcp->clc_options);
+ if (isop->isop_route.ro_rt)
+ printf("\tro_rt x%x, rt_flags x%x\n",
+ isop->isop_route.ro_rt, isop->isop_route.ro_rt->rt_flags);
+ printf("\tflags x%x, clc_flags x%x\n", flags, clcp->clc_flags);
+ printf("\tclc_hdr x%x\n", clcp->clc_hdr);
+ }
+ ENDDEBUG
+ if ((clcp != NULL) && /* cache exists */
+ (isop->isop_options == clcp->clc_options) && /* same options */
+ (iso_addrmatch1(dst, &clcp->clc_dst)) && /* dst still same */
+ (isop->isop_route.ro_rt != NULL) && /* route exists */
+ (isop->isop_route.ro_rt == clcp->clc_rt) && /* and is cached */
+ (isop->isop_route.ro_rt->rt_flags & RTF_UP) && /* route still up */
+ (flags == clcp->clc_flags) && /* same flags */
+ (clcp->clc_hdr != NULL)) { /* hdr mbuf exists */
+ /*
+ * The cache is valid
+ */
+
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: using cache\n");
+ ENDDEBUG
+
+ m = m_copy(clcp->clc_hdr, 0, (int)M_COPYALL);
+ if (m == NULL) {
+ /*
+ * No buffers left to copy cached packet header. Use
+ * the cached packet header this time, and
+ * mark the hdr as vacant
+ */
+ m = clcp->clc_hdr;
+ clcp->clc_hdr = NULL;
+ }
+ m->m_next = m0; /* ASSUMES pkt hdr is 1 mbuf long */
+ clnp = mtod(m, struct clnp_fixed *);
+ } else {
+ struct clnp_optidx *oidx = NULL; /* index to clnp options */
+
+ /*
+ * The cache is not valid. Allocate an mbuf (if necessary)
+ * to hold cached info. If one is not available, then
+ * don't bother with the cache
+ */
+ INCSTAT(cns_cachemiss);
+ if (flags & CLNP_NOCACHE) {
+ clcp = &clc;
+ } else {
+ if (isop->isop_clnpcache == NULL) {
+ /*
+ * There is no clnpcache. Allocate an mbuf to hold one
+ */
+ if ((isop->isop_clnpcache = m_get(M_DONTWAIT, MT_HEADER))
+ == NULL) {
+ /*
+ * No mbufs available. Pretend that we don't want
+ * caching this time.
+ */
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: no mbufs to allocate to cache\n");
+ ENDDEBUG
+ flags |= CLNP_NOCACHE;
+ clcp = &clc;
+ } else {
+ clcp = mtod(isop->isop_clnpcache, struct clnp_cache *);
+ }
+ } else {
+ /*
+ * A clnpcache mbuf exists. If the clc_hdr is not null,
+ * we must free it, as a new one is about to be created.
+ */
+ clcp = mtod(isop->isop_clnpcache, struct clnp_cache *);
+ if (clcp->clc_hdr != NULL) {
+ /*
+ * The clc_hdr is not null but a clnpcache mbuf exists.
+ * This means that there was a cache, but the existing
+ * copy of the hdr is no longer valid. Free it now
+ * before we lose the pointer to it.
+ */
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: freeing old clc_hdr 0x%x\n",
+ clcp->clc_hdr);
+ ENDDEBUG
+ m_free(clcp->clc_hdr);
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: freed old clc_hdr (done)\n");
+ ENDDEBUG
+ }
+ }
+ }
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: NEW clcp x%x\n",clcp);
+ ENDDEBUG
+ bzero((caddr_t)clcp, sizeof(struct clnp_cache));
+
+ if (isop->isop_optindex)
+ oidx = mtod(isop->isop_optindex, struct clnp_optidx *);
+
+ /*
+ * Don't allow packets with security, quality of service,
+ * priority, or error report options to be sent.
+ */
+ if ((isop->isop_options) && (oidx)) {
+ if ((oidx->cni_securep) ||
+ (oidx->cni_priorp) ||
+ (oidx->cni_qos_formatp) ||
+ (oidx->cni_er_reason != ER_INVALREAS)) {
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: pkt dropped - option unsupported\n");
+ ENDDEBUG
+ m_freem(m0);
+ return(EINVAL);
+ }
+ }
+
+ /*
+ * Don't allow any invalid flags to be set
+ */
+ if ((flags & (CLNP_VFLAGS)) != flags) {
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: packet dropped - flags unsupported\n");
+ ENDDEBUG
+ INCSTAT(cns_odropped);
+ m_freem(m0);
+ return(EINVAL);
+ }
+
+ /*
+ * Don't allow funny lengths on dst; src may be zero in which
+ * case we insert the source address based upon the interface
+ */
+ if ((src->isoa_len > sizeof(struct iso_addr)) ||
+ (dst->isoa_len == 0) ||
+ (dst->isoa_len > sizeof(struct iso_addr))) {
+ m_freem(m0);
+ INCSTAT(cns_odropped);
+ return(ENAMETOOLONG);
+ }
+
+ /*
+ * Grab mbuf to contain header
+ */
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == 0) {
+ m_freem(m0);
+ INCSTAT(cns_odropped);
+ return(ENOBUFS);
+ }
+ INCSTAT(cns_sent);
+ m->m_next = m0;
+ clnp = mtod(m, struct clnp_fixed *);
+ clcp->clc_segoff = 0;
+
+ /*
+ * Fill in all of fixed hdr except lengths and checksum
+ */
+ if (flags & CLNP_SEND_RAW) {
+ *clnp = raw_template;
+ } else if (flags & CLNP_ECHO) {
+ *clnp = echo_template;
+ } else if (flags & CLNP_ECHOR) {
+ *clnp = echor_template;
+ } else {
+ *clnp = dt_template;
+ }
+ if (flags & CLNP_NO_SEG)
+ clnp->cnf_type &= ~CNF_SEG_OK;
+ if (flags & CLNP_NO_ER)
+ clnp->cnf_type &= ~CNF_ERR_OK;
+
+ /*
+ * Route packet; special case for source rt
+ */
+ if ((isop->isop_options) && CLNPSRCRT_VALID(oidx)) {
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: calling clnp_srcroute\n");
+ ENDDEBUG
+ error = clnp_srcroute(isop->isop_options, oidx, &isop->isop_route,
+ &clcp->clc_firsthop, &clcp->clc_ifa, dst);
+ } else {
+ IFDEBUG(D_OUTPUT)
+ ENDDEBUG
+ error = clnp_route(dst, &isop->isop_route, flags,
+ &clcp->clc_firsthop, &clcp->clc_ifa);
+ }
+ if (error || (clcp->clc_ifa == 0)) {
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: route failed, errno %d\n", error);
+ printf("@clcp:\n");
+ dump_buf(clcp, sizeof (struct clnp_cache));
+ ENDDEBUG
+ goto bad;
+ }
+ clcp->clc_rt = isop->isop_route.ro_rt; /* XXX */
+ clcp->clc_ifp = clcp->clc_ifa->ia_ifp; /* XXX */
+
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: packet routed to %s\n",
+ clnp_iso_addrp(
+ &((struct sockaddr_iso *)clcp->clc_firsthop)->siso_addr));
+ ENDDEBUG
+
+ /*
+ * If src address is not yet specified, use address of
+ * interface. NOTE: this will now update the laddr field in
+ * the isopcb. Is this desirable? RAH?
+ */
+ if (src->isoa_len == 0) {
+ src = &(clcp->clc_ifa->ia_addr.siso_addr);
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_output: new src %s\n", clnp_iso_addrp(src));
+ ENDDEBUG
+ }
+
+ /*
+ * Insert the source and destination address,
+ */
+ hoff = (caddr_t)clnp + sizeof(struct clnp_fixed);
+ CLNP_INSERT_ADDR(hoff, *dst);
+ CLNP_INSERT_ADDR(hoff, *src);
+
+ /*
+ * Leave room for the segment part, if segmenting is selected
+ */
+ if (clnp->cnf_type & CNF_SEG_OK) {
+ clcp->clc_segoff = hoff - (caddr_t)clnp;
+ hoff += sizeof(struct clnp_segment);
+ }
+
+ clnp->cnf_hdr_len = m->m_len = (u_char)(hoff - (caddr_t)clnp);
+ hdrlen = clnp->cnf_hdr_len;
+
+#ifdef DECBIT
+ /*
+ * Add the globally unique QOS (with room for congestion experienced
+ * bit). I can safely assume that this option is not in the options
+ * mbuf below because I checked that the option was not specified
+ * previously
+ */
+ if ((m->m_len + sizeof(qos_option)) < MLEN) {
+ bcopy((caddr_t)qos_option, hoff, sizeof(qos_option));
+ clnp->cnf_hdr_len += sizeof(qos_option);
+ hdrlen += sizeof(qos_option);
+ m->m_len += sizeof(qos_option);
+ }
+#endif /* DECBIT */
+
+ /*
+ * If an options mbuf is present, concatenate a copy to the hdr mbuf.
+ */
+ if (isop->isop_options) {
+ struct mbuf *opt_copy = m_copy(isop->isop_options, 0, (int)M_COPYALL);
+ if (opt_copy == NULL) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ /* Link in place */
+ opt_copy->m_next = m->m_next;
+ m->m_next = opt_copy;
+
+ /* update size of header */
+ clnp->cnf_hdr_len += opt_copy->m_len;
+ hdrlen += opt_copy->m_len;
+ }
+
+ if (hdrlen > CLNP_HDR_MAX) {
+ error = EMSGSIZE;
+ goto bad;
+ }
+
+ /*
+ * Now set up the cache entry in the pcb
+ */
+ if ((flags & CLNP_NOCACHE) == 0) {
+ if (clcp->clc_hdr = m_copy(m, 0, (int)clnp->cnf_hdr_len)) {
+ clcp->clc_dst = *dst;
+ clcp->clc_flags = flags;
+ clcp->clc_options = isop->isop_options;
+ }
+ }
+ }
+ /*
+ * If small enough for interface, send directly
+ * Fill in segmentation part of hdr if using the full protocol
+ */
+ total_len = clnp->cnf_hdr_len + datalen;
+ if (clnp->cnf_type & CNF_SEG_OK) {
+ struct clnp_segment seg_part; /* segment part of hdr */
+ seg_part.cng_id = htons(clnp_id++);
+ seg_part.cng_off = htons(0);
+ seg_part.cng_tot_len = htons(total_len);
+ (void) bcopy((caddr_t)&seg_part, (caddr_t) clnp + clcp->clc_segoff,
+ sizeof(seg_part));
+ }
+ if (total_len <= SN_MTU(clcp->clc_ifp, clcp->clc_rt)) {
+ HTOC(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, total_len);
+ m->m_pkthdr.len = total_len;
+ /*
+ * Compute clnp checksum (on header only)
+ */
+ if (flags & CLNP_NO_CKSUM) {
+ HTOC(clnp->cnf_cksum_msb, clnp->cnf_cksum_lsb, 0);
+ } else {
+ iso_gen_csum(m, CLNP_CKSUM_OFF, (int)clnp->cnf_hdr_len);
+ }
+
+ IFDEBUG(D_DUMPOUT)
+ struct mbuf *mdump = m;
+ printf("clnp_output: sending dg:\n");
+ while (mdump != NULL) {
+ dump_buf(mtod(mdump, caddr_t), mdump->m_len);
+ mdump = mdump->m_next;
+ }
+ ENDDEBUG
+
+ error = SN_OUTPUT(clcp, m);
+ goto done;
+ } else {
+ /*
+ * Too large for interface; fragment if possible.
+ */
+ error = clnp_fragment(clcp->clc_ifp, m, clcp->clc_firsthop,
+ total_len, clcp->clc_segoff, flags, clcp->clc_rt);
+ goto done;
+ }
+bad:
+ m_freem(m);
+done:
+ if (error) {
+ clnp_stat.cns_sent--;
+ clnp_stat.cns_odropped++;
+ }
+ return (error);
+}
+
+int clnp_ctloutput()
+{
+}
diff --git a/sys/netiso/clnp_raw.c b/sys/netiso/clnp_raw.c
new file mode 100644
index 000000000000..0bc3dbac4b17
--- /dev/null
+++ b/sys/netiso/clnp_raw.c
@@ -0,0 +1,352 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_raw.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: clnp_raw.c,v 4.2 88/06/29 14:58:56 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/clnp_raw.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+#include <netiso/tp_user.h> /* XXX -- defines SOL_NETWORK */
+
+struct sockproto rclnp_proto = { PF_ISO, 0 };
+/*
+ * FUNCTION: rclnp_input
+ *
+ * PURPOSE: Setup generic address an protocol structures for
+ * raw input routine, then pass them along with the
+ * mbuf chain.
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: The protocol field of rclnp_proto is set to zero indicating
+ * no protocol.
+ */
+rclnp_input(m, src, dst, hdrlen)
+struct mbuf *m; /* ptr to packet */
+struct sockaddr_iso *src; /* ptr to src address */
+struct sockaddr_iso *dst; /* ptr to dest address */
+int hdrlen; /* length (in bytes) of clnp header */
+{
+#ifdef TROLL
+ if (trollctl.tr_ops & TR_CHUCK) {
+ m_freem(m);
+ return;
+ }
+#endif /* TROLL */
+
+ raw_input(m, &rclnp_proto, (struct sockaddr *)src, (struct sockaddr *)dst);
+}
+
+/*
+ * FUNCTION: rclnp_output
+ *
+ * PURPOSE: Prepare to send a raw clnp packet. Setup src and dest
+ * addresses, count the number of bytes to send, and
+ * call clnp_output.
+ *
+ * RETURNS: success - 0
+ * failure - an appropriate error code
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+rclnp_output(m0, so)
+struct mbuf *m0; /* packet to send */
+struct socket *so; /* socket to send from */
+{
+ register struct mbuf *m; /* used to scan a chain */
+ int len = 0; /* store length of chain here */
+ struct rawisopcb *rp = sotorawisopcb(so); /* ptr to raw cb */
+ int error; /* return value of function */
+ int flags; /* flags for clnp_output */
+
+ if (0 == (m0->m_flags & M_PKTHDR))
+ return (EINVAL);
+ /*
+ * Set up src address. If user has bound socket to an address, use it.
+ * Otherwise, do not specify src (clnp_output will fill it in).
+ */
+ if (rp->risop_rcb.rcb_laddr) {
+ if (rp->risop_isop.isop_sladdr.siso_family != AF_ISO) {
+bad:
+ m_freem(m0);
+ return(EAFNOSUPPORT);
+ }
+ }
+ /* set up dest address */
+ if (rp->risop_rcb.rcb_faddr == 0)
+ goto bad;
+ rp->risop_isop.isop_sfaddr =
+ *(struct sockaddr_iso *)rp->risop_rcb.rcb_faddr;
+ rp->risop_isop.isop_faddr = &rp->risop_isop.isop_sfaddr;
+
+ /* get flags and ship it off */
+ flags = rp->risop_flags & CLNP_VFLAGS;
+
+ error = clnp_output(m0, &rp->risop_isop, m0->m_pkthdr.len,
+ flags|CLNP_NOCACHE);
+
+ return (error);
+}
+
+/*
+ * FUNCTION: rclnp_ctloutput
+ *
+ * PURPOSE: Raw clnp socket option processing
+ * All options are stored inside an mbuf.
+ *
+ * RETURNS: success - 0
+ * failure - unix error code
+ *
+ * SIDE EFFECTS: If the options mbuf does not exist, it the mbuf passed
+ * is used.
+ *
+ * NOTES:
+ */
+rclnp_ctloutput(op, so, level, optname, m)
+int op; /* type of operation */
+struct socket *so; /* ptr to socket */
+int level; /* level of option */
+int optname; /* name of option */
+struct mbuf **m; /* ptr to ptr to option data */
+{
+ int error = 0;
+ register struct rawisopcb *rp = sotorawisopcb(so);/* raw cb ptr */
+
+ IFDEBUG(D_CTLOUTPUT)
+ printf("rclnp_ctloutput: op = x%x, level = x%x, name = x%x\n",
+ op, level, optname);
+ if (*m != NULL) {
+ printf("rclnp_ctloutput: %d bytes of mbuf data\n", (*m)->m_len);
+ dump_buf(mtod((*m), caddr_t), (*m)->m_len);
+ }
+ ENDDEBUG
+
+#ifdef SOL_NETWORK
+ if (level != SOL_NETWORK)
+ error = EINVAL;
+ else switch (op) {
+#else
+ switch (op) {
+#endif /* SOL_NETWORK */
+ case PRCO_SETOPT:
+ switch (optname) {
+ case CLNPOPT_FLAGS: {
+ u_short usr_flags;
+ /*
+ * Insure that the data passed has exactly one short in it
+ */
+ if ((*m == NULL) || ((*m)->m_len != sizeof(short))) {
+ error = EINVAL;
+ break;
+ }
+
+ /*
+ * Don't allow invalid flags to be set
+ */
+ usr_flags = (*mtod((*m), short *));
+
+ if ((usr_flags & (CLNP_VFLAGS)) != usr_flags) {
+ error = EINVAL;
+ } else
+ rp->risop_flags |= usr_flags;
+
+ } break;
+
+ case CLNPOPT_OPTS:
+ if (error = clnp_set_opts(&rp->risop_isop.isop_options, m))
+ break;
+ rp->risop_isop.isop_optindex = m_get(M_WAIT, MT_SOOPTS);
+ (void) clnp_opt_sanity(rp->risop_isop.isop_options,
+ mtod(rp->risop_isop.isop_options, caddr_t),
+ rp->risop_isop.isop_options->m_len,
+ mtod(rp->risop_isop.isop_optindex,
+ struct clnp_optidx *));
+ break;
+ }
+ break;
+
+ case PRCO_GETOPT:
+#ifdef notdef
+ /* commented out to keep hi C quiet */
+ switch (optname) {
+ default:
+ error = EINVAL;
+ break;
+ }
+#endif /* notdef */
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ if (op == PRCO_SETOPT) {
+ /* note: m_freem does not barf is *m is NULL */
+ m_freem(*m);
+ *m = NULL;
+ }
+
+ return error;
+}
+
+/*ARGSUSED*/
+clnp_usrreq(so, req, m, nam, control)
+ register struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ register int error = 0;
+ register struct rawisopcb *rp = sotorawisopcb(so);
+
+ rp = sotorawisopcb(so);
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (rp)
+ panic("rip_attach");
+ MALLOC(rp, struct rawisopcb *, sizeof *rp, M_PCB, M_WAITOK);
+ if (rp == 0)
+ return (ENOBUFS);
+ bzero((caddr_t)rp, sizeof *rp);
+ so->so_pcb = (caddr_t)rp;
+ break;
+
+ case PRU_DETACH:
+ if (rp == 0)
+ panic("rip_detach");
+ if (rp->risop_isop.isop_options)
+ m_freem(rp->risop_isop.isop_options);
+ if (rp->risop_isop.isop_route.ro_rt)
+ RTFREE(rp->risop_isop.isop_route.ro_rt);
+ if (rp->risop_rcb.rcb_laddr)
+ rp->risop_rcb.rcb_laddr = 0;
+ /* free clnp cached hdr if necessary */
+ if (rp->risop_isop.isop_clnpcache != NULL) {
+ struct clnp_cache *clcp =
+ mtod(rp->risop_isop.isop_clnpcache, struct clnp_cache *);
+ if (clcp->clc_hdr != NULL) {
+ m_free(clcp->clc_hdr);
+ }
+ m_free(rp->risop_isop.isop_clnpcache);
+ }
+ if (rp->risop_isop.isop_optindex != NULL)
+ m_free(rp->risop_isop.isop_optindex);
+
+ break;
+
+ case PRU_BIND:
+ {
+ struct sockaddr_iso *addr = mtod(nam, struct sockaddr_iso *);
+
+ if (nam->m_len != sizeof(*addr))
+ return (EINVAL);
+ if ((ifnet == 0) ||
+ (addr->siso_family != AF_ISO) ||
+ (addr->siso_addr.isoa_len &&
+ ifa_ifwithaddr((struct sockaddr *)addr) == 0))
+ return (EADDRNOTAVAIL);
+ rp->risop_isop.isop_sladdr = *addr;
+ rp->risop_rcb.rcb_laddr = (struct sockaddr *)
+ (rp->risop_isop.isop_laddr = &rp->risop_isop.isop_sladdr);
+ return (0);
+ }
+ case PRU_CONNECT:
+ {
+ struct sockaddr_iso *addr = mtod(nam, struct sockaddr_iso *);
+
+ if ((nam->m_len > sizeof(*addr)) || (addr->siso_len > sizeof(*addr)))
+ return (EINVAL);
+ if (ifnet == 0)
+ return (EADDRNOTAVAIL);
+ if (addr->siso_family != AF_ISO)
+ rp->risop_isop.isop_sfaddr = *addr;
+ rp->risop_rcb.rcb_faddr = (struct sockaddr *)
+ (rp->risop_isop.isop_faddr = &rp->risop_isop.isop_sfaddr);
+ soisconnected(so);
+ return (0);
+ }
+ }
+ error = raw_usrreq(so, req, m, nam, control);
+
+ if (error && req == PRU_ATTACH && so->so_pcb)
+ free((caddr_t)rp, M_PCB);
+ return (error);
+}
diff --git a/sys/netiso/clnp_stat.h b/sys/netiso/clnp_stat.h
new file mode 100644
index 000000000000..07cd72c63e41
--- /dev/null
+++ b/sys/netiso/clnp_stat.h
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_stat.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_stat.h,v 5.1 89/02/09 16:20:42 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_stat.h,v $ */
+
+
+#ifndef __CLNP_STAT__
+#define __CLNP_STAT__
+
+struct clnp_stat {
+ int cns_total; /* total pkts received */
+ int cns_toosmall; /* fixed part of header too small */
+ int cns_badhlen; /* header length is not reasonable */
+ int cns_badcsum; /* checksum on packet failed */
+ int cns_badaddr; /* address fields were not reasonable */
+ int cns_badvers; /* incorrect version */
+ int cns_noseg; /* segment information forgotten */
+ int cns_noproto; /* incorrect protocol id */
+ int cns_delivered; /* packets consumed by protocol */
+ int cns_ttlexpired; /* ttl has expired */
+ int cns_forward; /* forwarded packets */
+ int cns_sent; /* total packets sent */
+ int cns_odropped; /* o.k. packets discarded, e.g. ENOBUFS */
+ int cns_cantforward; /* non-forwarded packets */
+ int cns_fragmented; /* packets fragmented */
+ int cns_fragments; /* fragments received */
+ int cns_fragdropped; /* fragments discarded */
+ int cns_fragtimeout; /* fragments timed out */
+ int cns_ofragments; /* fragments generated */
+ int cns_cantfrag; /* fragmentation prohibited */
+ int cns_reassembled; /* packets reconstructed */
+ int cns_cachemiss; /* cache misses */
+ int cns_congest_set; /* congestion experienced bit set */
+ int cns_congest_rcvd; /* congestion experienced bit received */
+ int cns_er_inhist[CLNP_ERRORS + 1];
+ int cns_er_outhist[CLNP_ERRORS + 1];
+} clnp_stat ;
+
+#ifdef INCSTAT
+#undef INCSTAT
+#endif /* INCSTAT */
+#define INCSTAT(x) clnp_stat./**/x/**/++
+
+#endif /* __CLNP_STAT__ */
diff --git a/sys/netiso/clnp_subr.c b/sys/netiso/clnp_subr.c
new file mode 100644
index 000000000000..c877811be169
--- /dev/null
+++ b/sys/netiso/clnp_subr.c
@@ -0,0 +1,658 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_subr.c,v 5.1 89/02/09 16:20:46 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_subr.c,v $ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/if_dl.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+/*
+ * FUNCTION: clnp_data_ck
+ *
+ * PURPOSE: Check that the amount of data in the mbuf chain is
+ * at least as much as the clnp header would have us
+ * expect. Trim mbufs if longer than expected, drop
+ * packet if shorter than expected.
+ *
+ * RETURNS: success - ptr to mbuf chain
+ * failure - 0
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+struct mbuf *
+clnp_data_ck(m, length)
+register struct mbuf *m; /* ptr to mbuf chain containing hdr & data */
+int length; /* length (in bytes) of packet */
+ {
+ register int len; /* length of data */
+ register struct mbuf *mhead; /* ptr to head of chain */
+
+ len = -length;
+ mhead = m;
+ for (;;) {
+ len += m->m_len;
+ if (m->m_next == 0)
+ break;
+ m = m->m_next;
+ }
+ if (len != 0) {
+ if (len < 0) {
+ INCSTAT(cns_toosmall);
+ clnp_discard(mhead, GEN_INCOMPLETE);
+ return 0;
+ }
+ if (len <= m->m_len)
+ m->m_len -= len;
+ else
+ m_adj(mhead, -len);
+ }
+ return mhead;
+}
+
+#ifdef notdef
+/*
+ * FUNCTION: clnp_extract_addr
+ *
+ * PURPOSE: Extract the source and destination address from the
+ * supplied buffer. Place them in the supplied address buffers.
+ * If insufficient data is supplied, then fail.
+ *
+ * RETURNS: success - Address of first byte in the packet past
+ * the address part.
+ * failure - 0
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+caddr_t
+clnp_extract_addr(bufp, buflen, srcp, destp)
+caddr_t bufp; /* ptr to buffer containing addresses */
+int buflen; /* length of buffer */
+register struct iso_addr *srcp; /* ptr to source address buffer */
+register struct iso_addr *destp; /* ptr to destination address buffer */
+ {
+ int len; /* argument to bcopy */
+
+ /*
+ * check that we have enough data. Plus1 is for length octet
+ */
+ if ((u_char)*bufp + 1 > buflen) {
+ return((caddr_t)0);
+ }
+ len = destp->isoa_len = (u_char)*bufp++;
+ (void) bcopy(bufp, (caddr_t)destp, len);
+ buflen -= len;
+ bufp += len;
+
+ /*
+ * check that we have enough data. Plus1 is for length octet
+ */
+ if ((u_char)*bufp + 1 > buflen) {
+ return((caddr_t)0);
+ }
+ len = srcp->isoa_len = (u_char)* bufp++;
+ (void) bcopy(bufp, (caddr_t)srcp, len);
+ bufp += len;
+
+ /*
+ * Insure that the addresses make sense
+ */
+ if (iso_ck_addr(srcp) && iso_ck_addr(destp))
+ return bufp;
+ else
+ return (caddr_t) 0;
+}
+#endif /* notdef */
+
+/*
+ * FUNCTION: clnp_ours
+ *
+ * PURPOSE: Decide whether the supplied packet is destined for
+ * us, or that it should be forwarded on.
+ *
+ * RETURNS: packet is for us - 1
+ * packet is not for us - 0
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+clnp_ours(dst)
+register struct iso_addr *dst; /* ptr to destination address */
+{
+ register struct iso_ifaddr *ia; /* scan through interface addresses */
+
+ for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+ IFDEBUG(D_ROUTE)
+ printf("clnp_ours: ia_sis x%x, dst x%x\n", &ia->ia_addr,
+ dst);
+ ENDDEBUG
+ /*
+ * XXX Warning:
+ * We are overloading siso_tlen in the if's address, as an nsel length.
+ */
+ if (dst->isoa_len == ia->ia_addr.siso_nlen &&
+ bcmp((caddr_t)ia->ia_addr.siso_addr.isoa_genaddr,
+ (caddr_t)dst->isoa_genaddr,
+ ia->ia_addr.siso_nlen - ia->ia_addr.siso_tlen) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/* Dec bit set if ifp qlen is greater than congest_threshold */
+int congest_threshold = 0;
+
+/*
+ * FUNCTION: clnp_forward
+ *
+ * PURPOSE: Forward the datagram passed
+ * clnpintr guarantees that the header will be
+ * contigious (a cluster mbuf will be used if necessary).
+ *
+ * If oidx is NULL, no options are present.
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+clnp_forward(m, len, dst, oidx, seg_off, inbound_shp)
+struct mbuf *m; /* pkt to forward */
+int len; /* length of pkt */
+struct iso_addr *dst; /* destination address */
+struct clnp_optidx *oidx; /* option index */
+int seg_off;/* offset of segmentation part */
+struct snpa_hdr *inbound_shp; /* subnetwork header of inbound packet */
+{
+ struct clnp_fixed *clnp; /* ptr to fixed part of header */
+ int error; /* return value of route function */
+ struct sockaddr *next_hop; /* next hop for dgram */
+ struct ifnet *ifp; /* ptr to outgoing interface */
+ struct iso_ifaddr *ia = 0;/* ptr to iso name for ifp */
+ struct route_iso route; /* filled in by clnp_route */
+ extern int iso_systype;
+
+ clnp = mtod(m, struct clnp_fixed *);
+ bzero((caddr_t)&route, sizeof(route)); /* MUST be done before "bad:" */
+
+ /*
+ * Don't forward multicast or broadcast packets
+ */
+ if ((inbound_shp) && (IS_MULTICAST(inbound_shp->snh_dhost))) {
+ IFDEBUG(D_FORWARD)
+ printf("clnp_forward: dropping multicast packet\n");
+ ENDDEBUG
+ clnp->cnf_type &= ~CNF_ERR_OK; /* so we don't generate an ER */
+ clnp_discard(m, 0);
+ INCSTAT(cns_cantforward);
+ goto done;
+ }
+
+ IFDEBUG(D_FORWARD)
+ printf("clnp_forward: %d bytes, to %s, options x%x\n", len,
+ clnp_iso_addrp(dst), oidx);
+ ENDDEBUG
+
+ /*
+ * Decrement ttl, and if zero drop datagram
+ * Can't compare ttl as less than zero 'cause its a unsigned
+ */
+ if ((clnp->cnf_ttl == 0) || (--clnp->cnf_ttl == 0)) {
+ IFDEBUG(D_FORWARD)
+ printf("clnp_forward: discarding datagram because ttl is zero\n");
+ ENDDEBUG
+ INCSTAT(cns_ttlexpired);
+ clnp_discard(m, TTL_EXPTRANSIT);
+ goto done;
+ }
+ /*
+ * Route packet; special case for source rt
+ */
+ if CLNPSRCRT_VALID(oidx) {
+ /*
+ * Update src route first
+ */
+ clnp_update_srcrt(m, oidx);
+ error = clnp_srcroute(m, oidx, &route, &next_hop, &ia, dst);
+ } else {
+ error = clnp_route(dst, &route, 0, &next_hop, &ia);
+ }
+ if (error || ia == 0) {
+ IFDEBUG(D_FORWARD)
+ printf("clnp_forward: can't route packet (errno %d)\n", error);
+ ENDDEBUG
+ clnp_discard(m, ADDR_DESTUNREACH);
+ INCSTAT(cns_cantforward);
+ goto done;
+ }
+ ifp = ia->ia_ifp;
+
+ IFDEBUG(D_FORWARD)
+ printf("clnp_forward: packet routed to %s\n",
+ clnp_iso_addrp(&((struct sockaddr_iso *)next_hop)->siso_addr));
+ ENDDEBUG
+
+ INCSTAT(cns_forward);
+
+ /*
+ * If we are an intermediate system and
+ * we are routing outbound on the same ifp that the packet
+ * arrived upon, and we know the next hop snpa,
+ * then generate a redirect request
+ */
+ if ((iso_systype & SNPA_IS) && (inbound_shp) &&
+ (ifp == inbound_shp->snh_ifp))
+ esis_rdoutput(inbound_shp, m, oidx, dst, route.ro_rt);
+ /*
+ * If options are present, update them
+ */
+ if (oidx) {
+ struct iso_addr *mysrc = &ia->ia_addr.siso_addr;
+ if (mysrc == NULL) {
+ clnp_discard(m, ADDR_DESTUNREACH);
+ INCSTAT(cns_cantforward);
+ clnp_stat.cns_forward--;
+ goto done;
+ } else {
+ (void) clnp_dooptions(m, oidx, ifp, mysrc);
+ }
+ }
+
+#ifdef DECBIT
+ if (ifp->if_snd.ifq_len > congest_threshold) {
+ /*
+ * Congestion! Set the Dec Bit and thank Dave Oran
+ */
+ IFDEBUG(D_FORWARD)
+ printf("clnp_forward: congestion experienced\n");
+ ENDDEBUG
+ if ((oidx) && (oidx->cni_qos_formatp)) {
+ caddr_t qosp = CLNP_OFFTOOPT(m, oidx->cni_qos_formatp);
+ u_char qos = *qosp;
+ IFDEBUG(D_FORWARD)
+ printf("clnp_forward: setting congestion bit (qos x%x)\n", qos);
+ ENDDEBUG
+ if ((qos & CLNPOVAL_GLOBAL) == CLNPOVAL_GLOBAL) {
+ qos |= CLNPOVAL_CONGESTED;
+ INCSTAT(cns_congest_set);
+ *qosp = qos;
+ }
+ }
+ }
+#endif /* DECBIT */
+
+ /*
+ * Dispatch the datagram if it is small enough, otherwise fragment
+ */
+ if (len <= SN_MTU(ifp, route.ro_rt)) {
+ iso_gen_csum(m, CLNP_CKSUM_OFF, (int)clnp->cnf_hdr_len);
+ (void) (*ifp->if_output)(ifp, m, next_hop, route.ro_rt);
+ } else {
+ (void) clnp_fragment(ifp, m, next_hop, len, seg_off, /* flags */0, route.ro_rt);
+ }
+
+done:
+ /*
+ * Free route
+ */
+ if (route.ro_rt != NULL) {
+ RTFREE(route.ro_rt);
+ }
+}
+
+#ifdef notdef
+/*
+ * FUNCTION: clnp_insert_addr
+ *
+ * PURPOSE: Insert the address part into a clnp datagram.
+ *
+ * RETURNS: Address of first byte after address part in datagram.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Assume that there is enough space for the address part.
+ */
+caddr_t
+clnp_insert_addr(bufp, srcp, dstp)
+caddr_t bufp; /* address of where addr part goes */
+register struct iso_addr *srcp; /* ptr to src addr */
+register struct iso_addr *dstp; /* ptr to dst addr */
+{
+ *bufp++ = dstp->isoa_len;
+ (void) bcopy((caddr_t)dstp, bufp, dstp->isoa_len);
+ bufp += dstp->isoa_len;
+
+ *bufp++ = srcp->isoa_len;
+ (void) bcopy((caddr_t)srcp, bufp, srcp->isoa_len);
+ bufp += srcp->isoa_len;
+
+ return bufp;
+}
+
+#endif /* notdef */
+
+/*
+ * FUNCTION: clnp_route
+ *
+ * PURPOSE: Route a clnp datagram to the first hop toward its
+ * destination. In many cases, the first hop will be
+ * the destination. The address of a route
+ * is specified. If a routing entry is present in
+ * that route, and it is still up to the same destination,
+ * then no further action is necessary. Otherwise, a
+ * new routing entry will be allocated.
+ *
+ * RETURNS: route found - 0
+ * unix error code
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: It is up to the caller to free the routing entry
+ * allocated in route.
+ */
+clnp_route(dst, ro, flags, first_hop, ifa)
+ struct iso_addr *dst; /* ptr to datagram destination */
+ register struct route_iso *ro; /* existing route structure */
+ int flags; /* flags for routing */
+ struct sockaddr **first_hop; /* result: fill in with ptr to firsthop */
+ struct iso_ifaddr **ifa; /* result: fill in with ptr to interface */
+{
+ if (flags & SO_DONTROUTE) {
+ struct iso_ifaddr *ia;
+
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = 0;
+ }
+ bzero((caddr_t)&ro->ro_dst, sizeof(ro->ro_dst));
+ bcopy((caddr_t)dst, (caddr_t)&ro->ro_dst.siso_addr,
+ 1 + (unsigned)dst->isoa_len);
+ ro->ro_dst.siso_family = AF_ISO;
+ ro->ro_dst.siso_len = sizeof(ro->ro_dst);
+ ia = iso_localifa(&ro->ro_dst);
+ if (ia == 0)
+ return EADDRNOTAVAIL;
+ if (ifa)
+ *ifa = ia;
+ if (first_hop)
+ *first_hop = (struct sockaddr *)&ro->ro_dst;
+ return 0;
+ }
+ /*
+ * If there is a cached route, check that it is still up and to
+ * the same destination. If not, free it and try again.
+ */
+ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ (Bcmp(ro->ro_dst.siso_data, dst->isoa_genaddr, dst->isoa_len)))) {
+ IFDEBUG(D_ROUTE)
+ printf("clnp_route: freeing old route: ro->ro_rt 0x%x\n",
+ ro->ro_rt);
+ printf("clnp_route: old route refcnt: 0x%x\n",
+ ro->ro_rt->rt_refcnt);
+ ENDDEBUG
+
+ /* free old route entry */
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ } else {
+ IFDEBUG(D_ROUTE)
+ printf("clnp_route: OK route exists\n");
+ ENDDEBUG
+ }
+
+ if (ro->ro_rt == 0) {
+ /* set up new route structure */
+ bzero((caddr_t)&ro->ro_dst, sizeof(ro->ro_dst));
+ ro->ro_dst.siso_len = sizeof(ro->ro_dst);
+ ro->ro_dst.siso_family = AF_ISO;
+ Bcopy(dst, &ro->ro_dst.siso_addr, 1 + dst->isoa_len);
+ /* allocate new route */
+ IFDEBUG(D_ROUTE)
+ printf("clnp_route: allocating new route to %s\n",
+ clnp_iso_addrp(dst));
+ ENDDEBUG
+ rtalloc((struct route *)ro);
+ }
+ if (ro->ro_rt == 0)
+ return(ENETUNREACH); /* rtalloc failed */
+ ro->ro_rt->rt_use++;
+ if (ifa)
+ if ((*ifa = (struct iso_ifaddr *)ro->ro_rt->rt_ifa) == 0)
+ panic("clnp_route");
+ if (first_hop) {
+ if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+ *first_hop = ro->ro_rt->rt_gateway;
+ else
+ *first_hop = (struct sockaddr *)&ro->ro_dst;
+ }
+ return(0);
+}
+
+/*
+ * FUNCTION: clnp_srcroute
+ *
+ * PURPOSE: Source route the datagram. If complete source
+ * routing is specified but not possible, then
+ * return an error. If src routing is terminated, then
+ * try routing on destination.
+ * Usage of first_hop,
+ * ifp, and error return is identical to clnp_route.
+ *
+ * RETURNS: 0 or unix error code
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Remember that option index pointers are really
+ * offsets from the beginning of the mbuf.
+ */
+clnp_srcroute(options, oidx, ro, first_hop, ifa, final_dst)
+struct mbuf *options; /* ptr to options */
+struct clnp_optidx *oidx; /* index to options */
+struct route_iso *ro; /* route structure */
+struct sockaddr **first_hop; /* RETURN: fill in with ptr to firsthop */
+struct iso_ifaddr **ifa; /* RETURN: fill in with ptr to interface */
+struct iso_addr *final_dst; /* final destination */
+{
+ struct iso_addr dst; /* first hop specified by src rt */
+ int error = 0; /* return code */
+
+ /*
+ * Check if we have run out of routes
+ * If so, then try to route on destination.
+ */
+ if CLNPSRCRT_TERM(oidx, options) {
+ dst.isoa_len = final_dst->isoa_len;
+ bcopy(final_dst->isoa_genaddr, dst.isoa_genaddr, dst.isoa_len);
+ } else {
+ /*
+ * setup dst based on src rt specified
+ */
+ dst.isoa_len = CLNPSRCRT_CLEN(oidx, options);
+ bcopy(CLNPSRCRT_CADDR(oidx, options), dst.isoa_genaddr, dst.isoa_len);
+ }
+
+ /*
+ * try to route it
+ */
+ error = clnp_route(&dst, ro, 0, first_hop, ifa);
+ if (error != 0)
+ return error;
+
+ /*
+ * If complete src rt, first hop must be equal to dst
+ */
+ if ((CLNPSRCRT_TYPE(oidx, options) == CLNPOVAL_COMPRT) &&
+ (!iso_addrmatch1(&(*(struct sockaddr_iso **)first_hop)->siso_addr,&dst))){
+ IFDEBUG(D_OPTIONS)
+ printf("clnp_srcroute: complete src route failed\n");
+ ENDDEBUG
+ return EHOSTUNREACH; /* RAH? would like ESRCRTFAILED */
+ }
+
+ return error;
+}
+
+/*
+ * FUNCTION: clnp_echoreply
+ *
+ * PURPOSE: generate an echo reply packet and transmit
+ *
+ * RETURNS: result of clnp_output
+ *
+ * SIDE EFFECTS:
+ */
+clnp_echoreply(ec_m, ec_len, ec_src, ec_dst, ec_oidxp)
+struct mbuf *ec_m; /* echo request */
+int ec_len; /* length of ec */
+struct sockaddr_iso *ec_src; /* src of ec */
+struct sockaddr_iso *ec_dst; /* destination of ec (i.e., us) */
+struct clnp_optidx *ec_oidxp; /* options index to ec packet */
+{
+ struct isopcb isopcb;
+ int flags = CLNP_NOCACHE|CLNP_ECHOR;
+ int ret;
+
+ /* fill in fake isopcb to pass to output function */
+ bzero(&isopcb, sizeof(isopcb));
+ isopcb.isop_laddr = ec_dst;
+ isopcb.isop_faddr = ec_src;
+
+ /* forget copying the options for now. If implemented, need only
+ * copy record route option, but it must be reset to zero length */
+
+ ret = clnp_output(ec_m, &isopcb, ec_len, flags);
+
+ IFDEBUG(D_OUTPUT)
+ printf("clnp_echoreply: output returns %d\n", ret);
+ ENDDEBUG
+ return ret;
+}
+
+/*
+ * FUNCTION: clnp_badmtu
+ *
+ * PURPOSE: print notice of route with mtu not initialized.
+ *
+ * RETURNS: mtu of ifp.
+ *
+ * SIDE EFFECTS: prints notice, slows down system.
+ */
+clnp_badmtu(ifp, rt, line, file)
+struct ifnet *ifp; /* outgoing interface */
+struct rtentry *rt; /* dst route */
+int line; /* where the dirty deed occured */
+char *file; /* where the dirty deed occured */
+{
+ printf("sending on route 0x%x with no mtu, line %d of file %s\n",
+ rt, line, file);
+#ifdef ARGO_DEBUG
+ printf("route dst is ");
+ dump_isoaddr(rt_key(rt));
+#endif
+ return ifp->if_mtu;
+}
+
+/*
+ * FUNCTION: clnp_ypocb - backwards bcopy
+ *
+ * PURPOSE: bcopy starting at end of src rather than beginning.
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: No attempt has been made to make this efficient
+ */
+clnp_ypocb(from, to, len)
+caddr_t from; /* src buffer */
+caddr_t to; /* dst buffer */
+u_int len; /* number of bytes */
+{
+ while (len--)
+ *(to + len) = *(from + len);
+}
+#endif /* ISO */
diff --git a/sys/netiso/clnp_timer.c b/sys/netiso/clnp_timer.c
new file mode 100644
index 000000000000..718d5302f77c
--- /dev/null
+++ b/sys/netiso/clnp_timer.c
@@ -0,0 +1,180 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clnp_timer.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: clnp_timer.c,v 4.2 88/06/29 14:59:05 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/clnp_timer.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+extern struct clnp_fragl *clnp_frags;
+
+/*
+ * FUNCTION: clnp_freefrags
+ *
+ * PURPOSE: Free the resources associated with a fragment
+ *
+ * RETURNS: pointer to next fragment in list of fragments
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ * TODO: send ER back to source
+ */
+struct clnp_fragl *
+clnp_freefrags(cfh)
+register struct clnp_fragl *cfh; /* fragment header to delete */
+{
+ struct clnp_fragl *next = cfh->cfl_next;
+ struct clnp_frag *cf;
+
+ /* free any frags hanging around */
+ cf = cfh->cfl_frags;
+ while (cf != NULL) {
+ struct clnp_frag *cf_next = cf->cfr_next;
+ INCSTAT(cns_fragdropped);
+ m_freem(cf->cfr_data);
+ cf = cf_next;
+ }
+
+ /* free the copy of the header */
+ INCSTAT(cns_fragdropped);
+ m_freem(cfh->cfl_orighdr);
+
+ if (clnp_frags == cfh) {
+ clnp_frags = cfh->cfl_next;
+ } else {
+ struct clnp_fragl *scan;
+
+ for (scan = clnp_frags; scan != NULL; scan = scan->cfl_next) {
+ if (scan->cfl_next == cfh) {
+ scan->cfl_next = cfh->cfl_next;
+ break;
+ }
+ }
+ }
+
+ /* free the fragment header */
+ m_freem(dtom(cfh));
+
+ return(next);
+}
+
+/*
+ * FUNCTION: clnp_slowtimo
+ *
+ * PURPOSE: clnp timer processing; if the ttl expires on a
+ * packet on the reassembly queue, discard it.
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+clnp_slowtimo()
+{
+ register struct clnp_fragl *cfh = clnp_frags;
+ int s = splnet();
+
+ while (cfh != NULL) {
+ if (--cfh->cfl_ttl == 0) {
+ cfh = clnp_freefrags(cfh);
+ INCSTAT(cns_fragtimeout);
+ } else {
+ cfh = cfh->cfl_next;
+ }
+ }
+ splx(s);
+}
+
+/*
+ * FUNCTION: clnp_drain
+ *
+ * PURPOSE: drain off all datagram fragments
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ * TODO: should send back ER
+ */
+clnp_drain()
+{
+ register struct clnp_fragl *cfh = clnp_frags;
+
+ while (cfh != NULL)
+ cfh = clnp_freefrags(cfh);
+}
diff --git a/sys/netiso/cltp_usrreq.c b/sys/netiso/cltp_usrreq.c
new file mode 100644
index 000000000000..93f8d1c398ee
--- /dev/null
+++ b/sys/netiso/cltp_usrreq.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cltp_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef CLTPOVAL_SRC /* XXX -- till files gets changed */
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/clnp.h>
+#include <netiso/cltp_var.h>
+#endif
+
+/*
+ * CLTP protocol implementation.
+ * Per ISO 8602, December, 1987.
+ */
+cltp_init()
+{
+
+ cltb.isop_next = cltb.isop_prev = &cltb;
+}
+
+int cltp_cksum = 1;
+
+
+/* ARGUSED */
+cltp_input(m0, srcsa, dstsa, cons_channel, output)
+ struct mbuf *m0;
+ struct sockaddr *srcsa, *dstsa;
+ u_int cons_channel;
+ int (*output)();
+{
+ register struct isopcb *isop;
+ register struct mbuf *m = m0;
+ register u_char *up = mtod(m, u_char *);
+ register struct sockaddr_iso *src = (struct sockaddr_iso *)srcsa;
+ int len, hdrlen = *up + 1, dlen = 0;
+ u_char *uplim = up + hdrlen;
+ caddr_t dtsap;
+
+ for (len = 0; m; m = m->m_next)
+ len += m->m_len;
+ up += 2; /* skip header */
+ while (up < uplim) switch (*up) { /* process options */
+ case CLTPOVAL_SRC:
+ src->siso_tlen = up[1];
+ src->siso_len = up[1] + TSEL(src) - (caddr_t)src;
+ if (src->siso_len < sizeof(*src))
+ src->siso_len = sizeof(*src);
+ else if (src->siso_len > sizeof(*src)) {
+ MGET(m, M_DONTWAIT, MT_SONAME);
+ if (m == 0)
+ goto bad;
+ m->m_len = src->siso_len;
+ src = mtod(m, struct sockaddr_iso *);
+ bcopy((caddr_t)srcsa, (caddr_t)src, srcsa->sa_len);
+ }
+ bcopy((caddr_t)up + 2, TSEL(src), up[1]);
+ up += 2 + src->siso_tlen;
+ continue;
+
+ case CLTPOVAL_DST:
+ dtsap = 2 + (caddr_t)up;
+ dlen = up[1];
+ up += 2 + dlen;
+ continue;
+
+ case CLTPOVAL_CSM:
+ if (iso_check_csum(m0, len)) {
+ cltpstat.cltps_badsum++;
+ goto bad;
+ }
+ up += 4;
+ continue;
+
+ default:
+ printf("clts: unknown option (%x)\n", up[0]);
+ cltpstat.cltps_hdrops++;
+ goto bad;
+ }
+ if (dlen == 0 || src->siso_tlen == 0)
+ goto bad;
+ for (isop = cltb.isop_next;; isop = isop->isop_next) {
+ if (isop == &cltb) {
+ cltpstat.cltps_noport++;
+ goto bad;
+ }
+ if (isop->isop_laddr &&
+ bcmp(TSEL(isop->isop_laddr), dtsap, dlen) == 0)
+ break;
+ }
+ m = m0;
+ m->m_len -= hdrlen;
+ m->m_data += hdrlen;
+ if (sbappendaddr(&isop->isop_socket->so_rcv, (struct sockaddr *)src,
+ m, (struct mbuf *)0) == 0)
+ goto bad;
+ cltpstat.cltps_ipackets++;
+ sorwakeup(isop->isop_socket);
+ m0 = 0;
+bad:
+ if (src != (struct sockaddr_iso *)srcsa)
+ m_freem(dtom(src));
+ if (m0)
+ m_freem(m0);
+ return 0;
+}
+
+/*
+ * Notify a cltp user of an asynchronous error;
+ * just wake up so that he can collect error status.
+ */
+cltp_notify(isop)
+ register struct isopcb *isop;
+{
+
+ sorwakeup(isop->isop_socket);
+ sowwakeup(isop->isop_socket);
+}
+
+cltp_ctlinput(cmd, sa)
+ int cmd;
+ struct sockaddr *sa;
+{
+ extern u_char inetctlerrmap[];
+ struct sockaddr_iso *siso;
+ int iso_rtchange();
+
+ if ((unsigned)cmd > PRC_NCMDS)
+ return;
+ if (sa->sa_family != AF_ISO && sa->sa_family != AF_CCITT)
+ return;
+ siso = (struct sockaddr_iso *)sa;
+ if (siso == 0 || siso->siso_nlen == 0)
+ return;
+
+ switch (cmd) {
+ case PRC_ROUTEDEAD:
+ case PRC_REDIRECT_NET:
+ case PRC_REDIRECT_HOST:
+ case PRC_REDIRECT_TOSNET:
+ case PRC_REDIRECT_TOSHOST:
+ iso_pcbnotify(&cltb, siso,
+ (int)inetctlerrmap[cmd], iso_rtchange);
+ break;
+
+ default:
+ if (inetctlerrmap[cmd] == 0)
+ return; /* XXX */
+ iso_pcbnotify(&cltb, siso, (int)inetctlerrmap[cmd],
+ cltp_notify);
+ }
+}
+
+cltp_output(isop, m)
+ register struct isopcb *isop;
+ register struct mbuf *m;
+{
+ register int len;
+ register struct sockaddr_iso *siso;
+ int hdrlen, error = 0, docsum;
+ register u_char *up;
+
+ if (isop->isop_laddr == 0 || isop->isop_faddr == 0) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ /*
+ * Calculate data length and get a mbuf for CLTP header.
+ */
+ hdrlen = 2 + 2 + isop->isop_laddr->siso_tlen
+ + 2 + isop->isop_faddr->siso_tlen;
+ if (docsum = /*isop->isop_flags & CLNP_NO_CKSUM*/ cltp_cksum)
+ hdrlen += 4;
+ M_PREPEND(m, hdrlen, M_WAIT);
+ len = m->m_pkthdr.len;
+ /*
+ * Fill in mbuf with extended CLTP header
+ */
+ up = mtod(m, u_char *);
+ up[0] = hdrlen - 1;
+ up[1] = UD_TPDU_type;
+ up[2] = CLTPOVAL_SRC;
+ up[3] = (siso = isop->isop_laddr)->siso_tlen;
+ up += 4;
+ bcopy(TSEL(siso), (caddr_t)up, siso->siso_tlen);
+ up += siso->siso_tlen;
+ up[0] = CLTPOVAL_DST;
+ up[1] = (siso = isop->isop_faddr)->siso_tlen;
+ up += 2;
+ bcopy(TSEL(siso), (caddr_t)up, siso->siso_tlen);
+ /*
+ * Stuff checksum and output datagram.
+ */
+ if (docsum) {
+ up += siso->siso_tlen;
+ up[0] = CLTPOVAL_CSM;
+ up[1] = 2;
+ iso_gen_csum(m, 2 + up - mtod(m, u_char *), len);
+ }
+ cltpstat.cltps_opackets++;
+ return (tpclnp_output(isop, m, len, !docsum));
+bad:
+ m_freem(m);
+ return (error);
+}
+
+u_long cltp_sendspace = 9216; /* really max datagram size */
+u_long cltp_recvspace = 40 * (1024 + sizeof(struct sockaddr_iso));
+ /* 40 1K datagrams */
+
+
+/*ARGSUSED*/
+cltp_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ register struct isopcb *isop = sotoisopcb(so);
+ int s, error = 0;
+
+ if (req == PRU_CONTROL)
+ return (iso_control(so, (int)m, (caddr_t)nam,
+ (struct ifnet *)control));
+ if ((isop == NULL && req != PRU_ATTACH) ||
+ (control && control->m_len)) {
+ error = EINVAL;
+ goto release;
+ }
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (isop != NULL) {
+ error = EINVAL;
+ break;
+ }
+ error = iso_pcballoc(so, &cltb);
+ if (error)
+ break;
+ error = soreserve(so, cltp_sendspace, cltp_recvspace);
+ if (error)
+ break;
+ break;
+
+ case PRU_DETACH:
+ iso_pcbdetach(isop);
+ break;
+
+ case PRU_BIND:
+ error = iso_pcbbind(isop, nam);
+ break;
+
+ case PRU_LISTEN:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_CONNECT:
+ if (isop->isop_faddr) {
+ error = EISCONN;
+ break;
+ }
+ error = iso_pcbconnect(isop, nam);
+ if (error == 0)
+ soisconnected(so);
+ break;
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_ACCEPT:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_DISCONNECT:
+ if (isop->isop_faddr == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ iso_pcbdisconnect(isop);
+ so->so_state &= ~SS_ISCONNECTED; /* XXX */
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ case PRU_SEND:
+ if (nam) {
+ if (isop->isop_faddr) {
+ error = EISCONN;
+ break;
+ }
+ /*
+ * Must block input while temporarily connected.
+ */
+ s = splnet();
+ error = iso_pcbconnect(isop, nam);
+ if (error) {
+ splx(s);
+ break;
+ }
+ } else {
+ if (isop->isop_faddr == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ }
+ error = cltp_output(isop, m);
+ m = 0;
+ if (nam) {
+ iso_pcbdisconnect(isop);
+ splx(s);
+ }
+ break;
+
+ case PRU_ABORT:
+ soisdisconnected(so);
+ iso_pcbdetach(isop);
+ break;
+
+ case PRU_SOCKADDR:
+ if (isop->isop_laddr)
+ bcopy((caddr_t)isop->isop_laddr, mtod(m, caddr_t),
+ nam->m_len = isop->isop_laddr->siso_len);
+ break;
+
+ case PRU_PEERADDR:
+ if (isop->isop_faddr)
+ bcopy((caddr_t)isop->isop_faddr, mtod(m, caddr_t),
+ nam->m_len = isop->isop_faddr->siso_len);
+ break;
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ case PRU_SENDOOB:
+ case PRU_FASTTIMO:
+ case PRU_SLOWTIMO:
+ case PRU_PROTORCV:
+ case PRU_PROTOSEND:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_RCVD:
+ case PRU_RCVOOB:
+ return (EOPNOTSUPP); /* do not free mbuf's */
+
+ default:
+ panic("cltp_usrreq");
+ }
+release:
+ if (control != NULL)
+ m_freem(control);
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
diff --git a/sys/netiso/cltp_var.h b/sys/netiso/cltp_var.h
new file mode 100644
index 000000000000..b4e08f2c99b5
--- /dev/null
+++ b/sys/netiso/cltp_var.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cltp_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+#define UD_TPDU_type 0x40 /* packet type */
+
+#define CLTPOVAL_SRC 0xc1 /* Source TSAP -- required */
+#define CLTPOVAL_DST 0xc2 /* Destination TSAP -- required */
+#define CLTPOVAL_CSM 0xc3 /* Checksum parameter -- optional */
+
+struct cltpstat {
+ int cltps_hdrops;
+ int cltps_badsum;
+ int cltps_badlen;
+ int cltps_noport;
+ int cltps_ipackets;
+ int cltps_opackets;
+};
+
+#ifdef KERNEL
+struct isopcb cltb;
+struct cltpstat cltpstat;
+#endif
diff --git a/sys/netiso/cons.h b/sys/netiso/cons.h
new file mode 100644
index 000000000000..b0739de1eac0
--- /dev/null
+++ b/sys/netiso/cons.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cons.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: cons.h,v 4.4 88/09/09 19:01:28 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/cons.h,v $
+ *
+ * interface between TP and CONS
+ */
+
+#define CONSOPT_X25CRUD 0x01 /* set x.25 call request user data */
+
+struct dte_addr {
+ u_char dtea_addr[7];
+ u_char dtea_niblen;
+};
+
+#ifdef KERNEL
+
+#define CONN_OPEN 0x33
+#define CONN_CONFIRM 0x30
+#define CONN_REFUSE 0x31
+#define CONN_CLOSE 0x32
+
+#define CONS_IS_DGM 0x1
+#define CONS_NOT_DGM 0x0
+
+#ifndef PRC_NCMDS
+#include <sys/protosw.h>
+#endif /* PRC_NCMDS */
+
+#define PRC_CONS_SEND_DONE 2 /* something unused in protosw.h */
+
+#endif /* KERNEL */
diff --git a/sys/netiso/cons_pcb.h b/sys/netiso/cons_pcb.h
new file mode 100644
index 000000000000..b8adc373947c
--- /dev/null
+++ b/sys/netiso/cons_pcb.h
@@ -0,0 +1,193 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cons_pcb.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: cons_pcb.h,v 4.2 88/06/29 14:59:08 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/cons_pcb.h,v $ */
+
+/*
+ * protocol control block for the connection oriented network service
+ */
+
+/*
+ * legit port #s for cons "transport" are 0..23 for su users only, and
+ * 1024..1099 for public users
+ */
+#define X25_SBSIZE 512
+#define X25_PORT_RESERVED 24
+#define X25_PORT_USERMAX 1099
+#define X25_FACIL_LEN_MAX 109
+#define X25_PARTIAL_PKT_LEN_MAX (MLEN - sizeof(struct cons_pcb))
+
+#ifndef ARGO_DEBUG
+#define X25_TTL 600 /* 5 min */
+#else /* ARGO_DEBUG */
+#define X25_TTL 120 /* 1 min */
+#endif /* ARGO_DEBUG */
+
+struct cons_pcb {
+ struct isopcb _co_isopcb;
+#define co_next _co_isopcb.isop_next
+/* prev used for netstat only */
+#define co_prev _co_isopcb.isop_prev
+#define co_head _co_isopcb.isop_head
+#define co_laddr _co_isopcb.isop_laddr
+#define co_faddr _co_isopcb.isop_faddr
+#define co_lport _co_isopcb.isop_laddr.siso_tsuffix
+#define co_fport _co_isopcb.isop_faddr.siso_tsuffix
+#define co_route _co_isopcb.isop_route
+#define co_socket _co_isopcb.isop_socket
+#define co_chanmask _co_isopcb.isop_chanmask
+#define co_negchanmask _co_isopcb.isop_negchanmask
+#define co_x25crud _co_isopcb.isop_x25crud
+#define co_x25crud_len _co_isopcb.isop_x25crud_len
+ u_short co_state;
+ u_char co_flags;
+ u_short co_ttl; /* time to live timer */
+ u_short co_init_ttl; /* initial value of ttl */
+ int co_channel; /* logical channel */
+ struct ifnet * co_ifp; /* interface */
+ struct protosw *co_proto;
+
+ struct ifqueue co_pending; /* queue data to send when connection
+ completes*/
+#define MAX_DTE_LEN 0x7 /* 17 bcd digits */
+ struct dte_addr co_peer_dte;
+ struct cons_pcb *co_myself; /* DEBUGGING AID */
+};
+
+/*
+ * X.25 Packet types
+ */
+#define XPKT_DATA 1
+#define XPKT_INTERRUPT 2
+#define XPKT_FLOWCONTROL 3 /* not delivered? */
+
+/*
+ * pcb xtates
+ */
+
+#define CLOSED 0x0
+#define LISTENING 0x1
+#define CLOSING 0x2
+/* USABLE STATES MUST BE LAST */
+#define CONNECTING 0x3
+#define ACKWAIT 0x4
+#define OPEN 0x5
+#define MIN_USABLE_STATE CONNECTING
+
+#define cons_NSTATES 0x6
+
+
+/* type */
+#define CONSF_OCRE 0x40 /* created on OUTPUT */
+#define CONSF_ICRE 0x20 /* created on INPUT */
+#define CONSF_unused 0x10 /* not used */
+#define CONSF_unused2 0x08 /* not used */
+#define CONSF_DGM 0x04 /* for dgm use only */
+#define CONSF_XTS 0x02 /* for cons-as-transport-service */
+#define CONSF_LOOPBACK 0x01 /* loopback was on when connection commenced */
+
+#define X_NOCHANNEL 0x80
+
+
+struct cons_stat {
+ u_int co_intr; /* input from eicon board */
+ u_int co_restart; /* ecn_restart() request issued to board */
+ u_int co_slowtimo; /* times slowtimo called */
+ u_int co_timedout; /* connections closed by slowtimo */
+ u_int co_ack; /* ECN_ACK indication came from eicon board */
+ u_int co_receive; /* ECN_RECEIVE indication came from eicon board */
+ u_int co_send; /* ECN_SEND request issued to board */
+ u_int co_reset_in; /* ECN_RESET indication came from eicon board */
+ u_int co_reset_out; /* ECN_RESET issued to the eicon board */
+ u_int co_clear_in; /* ECN_CLEAR indication came from eicon board */
+ u_int co_clear_out; /* ECN_CLEAR request issued to board */
+ u_int co_refuse; /* ECN_REFUSE indication came from eicon board */
+ u_int co_accept; /* ECN_ACCEPT indication came from eicon board */
+ u_int co_connect; /* ECN_CONNECT indication came from eicon board */
+ u_int co_call; /* ECN_CALL request issued to board */
+ u_int co_Rdrops; /* bad pkt came from ll */
+ u_int co_Xdrops; /* can't keep up */
+
+ u_int co_intrpt_pkts_in; /* interrupt packets in */
+ u_int co_avg_qlen;
+ u_int co_avg_qdrop;
+ u_int co_active;
+
+ u_int co_noresources;
+ u_int co_parse_facil_err;
+ u_int co_addr_proto_consist_err;
+ u_int co_no_copcb;
+} cons_stat;
+
+u_char x25_error_stats[CONL_ERROR_MAX + 1];
+
+struct ifqueue consintrq;
+
+/* reasons for clear are in a data mbuf chained to a clear ecn_request */
+struct e_clear_data {
+ u_char ecd_cause;
+ u_char ecd_diagnostic;
+};
+
+#ifdef KERNEL
+#define IncStat(XYZ) cons_stat.XYZ++
+#endif /* KERNEL */
diff --git a/sys/netiso/eonvar.h b/sys/netiso/eonvar.h
new file mode 100644
index 000000000000..93f99172f749
--- /dev/null
+++ b/sys/netiso/eonvar.h
@@ -0,0 +1,170 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)eonvar.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#define EON_986_VERSION 0x3
+#define EON_VERSION 0x1
+
+#define EON_CACHESIZE 30
+
+#define E_FREE 1
+#define E_LINK 2
+#define E_ES 3
+#define E_IS 4
+
+
+/*
+ * this overlays a sockaddr_iso
+ */
+
+struct sockaddr_eon {
+ u_char seon_len; /* Length */
+ u_char seon_family; /* AF_ISO */
+ u_char seon_status; /* overlays session suffixlen */
+#define EON_ESLINK_UP 0x1
+#define EON_ESLINK_DOWN 0x2
+#define EON_ISLINK_UP 0x10
+#define EON_ISLINK_DOWN 0x20
+/* no change is neither up or down */
+ u_char seon_pad1; /* 0, overlays tsfxlen */
+ u_char seon_adrlen;
+ u_char seon_afi; /* 47 */
+ u_char seon_idi[2]; /* 0006 */
+ u_char seon_vers; /* 03 */
+ u_char seon_glbnum[2]; /* see RFC 1069 */
+ u_char seon_RDN[2]; /* see RFC 1070 */
+ u_char seon_pad2[3]; /* see RFC 1070 */
+ u_char seon_LAREA[2]; /* see RFC 1070 */
+ u_char seon_pad3[2]; /* see RFC 1070 */
+ /* right now ip addr is aligned -- be careful --
+ * future revisions may have it u_char[4]
+ */
+ u_int seon_ipaddr; /* a.b.c.d */
+ u_char seon_protoid; /* NSEL */
+};
+
+#ifdef EON_TEMPLATE
+struct sockaddr_eon eon_template = {
+ sizeof (eon_template), AF_ISO, 0, 0, 0x14,
+ 0x47, 0x0, 0x6, 0x3, 0
+};
+#endif
+
+#define DOWNBITS ( EON_ESLINK_DOWN | EON_ISLINK_DOWN )
+#define UPBITS ( EON_ESLINK_UP | EON_ISLINK_UP )
+
+#define SIOCSEONCORE _IOWR('i',10, struct iso_ifreq) /* EON core member */
+#define SIOCGEONCORE _IOWR('i',11, struct iso_ifreq) /* EON core member */
+
+struct eon_hdr {
+ u_char eonh_vers; /* value 1 */
+ u_char eonh_class; /* address multicast class, below */
+#define EON_NORMAL_ADDR 0x0
+#define EON_MULTICAST_ES 0x1
+#define EON_MULTICAST_IS 0x2
+#define EON_BROADCAST 0x3
+ u_short eonh_csum; /* osi checksum (choke)*/
+};
+struct eon_iphdr {
+ struct ip ei_ip;
+ struct eon_hdr ei_eh;
+};
+#define EONIPLEN (sizeof(struct eon_hdr) + sizeof(struct ip))
+
+/* stole these 2 fields of the flags for I-am-ES and I-am-IS */
+#define IFF_ES 0x400
+#define IFF_IS 0x800
+
+struct eon_stat {
+ int es_in_multi_es;
+ int es_in_multi_is;
+ int es_in_broad;
+ int es_in_normal;
+ int es_out_multi_es;
+ int es_out_multi_is;
+ int es_out_broad;
+ int es_out_normal;
+ int es_ipout;
+
+ int es_icmp[PRC_NCMDS];
+ /* errors */
+ int es_badcsum;
+ int es_badhdr;
+} eonstat;
+
+#undef IncStat
+#define IncStat(xxx) eonstat.xxx++
+
+typedef struct qhdr {
+ struct qhdr *link, *rlink;
+} *queue_t;
+
+struct eon_llinfo {
+ struct qhdr el_qhdr; /* keep all in a list */
+ int el_flags; /* cache valid ? */
+ int el_snpaoffset; /* IP address contained in dst nsap */
+ struct rtentry *el_rt; /* back pointer to parent route */
+ struct eon_iphdr el_ei; /* precomputed portion of hdr */
+ struct route el_iproute; /* if direct route cache IP info */
+ /* if gateway, cache secondary route */
+};
+#define el_iphdr el_ei.ei_ip
+#define el_eonhdr el_ei.ei_eh
diff --git a/sys/netiso/esis.c b/sys/netiso/esis.c
new file mode 100644
index 000000000000..f4ade0f4fc9e
--- /dev/null
+++ b/sys/netiso/esis.c
@@ -0,0 +1,1063 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)esis.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnl.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/esis.h>
+#include <netiso/argo_debug.h>
+
+/*
+ * Global variables to esis implementation
+ *
+ * esis_holding_time - the holding time (sec) parameter for outgoing pdus
+ * esis_config_time - the frequency (sec) that hellos are generated
+ * esis_esconfig_time - suggested es configuration time placed in the
+ * ish.
+ *
+ */
+struct rawcb esis_pcb;
+void esis_config(), snpac_age();
+int esis_sendspace = 2048;
+int esis_recvspace = 2048;
+short esis_holding_time = ESIS_HT;
+short esis_config_time = ESIS_CONFIG;
+short esis_esconfig_time = ESIS_CONFIG;
+extern int iso_systype;
+struct sockaddr_dl esis_dl = { sizeof(esis_dl), AF_LINK };
+extern char all_es_snpa[], all_is_snpa[];
+
+#define EXTEND_PACKET(m, mhdr, cp)\
+ if (((m)->m_next = m_getclr(M_DONTWAIT, MT_HEADER)) == NULL) {\
+ esis_stat.es_nomem++;\
+ m_freem(mhdr);\
+ return;\
+ } else {\
+ (m) = (m)->m_next;\
+ (cp) = mtod((m), caddr_t);\
+ }
+/*
+ * FUNCTION: esis_init
+ *
+ * PURPOSE: Initialize the kernel portion of esis protocol
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+esis_init()
+{
+ extern struct clnl_protosw clnl_protox[256];
+ int esis_input(), isis_input();
+#ifdef ISO_X25ESIS
+ int x25esis_input();
+#endif /* ISO_X25ESIS */
+
+ esis_pcb.rcb_next = esis_pcb.rcb_prev = &esis_pcb;
+ llinfo_llc.lc_next = llinfo_llc.lc_prev = &llinfo_llc;
+
+ timeout(snpac_age, (caddr_t)0, hz);
+ timeout(esis_config, (caddr_t)0, hz);
+
+ clnl_protox[ISO9542_ESIS].clnl_input = esis_input;
+ clnl_protox[ISO10589_ISIS].clnl_input = isis_input;
+#ifdef ISO_X25ESIS
+ clnl_protox[ISO9542X25_ESIS].clnl_input = x25esis_input;
+#endif /* ISO_X25ESIS */
+}
+
+/*
+ * FUNCTION: esis_usrreq
+ *
+ * PURPOSE: Handle user level esis requests
+ *
+ * RETURNS: 0 or appropriate errno
+ *
+ * SIDE EFFECTS:
+ *
+ */
+/*ARGSUSED*/
+esis_usrreq(so, req, m, nam, control)
+struct socket *so; /* socket: used only to get to this code */
+int req; /* request */
+struct mbuf *m; /* data for request */
+struct mbuf *nam; /* optional name */
+struct mbuf *control; /* optional control */
+{
+ struct rawcb *rp = sotorawcb(so);
+ int error = 0;
+
+ if ((so->so_state & SS_PRIV) == 0) {
+ error = EACCES;
+ goto release;
+ }
+ if (rp == NULL && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+
+ switch (req) {
+ case PRU_ATTACH:
+ if (rp != NULL) {
+ error = EINVAL;
+ break;
+ }
+ MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
+ if (so->so_pcb = (caddr_t)rp) {
+ bzero(so->so_pcb, sizeof(*rp));
+ insque(rp, &esis_pcb);
+ rp->rcb_socket = so;
+ error = soreserve(so, esis_sendspace, esis_recvspace);
+ } else
+ error = ENOBUFS;
+ break;
+
+ case PRU_SEND:
+ if (nam == NULL) {
+ error = EINVAL;
+ break;
+ }
+ /* error checking here */
+ error = isis_output(mtod(nam,struct sockaddr_dl *), m);
+ m = NULL;
+ break;
+
+ case PRU_DETACH:
+ raw_detach(rp);
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ case PRU_ABORT:
+ soisdisconnected(so);
+ raw_detach(rp);
+ break;
+
+ case PRU_SENSE:
+ return (0);
+
+ default:
+ return (EOPNOTSUPP);
+ }
+release:
+ if (m != NULL)
+ m_freem(m);
+
+ return (error);
+}
+
+/*
+ * FUNCTION: esis_input
+ *
+ * PURPOSE: Process an incoming esis packet
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+esis_input(m0, shp)
+struct mbuf *m0; /* ptr to first mbuf of pkt */
+struct snpa_hdr *shp; /* subnetwork header */
+{
+ register struct esis_fixed *pdu = mtod(m0, struct esis_fixed *);
+ register int type;
+
+ /*
+ * check checksum if necessary
+ */
+ if (ESIS_CKSUM_REQUIRED(pdu) && iso_check_csum(m0, (int)pdu->esis_hdr_len)) {
+ esis_stat.es_badcsum++;
+ goto bad;
+ }
+
+ /* check version */
+ if (pdu->esis_vers != ESIS_VERSION) {
+ esis_stat.es_badvers++;
+ goto bad;
+ }
+ type = pdu->esis_type & 0x1f;
+ switch (type) {
+ case ESIS_ESH:
+ esis_eshinput(m0, shp);
+ break;
+
+ case ESIS_ISH:
+ esis_ishinput(m0, shp);
+ break;
+
+ case ESIS_RD:
+ esis_rdinput(m0, shp);
+ break;
+
+ default:
+ esis_stat.es_badtype++;
+ }
+
+bad:
+ if (esis_pcb.rcb_next != &esis_pcb)
+ isis_input(m0, shp);
+ else
+ m_freem(m0);
+}
+
+/*
+ * FUNCTION: esis_rdoutput
+ *
+ * PURPOSE: Transmit a redirect pdu
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Assumes there is enough space for fixed part of header,
+ * DA, BSNPA and NET in first mbuf.
+ */
+esis_rdoutput(inbound_shp, inbound_m, inbound_oidx, rd_dstnsap, rt)
+struct snpa_hdr *inbound_shp; /* snpa hdr from incoming packet */
+struct mbuf *inbound_m; /* incoming pkt itself */
+struct clnp_optidx *inbound_oidx; /* clnp options assoc with incoming pkt */
+struct iso_addr *rd_dstnsap; /* ultimate destination of pkt */
+struct rtentry *rt; /* snpa cache info regarding next hop of
+ pkt */
+{
+ struct mbuf *m, *m0;
+ caddr_t cp;
+ struct esis_fixed *pdu;
+ int len, total_len = 0;
+ struct sockaddr_iso siso;
+ struct ifnet *ifp = inbound_shp->snh_ifp;
+ struct sockaddr_dl *sdl;
+ struct iso_addr *rd_gwnsap;
+
+ if (rt->rt_flags & RTF_GATEWAY) {
+ rd_gwnsap = &((struct sockaddr_iso *)rt->rt_gateway)->siso_addr;
+ rt = rtalloc1(rt->rt_gateway, 0);
+ } else
+ rd_gwnsap = &((struct sockaddr_iso *)rt_key(rt))->siso_addr;
+ if (rt == 0 || (sdl = (struct sockaddr_dl *)rt->rt_gateway) == 0 ||
+ sdl->sdl_family != AF_LINK) {
+ /* maybe we should have a function that you
+ could put in the iso_ifaddr structure
+ which could translate iso_addrs into snpa's
+ where there is a known mapping for that address type */
+ esis_stat.es_badtype++;
+ return;
+ }
+ esis_stat.es_rdsent++;
+ IFDEBUG(D_ESISOUTPUT)
+ printf("esis_rdoutput: ifp x%x (%s%d), ht %d, m x%x, oidx x%x\n",
+ ifp, ifp->if_name, ifp->if_unit, esis_holding_time, inbound_m,
+ inbound_oidx);
+ printf("\tdestination: %s\n", clnp_iso_addrp(rd_dstnsap));
+ printf("\tredirected toward:%s\n", clnp_iso_addrp(rd_gwnsap));
+ ENDDEBUG
+
+ if ((m0 = m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL) {
+ esis_stat.es_nomem++;
+ return;
+ }
+ bzero(mtod(m, caddr_t), MHLEN);
+
+ pdu = mtod(m, struct esis_fixed *);
+ cp = (caddr_t)(pdu + 1); /*pointer arith.; 1st byte after header */
+ len = sizeof(struct esis_fixed);
+
+ /*
+ * Build fixed part of header
+ */
+ pdu->esis_proto_id = ISO9542_ESIS;
+ pdu->esis_vers = ESIS_VERSION;
+ pdu->esis_type = ESIS_RD;
+ HTOC(pdu->esis_ht_msb, pdu->esis_ht_lsb, esis_holding_time);
+
+ /* Insert destination address */
+ (void) esis_insert_addr(&cp, &len, rd_dstnsap, m, 0);
+
+ /* Insert the snpa of better next hop */
+ *cp++ = sdl->sdl_alen;
+ bcopy(LLADDR(sdl), cp, sdl->sdl_alen);
+ cp += sdl->sdl_alen;
+ len += (sdl->sdl_alen + 1);
+
+ /*
+ * If the next hop is not the destination, then it ought to be
+ * an IS and it should be inserted next. Else, set the
+ * NETL to 0
+ */
+ /* PHASE2 use mask from ifp of outgoing interface */
+ if (!iso_addrmatch1(rd_dstnsap, rd_gwnsap)) {
+ /* this should not happen:
+ if ((nhop_sc->sc_flags & SNPA_IS) == 0) {
+ printf("esis_rdoutput: next hop is not dst and not an IS\n");
+ m_freem(m0);
+ return;
+ } */
+ (void) esis_insert_addr(&cp, &len, rd_gwnsap, m, 0);
+ } else {
+ *cp++ = 0; /* NETL */
+ len++;
+ }
+ m->m_len = len;
+
+ /*
+ * PHASE2
+ * If redirect is to an IS, add an address mask. The mask to be
+ * used should be the mask present in the routing entry used to
+ * forward the original data packet.
+ */
+
+ /*
+ * Copy Qos, priority, or security options present in original npdu
+ */
+ if (inbound_oidx) {
+ /* THIS CODE IS CURRENTLY (mostly) UNTESTED */
+ int optlen = 0;
+ if (inbound_oidx->cni_qos_formatp)
+ optlen += (inbound_oidx->cni_qos_len + 2);
+ if (inbound_oidx->cni_priorp) /* priority option is 1 byte long */
+ optlen += 3;
+ if (inbound_oidx->cni_securep)
+ optlen += (inbound_oidx->cni_secure_len + 2);
+ if (M_TRAILINGSPACE(m) < optlen) {
+ EXTEND_PACKET(m, m0, cp);
+ m->m_len = 0;
+ /* assumes MLEN > optlen */
+ }
+ /* assume MLEN-len > optlen */
+ /*
+ * When copying options, copy from ptr - 2 in order to grab
+ * the option code and length
+ */
+ if (inbound_oidx->cni_qos_formatp) {
+ bcopy(mtod(inbound_m, caddr_t) + inbound_oidx->cni_qos_formatp - 2,
+ cp, (unsigned)(inbound_oidx->cni_qos_len + 2));
+ cp += inbound_oidx->cni_qos_len + 2;
+ }
+ if (inbound_oidx->cni_priorp) {
+ bcopy(mtod(inbound_m, caddr_t) + inbound_oidx->cni_priorp - 2,
+ cp, 3);
+ cp += 3;
+ }
+ if (inbound_oidx->cni_securep) {
+ bcopy(mtod(inbound_m, caddr_t) + inbound_oidx->cni_securep - 2, cp,
+ (unsigned)(inbound_oidx->cni_secure_len + 2));
+ cp += inbound_oidx->cni_secure_len + 2;
+ }
+ m->m_len += optlen;
+ len += optlen;
+ }
+
+ pdu->esis_hdr_len = m0->m_pkthdr.len = len;
+ iso_gen_csum(m0, ESIS_CKSUM_OFF, (int)pdu->esis_hdr_len);
+
+ bzero((caddr_t)&siso, sizeof(siso));
+ siso.siso_family = AF_ISO;
+ siso.siso_data[0] = AFI_SNA;
+ siso.siso_nlen = 6 + 1; /* should be taken from snpa_hdr */
+ /* +1 is for AFI */
+ bcopy(inbound_shp->snh_shost, siso.siso_data + 1, 6);
+ (ifp->if_output)(ifp, m0, (struct sockaddr *)&siso, 0);
+}
+
+/*
+ * FUNCTION: esis_insert_addr
+ *
+ * PURPOSE: Insert an iso_addr into a buffer
+ *
+ * RETURNS: true if buffer was big enough, else false
+ *
+ * SIDE EFFECTS: Increment buf & len according to size of iso_addr
+ *
+ * NOTES: Plus 1 here is for length byte
+ */
+esis_insert_addr(buf, len, isoa, m, nsellen)
+register caddr_t *buf; /* ptr to buffer to put address into */
+int *len; /* ptr to length of buffer so far */
+register struct iso_addr *isoa; /* ptr to address */
+register struct mbuf *m; /* determine if there remains space */
+int nsellen;
+{
+ register int newlen, result = 0;
+
+ isoa->isoa_len -= nsellen;
+ newlen = isoa->isoa_len + 1;
+ if (newlen <= M_TRAILINGSPACE(m)) {
+ bcopy((caddr_t)isoa, *buf, newlen);
+ *len += newlen;
+ *buf += newlen;
+ m->m_len += newlen;
+ result = 1;
+ }
+ isoa->isoa_len += nsellen;
+ return (result);
+}
+
+#define ESIS_EXTRACT_ADDR(d, b) { d = (struct iso_addr *)(b); b += (1 + *b); \
+ if (b > buflim) {esis_stat.es_toosmall++; goto bad;}}
+#define ESIS_NEXT_OPTION(b) { b += (2 + b[1]); \
+ if (b > buflim) {esis_stat.es_toosmall++; goto bad;}}
+int ESHonly = 0;
+/*
+
+/*
+ * FUNCTION: esis_eshinput
+ *
+ * PURPOSE: Process an incoming ESH pdu
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+esis_eshinput(m, shp)
+struct mbuf *m; /* esh pdu */
+struct snpa_hdr *shp; /* subnetwork header */
+{
+ struct esis_fixed *pdu = mtod(m, struct esis_fixed *);
+ u_short ht; /* holding time */
+ struct iso_addr *nsap;
+ int naddr;
+ u_char *buf = (u_char *)(pdu + 1);
+ u_char *buflim = pdu->esis_hdr_len + (u_char *)pdu;
+ int new_entry = 0;
+
+ esis_stat.es_eshrcvd++;
+
+ CTOH(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+
+ naddr = *buf++;
+ if (buf >= buflim)
+ goto bad;
+ if (naddr == 1) {
+ ESIS_EXTRACT_ADDR(nsap, buf);
+ new_entry = snpac_add(shp->snh_ifp,
+ nsap, shp->snh_shost, SNPA_ES, ht, 0);
+ } else {
+ int nsellength = 0, nlen = 0;
+ {
+ /* See if we want to compress out multiple nsaps differing
+ only by nsel */
+ register struct ifaddr *ifa = shp->snh_ifp->if_addrlist;
+ for (; ifa; ifa = ifa->ifa_next)
+ if (ifa->ifa_addr->sa_family == AF_ISO) {
+ nsellength = ((struct iso_ifaddr *)ifa)->ia_addr.siso_tlen;
+ break;
+ }
+ }
+ IFDEBUG(D_ESISINPUT)
+ printf("esis_eshinput: esh: ht %d, naddr %d nsellength %d\n",
+ ht, naddr, nsellength);
+ ENDDEBUG
+ while (naddr-- > 0) {
+ struct iso_addr *nsap2; u_char *buf2;
+ ESIS_EXTRACT_ADDR(nsap, buf);
+ /* see if there is at least one more nsap in ESH differing
+ only by nsel */
+ if (nsellength != 0) for (buf2 = buf; buf2 < buflim;) {
+ ESIS_EXTRACT_ADDR(nsap2, buf2);
+ IFDEBUG(D_ESISINPUT)
+ printf("esis_eshinput: comparing %s ",
+ clnp_iso_addrp(nsap));
+ printf("and %s\n", clnp_iso_addrp(nsap2));
+ ENDDEBUG
+ if (Bcmp(nsap->isoa_genaddr, nsap2->isoa_genaddr,
+ nsap->isoa_len - nsellength) == 0) {
+ nlen = nsellength;
+ break;
+ }
+ }
+ new_entry |= snpac_add(shp->snh_ifp,
+ nsap, shp->snh_shost, SNPA_ES, ht, nlen);
+ nlen = 0;
+ }
+ }
+ IFDEBUG(D_ESISINPUT)
+ printf("esis_eshinput: nsap %s is %s\n",
+ clnp_iso_addrp(nsap), new_entry ? "new" : "old");
+ ENDDEBUG
+ if (new_entry && (iso_systype & SNPA_IS))
+ esis_shoutput(shp->snh_ifp, ESIS_ISH, esis_holding_time,
+ shp->snh_shost, 6, (struct iso_addr *)0);
+bad:
+ return;
+}
+
+/*
+ * FUNCTION: esis_ishinput
+ *
+ * PURPOSE: process an incoming ISH pdu
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+esis_ishinput(m, shp)
+struct mbuf *m; /* esh pdu */
+struct snpa_hdr *shp; /* subnetwork header */
+{
+ struct esis_fixed *pdu = mtod(m, struct esis_fixed *);
+ u_short ht, newct; /* holding time */
+ struct iso_addr *nsap; /* Network Entity Title */
+ register u_char *buf = (u_char *) (pdu + 1);
+ register u_char *buflim = pdu->esis_hdr_len + (u_char *)pdu;
+ int new_entry;
+
+ esis_stat.es_ishrcvd++;
+ CTOH(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+
+ IFDEBUG(D_ESISINPUT)
+ printf("esis_ishinput: ish: ht %d\n", ht);
+ ENDDEBUG
+ if (ESHonly)
+ goto bad;
+
+ ESIS_EXTRACT_ADDR(nsap, buf);
+
+ while (buf < buflim) {
+ switch (*buf) {
+ case ESISOVAL_ESCT:
+ if (iso_systype & SNPA_IS)
+ break;
+ if (buf[1] != 2)
+ goto bad;
+ CTOH(buf[2], buf[3], newct);
+ if (esis_config_time != newct) {
+ untimeout(esis_config,0);
+ esis_config_time = newct;
+ esis_config();
+ }
+ break;
+
+ default:
+ printf("Unknown ISH option: %x\n", *buf);
+ }
+ ESIS_NEXT_OPTION(buf);
+ }
+ new_entry = snpac_add(shp->snh_ifp, nsap, shp->snh_shost, SNPA_IS, ht, 0);
+ IFDEBUG(D_ESISINPUT)
+ printf("esis_ishinput: nsap %s is %s\n",
+ clnp_iso_addrp(nsap), new_entry ? "new" : "old");
+ ENDDEBUG
+
+ if (new_entry)
+ esis_shoutput(shp->snh_ifp,
+ iso_systype & SNPA_ES ? ESIS_ESH : ESIS_ISH,
+ esis_holding_time, shp->snh_shost, 6, (struct iso_addr *)0);
+bad:
+ return;
+}
+
+/*
+ * FUNCTION: esis_rdinput
+ *
+ * PURPOSE: Process an incoming RD pdu
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+esis_rdinput(m0, shp)
+struct mbuf *m0; /* esh pdu */
+struct snpa_hdr *shp; /* subnetwork header */
+{
+ struct esis_fixed *pdu = mtod(m0, struct esis_fixed *);
+ u_short ht; /* holding time */
+ struct iso_addr *da, *net = 0, *netmask = 0, *snpamask = 0;
+ register struct iso_addr *bsnpa;
+ register u_char *buf = (u_char *)(pdu + 1);
+ register u_char *buflim = pdu->esis_hdr_len + (u_char *)pdu;
+
+ esis_stat.es_rdrcvd++;
+
+ /* intermediate systems ignore redirects */
+ if (iso_systype & SNPA_IS)
+ return;
+ if (ESHonly)
+ return;
+
+ CTOH(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+ if (buf >= buflim)
+ return;
+
+ /* Extract DA */
+ ESIS_EXTRACT_ADDR(da, buf);
+
+ /* Extract better snpa */
+ ESIS_EXTRACT_ADDR(bsnpa, buf);
+
+ /* Extract NET if present */
+ if (buf < buflim) {
+ if (*buf == 0)
+ buf++; /* no NET present, skip NETL anyway */
+ else
+ ESIS_EXTRACT_ADDR(net, buf);
+ }
+
+ /* process options */
+ while (buf < buflim) {
+ switch (*buf) {
+ case ESISOVAL_SNPAMASK:
+ if (snpamask) /* duplicate */
+ return;
+ snpamask = (struct iso_addr *)(buf + 1);
+ break;
+
+ case ESISOVAL_NETMASK:
+ if (netmask) /* duplicate */
+ return;
+ netmask = (struct iso_addr *)(buf + 1);
+ break;
+
+ default:
+ printf("Unknown option in ESIS RD (0x%x)\n", buf[-1]);
+ }
+ ESIS_NEXT_OPTION(buf);
+ }
+
+ IFDEBUG(D_ESISINPUT)
+ printf("esis_rdinput: rd: ht %d, da %s\n", ht, clnp_iso_addrp(da));
+ if (net)
+ printf("\t: net %s\n", clnp_iso_addrp(net));
+ ENDDEBUG
+ /*
+ * If netl is zero, then redirect is to an ES. We need to add an entry
+ * to the snpa cache for (destination, better snpa).
+ * If netl is not zero, then the redirect is to an IS. In this
+ * case, add an snpa cache entry for (net, better snpa).
+ *
+ * If the redirect is to an IS, add a route entry towards that
+ * IS.
+ */
+ if (net == 0 || net->isoa_len == 0 || snpamask) {
+ /* redirect to an ES */
+ snpac_add(shp->snh_ifp, da,
+ bsnpa->isoa_genaddr, SNPA_ES, ht, 0);
+ } else {
+ snpac_add(shp->snh_ifp, net,
+ bsnpa->isoa_genaddr, SNPA_IS, ht, 0);
+ snpac_addrt(shp->snh_ifp, da, net, netmask);
+ }
+bad: ; /* Needed by ESIS_NEXT_OPTION */
+}
+
+/*
+ * FUNCTION: esis_config
+ *
+ * PURPOSE: Report configuration
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Called every esis_config_time seconds
+ */
+void
+esis_config()
+{
+ register struct ifnet *ifp;
+
+ timeout(esis_config, (caddr_t)0, hz * esis_config_time);
+
+ /*
+ * Report configuration for each interface that
+ * - is UP
+ * - has BROADCAST capability
+ * - has an ISO address
+ */
+ /* Todo: a better way would be to construct the esh or ish
+ * once and copy it out for all devices, possibly calling
+ * a method in the iso_ifaddr structure to encapsulate and
+ * transmit it. This could work to advantage for non-broadcast media
+ */
+
+ for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+ if ((ifp->if_flags & IFF_UP) &&
+ (ifp->if_flags & IFF_BROADCAST)) {
+ /* search for an ISO address family */
+ struct ifaddr *ia;
+
+ for (ia = ifp->if_addrlist; ia; ia = ia->ifa_next) {
+ if (ia->ifa_addr->sa_family == AF_ISO) {
+ esis_shoutput(ifp,
+ iso_systype & SNPA_ES ? ESIS_ESH : ESIS_ISH,
+ esis_holding_time,
+ (caddr_t)(iso_systype & SNPA_ES ? all_is_snpa :
+ all_es_snpa), 6, (struct iso_addr *)0);
+ break;
+ }
+ }
+ }
+ }
+}
+
+/*
+ * FUNCTION: esis_shoutput
+ *
+ * PURPOSE: Transmit an esh or ish pdu
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+esis_shoutput(ifp, type, ht, sn_addr, sn_len, isoa)
+struct ifnet *ifp;
+int type;
+short ht;
+caddr_t sn_addr;
+int sn_len;
+struct iso_addr *isoa;
+{
+ struct mbuf *m, *m0;
+ caddr_t cp, naddrp;
+ int naddr = 0;
+ struct esis_fixed *pdu;
+ struct iso_ifaddr *ia;
+ int len;
+ struct sockaddr_iso siso;
+
+ if (type == ESIS_ESH)
+ esis_stat.es_eshsent++;
+ else if (type == ESIS_ISH)
+ esis_stat.es_ishsent++;
+ else {
+ printf("esis_shoutput: bad pdu type\n");
+ return;
+ }
+
+ IFDEBUG(D_ESISOUTPUT)
+ int i;
+ printf("esis_shoutput: ifp x%x (%s%d), %s, ht %d, to: [%d] ",
+ ifp, ifp->if_name, ifp->if_unit, type == ESIS_ESH ? "esh" : "ish",
+ ht, sn_len);
+ for (i=0; i<sn_len; i++)
+ printf("%x%c", *(sn_addr+i), i < (sn_len-1) ? ':' : ' ');
+ printf("\n");
+ ENDDEBUG
+
+ if ((m0 = m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL) {
+ esis_stat.es_nomem++;
+ return;
+ }
+ bzero(mtod(m, caddr_t), MHLEN);
+
+ pdu = mtod(m, struct esis_fixed *);
+ naddrp = cp = (caddr_t)(pdu + 1);
+ len = sizeof(struct esis_fixed);
+
+ /*
+ * Build fixed part of header
+ */
+ pdu->esis_proto_id = ISO9542_ESIS;
+ pdu->esis_vers = ESIS_VERSION;
+ pdu->esis_type = type;
+ HTOC(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+
+ if (type == ESIS_ESH) {
+ cp++;
+ len++;
+ }
+
+ m->m_len = len;
+ if (isoa) {
+ /*
+ * Here we are responding to a clnp packet sent to an NSAP
+ * that is ours which was sent to the MAC addr all_es's.
+ * It is possible that we did not specifically advertise this
+ * NSAP, even though it is ours, so we will respond
+ * directly to the sender that we are here. If we do have
+ * multiple NSEL's we'll tack them on so he can compress them out.
+ */
+ (void) esis_insert_addr(&cp, &len, isoa, m, 0);
+ naddr = 1;
+ }
+ for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+ int nsellen = (type == ESIS_ISH ? ia->ia_addr.siso_tlen : 0);
+ int n = ia->ia_addr.siso_nlen;
+ register struct iso_ifaddr *ia2;
+
+ if (type == ESIS_ISH && naddr > 0)
+ break;
+ for (ia2 = iso_ifaddr; ia2 != ia; ia2 = ia2->ia_next)
+ if (Bcmp(ia->ia_addr.siso_data, ia2->ia_addr.siso_data, n) == 0)
+ break;
+ if (ia2 != ia)
+ continue; /* Means we have previously copied this nsap */
+ if (isoa && Bcmp(ia->ia_addr.siso_data, isoa->isoa_genaddr, n) == 0) {
+ isoa = 0;
+ continue; /* Ditto */
+ }
+ IFDEBUG(D_ESISOUTPUT)
+ printf("esis_shoutput: adding NSAP %s\n",
+ clnp_iso_addrp(&ia->ia_addr.siso_addr));
+ ENDDEBUG
+ if (!esis_insert_addr(&cp, &len,
+ &ia->ia_addr.siso_addr, m, nsellen)) {
+ EXTEND_PACKET(m, m0, cp);
+ (void) esis_insert_addr(&cp, &len, &ia->ia_addr.siso_addr, m,
+ nsellen);
+ }
+ naddr++;
+ }
+
+ if (type == ESIS_ESH)
+ *naddrp = naddr;
+ else {
+ /* add suggested es config timer option to ISH */
+ if (M_TRAILINGSPACE(m) < 4) {
+ printf("esis_shoutput: extending packet\n");
+ EXTEND_PACKET(m, m0, cp);
+ }
+ *cp++ = ESISOVAL_ESCT;
+ *cp++ = 2;
+ HTOC(*cp, *(cp+1), esis_esconfig_time);
+ len += 4;
+ m->m_len += 4;
+ IFDEBUG(D_ESISOUTPUT)
+ printf("m0 0x%x, m 0x%x, data 0x%x, len %d, cp 0x%x\n",
+ m0, m, m->m_data, m->m_len, cp);
+ ENDDEBUG
+ }
+
+ m0->m_pkthdr.len = len;
+ pdu->esis_hdr_len = len;
+ iso_gen_csum(m0, ESIS_CKSUM_OFF, (int)pdu->esis_hdr_len);
+
+ bzero((caddr_t)&siso, sizeof(siso));
+ siso.siso_family = AF_ISO;
+ siso.siso_data[0] = AFI_SNA;
+ siso.siso_nlen = sn_len + 1;
+ bcopy(sn_addr, siso.siso_data + 1, (unsigned)sn_len);
+ (ifp->if_output)(ifp, m0, (struct sockaddr *)&siso, 0);
+}
+
+/*
+ * FUNCTION: isis_input
+ *
+ * PURPOSE: Process an incoming isis packet
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+isis_input(m0, shp)
+struct mbuf *m0; /* ptr to first mbuf of pkt */
+struct snpa_hdr *shp; /* subnetwork header */
+{
+ register int type;
+ register struct rawcb *rp, *first_rp = 0;
+ struct ifnet *ifp = shp->snh_ifp;
+ char workbuf[16];
+ struct mbuf *mm;
+
+ IFDEBUG(D_ISISINPUT)
+ int i;
+
+ printf("isis_input: pkt on ifp x%x (%s%d): from:", ifp,
+ ifp->if_name, ifp->if_unit);
+ for (i=0; i<6; i++)
+ printf("%x%c", shp->snh_shost[i]&0xff, (i<5) ? ':' : ' ');
+ printf(" to:");
+ for (i=0; i<6; i++)
+ printf("%x%c", shp->snh_dhost[i]&0xff, (i<5) ? ':' : ' ');
+ printf("\n");
+ ENDDEBUG
+ esis_dl.sdl_alen = ifp->if_addrlen;
+ esis_dl.sdl_index = ifp->if_index;
+ bcopy(shp->snh_shost, (caddr_t)esis_dl.sdl_data, esis_dl.sdl_alen);
+ for (rp = esis_pcb.rcb_next; rp != &esis_pcb; rp = rp->rcb_next) {
+ if (first_rp == 0) {
+ first_rp = rp;
+ continue;
+ }
+ if (mm = m_copy(m0, 0, M_COPYALL)) { /*can't block at interrupt level */
+ if (sbappendaddr(&rp->rcb_socket->so_rcv,
+ &esis_dl, mm, (struct mbuf *)0) != 0) {
+ sorwakeup(rp->rcb_socket);
+ } else {
+ IFDEBUG(D_ISISINPUT)
+ printf("Error in sbappenaddr, mm = 0x%x\n", mm);
+ ENDDEBUG
+ m_freem(mm);
+ }
+ }
+ }
+ if (first_rp && sbappendaddr(&first_rp->rcb_socket->so_rcv,
+ &esis_dl, m0, (struct mbuf *)0) != 0) {
+ sorwakeup(first_rp->rcb_socket);
+ return;
+ }
+ m_freem(m0);
+}
+
+isis_output(sdl, m)
+register struct sockaddr_dl *sdl;
+struct mbuf *m;
+{
+ register struct ifnet *ifp;
+ struct ifaddr *ifa, *ifa_ifwithnet();
+ struct sockaddr_iso siso;
+ int error = 0;
+ unsigned sn_len;
+
+ ifa = ifa_ifwithnet((struct sockaddr *)sdl); /* get ifp from sdl */
+ if (ifa == 0) {
+ IFDEBUG(D_ISISOUTPUT)
+ printf("isis_output: interface not found\n");
+ ENDDEBUG
+ error = EINVAL;
+ goto release;
+ }
+ ifp = ifa->ifa_ifp;
+ sn_len = sdl->sdl_alen;
+ IFDEBUG(D_ISISOUTPUT)
+ u_char *cp = (u_char *)LLADDR(sdl), *cplim = cp + sn_len;
+ printf("isis_output: ifp 0x%x (%s%d), to: ",
+ ifp, ifp->if_name, ifp->if_unit);
+ while (cp < cplim) {
+ printf("%x", *cp++);
+ printf("%c", (cp < cplim) ? ':' : ' ');
+ }
+ printf("\n");
+ ENDDEBUG
+ bzero((caddr_t)&siso, sizeof(siso));
+ siso.siso_family = AF_ISO; /* This convention may be useful for X.25 */
+ siso.siso_data[0] = AFI_SNA;
+ siso.siso_nlen = sn_len + 1;
+ bcopy(LLADDR(sdl), siso.siso_data + 1, sn_len);
+ error = (ifp->if_output)(ifp, m, (struct sockaddr *)&siso, 0);
+ if (error) {
+ IFDEBUG(D_ISISOUTPUT)
+ printf("isis_output: error from ether_output is %d\n", error);
+ ENDDEBUG
+ }
+ return (error);
+
+release:
+ if (m != NULL)
+ m_freem(m);
+ return(error);
+}
+
+
+/*
+ * FUNCTION: esis_ctlinput
+ *
+ * PURPOSE: Handle the PRC_IFDOWN transition
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Calls snpac_flush for interface specified.
+ * The loop through iso_ifaddr is stupid because
+ * back in if_down, we knew the ifp...
+ */
+esis_ctlinput(req, siso)
+int req; /* request: we handle only PRC_IFDOWN */
+struct sockaddr_iso *siso; /* address of ifp */
+{
+ register struct iso_ifaddr *ia; /* scan through interface addresses */
+
+ if (req == PRC_IFDOWN)
+ for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+ if (iso_addrmatch(IA_SIS(ia), siso))
+ snpac_flushifp(ia->ia_ifp);
+ }
+}
+
+#endif /* ISO */
diff --git a/sys/netiso/esis.h b/sys/netiso/esis.h
new file mode 100644
index 000000000000..81dd74ac3104
--- /dev/null
+++ b/sys/netiso/esis.h
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)esis.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: esis.h,v 4.7 88/09/15 11:24:18 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/esis.h,v $
+ */
+
+#ifndef BYTE_ORDER
+/*
+ * Definitions for byte order,
+ * according to byte significance from low address to high.
+ */
+#define LITTLE_ENDIAN 1234 /* least-significant byte first (vax) */
+#define BIG_ENDIAN 4321 /* most-significant byte first (IBM, net) */
+#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long (pdp) */
+
+#ifdef vax
+#define BYTE_ORDER LITTLE_ENDIAN
+#else
+#define BYTE_ORDER BIG_ENDIAN /* mc68000, tahoe, most others */
+#endif
+#endif /* BYTE_ORDER */
+
+#define SNPAC_AGE 60 /* seconds */
+#define ESIS_CONFIG 60 /* seconds */
+#define ESIS_HT (ESIS_CONFIG * 2)
+
+/*
+ * Fixed part of an ESIS header
+ */
+struct esis_fixed {
+ u_char esis_proto_id; /* network layer protocol identifier */
+ u_char esis_hdr_len; /* length indicator (octets) */
+ u_char esis_vers; /* version/protocol identifier extension */
+ u_char esis_res1; /* reserved */
+ u_char esis_type; /* type code */
+/* technically, type should be &='d 0x1f */
+#define ESIS_ESH 0x02 /* End System Hello */
+#define ESIS_ISH 0x04 /* Intermediate System Hello */
+#define ESIS_RD 0x06 /* Redirect */
+ u_char esis_ht_msb; /* holding time (seconds) high byte */
+ u_char esis_ht_lsb; /* holding time (seconds) low byte */
+ u_char esis_cksum_msb; /* checksum high byte */
+ u_char esis_cksum_lsb; /* checksum low byte */
+};
+/*
+ * Values for ESIS datagram options
+ */
+#define ESISOVAL_NETMASK 0xe1 /* address mask option, RD PDU only */
+#define ESISOVAL_SNPAMASK 0xe2 /* snpa mask option, RD PDU only */
+#define ESISOVAL_ESCT 0xc6 /* end system conf. timer, ISH PDU only */
+
+
+#define ESIS_CKSUM_OFF 0x07
+#define ESIS_CKSUM_REQUIRED(pdu)\
+ ((pdu->esis_cksum_msb != 0) || (pdu->esis_cksum_lsb != 0))
+
+#define ESIS_VERSION 1
+
+struct esis_stat {
+ u_short es_nomem; /* insufficient memory to send hello */
+ u_short es_badcsum; /* incorrect checksum */
+ u_short es_badvers; /* incorrect version number */
+ u_short es_badtype; /* unknown pdu type field */
+ u_short es_toosmall; /* packet too small */
+ u_short es_eshsent; /* ESH sent */
+ u_short es_eshrcvd; /* ESH rcvd */
+ u_short es_ishsent; /* ISH sent */
+ u_short es_ishrcvd; /* ISH rcvd */
+ u_short es_rdsent; /* RD sent */
+ u_short es_rdrcvd; /* RD rcvd */
+};
+
+#ifdef KERNEL
+struct esis_stat esis_stat;
+#endif /* KERNEL */
diff --git a/sys/netiso/idrp_usrreq.c b/sys/netiso/idrp_usrreq.c
new file mode 100644
index 000000000000..3109936b4153
--- /dev/null
+++ b/sys/netiso/idrp_usrreq.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)idrp_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnl.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+
+int idrp_input();
+struct isopcb idrp_isop;
+static struct sockaddr_iso idrp_addrs[2] =
+{ { sizeof(idrp_addrs), AF_ISO, }, { sizeof(idrp_addrs[1]), AF_ISO, } };
+/*
+ * IDRP initialization
+ */
+idrp_init()
+{
+ extern struct clnl_protosw clnl_protox[256];
+
+ idrp_isop.isop_next = idrp_isop.isop_prev = &idrp_isop;
+ idrp_isop.isop_faddr = &idrp_isop.isop_sfaddr;
+ idrp_isop.isop_laddr = &idrp_isop.isop_sladdr;
+ idrp_isop.isop_sladdr = idrp_addrs[1];
+ idrp_isop.isop_sfaddr = idrp_addrs[1];
+ clnl_protox[ISO10747_IDRP].clnl_input = idrp_input;
+}
+
+/*
+ * CALLED FROM:
+ * tpclnp_input().
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from clnp, strip off the clnp header
+ * and mke suitable for the idrp socket.
+ * No return value.
+ */
+idrp_input(m, src, dst)
+ register struct mbuf *m;
+ struct sockaddr_iso *src, *dst;
+{
+ if (idrp_isop.isop_socket == 0) {
+ bad: m_freem(m);
+ return 0;
+ }
+ bzero(idrp_addrs[0].siso_data, sizeof(idrp_addrs[0].siso_data));
+ bcopy((caddr_t)&(src->siso_addr), (caddr_t)&idrp_addrs[0].siso_addr,
+ 1 + src->siso_nlen);
+ bzero(idrp_addrs[1].siso_data, sizeof(idrp_addrs[1].siso_data));
+ bcopy((caddr_t)&(dst->siso_addr), (caddr_t)&idrp_addrs[1].siso_addr,
+ 1 + dst->siso_nlen);
+ if (sbappendaddr(&idrp_isop.isop_socket->so_rcv,
+ (struct sockaddr *)idrp_addrs, m, (struct mbuf *)0) == 0)
+ goto bad;
+ sorwakeup(idrp_isop.isop_socket);
+ return 0;
+}
+
+idrp_output(m, addr)
+ struct mbuf *m, *addr;
+{
+ register struct sockaddr_iso *siso = mtod(addr, struct sockaddr_iso *);
+ int s = splnet(), i;
+
+ bcopy((caddr_t)&(siso->siso_addr),
+ (caddr_t)&idrp_isop.isop_sfaddr.siso_addr, 1 + siso->siso_nlen);
+ siso++;
+ bcopy((caddr_t)&(siso->siso_addr),
+ (caddr_t)&idrp_isop.isop_sladdr.siso_addr, 1 + siso->siso_nlen);
+ i = clnp_output(m, idrp_isop, m->m_pkthdr.len, 0);
+ splx(s);
+ return (i);
+}
+
+u_long idrp_sendspace = 3072; /* really max datagram size */
+u_long idrp_recvspace = 40 * 1024; /* 40 1K datagrams */
+
+/*ARGSUSED*/
+idrp_usrreq(so, req, m, addr, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *addr, *control;
+{
+ int error = 0;
+
+ /* Note: need to block idrp_input while changing
+ * the udp pcb queue and/or pcb addresses.
+ */
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (idrp_isop.isop_socket != NULL) {
+ error = ENXIO;
+ break;
+ }
+ idrp_isop.isop_socket = so;
+ error = soreserve(so, idrp_sendspace, idrp_recvspace);
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ case PRU_SEND:
+ return (idrp_output(m, addr));
+
+ case PRU_ABORT:
+ soisdisconnected(so);
+ case PRU_DETACH:
+ idrp_isop.isop_socket = 0;
+ break;
+
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ default:
+ return (EOPNOTSUPP); /* do not free mbuf's */
+ }
+
+release:
+ if (control) {
+ printf("idrp control data unexpectedly retained\n");
+ m_freem(control);
+ }
+ if (m)
+ m_freem(m);
+ return (error);
+}
diff --git a/sys/netiso/if_cons.c b/sys/netiso/if_cons.c
new file mode 100644
index 000000000000..7724b048be59
--- /dev/null
+++ b/sys/netiso/if_cons.c
@@ -0,0 +1,960 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_cons.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: if_cons.c,v 4.7 88/08/11 15:52:55 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/if_cons.c,v $
+ *
+ * cons.c - Connection Oriented Network Service:
+ * including support for a) user transport-level service,
+ * b) COSNS below CLNP, and c) CONS below TP.
+ */
+
+#ifdef TPCONS
+#ifdef KERNEL
+#ifdef ARGO_DEBUG
+#define Static
+unsigned LAST_CALL_PCB;
+#else /* ARGO_DEBUG */
+#define Static static
+#endif /* ARGO_DEBUG */
+
+#ifndef SOCK_STREAM
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/tsleep.h>
+
+#include <net/if.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netiso/iso_errno.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/iso.h>
+#include <netiso/cons.h>
+#include <netiso/iso_pcb.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#endif
+
+#ifdef ARGO_DEBUG
+#define MT_XCONN 0x50
+#define MT_XCLOSE 0x51
+#define MT_XCONFIRM 0x52
+#define MT_XDATA 0x53
+#define MT_XHEADER 0x54
+#else
+#define MT_XCONN MT_DATA
+#define MT_XCLOSE MT_DATA
+#define MT_XCONFIRM MT_DATA
+#define MT_XDATA MT_DATA
+#define MT_XHEADER MT_HEADER
+#endif /* ARGO_DEBUG */
+
+#define DONTCLEAR -1
+
+/*********************************************************************
+ * cons.c - CONS interface to the x.25 layer
+ *
+ * TODO: figure out what resources we might run out of besides mbufs.
+ * If we run out of any of them (including mbufs) close and recycle
+ * lru x% of the connections, for some parameter x.
+ *
+ * There are 2 interfaces from above:
+ * 1) from TP0:
+ * cons CO network service
+ * TP associates a transport connection with a network connection.
+ * cons_output( isop, m, len, isdgm==0 )
+ * co_flags == 0
+ * 2) from TP4:
+ * It's a datagram service, like clnp is. - even though it calls
+ * cons_output( isop, m, len, isdgm==1 )
+ * it eventually goes through
+ * cosns_output(ifp, m, dst).
+ * TP4 permits multiplexing (reuse, possibly simultaneously) of the
+ * network connections.
+ * This means that many sockets (many tpcbs) may be associated with
+ * this pklcd, hence cannot have a back ptr from pklcd to a tpcb.
+ * co_flags & CONSF_DGM
+ * co_socket is null since there may be many sockets that use this pklcd.
+ *
+NOTE:
+ streams would really be nice. sigh.
+NOTE:
+ PVCs could be handled by config-ing a cons with an address and with the
+ IFF_POINTTOPOINT flag on. This code would then have to skip the
+ connection setup stuff for pt-to-pt links.
+
+
+ *********************************************************************/
+
+
+#define CONS_IFQMAXLEN 5
+
+
+/* protosw pointers for getting to higher layer */
+Static struct protosw *CLNP_proto;
+Static struct protosw *TP_proto;
+Static struct protosw *X25_proto;
+Static int issue_clear_req();
+
+#ifndef PHASEONE
+extern struct ifaddr *ifa_ifwithnet();
+#endif /* PHASEONE */
+
+extern struct ifaddr *ifa_ifwithaddr();
+
+extern struct isopcb tp_isopcb; /* chain of all TP pcbs */
+
+
+Static int parse_facil(), NSAPtoDTE(), make_partial_x25_packet();
+Static int FACILtoNSAP(), DTEtoNSAP();
+Static struct pklcd *cons_chan_to_pcb();
+
+#define HIGH_NIBBLE 1
+#define LOW_NIBBLE 0
+
+/*
+ * NAME: nibble_copy()
+ * FUNCTION and ARGUMENTS:
+ * copies (len) nibbles from (src_octet), high or low nibble
+ * to (dst_octet), high or low nibble,
+ * src_nibble & dst_nibble should be:
+ * HIGH_NIBBLE (1) if leftmost 4 bits/ most significant nibble
+ * LOW_NIBBLE (0) if rightmost 4 bits/ least significant nibble
+ * RETURNS: VOID
+ */
+void
+nibble_copy(src_octet, src_nibble, dst_octet, dst_nibble, len)
+ register char *src_octet;
+ register char *dst_octet;
+ register unsigned src_nibble;
+ register unsigned dst_nibble;
+ int len;
+{
+
+ register i;
+ register unsigned dshift, sshift;
+
+ IFDEBUG(D_CADDR)
+ printf("nibble_copy ( 0x%x, 0x%x, 0x%x, 0x%x 0x%x)\n",
+ src_octet, src_nibble, dst_octet, dst_nibble, len);
+ ENDDEBUG
+#define SHIFT 0x4
+
+ dshift = dst_nibble << 2;
+ sshift = src_nibble << 2;
+
+ for (i=0; i<len; i++) {
+ /* clear dst_nibble */
+ *dst_octet &= ~(0xf<< dshift);
+
+ /* set dst nibble */
+ *dst_octet |= ( 0xf & (*src_octet >> sshift))<< dshift;
+
+ dshift ^= SHIFT;
+ sshift ^= SHIFT;
+ src_nibble = 1-src_nibble;
+ dst_nibble = 1-dst_nibble;
+ src_octet += src_nibble;
+ dst_octet += dst_nibble;
+ }
+ IFDEBUG(D_CADDR)
+ printf("nibble_copy DONE\n");
+ ENDDEBUG
+}
+
+/*
+ * NAME: nibble_match()
+ * FUNCTION and ARGUMENTS:
+ * compares src_octet/src_nibble and dst_octet/dst_nibble for len nibbles.
+ * RETURNS: 0 if they differ, 1 if they are the same.
+ */
+int
+nibble_match( src_octet, src_nibble, dst_octet, dst_nibble, len)
+ register char *src_octet;
+ register char *dst_octet;
+ register unsigned src_nibble;
+ register unsigned dst_nibble;
+ int len;
+{
+
+ register i;
+ register unsigned dshift, sshift;
+ u_char nibble_a, nibble_b;
+
+ IFDEBUG(D_CADDR)
+ printf("nibble_match ( 0x%x, 0x%x, 0x%x, 0x%x 0x%x)\n",
+ src_octet, src_nibble, dst_octet, dst_nibble, len);
+ ENDDEBUG
+#define SHIFT 0x4
+
+ dshift = dst_nibble << 2;
+ sshift = src_nibble << 2;
+
+ for (i=0; i<len; i++) {
+ nibble_b = ((*dst_octet)>>dshift) & 0xf;
+ nibble_a = ( 0xf & (*src_octet >> sshift));
+ if (nibble_b != nibble_a)
+ return 0;
+
+ dshift ^= SHIFT;
+ sshift ^= SHIFT;
+ src_nibble = 1-src_nibble;
+ dst_nibble = 1-dst_nibble;
+ src_octet += src_nibble;
+ dst_octet += dst_nibble;
+ }
+ IFDEBUG(D_CADDR)
+ printf("nibble_match DONE\n");
+ ENDDEBUG
+ return 1;
+}
+
+/*
+ **************************** NET PROTOCOL cons ***************************
+ */
+/*
+ * NAME: cons_init()
+ * CALLED FROM:
+ * autoconf
+ * FUNCTION:
+ * initialize the protocol
+ */
+cons_init()
+{
+ int tp_incoming(), clnp_incoming();
+
+
+ CLNP_proto = pffindproto(AF_ISO, ISOPROTO_CLNP, SOCK_DGRAM);
+ X25_proto = pffindproto(AF_ISO, ISOPROTO_X25, SOCK_STREAM);
+ TP_proto = pffindproto(AF_ISO, ISOPROTO_TP0, SOCK_SEQPACKET);
+ IFDEBUG(D_CCONS)
+ printf("cons_init end : cnlp_proto 0x%x cons proto 0x%x tp proto 0x%x\n",
+ CLNP_proto, X25_proto, TP_proto);
+ ENDDEBUG
+#ifdef notdef
+ pk_protolisten(0x81, 0, clnp_incoming);
+ pk_protolisten(0x82, 0, esis_incoming);
+ pk_protolisten(0x84, 0, tp8878_A_incoming);
+ pk_protolisten(0, 0, tp_incoming);
+#endif
+}
+
+tp_incoming(lcp, m)
+struct pklcd *lcp;
+register struct mbuf *m;
+{
+ register struct isopcb *isop;
+ int cons_tpinput();
+
+ if (iso_pcballoc((struct socket *)0, &tp_isopcb)) {
+ pk_close(lcp);
+ return;
+ }
+ isop = tp_isopcb.isop_next;
+ lcp->lcd_upper = cons_tpinput;
+ lcp->lcd_upnext = (caddr_t)isop;
+ lcp->lcd_send(lcp); /* Confirms call */
+ isop->isop_chan = (caddr_t)lcp;
+ isop->isop_laddr = &isop->isop_sladdr;
+ isop->isop_faddr = &isop->isop_sfaddr;
+ DTEtoNSAP(isop->isop_laddr, &lcp->lcd_laddr);
+ DTEtoNSAP(isop->isop_faddr, &lcp->lcd_faddr);
+ parse_facil(lcp, isop, &(mtod(m, struct x25_packet *)->packet_data),
+ m->m_pkthdr.len - PKHEADERLN);
+}
+
+cons_tpinput(lcp, m0)
+struct mbuf *m0;
+struct pklcd *lcp;
+{
+ register struct isopcb *isop = (struct isopcb *)lcp->lcd_upnext;
+ register struct x25_packet *xp;
+ int cmd, ptype = CLEAR;
+
+ if (isop == 0)
+ return;
+ if (m0 == 0)
+ goto dead;
+ switch(m0->m_type) {
+ case MT_DATA:
+ case MT_OOBDATA:
+ tpcons_input(m0, isop->isop_faddr, isop->isop_laddr, (caddr_t)lcp);
+ return;
+
+ case MT_CONTROL:
+ switch (ptype = pk_decode(mtod(m0, struct x25_packet *))) {
+
+ case RR:
+ cmd = PRC_CONS_SEND_DONE;
+ break;
+
+ case CALL_ACCEPTED:
+ if (lcp->lcd_sb.sb_mb)
+ lcp->lcd_send(lcp); /* XXX - fix this */
+ /*FALLTHROUGH*/
+ default:
+ return;
+
+ dead:
+ case CLEAR:
+ case CLEAR_CONF:
+ lcp->lcd_upper = 0;
+ lcp->lcd_upnext = 0;
+ isop->isop_chan = 0;
+ case RESET:
+ cmd = PRC_ROUTEDEAD;
+ }
+ tpcons_ctlinput(cmd, isop->isop_faddr, isop);
+ if (cmd = PRC_ROUTEDEAD && isop->isop_refcnt == 0)
+ iso_pcbdetach(isop);
+ }
+}
+
+/*
+ * NAME: cons_connect()
+ * CALLED FROM:
+ * tpcons_pcbconnect() when opening a new connection.
+ * FUNCTION anD ARGUMENTS:
+ * Figures out which device to use, finding a route if one doesn't
+ * already exist.
+ * RETURN VALUE:
+ * returns E*
+ */
+cons_connect(isop)
+ register struct isopcb *isop;
+{
+ register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+ register struct mbuf *m;
+ struct ifaddr *ifa;
+ int error;
+
+ IFDEBUG(D_CCONN)
+ printf("cons_connect(0x%x): ", isop);
+ dump_isoaddr(isop->isop_faddr);
+ printf("myaddr: ");
+ dump_isoaddr(isop->isop_laddr);
+ printf("\n" );
+ ENDDEBUG
+ NSAPtoDTE(isop->isop_faddr, &lcp->lcd_faddr);
+ lcp->lcd_upper = cons_tpinput;
+ lcp->lcd_upnext = (caddr_t)isop;
+ IFDEBUG(D_CCONN)
+ printf(
+ "calling make_partial_x25_packet( 0x%x, 0x%x, 0x%x)\n",
+ &lcp->lcd_faddr, &lcp->lcd_laddr,
+ isop->isop_socket->so_proto->pr_protocol);
+ ENDDEBUG
+ if ((error = make_partial_x25_packet(isop, lcp, m)) == 0)
+ error = pk_connect(lcp, &lcp->lcd_faddr);
+ return error;
+}
+
+/*
+ **************************** DEVICE cons ***************************
+ */
+
+
+/*
+ * NAME: cons_ctlinput()
+ * CALLED FROM:
+ * lower layer when ECN_CLEAR occurs : this routine is here
+ * for consistency - cons subnet service calls its higher layer
+ * through the protosw entry.
+ * FUNCTION & ARGUMENTS:
+ * cmd is a PRC_* command, list found in ../sys/protosw.h
+ * copcb is the obvious.
+ * This serves the higher-layer cons service.
+ * NOTE: this takes 3rd arg. because cons uses it to inform itself
+ * of things (timeouts, etc) but has a pcb instead of an address.
+ */
+cons_ctlinput(cmd, sa, copcb)
+ int cmd;
+ struct sockaddr *sa;
+ register struct pklcd *copcb;
+{
+}
+
+
+find_error_reason( xp )
+ register struct x25_packet *xp;
+{
+ extern u_char x25_error_stats[];
+ int error, cause;
+
+ if (xp) {
+ cause = 4[(char *)xp];
+ switch (cause) {
+ case 0x00:
+ case 0x80:
+ /* DTE originated; look at the diagnostic */
+ error = (CONL_ERROR_MASK | cause);
+ goto done;
+
+ case 0x01: /* number busy */
+ case 0x81:
+ case 0x09: /* Out of order */
+ case 0x89:
+ case 0x11: /* Remot Procedure Error */
+ case 0x91:
+ case 0x19: /* reverse charging accept not subscribed */
+ case 0x99:
+ case 0x21: /* Incampat destination */
+ case 0xa1:
+ case 0x29: /* fast select accept not subscribed */
+ case 0xa9:
+ case 0x39: /* ship absent */
+ case 0xb9:
+ case 0x03: /* invalid facil request */
+ case 0x83:
+ case 0x0b: /* access barred */
+ case 0x8b:
+ case 0x13: /* local procedure error */
+ case 0x93:
+ case 0x05: /* network congestion */
+ case 0x85:
+ case 0x8d: /* not obtainable */
+ case 0x0d:
+ case 0x95: /* RPOA out of order */
+ case 0x15:
+ /* take out bit 8
+ * so we don't have to have so many perror entries
+ */
+ error = (CONL_ERROR_MASK | 0x100 | (cause & ~0x80));
+ goto done;
+
+ case 0xc1: /* gateway-detected proc error */
+ case 0xc3: /* gateway congestion */
+
+ error = (CONL_ERROR_MASK | 0x100 | cause);
+ goto done;
+ }
+ }
+ /* otherwise, a *hopefully* valid perror exists in the e_reason field */
+ error = xp->packet_data;
+ if (error = 0) {
+ printf("Incoming PKT TYPE 0x%x with reason 0x%x\n",
+ pk_decode(xp),
+ cause);
+ error = E_CO_HLI_DISCA;
+ }
+
+done:
+ return error;
+}
+
+
+
+#endif /* KERNEL */
+
+/*
+ * NAME: make_partial_x25_packet()
+ *
+ * FUNCTION and ARGUMENTS:
+ * Makes part of an X.25 call packet, for use by x25.
+ * (src) and (dst) are the NSAP-addresses of source and destination.
+ * (buf) is a ptr to a buffer into which to write this partial header.
+ *
+ * 0 Facility length (in octets)
+ * 1 Facility field, which is a set of:
+ * m facil code
+ * m+1 facil param len (for >2-byte facilities) in octets
+ * m+2..p facil param field
+ * q user data (protocol identification octet)
+ *
+ *
+ * RETURNS:
+ * 0 if OK
+ * E* if failed.
+ *
+ * SIDE EFFECTS:
+ * Stores facilites mbuf in X.25 control block, where the connect
+ * routine knows where to look for it.
+ */
+
+#ifdef X25_1984
+int cons_use_facils = 1;
+#else /* X25_1984 */
+int cons_use_facils = 0;
+#endif /* X25_1984 */
+
+int cons_use_udata = 1; /* KLUDGE FOR DEBUGGING */
+
+Static int
+make_partial_x25_packet(isop, lcp)
+ struct isopcb *isop;
+ struct pklcd *lcp;
+{
+ u_int proto;
+ int flag;
+ caddr_t buf;
+ register caddr_t ptr;
+ register int len = 0;
+ int buflen =0;
+ caddr_t facil_len;
+ int oddness = 0;
+ struct mbuf *m;
+
+
+ IFDEBUG(D_CCONN)
+ printf("make_partial_x25_packet(0x%x, 0x%x, 0x%x, 0x%x, 0x%x)\n",
+ isop->isop_laddr, isop->isop_faddr, proto, m, flag);
+ ENDDEBUG
+ if (cons_use_udata) {
+ if (isop->isop_x25crud_len > 0) {
+ /*
+ * The user specified something. Stick it in
+ */
+ bcopy(isop->isop_x25crud, lcp->lcd_faddr.x25_udata,
+ isop->isop_x25crud_len);
+ lcp->lcd_faddr.x25_udlen = isop->isop_x25crud_len;
+ }
+ }
+
+ if (cons_use_facils == 0) {
+ lcp->lcd_facilities = 0;
+ return 0;
+ }
+ MGETHDR(m, MT_DATA, M_WAITOK);
+ if (m == 0)
+ return ENOBUFS;
+ buf = mtod(m, caddr_t);
+ ptr = buf;
+
+ /* ptr now points to facil length (len of whole facil field in OCTETS */
+ facil_len = ptr ++;
+ m->m_len = 0;
+ pk_build_facilities(m, &lcp->lcd_faddr, 0);
+
+ IFDEBUG(D_CADDR)
+ printf("make_partial calling: ptr 0x%x, len 0x%x\n", ptr,
+ isop->isop_laddr->siso_addr.isoa_len);
+ ENDDEBUG
+ if (cons_use_facils) {
+ *ptr++ = 0; /* Marker to separate X.25 facitilies from CCITT ones */
+ *ptr++ = 0x0f;
+ *ptr = 0xcb; /* calling facility code */
+ ptr ++;
+ ptr ++; /* leave room for facil param len (in OCTETS + 1) */
+ ptr ++; /* leave room for the facil param len (in nibbles),
+ * high two bits of which indicate full/partial NSAP
+ */
+ len = isop->isop_laddr->siso_addr.isoa_len;
+ bcopy( isop->isop_laddr->siso_data, ptr, len);
+ *(ptr-2) = len+1; /* facil param len in octets */
+ *(ptr-1) = len<<1; /* facil param len in nibbles */
+ ptr += len;
+
+ IFDEBUG(D_CADDR)
+ printf("make_partial called: ptr 0x%x, len 0x%x\n", ptr,
+ isop->isop_faddr->siso_addr.isoa_len);
+ ENDDEBUG
+ *ptr = 0xc9; /* called facility code */
+ ptr ++;
+ ptr ++; /* leave room for facil param len (in OCTETS + 1) */
+ ptr ++; /* leave room for the facil param len (in nibbles),
+ * high two bits of which indicate full/partial NSAP
+ */
+ len = isop->isop_faddr->siso_nlen;
+ bcopy(isop->isop_faddr->siso_data, ptr, len);
+ *(ptr-2) = len+1; /* facil param len = addr len + 1 for each of these
+ * two length fields, in octets */
+ *(ptr-1) = len<<1; /* facil param len in nibbles */
+ ptr += len;
+
+ }
+ *facil_len = ptr - facil_len - 1;
+ if (*facil_len > MAX_FACILITIES)
+ return E_CO_PNA_LONG;
+
+ buflen = (int)(ptr - buf);
+
+ IFDEBUG(D_CDUMP_REQ)
+ register int i;
+
+ printf("ECN_CONNECT DATA buf 0x%x len %d (0x%x)\n",
+ buf, buflen, buflen);
+ for( i=0; i < buflen; ) {
+ printf("+%d: %x %x %x %x %x %x %x %x\n",
+ i,
+ *(buf+i), *(buf+i+1), *(buf+i+2), *(buf+i+3),
+ *(buf+i+4), *(buf+i+5), *(buf+i+6), *(buf+i+7));
+ i+=8;
+ }
+ ENDDEBUG
+ IFDEBUG(D_CADDR)
+ printf("make_partial returns buf 0x%x size 0x%x bytes\n",
+ mtod(m, caddr_t), buflen);
+ ENDDEBUG
+
+ if (buflen > MHLEN)
+ return E_CO_PNA_LONG;
+
+ m->m_pkthdr.len = m->m_len = buflen;
+ lcp->lcd_facilities = m;
+ return 0;
+}
+
+/*
+ * NAME: NSAPtoDTE()
+ * CALLED FROM:
+ * make_partial_x25_packet()
+ * FUNCTION and ARGUMENTS:
+ * get a DTE address from an NSAP-address (struct sockaddr_iso)
+ * (dst_octet) is the octet into which to begin stashing the DTE addr
+ * (dst_nibble) takes 0 or 1. 1 means begin filling in the DTE addr
+ * in the high-order nibble of dst_octet. 0 means low-order nibble.
+ * (addr) is the NSAP-address
+ * (flag) is true if the transport suffix is to become the
+ * last two digits of the DTE address
+ * A DTE address is a series of ASCII digits
+ *
+ * A DTE address may have leading zeros. The are significant.
+ * 1 digit per nibble, may be an odd number of nibbles.
+ *
+ * An NSAP-address has the DTE address in the IDI. Leading zeros are
+ * significant. Trailing hex f indicates the end of the DTE address.
+ * The IDI is a series of BCD digits, one per nibble.
+ *
+ * RETURNS
+ * # significant digits in the DTE address, -1 if error.
+ */
+
+Static int
+NSAPtoDTE(siso, sx25)
+ register struct sockaddr_iso *siso;
+ register struct sockaddr_x25 *sx25;
+{
+ int dtelen = -1;
+
+ IFDEBUG(D_CADDR)
+ printf("NSAPtoDTE: nsap: %s\n", clnp_iso_addrp(&siso->siso_addr));
+ ENDDEBUG
+
+ if (siso->siso_data[0] == AFI_37) {
+ register char *out = sx25->x25_addr;
+ register char *in = siso->siso_data + 1;
+ register int nibble;
+ char *lim = siso->siso_data + siso->siso_nlen;
+ char *olim = out+15;
+ int lowNibble = 0;
+
+ while (in < lim) {
+ nibble = ((lowNibble ? *in++ : (*in >> 4)) & 0xf) | 0x30;
+ lowNibble ^= 1;
+ if (nibble != 0x3f && out < olim)
+ *out++ = nibble;
+ }
+ dtelen = out - sx25->x25_addr;
+ *out++ = 0;
+ } else {
+ /* error = iso_8208snparesolve(addr, x121string, &x121strlen);*/
+ register struct rtentry *rt;
+ extern struct sockaddr_iso blank_siso;
+ struct sockaddr_iso nsiso;
+
+ nsiso = blank_siso;
+ bcopy(nsiso.siso_data, siso->siso_data,
+ nsiso.siso_nlen = siso->siso_nlen);
+ if (rt = rtalloc1(&nsiso, 1)) {
+ register struct sockaddr_x25 *sxx =
+ (struct sockaddr_x25 *)rt->rt_gateway;
+ register char *in = sxx->x25_addr;
+
+ rt->rt_use--;
+ if (sxx && sxx->x25_family == AF_CCITT) {
+ bcopy(sx25->x25_addr, sxx->x25_addr, sizeof(sx25->x25_addr));
+ while (*in++) {}
+ dtelen = in - sxx->x25_addr;
+ }
+ }
+ }
+ return dtelen;
+}
+
+/*
+ * NAME: FACILtoNSAP()
+ * CALLED FROM:
+ * parse_facil()
+ * FUNCTION and ARGUMENTS:
+ * Creates and NSAP in the sockaddr_iso (addr) from the
+ * x.25 facility found at buf - 1.
+ * RETURNS:
+ * 0 if ok, -1 if error.
+ */
+
+Static int
+FACILtoNSAP(addr, buf)
+ register u_char *buf;
+ register struct sockaddr_iso *addr;
+{
+ int len_in_nibbles = *++buf & 0x3f;
+ u_char buf_len = (len_in_nibbles + 1) >> 1;; /* in bytes */
+
+ IFDEBUG(D_CADDR)
+ printf("FACILtoNSAP( 0x%x, 0x%x, 0x%x )\n",
+ buf, buf_len, addr );
+ ENDDEBUG
+
+ len_in_nibbles = *buf & 0x3f;
+ /* despite the fact that X.25 makes us put a length in nibbles
+ * here, the NSAP-addrs are always in full octets
+ */
+ switch (*buf++ & 0xc0) {
+ case 0:
+ /* Entire OSI NSAP address */
+ bcopy((caddr_t)buf, addr->siso_data, addr->siso_nlen = buf_len);
+ break;
+
+ case 40:
+ /* Partial OSI NSAP address, assume trailing */
+ if (buf_len + addr->siso_nlen > sizeof(addr->siso_addr))
+ return -1;
+ bcopy((caddr_t)buf, TSEL(addr), buf_len);
+ addr->siso_nlen += buf_len;
+ break;
+
+ default:
+ /* Rather than blow away the connection, just ignore and use
+ NSAP from DTE */;
+ }
+ return 0;
+}
+
+Static
+init_siso(siso)
+register struct sockaddr_iso *siso;
+{
+ siso->siso_len = sizeof (*siso);
+ siso->siso_family = AF_ISO;
+ siso->siso_data[0] = AFI_37;
+ siso->siso_nlen = 8;
+}
+
+/*
+ * NAME: DTEtoNSAP()
+ * CALLED FROM:
+ * parse_facil()
+ * FUNCTION and ARGUMENTS:
+ * Creates a type 37 NSAP in the sockaddr_iso (addr)
+ * from a DTE address found in a sockaddr_x25.
+ *
+ * RETURNS:
+ * 0 if ok; E* otherwise.
+ */
+
+Static int
+DTEtoNSAP(addr, sx)
+ struct sockaddr_iso *addr;
+ struct sockaddr_x25 *sx;
+{
+ register char *in, *out;
+ register int first;
+ int pad_tail = 0;
+ int src_len;
+
+
+ init_siso(addr);
+ in = sx->x25_addr;
+ src_len = strlen(in);
+ addr->siso_nlen = (src_len + 3) / 2;
+ out = addr->siso_data;
+ *out++ = 0x37;
+ if (src_len & 1) {
+ pad_tail = 0xf;
+ src_len++;
+ }
+ for (first = 0; src_len > 0; src_len--) {
+ first |= 0xf & *in++;
+ if (src_len & 1) {
+ *out++ = first;
+ first = 0;
+ }
+ else first <<= 4;
+ }
+ if (pad_tail)
+ out[-1] |= 0xf;
+ return 0; /* ok */
+}
+
+/*
+ * FUNCTION and ARGUMENTS:
+ * parses (buf_len) bytes beginning at (buf) and finds
+ * a called nsap, a calling nsap, and protocol identifier.
+ * RETURNS:
+ * 0 if ok, E* otherwise.
+ */
+
+Static int
+parse_facil(lcp, isop, buf, buf_len)
+ caddr_t buf;
+ u_char buf_len; /* in bytes */
+ struct isopcb *isop;
+ struct pklcd *lcp;
+{
+ register int i;
+ register u_char *ptr = (u_char *)buf;
+ u_char *ptr_lim, *facil_lim;
+ int facil_param_len, facil_len;
+
+ IFDEBUG(D_CADDR)
+ printf("parse_facil(0x%x, 0x%x, 0x%x, 0x%x)\n",
+ lcp, isop, buf, buf_len);
+ dump_buf(buf, buf_len);
+ ENDDEBUG
+
+ /* find the beginnings of the facility fields in buf
+ * by skipping over the called & calling DTE addresses
+ * i <- # nibbles in called + # nibbles in calling
+ * i += 1 so that an odd nibble gets rounded up to even
+ * before dividing by 2, then divide by two to get # octets
+ */
+ i = (int)(*ptr >> 4) + (int)(*ptr&0xf);
+ i++;
+ ptr += i >> 1;
+ ptr ++; /* plus one for the DTE lengths byte */
+
+ /* ptr now is at facil_length field */
+ facil_len = *ptr++;
+ facil_lim = ptr + facil_len;
+ IFDEBUG(D_CADDR)
+ printf("parse_facils: facil length is 0x%x\n", (int) facil_len);
+ ENDDEBUG
+
+ while (ptr < facil_lim) {
+ /* get NSAP addresses from facilities */
+ switch (*ptr++) {
+ case 0xcb:
+ /* calling NSAP */
+ facil_param_len = FACILtoNSAP(isop->isop_faddr, ptr);
+ break;
+ case 0xc9:
+ /* called NSAP */
+ facil_param_len = FACILtoNSAP(isop->isop_laddr, ptr);
+ break;
+
+ /* from here to default are legit cases that I ignore */
+ /* variable length */
+ case 0xca: /* end-to-end transit delay negot */
+ case 0xc6: /* network user id */
+ case 0xc5: /* charging info : indicating monetary unit */
+ case 0xc2: /* charging info : indicating segment count */
+ case 0xc1: /* charging info : indicating call duration */
+ case 0xc4: /* RPOA extended format */
+ case 0xc3: /* call redirection notification */
+ facil_param_len = 0;
+ break;
+
+ /* 1 octet */
+ case 0x0a: /* min. throughput class negot */
+ case 0x02: /* throughput class */
+ case 0x03: case 0x47: /* CUG shit */
+ case 0x0b: /* expedited data negot */
+ case 0x01: /* Fast select or reverse charging
+ (example of intelligent protocol design) */
+ case 0x04: /* charging info : requesting service */
+ case 0x08: /* called line addr modified notification */
+ case 0x00: /* marker to indicate beginning of CCITT facils */
+ facil_param_len = 1;
+ break;
+
+ /* any 2 octets */
+ case 0x42: /* pkt size */
+ case 0x43: /* win size */
+ case 0x44: /* RPOA basic format */
+ case 0x41: /* bilateral CUG shit */
+ case 0x49: /* transit delay selection and indication */
+ facil_param_len = 2;
+ break;
+
+ default:
+ printf(
+"BOGUS FACILITY CODE facil_lim 0x%x facil_len %d, ptr 0x%x *ptr 0x%x\n",
+ facil_lim, facil_len, ptr - 1, ptr[-1]);
+ /* facil that we don't handle
+ return E_CO_HLI_REJI; */
+ switch (ptr[-1] & 0xc0) {
+ case 0x00: facil_param_len = 1; break;
+ case 0x40: facil_param_len = 2; break;
+ case 0x80: facil_param_len = 3; break;
+ case 0xc0: facil_param_len = 0; break;
+ }
+ }
+ if (facil_param_len == -1)
+ return E_CO_REG_ICDA;
+ if (facil_param_len == 0) /* variable length */
+ facil_param_len = (int)*ptr++; /* 1 + the real facil param */
+ ptr += facil_param_len;
+ }
+ return 0;
+}
+
+#endif /* TPCONS */
diff --git a/sys/netiso/if_eon.c b/sys/netiso/if_eon.c
new file mode 100644
index 000000000000..3c05133040a2
--- /dev/null
+++ b/sys/netiso/if_eon.c
@@ -0,0 +1,609 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_eon.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: if_eon.c,v 1.4 88/07/19 15:53:59 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/if_eon.c,v $
+ *
+ * EON rfc
+ * Layer between IP and CLNL
+ *
+ * TODO:
+ * Put together a current rfc986 address format and get the right offset
+ * for the nsel
+ */
+
+#ifdef EON
+#define NEON 1
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/buf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/types.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_dl.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <machine/mtpr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/if_ether.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/argo_debug.h>
+#include <netiso/iso_errno.h>
+#include <netiso/eonvar.h>
+
+extern struct timeval time;
+extern struct ifnet loif;
+
+#define EOK 0
+
+int eoninput();
+int eonoutput();
+int eonioctl();
+int eonattach();
+int eoninit();
+void eonrtrequest();
+struct ifnet eonif[1];
+
+eonprotoinit() {
+ (void) eonattach();
+}
+
+struct eon_llinfo eon_llinfo;
+#define PROBE_OK 0;
+
+
+/*
+ * FUNCTION: eonattach
+ *
+ * PURPOSE: autoconf attach routine
+ *
+ * RETURNS: void
+ */
+
+eonattach()
+{
+ register struct ifnet *ifp = eonif;
+
+ IFDEBUG(D_EON)
+ printf("eonattach()\n");
+ ENDDEBUG
+ ifp->if_unit = 0;
+ ifp->if_name = "eon";
+ ifp->if_mtu = ETHERMTU;
+ /* since everything will go out over ether or token ring */
+
+ ifp->if_init = eoninit;
+ ifp->if_ioctl = eonioctl;
+ ifp->if_output = eonoutput;
+ ifp->if_type = IFT_EON;
+ ifp->if_addrlen = 5;
+ ifp->if_hdrlen = EONIPLEN;
+ ifp->if_flags = IFF_BROADCAST;
+ if_attach(ifp);
+ eonioctl(ifp, SIOCSIFADDR, (caddr_t)ifp->if_addrlist);
+ eon_llinfo.el_qhdr.link =
+ eon_llinfo.el_qhdr.rlink = &(eon_llinfo.el_qhdr);
+
+ IFDEBUG(D_EON)
+ printf("eonattach()\n");
+ ENDDEBUG
+}
+
+
+/*
+ * FUNCTION: eonioctl
+ *
+ * PURPOSE: io controls - ifconfig
+ * need commands to
+ * link-UP (core addr) (flags: ES, IS)
+ * link-DOWN (core addr) (flags: ES, IS)
+ * must be callable from kernel or user
+ *
+ * RETURNS: nothing
+ */
+eonioctl(ifp, cmd, data)
+ register struct ifnet *ifp;
+ int cmd;
+ register caddr_t data;
+{
+ int s = splimp();
+ register int error = 0;
+
+ IFDEBUG(D_EON)
+ printf("eonioctl (cmd 0x%x) \n", cmd);
+ ENDDEBUG
+
+ switch (cmd) {
+ register struct ifaddr *ifa;
+
+ case SIOCSIFADDR:
+ if (ifa = (struct ifaddr *)data) {
+ ifp->if_flags |= IFF_UP;
+ if (ifa->ifa_addr->sa_family != AF_LINK)
+ ifa->ifa_rtrequest = eonrtrequest;
+ }
+ break;
+ }
+ splx(s);
+ return(error);
+}
+
+
+eoniphdr(hdr, loc, ro, class, zero)
+struct route *ro;
+register struct eon_iphdr *hdr;
+caddr_t loc;
+{
+ struct mbuf mhead;
+ register struct sockaddr_in *sin = (struct sockaddr_in *)&ro->ro_dst;
+ if (zero) {
+ bzero((caddr_t)hdr, sizeof (*hdr));
+ bzero((caddr_t)ro, sizeof (*ro));
+ }
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof (*sin);
+ bcopy(loc, (caddr_t)&sin->sin_addr, sizeof(struct in_addr));
+ /*
+ * If there is a cached route,
+ * check that it is to the same destination
+ * and is still up. If not, free it and try again.
+ */
+ if (ro->ro_rt) {
+ struct sockaddr_in *dst =
+ (struct sockaddr_in *)rt_key(ro->ro_rt);
+ if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ sin->sin_addr.s_addr != dst->sin_addr.s_addr) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ }
+ }
+ rtalloc(ro);
+ if (ro->ro_rt)
+ ro->ro_rt->rt_use++;
+ hdr->ei_ip.ip_dst = sin->sin_addr;
+ hdr->ei_ip.ip_p = IPPROTO_EON;
+ hdr->ei_ip.ip_ttl = MAXTTL;
+ hdr->ei_eh.eonh_class = class;
+ hdr->ei_eh.eonh_vers = EON_VERSION;
+ hdr->ei_eh.eonh_csum = 0;
+ mhead.m_data = (caddr_t) &hdr->ei_eh;
+ mhead.m_len = sizeof(struct eon_hdr);
+ mhead.m_next = 0;
+ IFDEBUG(D_EON)
+ printf("eonoutput : gen csum (0x%x, offset %d, datalen %d)\n",
+ &mhead,
+ _offsetof(struct eon_hdr, eonh_csum), sizeof(struct eon_hdr));
+ ENDDEBUG
+ iso_gen_csum(&mhead,
+ _offsetof(struct eon_hdr, eonh_csum), sizeof(struct eon_hdr));
+}
+/*
+ * FUNCTION: eonrtrequest
+ *
+ * PURPOSE: maintains list of direct eon recipients.
+ * sets up IP route for rest.
+ *
+ * RETURNS: nothing
+ */
+void
+eonrtrequest(cmd, rt, gate)
+register struct rtentry *rt;
+register struct sockaddr *gate;
+{
+ unsigned long zerodst = 0;
+ caddr_t ipaddrloc = (caddr_t) &zerodst;
+ register struct eon_llinfo *el = (struct eon_llinfo *)rt->rt_llinfo;
+
+ /*
+ * Common Housekeeping
+ */
+ switch (cmd) {
+ case RTM_DELETE:
+ if (el) {
+ remque(&(el->el_qhdr));
+ if (el->el_iproute.ro_rt)
+ RTFREE(el->el_iproute.ro_rt);
+ Free(el);
+ rt->rt_llinfo = 0;
+ }
+ return;
+
+ case RTM_ADD:
+ case RTM_RESOLVE:
+ rt->rt_rmx.rmx_mtu = loif.if_mtu; /* unless better below */
+ R_Malloc(el, struct eon_llinfo *, sizeof(*el));
+ rt->rt_llinfo = (caddr_t)el;
+ if (el == 0)
+ return;
+ Bzero(el, sizeof(*el));
+ insque(&(el->el_qhdr), &eon_llinfo.el_qhdr);
+ el->el_rt = rt;
+ break;
+ }
+ if (gate || (gate = rt->rt_gateway)) switch (gate->sa_family) {
+ case AF_LINK:
+#define SDL(x) ((struct sockaddr_dl *)x)
+ if (SDL(gate)->sdl_alen == 1)
+ el->el_snpaoffset = *(u_char *)LLADDR(SDL(gate));
+ else
+ ipaddrloc = LLADDR(SDL(gate));
+ break;
+ case AF_INET:
+#define SIN(x) ((struct sockaddr_in *)x)
+ ipaddrloc = (caddr_t) &SIN(gate)->sin_addr;
+ break;
+ default:
+ return;
+ }
+ el->el_flags |= RTF_UP;
+ eoniphdr(&el->el_ei, ipaddrloc, &el->el_iproute, EON_NORMAL_ADDR, 0);
+ if (el->el_iproute.ro_rt)
+ rt->rt_rmx.rmx_mtu = el->el_iproute.ro_rt->rt_rmx.rmx_mtu
+ - sizeof(el->el_ei);
+}
+
+/*
+ * FUNCTION: eoninit
+ *
+ * PURPOSE: initialization
+ *
+ * RETURNS: nothing
+ */
+
+eoninit(unit)
+ int unit;
+{
+ printf("eon driver-init eon%d\n", unit);
+}
+
+
+/*
+ * FUNCTION: eonoutput
+ *
+ * PURPOSE: prepend an eon header and hand to IP
+ * ARGUMENTS: (ifp) is points to the ifnet structure for this unit/device
+ * (m) is an mbuf *, *m is a CLNL packet
+ * (dst) is a destination address - have to interp. as
+ * multicast or broadcast or real address.
+ *
+ * RETURNS: unix error code
+ *
+ * NOTES:
+ *
+ */
+eonoutput(ifp, m, dst, rt)
+ struct ifnet *ifp;
+ register struct mbuf *m; /* packet */
+ struct sockaddr_iso *dst; /* destination addr */
+ struct rtentry *rt;
+{
+ register struct eon_llinfo *el;
+ register struct eon_iphdr *ei;
+ struct route *ro;
+ int datalen;
+ struct mbuf *mh;
+ int error = 0, class = 0, alen = 0;
+ caddr_t ipaddrloc;
+ static struct eon_iphdr eon_iphdr;
+ static struct route route;
+
+ IFDEBUG(D_EON)
+ printf("eonoutput \n" );
+ ENDDEBUG
+
+ ifp->if_lastchange = time;
+ ifp->if_opackets++;
+ if (rt == 0 || (el = (struct eon_llinfo *)rt->rt_llinfo) == 0) {
+ if (dst->siso_family == AF_LINK) {
+ register struct sockaddr_dl *sdl = (struct sockaddr_dl *)dst;
+
+ ipaddrloc = LLADDR(sdl);
+ alen = sdl->sdl_alen;
+ } else if (dst->siso_family == AF_ISO && dst->siso_data[0] == AFI_SNA) {
+ alen = dst->siso_nlen - 1;
+ ipaddrloc = (caddr_t) dst->siso_data + 1;
+ }
+ switch (alen) {
+ case 5:
+ class = 4[(u_char *)ipaddrloc];
+ case 4:
+ ro = &route;
+ ei = &eon_iphdr;
+ eoniphdr(ei, ipaddrloc, ro, class, 1);
+ goto send;
+ }
+einval:
+ error = EINVAL;
+ goto flush;
+ }
+ if ((el->el_flags & RTF_UP) == 0) {
+ eonrtrequest(RTM_CHANGE, rt, (struct sockaddr *)0);
+ if ((el->el_flags & RTF_UP) == 0) {
+ error = EHOSTUNREACH;
+ goto flush;
+ }
+ }
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ printf("eon: got non headered packet\n");
+ goto einval;
+ }
+ ei = &el->el_ei;
+ ro = &el->el_iproute;
+ if (el->el_snpaoffset) {
+ if (dst->siso_family == AF_ISO) {
+ bcopy((caddr_t) &dst->siso_data[el->el_snpaoffset],
+ (caddr_t) &ei->ei_ip.ip_dst, sizeof(ei->ei_ip.ip_dst));
+ } else
+ goto einval;
+ }
+send:
+ /* put an eon_hdr in the buffer, prepended by an ip header */
+ datalen = m->m_pkthdr.len + EONIPLEN;
+ MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+ if(mh == (struct mbuf *)0)
+ goto flush;
+ mh->m_next = m;
+ m = mh;
+ MH_ALIGN(m, sizeof(struct eon_iphdr));
+ m->m_len = sizeof(struct eon_iphdr);
+ ifp->if_obytes +=
+ (ei->ei_ip.ip_len = (u_short)(m->m_pkthdr.len = datalen));
+ *mtod(m, struct eon_iphdr *) = *ei;
+
+ IFDEBUG(D_EON)
+ printf("eonoutput dst ip addr : %x\n", ei->ei_ip.ip_dst.s_addr);
+ printf("eonoutput ip_output : eonip header:\n");
+ dump_buf(ei, sizeof(struct eon_iphdr));
+ ENDDEBUG
+
+ error = ip_output(m, (struct mbuf *)0, ro, 0, NULL);
+ m = 0;
+ if (error) {
+ ifp->if_oerrors++;
+ ifp->if_opackets--;
+ ifp->if_obytes -= datalen;
+ }
+flush:
+ if (m)
+ m_freem(m);
+ return error;
+}
+
+eoninput(m, iphlen)
+ register struct mbuf *m;
+ int iphlen;
+{
+ register struct eon_hdr *eonhdr;
+ register struct ip *iphdr;
+ struct ifnet *eonifp;
+ int s;
+
+ eonifp = &eonif[0]; /* kludge - really want to give CLNP
+ * the ifp for eon, not for the real device
+ */
+
+ IFDEBUG(D_EON)
+ printf("eoninput() 0x%x m_data 0x%x m_len 0x%x dequeued\n",
+ m, m?m->m_data:0, m?m->m_len:0);
+ ENDDEBUG
+
+ if (m == 0)
+ return;
+ if (iphlen > sizeof (struct ip))
+ ip_stripoptions(m, (struct mbuf *)0);
+ if (m->m_len < EONIPLEN) {
+ if ((m = m_pullup(m, EONIPLEN)) == 0) {
+ IncStat(es_badhdr);
+drop:
+ IFDEBUG(D_EON)
+ printf("eoninput: DROP \n" );
+ ENDDEBUG
+ eonifp->if_ierrors ++;
+ m_freem(m);
+ return;
+ }
+ }
+ eonif->if_ibytes += m->m_pkthdr.len;
+ eonif->if_lastchange = time;
+ iphdr = mtod(m, struct ip *);
+ /* do a few checks for debugging */
+ if( iphdr->ip_p != IPPROTO_EON ) {
+ IncStat(es_badhdr);
+ goto drop;
+ }
+ /* temporarily drop ip header from the mbuf */
+ m->m_data += sizeof(struct ip);
+ eonhdr = mtod(m, struct eon_hdr *);
+ if( iso_check_csum( m, sizeof(struct eon_hdr) ) != EOK ) {
+ IncStat(es_badcsum);
+ goto drop;
+ }
+ m->m_data -= sizeof(struct ip);
+
+ IFDEBUG(D_EON)
+ printf("eoninput csum ok class 0x%x\n", eonhdr->eonh_class );
+ printf("eoninput: eon header:\n");
+ dump_buf(eonhdr, sizeof(struct eon_hdr));
+ ENDDEBUG
+
+ /* checks for debugging */
+ if( eonhdr->eonh_vers != EON_VERSION) {
+ IncStat(es_badhdr);
+ goto drop;
+ }
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ switch( eonhdr->eonh_class) {
+ case EON_BROADCAST:
+ IncStat(es_in_broad);
+ m->m_flags |= M_BCAST;
+ break;
+ case EON_NORMAL_ADDR:
+ IncStat(es_in_normal);
+ break;
+ case EON_MULTICAST_ES:
+ IncStat(es_in_multi_es);
+ m->m_flags |= M_MCAST;
+ break;
+ case EON_MULTICAST_IS:
+ IncStat(es_in_multi_is);
+ m->m_flags |= M_MCAST;
+ break;
+ }
+ eonifp->if_ipackets++;
+
+ {
+ /* put it on the CLNP queue and set soft interrupt */
+ struct ifqueue *ifq;
+ extern struct ifqueue clnlintrq;
+
+ m->m_pkthdr.rcvif = eonifp; /* KLUDGE */
+ IFDEBUG(D_EON)
+ printf("eoninput to clnl IFQ\n");
+ ENDDEBUG
+ ifq = &clnlintrq;
+ s = splimp();
+ if (IF_QFULL(ifq)) {
+ IF_DROP(ifq);
+ m_freem(m);
+ eonifp->if_iqdrops++;
+ eonifp->if_ipackets--;
+ splx(s);
+ return;
+ }
+ IF_ENQUEUE(ifq, m);
+ IFDEBUG(D_EON)
+ printf(
+ "0x%x enqueued on clnp Q: m_len 0x%x m_type 0x%x m_data 0x%x\n",
+ m, m->m_len, m->m_type, m->m_data);
+ dump_buf(mtod(m, caddr_t), m->m_len);
+ ENDDEBUG
+ schednetisr(NETISR_ISO);
+ splx(s);
+ }
+}
+
+int
+eonctlinput(cmd, sin)
+ int cmd;
+ struct sockaddr_in *sin;
+{
+ extern u_char inetctlerrmap[];
+
+ IFDEBUG(D_EON)
+ printf("eonctlinput: cmd 0x%x addr: ", cmd);
+ dump_isoaddr(sin);
+ printf("\n");
+ ENDDEBUG
+
+ if (cmd < 0 || cmd > PRC_NCMDS)
+ return 0;
+
+ IncStat(es_icmp[cmd]);
+ switch (cmd) {
+
+ case PRC_QUENCH:
+ case PRC_QUENCH2:
+ /* TODO: set the dec bit */
+ break;
+ case PRC_TIMXCEED_REASS:
+ case PRC_ROUTEDEAD:
+ case PRC_HOSTUNREACH:
+ case PRC_UNREACH_NET:
+ case PRC_IFDOWN:
+ case PRC_UNREACH_HOST:
+ case PRC_HOSTDEAD:
+ case PRC_TIMXCEED_INTRANS:
+ /* TODO: mark the link down */
+ break;
+
+ case PRC_UNREACH_PROTOCOL:
+ case PRC_UNREACH_PORT:
+ case PRC_UNREACH_SRCFAIL:
+ case PRC_REDIRECT_NET:
+ case PRC_REDIRECT_HOST:
+ case PRC_REDIRECT_TOSNET:
+ case PRC_REDIRECT_TOSHOST:
+ case PRC_MSGSIZE:
+ case PRC_PARAMPROB:
+ /* printf("eonctlinput: ICMP cmd 0x%x\n", cmd );*/
+ break;
+ }
+ return 0;
+}
+
+#endif
diff --git a/sys/netiso/iso.c b/sys/netiso/iso.c
new file mode 100644
index 000000000000..cd64e6871528
--- /dev/null
+++ b/sys/netiso/iso.c
@@ -0,0 +1,919 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso.c 8.2 (Berkeley) 11/15/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: iso.c,v 4.11 88/09/19 14:58:35 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso.c,v $
+ *
+ * iso.c: miscellaneous routines to support the iso address family
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/clnp.h>
+#include <netiso/argo_debug.h>
+#ifdef TUBA
+#include <netiso/tuba_table.h>
+#endif
+
+#ifdef ISO
+
+int iso_interfaces = 0; /* number of external interfaces */
+extern struct ifnet loif; /* loopback interface */
+int ether_output();
+void llc_rtrequest();
+
+/*
+ * FUNCTION: iso_addrmatch1
+ *
+ * PURPOSE: decide if the two iso_addrs passed are equal
+ *
+ * RETURNS: true if the addrs match, false if they do not
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+iso_addrmatch1(isoaa, isoab)
+register struct iso_addr *isoaa, *isoab; /* addresses to check */
+{
+ u_int compare_len;
+
+ IFDEBUG(D_ROUTE)
+ printf("iso_addrmatch1: comparing lengths: %d to %d\n", isoaa->isoa_len,
+ isoab->isoa_len);
+ printf("a:\n");
+ dump_buf(isoaa->isoa_genaddr, isoaa->isoa_len);
+ printf("b:\n");
+ dump_buf(isoab->isoa_genaddr, isoab->isoa_len);
+ ENDDEBUG
+
+ if ((compare_len = isoaa->isoa_len) != isoab->isoa_len) {
+ IFDEBUG(D_ROUTE)
+ printf("iso_addrmatch1: returning false because of lengths\n");
+ ENDDEBUG
+ return 0;
+ }
+
+#ifdef notdef
+ /* TODO : generalize this to all afis with masks */
+ if( isoaa->isoa_afi == AFI_37 ) {
+ /* must not compare 2 least significant digits, or for
+ * that matter, the DSP
+ */
+ compare_len = ADDR37_IDI_LEN - 1;
+ }
+#endif
+
+ IFDEBUG(D_ROUTE)
+ int i;
+ char *a, *b;
+
+ a = isoaa->isoa_genaddr;
+ b = isoab->isoa_genaddr;
+
+ for (i=0; i<compare_len; i++) {
+ printf("<%x=%x>", a[i]&0xff, b[i]&0xff);
+ if (a[i] != b[i]) {
+ printf("\naddrs are not equal at byte %d\n", i);
+ return(0);
+ }
+ }
+ printf("\n");
+ printf("addrs are equal\n");
+ return (1);
+ ENDDEBUG
+ return (!bcmp(isoaa->isoa_genaddr, isoab->isoa_genaddr, compare_len));
+}
+
+/*
+ * FUNCTION: iso_addrmatch
+ *
+ * PURPOSE: decide if the two sockadrr_isos passed are equal
+ *
+ * RETURNS: true if the addrs match, false if they do not
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+iso_addrmatch(sisoa, sisob)
+struct sockaddr_iso *sisoa, *sisob; /* addresses to check */
+{
+ return(iso_addrmatch1(&sisoa->siso_addr, &sisob->siso_addr));
+}
+#ifdef notdef
+/*
+ * FUNCTION: iso_netmatch
+ *
+ * PURPOSE: similar to iso_addrmatch but takes sockaddr_iso
+ * as argument.
+ *
+ * RETURNS: true if same net, false if not
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+iso_netmatch(sisoa, sisob)
+struct sockaddr_iso *sisoa, *sisob;
+{
+ u_char bufa[sizeof(struct sockaddr_iso)];
+ u_char bufb[sizeof(struct sockaddr_iso)];
+ register int lena, lenb;
+
+ lena = iso_netof(&sisoa->siso_addr, bufa);
+ lenb = iso_netof(&sisob->siso_addr, bufb);
+
+ IFDEBUG(D_ROUTE)
+ printf("iso_netmatch: comparing lengths: %d to %d\n", lena, lenb);
+ printf("a:\n");
+ dump_buf(bufa, lena);
+ printf("b:\n");
+ dump_buf(bufb, lenb);
+ ENDDEBUG
+
+ return ((lena == lenb) && (!bcmp(bufa, bufb, lena)));
+}
+#endif /* notdef */
+
+/*
+ * FUNCTION: iso_hashchar
+ *
+ * PURPOSE: Hash all character in the buffer specified into
+ * a long. Return the long.
+ *
+ * RETURNS: The hash value.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: The hash is achieved by exclusive ORing 4 byte
+ * quantities.
+ */
+u_long
+iso_hashchar(buf, len)
+register caddr_t buf; /* buffer to pack from */
+register int len; /* length of buffer */
+{
+ register u_long h = 0;
+ register int i;
+
+ for (i=0; i<len; i+=4) {
+ register u_long l = 0;
+
+ if ((len - i) < 4) {
+ /* buffer not multiple of 4 */
+ switch (len - i) {
+ case 3:
+ l |= buf[i+2] << 8;
+ case 2:
+ l |= buf[i+1] << 16;
+ case 1:
+ l |= buf[i] << 24;
+ break;
+ default:
+ printf("iso_hashchar: unexpected value x%x\n", len - i);
+ break;
+ }
+ } else {
+ l |= buf[i] << 24;
+ l |= buf[i+1] << 16;
+ l |= buf[i+2] << 8;
+ l |= buf[i+3];
+ }
+
+ h ^= l;
+ }
+
+ h ^= (u_long) (len % 4);
+
+ return(h);
+}
+#ifdef notdef
+/*
+ * FUNCTION: iso_hash
+ *
+ * PURPOSE: Fill in fields of afhash structure based upon addr passed.
+ *
+ * RETURNS: none
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+iso_hash(siso, hp)
+struct sockaddr_iso *siso; /* address to perform hash on */
+struct afhash *hp; /* RETURN: hash info here */
+{
+ u_long buf[sizeof(struct sockaddr_iso)+1/4];
+ register int bufsize;
+
+
+ bzero(buf, sizeof(buf));
+
+ bufsize = iso_netof(&siso->siso_addr, buf);
+ hp->afh_nethash = iso_hashchar((caddr_t)buf, bufsize);
+
+ IFDEBUG(D_ROUTE)
+ printf("iso_hash: iso_netof: bufsize = %d\n", bufsize);
+ ENDDEBUG
+
+ hp->afh_hosthash = iso_hashchar((caddr_t)&siso->siso_addr,
+ siso->siso_addr.isoa_len);
+
+ IFDEBUG(D_ROUTE)
+ printf("iso_hash: %s: nethash = x%x, hosthash = x%x\n",
+ clnp_iso_addrp(&siso->siso_addr), hp->afh_nethash,
+ hp->afh_hosthash);
+ ENDDEBUG
+}
+/*
+ * FUNCTION: iso_netof
+ *
+ * PURPOSE: Extract the network portion of the iso address.
+ * The network portion of the iso address varies depending
+ * on the type of address. The network portion of the
+ * address will include the IDP. The network portion is:
+ *
+ * TYPE DESC
+ * t37 The AFI and x.121 (IDI)
+ * osinet The AFI, orgid, snetid
+ * rfc986 The AFI, vers and network part of
+ * internet address.
+ *
+ * RETURNS: number of bytes placed into buf.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Buf is assumed to be big enough
+ */
+iso_netof(isoa, buf)
+struct iso_addr *isoa; /* address */
+caddr_t buf; /* RESULT: network portion of address here */
+{
+ u_int len = 1; /* length of afi */
+
+ switch (isoa->isoa_afi) {
+ case AFI_37:
+ /*
+ * Due to classic x.25 tunnel vision, there is no
+ * net portion of an x.121 address. For our purposes
+ * the AFI will do, so that all x.25 -type addresses
+ * map to the single x.25 SNPA. (Cannot have more than
+ * one, obviously).
+ */
+
+ break;
+
+/* case AFI_OSINET:*/
+ case AFI_RFC986: {
+ u_short idi; /* value of idi */
+
+ /* osinet and rfc986 have idi in the same place */
+ CTOH(isoa->rfc986_idi[0], isoa->rfc986_idi[1], idi);
+
+ if (idi == IDI_OSINET)
+/*
+ * Network portion of OSINET address can only be the IDI. Clearly,
+ * with one x25 interface, one could get to several orgids, and
+ * several snetids.
+ len += (ADDROSINET_IDI_LEN + OVLOSINET_ORGID_LEN +
+ OVLOSINET_SNETID_LEN);
+ */
+ len += ADDROSINET_IDI_LEN;
+ else if (idi == IDI_RFC986) {
+ u_long inetaddr;
+ struct ovl_rfc986 *o986 = (struct ovl_rfc986 *)isoa;
+
+ /* bump len to include idi and version (1 byte) */
+ len += ADDRRFC986_IDI_LEN + 1;
+
+ /* get inet addr long aligned */
+ bcopy(o986->o986_inetaddr, &inetaddr, sizeof(inetaddr));
+ inetaddr = ntohl(inetaddr); /* convert to host byte order */
+
+ IFDEBUG(D_ROUTE)
+ printf("iso_netof: isoa ");
+ dump_buf(isoa, sizeof(*isoa));
+ printf("iso_netof: inetaddr 0x%x ", inetaddr);
+ ENDDEBUG
+
+ /* bump len by size of network portion of inet address */
+ if (IN_CLASSA(inetaddr)) {
+ len += 4-IN_CLASSA_NSHIFT/8;
+ IFDEBUG(D_ROUTE)
+ printf("iso_netof: class A net len is now %d\n", len);
+ ENDDEBUG
+ } else if (IN_CLASSB(inetaddr)) {
+ len += 4-IN_CLASSB_NSHIFT/8;
+ IFDEBUG(D_ROUTE)
+ printf("iso_netof: class B net len is now %d\n", len);
+ ENDDEBUG
+ } else {
+ len += 4-IN_CLASSC_NSHIFT/8;
+ IFDEBUG(D_ROUTE)
+ printf("iso_netof: class C net len is now %d\n", len);
+ ENDDEBUG
+ }
+ } else
+ len = 0;
+ } break;
+
+ default:
+ len = 0;
+ }
+
+ bcopy((caddr_t)isoa, buf, len);
+ IFDEBUG(D_ROUTE)
+ printf("iso_netof: isoa ");
+ dump_buf(isoa, len);
+ printf("iso_netof: net ");
+ dump_buf(buf, len);
+ ENDDEBUG
+ return len;
+}
+#endif /* notdef */
+/*
+ * Generic iso control operations (ioctl's).
+ * Ifp is 0 if not an interface-specific ioctl.
+ */
+/* ARGSUSED */
+iso_control(so, cmd, data, ifp)
+ struct socket *so;
+ int cmd;
+ caddr_t data;
+ register struct ifnet *ifp;
+{
+ register struct iso_ifreq *ifr = (struct iso_ifreq *)data;
+ register struct iso_ifaddr *ia = 0;
+ register struct ifaddr *ifa;
+ struct iso_ifaddr *oia;
+ struct iso_aliasreq *ifra = (struct iso_aliasreq *)data;
+ int error, hostIsNew, maskIsNew;
+
+ /*
+ * Find address for this interface, if it exists.
+ */
+ if (ifp)
+ for (ia = iso_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp)
+ break;
+
+ switch (cmd) {
+
+ case SIOCAIFADDR_ISO:
+ case SIOCDIFADDR_ISO:
+ if (ifra->ifra_addr.siso_family == AF_ISO)
+ for (oia = ia; ia; ia = ia->ia_next) {
+ if (ia->ia_ifp == ifp &&
+ SAME_ISOADDR(&ia->ia_addr, &ifra->ifra_addr))
+ break;
+ }
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+ if (ifp == 0)
+ panic("iso_control");
+ if (ia == (struct iso_ifaddr *)0) {
+ struct iso_ifaddr *nia;
+ if (cmd == SIOCDIFADDR_ISO)
+ return (EADDRNOTAVAIL);
+#ifdef TUBA
+ /* XXXXXX can't be done in the proto init routines */
+ if (tuba_tree == 0)
+ tuba_table_init();
+#endif
+ MALLOC(nia, struct iso_ifaddr *, sizeof(*nia),
+ M_IFADDR, M_WAITOK);
+ if (nia == (struct iso_ifaddr *)0)
+ return (ENOBUFS);
+ bzero((caddr_t)nia, sizeof(*nia));
+ if (ia = iso_ifaddr) {
+ for ( ; ia->ia_next; ia = ia->ia_next)
+ ;
+ ia->ia_next = nia;
+ } else
+ iso_ifaddr = nia;
+ ia = nia;
+ if (ifa = ifp->if_addrlist) {
+ for ( ; ifa->ifa_next; ifa = ifa->ifa_next)
+ ;
+ ifa->ifa_next = (struct ifaddr *) ia;
+ } else
+ ifp->if_addrlist = (struct ifaddr *) ia;
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ ia->ia_ifa.ifa_dstaddr
+ = (struct sockaddr *)&ia->ia_dstaddr;
+ ia->ia_ifa.ifa_netmask
+ = (struct sockaddr *)&ia->ia_sockmask;
+ ia->ia_ifp = ifp;
+ if (ifp != &loif)
+ iso_interfaces++;
+ }
+ break;
+
+#define cmdbyte(x) (((x) >> 8) & 0xff)
+ default:
+ if (cmdbyte(cmd) == 'a')
+ return (snpac_ioctl(so, cmd, data));
+ if (ia == (struct iso_ifaddr *)0)
+ return (EADDRNOTAVAIL);
+ break;
+ }
+ switch (cmd) {
+
+ case SIOCGIFADDR_ISO:
+ ifr->ifr_Addr = ia->ia_addr;
+ break;
+
+ case SIOCGIFDSTADDR_ISO:
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ return (EINVAL);
+ ifr->ifr_Addr = ia->ia_dstaddr;
+ break;
+
+ case SIOCGIFNETMASK_ISO:
+ ifr->ifr_Addr = ia->ia_sockmask;
+ break;
+
+ case SIOCAIFADDR_ISO:
+ maskIsNew = 0; hostIsNew = 1; error = 0;
+ if (ia->ia_addr.siso_family == AF_ISO) {
+ if (ifra->ifra_addr.siso_len == 0) {
+ ifra->ifra_addr = ia->ia_addr;
+ hostIsNew = 0;
+ } else if (SAME_ISOADDR(&ia->ia_addr, &ifra->ifra_addr))
+ hostIsNew = 0;
+ }
+ if (ifra->ifra_mask.siso_len) {
+ iso_ifscrub(ifp, ia);
+ ia->ia_sockmask = ifra->ifra_mask;
+ maskIsNew = 1;
+ }
+ if ((ifp->if_flags & IFF_POINTOPOINT) &&
+ (ifra->ifra_dstaddr.siso_family == AF_ISO)) {
+ iso_ifscrub(ifp, ia);
+ ia->ia_dstaddr = ifra->ifra_dstaddr;
+ maskIsNew = 1; /* We lie; but the effect's the same */
+ }
+ if (ifra->ifra_addr.siso_family == AF_ISO &&
+ (hostIsNew || maskIsNew)) {
+ error = iso_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+ }
+ if (ifra->ifra_snpaoffset)
+ ia->ia_snpaoffset = ifra->ifra_snpaoffset;
+ return (error);
+
+ case SIOCDIFADDR_ISO:
+ iso_ifscrub(ifp, ia);
+ if ((ifa = ifp->if_addrlist) == (struct ifaddr *)ia)
+ ifp->if_addrlist = ifa->ifa_next;
+ else {
+ while (ifa->ifa_next &&
+ (ifa->ifa_next != (struct ifaddr *)ia))
+ ifa = ifa->ifa_next;
+ if (ifa->ifa_next)
+ ifa->ifa_next = ((struct ifaddr *)ia)->ifa_next;
+ else
+ printf("Couldn't unlink isoifaddr from ifp\n");
+ }
+ oia = ia;
+ if (oia == (ia = iso_ifaddr)) {
+ iso_ifaddr = ia->ia_next;
+ } else {
+ while (ia->ia_next && (ia->ia_next != oia)) {
+ ia = ia->ia_next;
+ }
+ if (ia->ia_next)
+ ia->ia_next = oia->ia_next;
+ else
+ printf("Didn't unlink isoifadr from list\n");
+ }
+ IFAFREE((&oia->ia_ifa));
+ break;
+
+ default:
+ if (ifp == 0 || ifp->if_ioctl == 0)
+ return (EOPNOTSUPP);
+ return ((*ifp->if_ioctl)(ifp, cmd, data));
+ }
+ return (0);
+}
+
+/*
+ * Delete any existing route for an interface.
+ */
+iso_ifscrub(ifp, ia)
+ register struct ifnet *ifp;
+ register struct iso_ifaddr *ia;
+{
+ int nsellength = ia->ia_addr.siso_tlen;
+ if ((ia->ia_flags & IFA_ROUTE) == 0)
+ return;
+ ia->ia_addr.siso_tlen = 0;
+ if (ifp->if_flags & IFF_LOOPBACK)
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ else if (ifp->if_flags & IFF_POINTOPOINT)
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ else {
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+ }
+ ia->ia_addr.siso_tlen = nsellength;
+ ia->ia_flags &= ~IFA_ROUTE;
+}
+
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+iso_ifinit(ifp, ia, siso, scrub)
+ register struct ifnet *ifp;
+ register struct iso_ifaddr *ia;
+ struct sockaddr_iso *siso;
+{
+ struct sockaddr_iso oldaddr;
+ int s = splimp(), error, nsellength;
+
+ oldaddr = ia->ia_addr;
+ ia->ia_addr = *siso;
+ /*
+ * Give the interface a chance to initialize
+ * if this is its first address,
+ * and to validate the address if necessary.
+ */
+ if (ifp->if_ioctl &&
+ (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
+ splx(s);
+ ia->ia_addr = oldaddr;
+ return (error);
+ }
+ if (scrub) {
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+ iso_ifscrub(ifp, ia);
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ }
+ /* XXX -- The following is here temporarily out of laziness
+ in not changing every ethernet driver's if_ioctl routine */
+ if (ifp->if_output == ether_output) {
+ ia->ia_ifa.ifa_rtrequest = llc_rtrequest;
+ ia->ia_ifa.ifa_flags |= RTF_CLONING;
+ }
+ /*
+ * Add route for the network.
+ */
+ nsellength = ia->ia_addr.siso_tlen;
+ ia->ia_addr.siso_tlen = 0;
+ if (ifp->if_flags & IFF_LOOPBACK) {
+ ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr;
+ error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+ } else if (ifp->if_flags & IFF_POINTOPOINT &&
+ ia->ia_dstaddr.siso_family == AF_ISO)
+ error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+ else {
+ rt_maskedcopy(ia->ia_ifa.ifa_addr, ia->ia_ifa.ifa_dstaddr,
+ ia->ia_ifa.ifa_netmask);
+ ia->ia_dstaddr.siso_nlen =
+ min(ia->ia_addr.siso_nlen, (ia->ia_sockmask.siso_len - 6));
+ error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_UP);
+ }
+ ia->ia_addr.siso_tlen = nsellength;
+ ia->ia_flags |= IFA_ROUTE;
+ splx(s);
+ return (error);
+}
+#ifdef notdef
+
+struct ifaddr *
+iso_ifwithidi(addr)
+ register struct sockaddr *addr;
+{
+ register struct ifnet *ifp;
+ register struct ifaddr *ifa;
+ register u_int af = addr->sa_family;
+
+ if (af != AF_ISO)
+ return (0);
+ IFDEBUG(D_ROUTE)
+ printf(">>> iso_ifwithidi addr\n");
+ dump_isoaddr( (struct sockaddr_iso *)(addr));
+ printf("\n");
+ ENDDEBUG
+ for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+ IFDEBUG(D_ROUTE)
+ printf("iso_ifwithidi ifnet %s\n", ifp->if_name);
+ ENDDEBUG
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+ IFDEBUG(D_ROUTE)
+ printf("iso_ifwithidi address ");
+ dump_isoaddr( (struct sockaddr_iso *)(ifa->ifa_addr));
+ ENDDEBUG
+ if (ifa->ifa_addr->sa_family != addr->sa_family)
+ continue;
+
+#define IFA_SIS(ifa)\
+ ((struct sockaddr_iso *)((ifa)->ifa_addr))
+
+ IFDEBUG(D_ROUTE)
+ printf(" af same, args to iso_eqtype:\n");
+ printf("0x%x ", IFA_SIS(ifa)->siso_addr);
+ printf(" 0x%x\n",
+ &(((struct sockaddr_iso *)addr)->siso_addr));
+ ENDDEBUG
+
+ if (iso_eqtype(&(IFA_SIS(ifa)->siso_addr),
+ &(((struct sockaddr_iso *)addr)->siso_addr))) {
+ IFDEBUG(D_ROUTE)
+ printf("ifa_ifwithidi: ifa found\n");
+ ENDDEBUG
+ return (ifa);
+ }
+ IFDEBUG(D_ROUTE)
+ printf(" iso_eqtype failed\n");
+ ENDDEBUG
+ }
+ }
+ return ((struct ifaddr *)0);
+}
+
+#endif /* notdef */
+/*
+ * FUNCTION: iso_ck_addr
+ *
+ * PURPOSE: return true if the iso_addr passed is
+ * within the legal size limit for an iso address.
+ *
+ * RETURNS: true or false
+ *
+ * SIDE EFFECTS:
+ *
+ */
+iso_ck_addr(isoa)
+struct iso_addr *isoa; /* address to check */
+{
+ return (isoa->isoa_len <= 20);
+
+}
+
+#ifdef notdef
+/*
+ * FUNCTION: iso_eqtype
+ *
+ * PURPOSE: Determine if two iso addresses are of the same type.
+ * This is flaky. Really we should consider all type 47 addrs to be the
+ * same - but there do exist different structures for 47 addrs.
+ * Gosip adds a 3rd.
+ *
+ * RETURNS: true if the addresses are the same type
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: By type, I mean rfc986, t37, or osinet
+ *
+ * This will first compare afis. If they match, then
+ * if the addr is not t37, the idis must be compared.
+ */
+iso_eqtype(isoaa, isoab)
+struct iso_addr *isoaa; /* first addr to check */
+struct iso_addr *isoab; /* other addr to check */
+{
+ if (isoaa->isoa_afi == isoab->isoa_afi) {
+ if (isoaa->isoa_afi == AFI_37)
+ return(1);
+ else
+ return (!bcmp(&isoaa->isoa_u, &isoab->isoa_u, 2));
+ }
+ return(0);
+}
+#endif /* notdef */
+/*
+ * FUNCTION: iso_localifa()
+ *
+ * PURPOSE: Find an interface addresss having a given destination
+ * or at least matching the net.
+ *
+ * RETURNS: ptr to an interface address
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+struct iso_ifaddr *
+iso_localifa(siso)
+ register struct sockaddr_iso *siso;
+{
+ register struct iso_ifaddr *ia;
+ register char *cp1, *cp2, *cp3;
+ register struct ifnet *ifp;
+ struct iso_ifaddr *ia_maybe = 0;
+ /*
+ * We make one pass looking for both net matches and an exact
+ * dst addr.
+ */
+ for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+ if ((ifp = ia->ia_ifp) == 0 || ((ifp->if_flags & IFF_UP) == 0))
+ continue;
+ if (ifp->if_flags & IFF_POINTOPOINT) {
+ if ((ia->ia_dstaddr.siso_family == AF_ISO) &&
+ SAME_ISOADDR(&ia->ia_dstaddr, siso))
+ return (ia);
+ else
+ if (SAME_ISOADDR(&ia->ia_addr, siso))
+ ia_maybe = ia;
+ continue;
+ }
+ if (ia->ia_sockmask.siso_len) {
+ char *cplim = ia->ia_sockmask.siso_len + (char *)&ia->ia_sockmask;
+ cp1 = ia->ia_sockmask.siso_data;
+ cp2 = siso->siso_data;
+ cp3 = ia->ia_addr.siso_data;
+ while (cp1 < cplim)
+ if (*cp1++ & (*cp2++ ^ *cp3++))
+ goto next;
+ ia_maybe = ia;
+ }
+ if (SAME_ISOADDR(&ia->ia_addr, siso))
+ return ia;
+ next:;
+ }
+ return ia_maybe;
+}
+
+#ifdef TPCONS
+#include <netiso/cons.h>
+#endif /* TPCONS */
+/*
+ * FUNCTION: iso_nlctloutput
+ *
+ * PURPOSE: Set options at the network level
+ *
+ * RETURNS: E*
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: This could embody some of the functions of
+ * rclnp_ctloutput and cons_ctloutput.
+ */
+iso_nlctloutput(cmd, optname, pcb, m)
+int cmd; /* command:set or get */
+int optname; /* option of interest */
+caddr_t pcb; /* nl pcb */
+struct mbuf *m; /* data for set, buffer for get */
+{
+ struct isopcb *isop = (struct isopcb *)pcb;
+ int error = 0; /* return value */
+ caddr_t data; /* data for option */
+ int data_len; /* data's length */
+
+ IFDEBUG(D_ISO)
+ printf("iso_nlctloutput: cmd %x, opt %x, pcb %x, m %x\n",
+ cmd, optname, pcb, m);
+ ENDDEBUG
+
+ if ((cmd != PRCO_GETOPT) && (cmd != PRCO_SETOPT))
+ return(EOPNOTSUPP);
+
+ data = mtod(m, caddr_t);
+ data_len = (m)->m_len;
+
+ IFDEBUG(D_ISO)
+ printf("iso_nlctloutput: data is:\n");
+ dump_buf(data, data_len);
+ ENDDEBUG
+
+ switch (optname) {
+
+#ifdef TPCONS
+ case CONSOPT_X25CRUD:
+ if (cmd == PRCO_GETOPT) {
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ if (data_len > MAXX25CRUDLEN) {
+ error = EINVAL;
+ break;
+ }
+
+ IFDEBUG(D_ISO)
+ printf("iso_nlctloutput: setting x25 crud\n");
+ ENDDEBUG
+
+ bcopy(data, (caddr_t)isop->isop_x25crud, (unsigned)data_len);
+ isop->isop_x25crud_len = data_len;
+ break;
+#endif /* TPCONS */
+
+ default:
+ error = EOPNOTSUPP;
+ }
+ if (cmd == PRCO_SETOPT)
+ m_freem(m);
+ return error;
+}
+#endif /* ISO */
+
+#ifdef ARGO_DEBUG
+
+/*
+ * FUNCTION: dump_isoaddr
+ *
+ * PURPOSE: debugging
+ *
+ * RETURNS: nada
+ *
+ */
+dump_isoaddr(s)
+ struct sockaddr_iso *s;
+{
+ char *clnp_saddr_isop();
+ register int i;
+
+ if( s->siso_family == AF_ISO) {
+ printf("ISO address: suffixlen %d, %s\n",
+ s->siso_tlen, clnp_saddr_isop(s));
+ } else if( s->siso_family == AF_INET) {
+ /* hack */
+ struct sockaddr_in *sin = (struct sockaddr_in *)s;
+
+ printf("%d.%d.%d.%d: %d",
+ (sin->sin_addr.s_addr>>24)&0xff,
+ (sin->sin_addr.s_addr>>16)&0xff,
+ (sin->sin_addr.s_addr>>8)&0xff,
+ (sin->sin_addr.s_addr)&0xff,
+ sin->sin_port);
+ }
+}
+
+#endif /* ARGO_DEBUG */
diff --git a/sys/netiso/iso.h b/sys/netiso/iso.h
new file mode 100644
index 000000000000..9237e6aaa733
--- /dev/null
+++ b/sys/netiso/iso.h
@@ -0,0 +1,195 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso.h,v 4.9 88/09/11 18:06:38 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/iso.h,v $ */
+
+#ifndef __ISO__
+#define __ISO__
+
+/*
+ * Return true if this is a multicast address
+ * This assumes that the bit transmission is lsb first. This
+ * assumption is valid for 802.3 but not 802.5. There is a
+ * kludge to get around this for 802.5 -- see if_lan.c
+ * where subnetwork header is setup.
+ */
+#define IS_MULTICAST(snpa)\
+ ((snpa)[0] & 0x01)
+
+/*
+ * Protocols
+ */
+#define ISOPROTO_TCP 6 /* IETF experiment */
+#define ISOPROTO_UDP 17 /* IETF experiment */
+#define ISOPROTO_TP0 25 /* connection oriented transport protocol */
+#define ISOPROTO_TP1 26 /* not implemented */
+#define ISOPROTO_TP2 27 /* not implemented */
+#define ISOPROTO_TP3 28 /* not implemented */
+#define ISOPROTO_TP4 29 /* connection oriented transport protocol */
+#define ISOPROTO_TP ISOPROTO_TP4 /* tp-4 with negotiation */
+#define ISOPROTO_CLTP 30 /* connectionless transport (not yet impl.) */
+#define ISOPROTO_CLNP 31 /* connectionless internetworking protocol */
+#define ISOPROTO_X25 32 /* cons */
+#define ISOPROTO_INACT_NL 33 /* inactive network layer! */
+#define ISOPROTO_ESIS 34 /* ES-IS protocol */
+#define ISOPROTO_INTRAISIS 35 /* IS-IS protocol */
+#define ISOPROTO_IDRP 36 /* Interdomain Routing Protocol */
+
+#define ISOPROTO_RAW 255 /* raw clnp */
+#define ISOPROTO_MAX 256
+
+#define ISO_PORT_RESERVED 1024
+#define ISO_PORT_USERRESERVED 5000
+/*
+ * Port/socket numbers: standard network functions
+ * NOT PRESENTLY USED
+ */
+#define ISO_PORT_MAINT 501
+#define ISO_PORT_ECHO 507
+#define ISO_PORT_DISCARD 509
+#define ISO_PORT_SYSTAT 511
+#define ISO_PORT_NETSTAT 515
+/*
+ * Port/socket numbers: non-standard application functions
+ */
+#define ISO_PORT_LOGIN 513
+/*
+ * Port/socket numbers: public use
+ */
+#define ISO_PORT_PUBLIC 1024 /* high bit set --> public */
+
+/*
+ * Network layer protocol identifiers
+ */
+#define ISO8473_CLNP 0x81
+#define ISO9542_ESIS 0x82
+#define ISO9542X25_ESIS 0x8a
+#define ISO10589_ISIS 0x83
+#define ISO8878A_CONS 0x84
+#define ISO10747_IDRP 0x85
+
+
+#ifndef IN_CLASSA_NET
+#include <netinet/in.h>
+#endif /* IN_CLASSA_NET */
+
+
+
+/* The following looks like a sockaddr
+ * to facilitate using tree lookup routines */
+struct iso_addr {
+ u_char isoa_len; /* length (in bytes) */
+ char isoa_genaddr[20]; /* general opaque address */
+};
+
+struct sockaddr_iso {
+ u_char siso_len; /* length */
+ u_char siso_family; /* family */
+ u_char siso_plen; /* presentation selector length */
+ u_char siso_slen; /* session selector length */
+ u_char siso_tlen; /* transport selector length */
+ struct iso_addr siso_addr; /* network address */
+ u_char siso_pad[6]; /* space for gosip v2 sels */
+ /* makes struct 32 bytes long */
+};
+#define siso_nlen siso_addr.isoa_len
+#define siso_data siso_addr.isoa_genaddr
+
+#define TSEL(s) ((caddr_t)((s)->siso_data + (s)->siso_nlen))
+
+#define SAME_ISOADDR(a, b) \
+ (bcmp((a)->siso_data, (b)->siso_data, (unsigned)(a)->siso_nlen)==0)
+/*
+ * The following are specific values for siso->siso_data[0],
+ * otherwise known as the AFI:
+ */
+#define AFI_37 0x37 /* bcd of "37" */
+#define AFI_OSINET 0x47 /* bcd of "47" */
+#define AFI_RFC986 0x47 /* bcd of "47" */
+#define AFI_SNA 0x00 /* SubNetwork Address; invalid really...*/
+
+#ifdef KERNEL
+
+extern int iso_netmatch();
+extern int iso_hash();
+extern int iso_addrmatch();
+extern struct iso_ifaddr *iso_iaonnetof();
+extern struct domain isodomain;
+extern struct protosw isosw[];
+
+#else
+/* user utilities definitions from the iso library */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct iso_addr *iso_addr __P((const char *));
+char *iso_ntoa __P((const struct iso_addr *));
+
+/* THESE DON'T EXIST YET */
+struct hostent *iso_gethostbyname(), *iso_gethostbyaddr();
+__END_DECLS
+
+#endif /* KERNEL */
+
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+#endif /* __ISO__ */
diff --git a/sys/netiso/iso_chksum.c b/sys/netiso/iso_chksum.c
new file mode 100644
index 000000000000..5b1aae59e16c
--- /dev/null
+++ b/sys/netiso/iso_chksum.c
@@ -0,0 +1,360 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_chksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: iso_chksum.c,v 4.7 88/07/29 15:31:26 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso_chksum.c,v $
+ *
+ * ISO CHECKSUM
+ *
+ * The checksum generation and check routines are here.
+ * The checksum is 2 bytes such that the sum of all the bytes b(i) == 0
+ * and the sum of i * b(i) == 0.
+ * The whole thing is complicated by the fact that the data are in mbuf
+ * chains.
+ * Furthermore, there is the possibility of wraparound in the running
+ * sums after adding up 4102 octets. In order to avoid doing a mod
+ * operation after EACH add, we have restricted this implementation to
+ * negotiating a maximum of 4096-octets per TPDU (for the transport layer).
+ * The routine iso_check_csum doesn't need to know where the checksum
+ * octets are.
+ * The routine iso_gen_csum takes a pointer to an mbuf chain (logically
+ * a chunk of data), an offset into the chunk at which the 2 octets are to
+ * be stuffed, and the length of the chunk. The 2 octets have to be
+ * logically adjacent, but may be physically located in separate mbufs.
+ */
+
+#ifdef ISO
+#include <netiso/argo_debug.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#endif /* ISO */
+
+#ifndef MNULL
+#define MNULL (struct mbuf *)0
+#endif /* MNULL */
+
+/*
+ * FUNCTION: iso_check_csum
+ *
+ * PURPOSE: To check the checksum of the packet in the mbuf chain (m).
+ * The total length of the packet is (len).
+ * Called from tp_input() and clnp_intr()
+ *
+ * RETURNS: TRUE (something non-zero) if there is a checksum error,
+ * FALSE if there was NO checksum error.
+ *
+ * SIDE EFFECTS: none
+ *
+ * NOTES: It might be possible to gain something by optimizing
+ * this routine (unrolling loops, etc). But it is such
+ * a horrible thing to fiddle with anyway, it probably
+ * isn't worth it.
+ */
+int
+iso_check_csum(m, len)
+ struct mbuf *m;
+ int len;
+{
+ register u_char *p = mtod(m, u_char *);
+ register u_long c0=0, c1=0;
+ register int i=0;
+ int cum = 0; /* cumulative length */
+ int l;
+
+ l = len;
+ len = min(m->m_len, len);
+ i = 0;
+
+ IFDEBUG(D_CHKSUM)
+ printf("iso_check_csum: m x%x, l x%x, m->m_len x%x\n", m, l, m->m_len);
+ ENDDEBUG
+
+ while( i<l ) {
+ cum += len;
+ while (i<cum) {
+ c0 = c0 + *(p++);
+ c1 += c0;
+ i++;
+ }
+ if(i < l) {
+ m = m->m_next;
+ IFDEBUG(D_CHKSUM)
+ printf("iso_check_csum: new mbuf\n");
+ if(l-i < m->m_len)
+ printf(
+ "bad mbuf chain in check csum l 0x%x i 0x%x m_data 0x%x",
+ l,i,m->m_data);
+ ENDDEBUG
+ ASSERT( m != MNULL);
+ len = min( m->m_len, l-i);
+ p = mtod(m, u_char *);
+ }
+ }
+ if ( ((int)c0 % 255) || ((int)c1 % 255) ) {
+ IFDEBUG(D_CHKSUM)
+ printf("BAD iso_check_csum l 0x%x cum 0x%x len 0x%x, i 0x%x",
+ l, cum, len, i);
+ ENDDEBUG
+ return ((int)c0 % 255)<<8 | ((int)c1 % 255);
+ }
+ return 0;
+}
+
+/*
+ * FUNCTION: iso_gen_csum
+ *
+ * PURPOSE: To generate the checksum of the packet in the mbuf chain (m).
+ * The first of the 2 (logically) adjacent checksum bytes
+ * (x and y) go at offset (n).
+ * (n) is an offset relative to the beginning of the data,
+ * not the beginning of the mbuf.
+ * (l) is the length of the total mbuf chain's data.
+ * Called from tp_emit(), tp_error_emit()
+ * clnp_emit_er(), clnp_forward(), clnp_output().
+ *
+ * RETURNS: Rien
+ *
+ * SIDE EFFECTS: Puts the 2 checksum bytes into the packet.
+ *
+ * NOTES: Ditto the note for iso_check_csum().
+ */
+
+void
+iso_gen_csum(m,n,l)
+ struct mbuf *m;
+ int n; /* offset of 2 checksum bytes */
+ int l;
+{
+ register u_char *p = mtod(m, u_char *);
+ register int c0=0, c1=0;
+ register int i=0;
+ int loc = n++, len=0; /* n is position, loc is offset */
+ u_char *xloc;
+ u_char *yloc;
+ int cum=0; /* cum == cumulative length */
+
+ IFDEBUG(D_CHKSUM)
+ printf("enter gen csum m 0x%x n 0x%x l 0x%x\n",m, n-1 ,l );
+ ENDDEBUG
+
+ while(i < l) {
+ len = min(m->m_len, CLBYTES);
+ /* RAH: don't cksum more than l bytes */
+ len = min(len, l - i);
+
+ cum +=len;
+ p = mtod(m, u_char *);
+
+ if(loc>=0) {
+ if (loc < len) {
+ xloc = loc + mtod(m, u_char *);
+ IFDEBUG(D_CHKSUM)
+ printf("1: zeroing xloc 0x%x loc 0x%x\n",xloc, loc );
+ ENDDEBUG
+ *xloc = (u_char)0;
+ if (loc+1 < len) {
+ /* both xloc and yloc are in same mbuf */
+ yloc = 1 + xloc;
+ IFDEBUG(D_CHKSUM)
+ printf("2: zeroing yloc 0x%x loc 0x%x\n",yloc, loc );
+ ENDDEBUG
+ *yloc = (u_char)0;
+ } else {
+ /* crosses boundary of mbufs */
+ yloc = mtod(m->m_next, u_char *);
+ IFDEBUG(D_CHKSUM)
+ printf("3: zeroing yloc 0x%x \n",yloc );
+ ENDDEBUG
+ *yloc = (u_char)0;
+ }
+ }
+ loc -= len;
+ }
+
+ while(i < cum) {
+ c0 = (c0 + *p);
+ c1 += c0 ;
+ i++;
+ p++;
+ }
+ m = m->m_next;
+ }
+ IFDEBUG(D_CHKSUM)
+ printf("gen csum final xloc 0x%x yloc 0x%x\n",xloc, yloc );
+ ENDDEBUG
+
+ c1 = (((c0 * (l-n))-c1)%255) ;
+ *xloc = (u_char) ((c1 < 0)? c1+255 : c1);
+
+ c1 = (-(int)(c1+c0))%255;
+ *yloc = (u_char) (c1 < 0? c1 + 255 : c1);
+
+ IFDEBUG(D_CHKSUM)
+ printf("gen csum end \n");
+ ENDDEBUG
+}
+
+/*
+ * FUNCTION: m_datalen
+ *
+ * PURPOSE: returns length of the mbuf chain.
+ * used all over the iso code.
+ *
+ * RETURNS: integer
+ *
+ * SIDE EFFECTS: none
+ *
+ * NOTES:
+ */
+
+int
+m_datalen (m)
+ register struct mbuf *m;
+{
+ register int datalen;
+
+ for (datalen = 0; m; m = m->m_next)
+ datalen += m->m_len;
+ return datalen;
+}
+
+int
+m_compress(in, out)
+ register struct mbuf *in, **out;
+{
+ register int datalen = 0;
+ int s = splimp();
+
+ if( in->m_next == MNULL ) {
+ *out = in;
+ IFDEBUG(D_REQUEST)
+ printf("m_compress returning 0x%x: A\n", in->m_len);
+ ENDDEBUG
+ splx(s);
+ return in->m_len;
+ }
+ MGET((*out), M_DONTWAIT, MT_DATA);
+ if((*out) == MNULL) {
+ *out = in;
+ IFDEBUG(D_REQUEST)
+ printf("m_compress returning -1: B\n");
+ ENDDEBUG
+ splx(s);
+ return -1;
+ }
+ (*out)->m_len = 0;
+ (*out)->m_act = MNULL;
+
+ while (in) {
+ IFDEBUG(D_REQUEST)
+ printf("m_compress in 0x%x *out 0x%x\n", in, *out);
+ printf("m_compress in: len 0x%x, off 0x%x\n", in->m_len, in->m_data);
+ printf("m_compress *out: len 0x%x, off 0x%x\n", (*out)->m_len,
+ (*out)->m_data);
+ ENDDEBUG
+ if (in->m_flags & M_EXT) {
+ ASSERT(in->m_len == 0);
+ }
+ if ( in->m_len == 0) {
+ in = in->m_next;
+ continue;
+ }
+ if (((*out)->m_flags & M_EXT) == 0) {
+ int len;
+
+ len = M_TRAILINGSPACE(*out);
+ len = min(len, in->m_len);
+ datalen += len;
+
+ IFDEBUG(D_REQUEST)
+ printf("m_compress copying len %d\n", len);
+ ENDDEBUG
+ bcopy(mtod(in, caddr_t), mtod((*out), caddr_t) + (*out)->m_len,
+ (unsigned)len);
+
+ (*out)->m_len += len;
+ in->m_len -= len;
+ continue;
+ } else {
+ /* (*out) is full */
+ if(( (*out)->m_next = m_get(M_DONTWAIT, MT_DATA) ) == MNULL) {
+ m_freem(*out);
+ *out = in;
+ IFDEBUG(D_REQUEST)
+ printf("m_compress returning -1: B\n");
+ ENDDEBUG
+ splx(s);
+ return -1;
+ }
+ (*out)->m_len = 0;
+ (*out)->m_act = MNULL;
+ *out = (*out)->m_next;
+ }
+ }
+ m_freem(in);
+ IFDEBUG(D_REQUEST)
+ printf("m_compress returning 0x%x: A\n", datalen);
+ ENDDEBUG
+ splx(s);
+ return datalen;
+}
diff --git a/sys/netiso/iso_errno.h b/sys/netiso/iso_errno.h
new file mode 100644
index 000000000000..0d75589ca0eb
--- /dev/null
+++ b/sys/netiso/iso_errno.h
@@ -0,0 +1,274 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_errno.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#ifndef __ISO_ERRNO__
+#define __ISO_ERRNO__
+
+#define ISO_ERROR_MASK 0x8000
+#define BSD_ERROR_MASK 0x0000
+#define TP_ERROR_MASK 0x8800 /* transport layer */
+#define CONL_ERROR_MASK 0x8400 /* co network layer */
+#define CLNL_ERROR_MASK 0x8200 /* cl network layer */
+#define TP_ERROR_SNDC 0x10000 /* kludge to force DC's on certain errors */
+
+#define E_CO_NOERROR (CONL_ERROR_MASK | 0x0) /* no add'l info */
+
+/******************************************************************************/
+/* */
+/* */
+/* Transport Layer */
+/* */
+/* */
+/******************************************************************************/
+
+#define E_TP_DR_NO_REAS (TP_ERROR_MASK | 0x0) /* dr reason not specified*/
+#define E_TP_CONGEST (TP_ERROR_MASK | 0x1) /* dr reason congestion */
+#define E_TP_NO_SESSION (TP_ERROR_MASK | 0x2) /* dr reason no sess ent */
+#define E_TP_ADDR_UNK (TP_ERROR_MASK | 0x3) /* dr reason addr unknown */
+
+#define E_TP_ER_NO_REAS (TP_ERROR_MASK | 0x40) /* er reas not specified */
+#define E_TP_INV_PCODE (TP_ERROR_MASK | 0x41) /* er reas invalid parm code */
+#define E_TP_INV_TPDU (TP_ERROR_MASK | 0x42) /* er reas invalid tpdu type */
+#define E_TP_INV_PVAL (TP_ERROR_MASK | 0x43) /* er reas invalid parm value*/
+
+#define E_TP_NORMAL_DISC (TP_ERROR_MASK | 0x80) /* dr reas normal disc */
+#define E_TP_CONGEST_2 (TP_ERROR_MASK | 0x81) /* dr reason congestion */
+#define E_TP_NEGOT_FAILED (TP_ERROR_MASK | 0x82) /* dr negotiation failed */
+#define E_TP_DUPL_SRCREF (TP_ERROR_MASK | 0x83) /* dr duplicate src ref */
+#define E_TP_MISM_REFS (TP_ERROR_MASK | 0x84) /* dr mismatched references*/
+#define E_TP_PROTO_ERR (TP_ERROR_MASK | 0x85) /* dr protocol error*/
+/* 0x86 not used */
+#define E_TP_REF_OVERFLOW (TP_ERROR_MASK | 0x87) /* dr reference overflow */
+#define E_TP_NO_CR_ON_NC (TP_ERROR_MASK | 0x88) /* dr cr refused on this nc */
+/* 0x89 not used */
+#define E_TP_LENGTH_INVAL (TP_ERROR_MASK | 0x8a) /* dr inval length in hdr*/
+
+/******************************************************************************/
+/* */
+/* */
+/* Connection Less Network Layer */
+/* */
+/* */
+/******************************************************************************/
+
+#define E_CLNL_??? (CLNL_ERROR_MASK | 0x1) /* explanation */
+
+/******************************************************************************/
+/* */
+/* */
+/* Connection Oriented Network Layer */
+/* */
+/* */
+/******************************************************************************/
+ /* see p. 149 of ISO 8208 */
+#define E_CO_NOERROR (CONL_ERROR_MASK | 0x0) /* no add'l info */
+#define E_CO_INV_PS (CONL_ERROR_MASK | 0x1) /* invalid p(s) */
+#define E_CO_INV_PR (CONL_ERROR_MASK | 0x2) /* invalid p(r) */
+ /* dot dot dot */
+#define E_CO_INV_PKT_TYPE (CONL_ERROR_MASK | 0x10) /* packet type invalid*/
+#define E_CO_INV_PKT_R1 (CONL_ERROR_MASK | 0x11) /* for state r1 */
+#define E_CO_INV_PKT_R2 (CONL_ERROR_MASK | 0x12) /* for state r2 */
+#define E_CO_INV_PKT_R3 (CONL_ERROR_MASK | 0x13) /* for state r3 */
+#define E_CO_INV_PKT_P1 (CONL_ERROR_MASK | 0x14) /* for state p1 */
+#define E_CO_INV_PKT_P2 (CONL_ERROR_MASK | 0x15) /* for state p2 */
+#define E_CO_INV_PKT_P3 (CONL_ERROR_MASK | 0x16) /* for state p3 */
+#define E_CO_INV_PKT_P4 (CONL_ERROR_MASK | 0x17) /* for state p4 */
+#define E_CO_INV_PKT_P5 (CONL_ERROR_MASK | 0x18) /* for state p5 */
+#define E_CO_INV_PKT_P6 (CONL_ERROR_MASK | 0x19) /* for state p6 */
+#define E_CO_INV_PKT_P7 (CONL_ERROR_MASK | 0x1a) /* for state p7 */
+#define E_CO_INV_PKT_D1 (CONL_ERROR_MASK | 0x1b) /* for state d1 */
+#define E_CO_INV_PKT_D2 (CONL_ERROR_MASK | 0x1c) /* for state d2 */
+#define E_CO_INV_PKT_D3 (CONL_ERROR_MASK | 0x1d) /* for state d3 */
+ /* dot dot dot */
+#define E_CO_PKT_NOT_ALWD (CONL_ERROR_MASK | 0x20) /* packet not allowed */
+#define E_CO_PNA_UNIDENT (CONL_ERROR_MASK | 0x21) /* unidentifiable pkt */
+#define E_CO_PNA_ONEWAY (CONL_ERROR_MASK | 0x22) /* call on 1-way lc */
+#define E_CO_PNA_PVC (CONL_ERROR_MASK | 0x23) /* inv pkt type on a pvc */
+#define E_CO_PNA_UNASSLC (CONL_ERROR_MASK | 0x24) /* pkt on unassigned lc */
+#define E_CO_PNA_REJECT (CONL_ERROR_MASK | 0x25) /* REJ not subscribed to*/
+#define E_CO_PNA_SHORT (CONL_ERROR_MASK | 0x26) /* pkt too short */
+#define E_CO_PNA_LONG (CONL_ERROR_MASK | 0x27) /* pkt too long */
+#define E_CO_PNA_INVGFI (CONL_ERROR_MASK | 0x28) /* inv gen format id */
+#define E_CO_PNA_NZLCI (CONL_ERROR_MASK | 0x29) \
+ /* restart or reg pkt with nonzero logical channel identifier */
+#define E_CO_PNA_FACIL (CONL_ERROR_MASK | 0x2a) \
+ /* pkt type not compat with facility */
+#define E_CO_PNA_UINTCON (CONL_ERROR_MASK | 0x2b) /* unauthor intrpt conf */
+#define E_CO_PNA_UINTRPT (CONL_ERROR_MASK | 0x2c) /* unauthorized intrpt */
+#define E_CO_PNA_UREJECT (CONL_ERROR_MASK | 0x2d) /* unauthorized reject */
+
+#define E_CO_TMR_EXP (CONL_ERROR_MASK | 0x30) /* timer expired */
+#define E_CO_TMR_CALR (CONL_ERROR_MASK | 0x31) /* inc. call or call req */
+#define E_CO_TMR_CLRI (CONL_ERROR_MASK | 0x32) /* clear indication */
+#define E_CO_TMR_RSTI (CONL_ERROR_MASK | 0x33) /* reset indication */
+#define E_CO_TMR_RRTI (CONL_ERROR_MASK | 0x34) /* restart indication */
+
+#define E_CO_REG_PROB (CONL_ERROR_MASK | 0x40)\
+ /* call setup, clear, or registration problem */
+#define E_CO_REG_CODE (CONL_ERROR_MASK | 0x41) /* code not allowed */
+#define E_CO_REG_PARM (CONL_ERROR_MASK | 0x42) /* parameter not allowed */
+#define E_CO_REG_ICDA (CONL_ERROR_MASK | 0x43) /* invalid called addr */
+#define E_CO_REG_ICGA (CONL_ERROR_MASK | 0x44) /* invalid calling addr */
+#define E_CO_REG_ILEN (CONL_ERROR_MASK | 0x45) /* invalid facil length */
+#define E_CO_REG_IBAR (CONL_ERROR_MASK | 0x46) /* incoming call barred */
+#define E_CO_REG_NOLC (CONL_ERROR_MASK | 0x47) /* no logical chan avail*/
+#define E_CO_REG_COLL (CONL_ERROR_MASK | 0x48) /* call collision */
+#define E_CO_REG_DUPF (CONL_ERROR_MASK | 0x49) /* dupl facil requested */
+#define E_CO_REG_NZAL (CONL_ERROR_MASK | 0x4a) /* non-zero addr length */
+#define E_CO_REG_NZFL (CONL_ERROR_MASK | 0x4b) /* non-zero facil length */
+#define E_CO_REG_EFNP (CONL_ERROR_MASK | 0x4c) \
+ /* expected facil not provided */
+#define E_CO_REG_ICCITT (CONL_ERROR_MASK | 0x4d) \
+ /* invalid CCITT-specified DTE facil */
+
+#define E_CO_MISC (CONL_ERROR_MASK | 0x50) /* miscellaneous */
+#define E_CO_MISC_CAUSE (CONL_ERROR_MASK | 0x51) /* improper cause code */
+#define E_CO_MISC_ALIGN (CONL_ERROR_MASK | 0x52) /* not octet-aligned */
+#define E_CO_MISC_IQBS (CONL_ERROR_MASK | 0x53) \
+ /* inconsistent Q bit settings */
+
+#define E_CO_INTL (CONL_ERROR_MASK | 0x70) /* international problem */
+#define E_CO_IREMNWK (CONL_ERROR_MASK | 0x71) /* remote network problem */
+#define E_CO_INPROTO (CONL_ERROR_MASK | 0x72) /* int'l protocol problem */
+#define E_CO_ILINKDWN (CONL_ERROR_MASK | 0x73) /* int'l link down */
+#define E_CO_ILINKBSY (CONL_ERROR_MASK | 0x74) /* int'l link busy */
+#define E_CO_IXNETFAC (CONL_ERROR_MASK | 0x75) /* transit netwk facil */
+#define E_CO_IRNETFAC (CONL_ERROR_MASK | 0x76) /* remote netwk facil */
+#define E_CO_IROUTING (CONL_ERROR_MASK | 0x77) /* int'l routing prob */
+#define E_CO_ITMPRTG (CONL_ERROR_MASK | 0x78) /* temporary routing prob */
+#define E_CO_IUNKDNIC (CONL_ERROR_MASK | 0x79) /* unknown called DNIC */
+#define E_CO_IMAINT (CONL_ERROR_MASK | 0x7a) /* maintenance action */
+
+#define E_CO_TIMO (CONL_ERROR_MASK | 0x90) \
+ /* timer expired or retransmission count surpassed */
+#define E_CO_TIM_INTRP (CONL_ERROR_MASK | 0x91) /* for interrupt */
+#define E_CO_TIM_DATA (CONL_ERROR_MASK | 0x92) /* for data */
+#define E_CO_TIM_REJ (CONL_ERROR_MASK | 0x93) /* for reject */
+
+#define E_CO_DTE_SPEC (CONL_ERROR_MASK | 0xa0) /* DTE-specific */
+#define E_CO_DTE_OK (CONL_ERROR_MASK | 0xa1) /* DTE operational */
+#define E_CO_DTE_NOK (CONL_ERROR_MASK | 0xa2) /* DTE not operational */
+#define E_CO_DTE_RSRC (CONL_ERROR_MASK | 0xa3) /* DTE resource constraint*/
+#define E_CO_DTE_FSLCT (CONL_ERROR_MASK | 0xa4) /* fast select not subsc */
+#define E_CO_DTE_PFPKT (CONL_ERROR_MASK | 0xa5) /* partially full pkt */
+#define E_CO_DTE_DBIT (CONL_ERROR_MASK | 0xa6) /* D-bit proc not supp */
+#define E_CO_DTE_RCCON (CONL_ERROR_MASK | 0xa7) /* reg/canell confirmed */
+
+#define E_CO_OSI_NSP (CONL_ERROR_MASK | 0xe0) /* OSI net svc problem */
+#define E_CO_OSI_DISCT (CONL_ERROR_MASK | 0xe1) /* disconnect transient */
+#define E_CO_OSI_DISCP (CONL_ERROR_MASK | 0xe2) /* disconnect permanent */
+#define E_CO_OSI_REJT (CONL_ERROR_MASK | 0xe3) /* reject transient */
+#define E_CO_OSI_REJP (CONL_ERROR_MASK | 0xe4) /* reject permanent */
+#define E_CO_OSI_QOST (CONL_ERROR_MASK | 0xe5) /* reject QOS transient */
+#define E_CO_OSI_QOSP (CONL_ERROR_MASK | 0xe6) /* reject QOS permanent */
+#define E_CO_OSI_NSAPT (CONL_ERROR_MASK | 0xe7) /* NSAP unreach transient */
+#define E_CO_OSI_NSAPP (CONL_ERROR_MASK | 0xe8) /* NSAP unreach permanent */
+#define E_CO_OSI_RESET (CONL_ERROR_MASK | 0xe9) /* reset no reason */
+#define E_CO_OSI_CONGEST (CONL_ERROR_MASK | 0xea) /* reset congestion */
+#define E_CO_OSI_UNSAP (CONL_ERROR_MASK | 0xeb) /* unknown NSAP permanent */
+
+#define E_CO_HLI_INIT (CONL_ERROR_MASK | 0xf0) /* higher level initiated*/
+#define E_CO_HLI_DISCN (CONL_ERROR_MASK | 0xf1) /* disconnect normal */
+#define E_CO_HLI_DISCA (CONL_ERROR_MASK | 0xf2) /* disconnect abnormal */
+#define E_CO_HLI_DISCI (CONL_ERROR_MASK | 0xf3) /* disconnect incompatible*/
+#define E_CO_HLI_REJT (CONL_ERROR_MASK | 0xf4) /* reject transient */
+#define E_CO_HLI_REJP (CONL_ERROR_MASK | 0xf5) /* reject permanent */
+#define E_CO_HLI_QOST (CONL_ERROR_MASK | 0xf6) /* reject QOS transient */
+#define E_CO_HLI_QOSP (CONL_ERROR_MASK | 0xf7) /* reject QOS permanent */
+#define E_CO_HLI_REJI (CONL_ERROR_MASK | 0xf8) /* reject incompatible */
+#define E_CO_HLI_PROTOID (CONL_ERROR_MASK | 0xf9) /* unrecog proto id */
+#define E_CO_HLI_RESYNC (CONL_ERROR_MASK | 0xfa) /* reset - user resync */
+
+/* Cause on 8208 CLEAR field */
+#define E_CO_NUMBERBUSY (CONL_ERROR_MASK | 0x101) /* Number busy */
+#define E_CO_INVFACREQ (CONL_ERROR_MASK | 0x103) /* invalid facil req */
+#define E_CO_NETCONGEST (CONL_ERROR_MASK | 0x105) /* Network congestion */
+#define E_CO_OUTOFORDER (CONL_ERROR_MASK | 0x109) /* Out of order */
+#define E_CO_ACCESSBAR (CONL_ERROR_MASK | 0x10b) /* access barred */
+#define E_CO_NOTOBTAIN (CONL_ERROR_MASK | 0x10d) /* not obtainable */
+#define E_CO_REMPROCERR (CONL_ERROR_MASK | 0x111) /* Remote procedure err */
+#define E_CO_LOCPROCERR (CONL_ERROR_MASK | 0x113) /* Local procedure err */
+#define E_CO_RPOAOOO (CONL_ERROR_MASK | 0x115) /* RPOA out of order */
+#define E_CO_NOREVCHG (CONL_ERROR_MASK | 0x119) /* Revs chg not accepted*/
+#define E_CO_INCOMPAT (CONL_ERROR_MASK | 0x121) /* Incompatible dest */
+#define E_CO_NOFASTSEL (CONL_ERROR_MASK | 0x129)
+ /* Fast select accpt not subscribed */
+#define E_CO_NOSHIP (CONL_ERROR_MASK | 0x139) /* ship absent */
+#define E_CO_GWPROCERR (CONL_ERROR_MASK | 0x1c1) /* Gateway-detected err*/
+#define E_CO_GWCONGEST (CONL_ERROR_MASK | 0x1c3) /* Gateway congestion*/
+
+/* ARGO only */
+#define E_CO_QFULL (CONL_ERROR_MASK | 0x100) /* dropped packet - queue full*/
+#define E_CO_AIWP (CONL_ERROR_MASK | 0x102) /* addr incompat w/proto */
+#define E_CO_CHAN (CONL_ERROR_MASK | 0x104) /* bad channel number */
+
+/* ARGO only; driver specific */
+#define E_CO_NORESOURCES (CONL_ERROR_MASK | 0x1b0) /* eicon clogged */
+#define E_CO_PDNDOWN (CONL_ERROR_MASK | 0x1b1) /* physical net down */
+#define E_CO_DRVRCLRESET (CONL_ERROR_MASK | 0x1b2) /* driver clear/reset */
+#define E_CO_PDNCLRESET (CONL_ERROR_MASK | 0x1b3) /* PDN clear/reset */
+#define E_CO_DTECLRESET (CONL_ERROR_MASK | 0x1b4) /* board clear/reset */
+#define E_CO_UNKCLRESET (CONL_ERROR_MASK | 0x1b5) /* unexpected clr/rst */
+
+#define CONL_ERROR_MAX 0x1c3
+
+#endif /* __ISO_ERRNO__ */
diff --git a/sys/netiso/iso_pcb.c b/sys/netiso/iso_pcb.c
new file mode 100644
index 000000000000..0b50c603422f
--- /dev/null
+++ b/sys/netiso/iso_pcb.c
@@ -0,0 +1,617 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_pcb.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: iso_pcb.c,v 4.5 88/06/29 14:59:56 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso_pcb.c,v $
+ *
+ * Iso address family net-layer(s) pcb stuff. NEH 1/29/87
+ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netinet/in_systm.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <sys/protosw.h>
+
+#ifdef TPCONS
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#endif
+
+#define PCBNULL (struct isopcb *)0
+struct iso_addr zeroiso_addr = {
+ 0
+};
+
+
+/*
+ * FUNCTION: iso_pcballoc
+ *
+ * PURPOSE: creates an isopcb structure in an mbuf,
+ * with socket (so), and
+ * puts it in the queue with head (head)
+ *
+ * RETURNS: 0 if OK, ENOBUFS if can't alloc the necessary mbuf
+ */
+int
+iso_pcballoc(so, head)
+ struct socket *so;
+ struct isopcb *head;
+{
+ register struct isopcb *isop;
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcballoc(so 0x%x)\n", so);
+ ENDDEBUG
+ MALLOC(isop, struct isopcb *, sizeof(*isop), M_PCB, M_NOWAIT);
+ if (isop == NULL)
+ return ENOBUFS;
+ bzero((caddr_t)isop, sizeof(*isop));
+ isop->isop_head = head;
+ isop->isop_socket = so;
+ insque(isop, head);
+ if (so)
+ so->so_pcb = (caddr_t)isop;
+ return 0;
+}
+
+/*
+ * FUNCTION: iso_pcbbind
+ *
+ * PURPOSE: binds the address given in *(nam) to the socket
+ * specified by the isopcb in *(isop)
+ * If the given address is zero, it makes sure the
+ * address isn't already in use and if it's got a network
+ * portion, we look for an interface with that network
+ * address. If the address given is zero, we allocate
+ * a port and stuff it in the (nam) structure.
+ *
+ * RETURNS: errno E* or 0 if ok.
+ *
+ * SIDE EFFECTS: increments head->isop_lport if it allocates a port #
+ *
+ * NOTES:
+ */
+#define satosiso(sa) ((struct sockaddr_iso *)(sa))
+int
+iso_pcbbind(isop, nam)
+ register struct isopcb *isop;
+ struct mbuf *nam;
+{
+ register struct isopcb *head = isop->isop_head;
+ register struct sockaddr_iso *siso;
+ struct iso_ifaddr *ia;
+ union {
+ char data[2];
+ u_short s;
+ } suf;
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcbbind(isop 0x%x, nam 0x%x)\n", isop, nam);
+ ENDDEBUG
+ suf.s = 0;
+ if (iso_ifaddr == 0) /* any interfaces attached? */
+ return EADDRNOTAVAIL;
+ if (isop->isop_laddr) /* already bound */
+ return EADDRINUSE;
+ if(nam == (struct mbuf *)0) {
+ isop->isop_laddr = &isop->isop_sladdr;
+ isop->isop_sladdr.siso_len = sizeof(struct sockaddr_iso);
+ isop->isop_sladdr.siso_family = AF_ISO;
+ isop->isop_sladdr.siso_tlen = 2;
+ isop->isop_sladdr.siso_nlen = 0;
+ isop->isop_sladdr.siso_slen = 0;
+ isop->isop_sladdr.siso_plen = 0;
+ goto noname;
+ }
+ siso = mtod(nam, struct sockaddr_iso *);
+ IFDEBUG(D_ISO)
+ printf("iso_pcbbind(name len 0x%x)\n", nam->m_len);
+ printf("The address is %s\n", clnp_iso_addrp(&siso->siso_addr));
+ ENDDEBUG
+ /*
+ * We would like sort of length check but since some OSI addrs
+ * do not have fixed length, we can't really do much.
+ * The ONLY thing we can say is that an osi addr has to have
+ * at LEAST an afi and one more byte and had better fit into
+ * a struct iso_addr.
+ * However, in fact the size of the whole thing is a struct
+ * sockaddr_iso, so probably this is what we should check for.
+ */
+ if( (nam->m_len < 2) || (nam->m_len < siso->siso_len)) {
+ return ENAMETOOLONG;
+ }
+ if (siso->siso_nlen) {
+ /* non-zero net addr- better match one of our interfaces */
+ IFDEBUG(D_ISO)
+ printf("iso_pcbbind: bind to NOT zeroisoaddr\n");
+ ENDDEBUG
+ for (ia = iso_ifaddr; ia; ia = ia->ia_next)
+ if (SAME_ISOADDR(siso, &ia->ia_addr))
+ break;
+ if (ia == 0)
+ return EADDRNOTAVAIL;
+ }
+ if (siso->siso_len <= sizeof (isop->isop_sladdr)) {
+ isop->isop_laddr = &isop->isop_sladdr;
+ } else {
+ if ((nam = m_copy(nam, 0, (int)M_COPYALL)) == 0)
+ return ENOBUFS;
+ isop->isop_laddr = mtod(nam, struct sockaddr_iso *);
+ }
+ bcopy((caddr_t)siso, (caddr_t)isop->isop_laddr, siso->siso_len);
+ if (siso->siso_tlen == 0)
+ goto noname;
+ if ((isop->isop_socket->so_options & SO_REUSEADDR) == 0 &&
+ iso_pcblookup(head, 0, (caddr_t)0, isop->isop_laddr))
+ return EADDRINUSE;
+ if (siso->siso_tlen <= 2) {
+ bcopy(TSEL(siso), suf.data, sizeof(suf.data));
+ suf.s = ntohs(suf.s);
+ if((suf.s < ISO_PORT_RESERVED) &&
+ (isop->isop_socket->so_state && SS_PRIV) == 0)
+ return EACCES;
+ } else {
+ register char *cp;
+noname:
+ cp = TSEL(isop->isop_laddr);
+ IFDEBUG(D_ISO)
+ printf("iso_pcbbind noname\n");
+ ENDDEBUG
+ do {
+ if (head->isop_lport++ < ISO_PORT_RESERVED ||
+ head->isop_lport > ISO_PORT_USERRESERVED)
+ head->isop_lport = ISO_PORT_RESERVED;
+ suf.s = htons(head->isop_lport);
+ cp[0] = suf.data[0];
+ cp[1] = suf.data[1];
+ } while (iso_pcblookup(head, 0, (caddr_t)0, isop->isop_laddr));
+ }
+ IFDEBUG(D_ISO)
+ printf("iso_pcbbind returns 0, suf 0x%x\n", suf);
+ ENDDEBUG
+ return 0;
+}
+/*
+ * FUNCTION: iso_pcbconnect
+ *
+ * PURPOSE: Make the isopcb (isop) look like it's connected.
+ * In other words, give it the peer address given in
+ * the mbuf * (nam). Make sure such a combination
+ * of local, peer addresses doesn't already exist
+ * for this protocol. Internet mentality prevails here,
+ * wherein a src,dst pair uniquely identifies a connection.
+ * Both net address and port must be specified in argument
+ * (nam).
+ * If we don't have a local address for this socket yet,
+ * we pick one by calling iso_pcbbind().
+ *
+ * RETURNS: errno E* or 0 if ok.
+ *
+ * SIDE EFFECTS: Looks up a route, which may cause one to be left
+ * in the isopcb.
+ *
+ * NOTES:
+ */
+int
+iso_pcbconnect(isop, nam)
+ register struct isopcb *isop;
+ struct mbuf *nam;
+{
+ register struct sockaddr_iso *siso = mtod(nam, struct sockaddr_iso *);
+ int local_zero, error = 0;
+ struct iso_ifaddr *ia;
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcbconnect(isop 0x%x sock 0x%x nam 0x%x",
+ isop, isop->isop_socket, nam);
+ printf("nam->m_len 0x%x), addr:\n", nam->m_len);
+ dump_isoaddr(siso);
+ ENDDEBUG
+ if (nam->m_len < siso->siso_len)
+ return EINVAL;
+ if (siso->siso_family != AF_ISO)
+ return EAFNOSUPPORT;
+ if (siso->siso_nlen == 0) {
+ if (ia = iso_ifaddr) {
+ int nlen = ia->ia_addr.siso_nlen;
+ ovbcopy(TSEL(siso), nlen + TSEL(siso),
+ siso->siso_plen + siso->siso_tlen + siso->siso_slen);
+ bcopy((caddr_t)&ia->ia_addr.siso_addr,
+ (caddr_t)&siso->siso_addr, nlen + 1);
+ /* includes siso->siso_nlen = nlen; */
+ } else
+ return EADDRNOTAVAIL;
+ }
+ /*
+ * Local zero means either not bound, or bound to a TSEL, but no
+ * particular local interface. So, if we want to send somebody
+ * we need to choose a return address.
+ */
+ local_zero =
+ ((isop->isop_laddr == 0) || (isop->isop_laddr->siso_nlen == 0));
+ if (local_zero) {
+ int flags;
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcbconnect localzero 1\n");
+ ENDDEBUG
+ /*
+ * If route is known or can be allocated now,
+ * our src addr is taken from the i/f, else punt.
+ */
+ flags = isop->isop_socket->so_options & SO_DONTROUTE;
+ if (error = clnp_route(&siso->siso_addr, &isop->isop_route, flags,
+ (struct sockaddr **)0, &ia))
+ return error;
+ IFDEBUG(D_ISO)
+ printf("iso_pcbconnect localzero 2, ro->ro_rt 0x%x",
+ isop->isop_route.ro_rt);
+ printf(" ia 0x%x\n", ia);
+ ENDDEBUG
+ }
+ IFDEBUG(D_ISO)
+ printf("in iso_pcbconnect before lookup isop 0x%x isop->sock 0x%x\n",
+ isop, isop->isop_socket);
+ ENDDEBUG
+ if (local_zero) {
+ int nlen, tlen, totlen; caddr_t oldtsel, newtsel;
+ siso = isop->isop_laddr;
+ if (siso == 0 || siso->siso_tlen == 0)
+ (void)iso_pcbbind(isop, (struct mbuf *)0);
+ /*
+ * Here we have problem of squezeing in a definite network address
+ * into an existing sockaddr_iso, which in fact may not have room
+ * for it. This gets messy.
+ */
+ siso = isop->isop_laddr;
+ oldtsel = TSEL(siso);
+ tlen = siso->siso_tlen;
+ nlen = ia->ia_addr.siso_nlen;
+ totlen = tlen + nlen + _offsetof(struct sockaddr_iso, siso_data[0]);
+ if ((siso == &isop->isop_sladdr) &&
+ (totlen > sizeof(isop->isop_sladdr))) {
+ struct mbuf *m = m_get(MT_SONAME, M_DONTWAIT);
+ if (m == 0)
+ return ENOBUFS;
+ m->m_len = totlen;
+ isop->isop_laddr = siso = mtod(m, struct sockaddr_iso *);
+ }
+ siso->siso_nlen = ia->ia_addr.siso_nlen;
+ newtsel = TSEL(siso);
+ ovbcopy(oldtsel, newtsel, tlen);
+ bcopy(ia->ia_addr.siso_data, siso->siso_data, nlen);
+ siso->siso_tlen = tlen;
+ siso->siso_family = AF_ISO;
+ siso->siso_len = totlen;
+ siso = mtod(nam, struct sockaddr_iso *);
+ }
+ IFDEBUG(D_ISO)
+ printf("in iso_pcbconnect before bcopy isop 0x%x isop->sock 0x%x\n",
+ isop, isop->isop_socket);
+ ENDDEBUG
+ /*
+ * If we had to allocate space to a previous big foreign address,
+ * and for some reason we didn't free it, we reuse it knowing
+ * that is going to be big enough, as sockaddrs are delivered in
+ * 128 byte mbufs.
+ * If the foreign address is small enough, we use default space;
+ * otherwise, we grab an mbuf to copy into.
+ */
+ if (isop->isop_faddr == 0 || isop->isop_faddr == &isop->isop_sfaddr) {
+ if (siso->siso_len <= sizeof(isop->isop_sfaddr))
+ isop->isop_faddr = &isop->isop_sfaddr;
+ else {
+ struct mbuf *m = m_get(MT_SONAME, M_DONTWAIT);
+ if (m == 0)
+ return ENOBUFS;
+ isop->isop_faddr = mtod(m, struct sockaddr_iso *);
+ }
+ }
+ bcopy((caddr_t)siso, (caddr_t)isop->isop_faddr, siso->siso_len);
+ IFDEBUG(D_ISO)
+ printf("in iso_pcbconnect after bcopy isop 0x%x isop->sock 0x%x\n",
+ isop, isop->isop_socket);
+ printf("iso_pcbconnect connected to addr:\n");
+ dump_isoaddr(isop->isop_faddr);
+ printf("iso_pcbconnect end: src addr:\n");
+ dump_isoaddr(isop->isop_laddr);
+ ENDDEBUG
+ return 0;
+}
+
+/*
+ * FUNCTION: iso_pcbdisconnect()
+ *
+ * PURPOSE: washes away the peer address info so the socket
+ * appears to be disconnected.
+ * If there's no file descriptor associated with the socket
+ * it detaches the pcb.
+ *
+ * RETURNS: Nada.
+ *
+ * SIDE EFFECTS: May detach the pcb.
+ *
+ * NOTES:
+ */
+void
+iso_pcbdisconnect(isop)
+ struct isopcb *isop;
+{
+ void iso_pcbdetach();
+ register struct sockaddr_iso *siso;
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdisconnect(isop 0x%x)\n", isop);
+ ENDDEBUG
+ /*
+ * Preserver binding infnormation if already bound.
+ */
+ if ((siso = isop->isop_laddr) && siso->siso_nlen && siso->siso_tlen) {
+ caddr_t otsel = TSEL(siso);
+ siso->siso_nlen = 0;
+ ovbcopy(otsel, TSEL(siso), siso->siso_tlen);
+ }
+ if (isop->isop_faddr && isop->isop_faddr != &isop->isop_sfaddr)
+ m_freem(dtom(isop->isop_faddr));
+ isop->isop_faddr = 0;
+ if (isop->isop_socket->so_state & SS_NOFDREF)
+ iso_pcbdetach(isop);
+}
+
+/*
+ * FUNCTION: iso_pcbdetach
+ *
+ * PURPOSE: detach the pcb at *(isop) from it's socket and free
+ * the mbufs associated with the pcb..
+ * Dequeues (isop) from its head.
+ *
+ * RETURNS: Nada.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+iso_pcbdetach(isop)
+ struct isopcb *isop;
+{
+ struct socket *so = isop->isop_socket;
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach(isop 0x%x socket 0x%x so 0x%x)\n",
+ isop, isop->isop_socket, so);
+ ENDDEBUG
+#ifdef TPCONS
+ if (isop->isop_chan) {
+ register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+ if (--isop->isop_refcnt > 0)
+ return;
+ if (lcp && lcp->lcd_state == DATA_TRANSFER) {
+ lcp->lcd_upper = 0;
+ lcp->lcd_upnext = 0;
+ pk_disconnect(lcp);
+ }
+ isop->isop_chan = 0;
+ }
+#endif
+ if (so) { /* in the x.25 domain, we sometimes have no socket */
+ so->so_pcb = 0;
+ sofree(so);
+ }
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach 2 \n");
+ ENDDEBUG
+ if (isop->isop_options)
+ (void)m_free(isop->isop_options);
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach 3 \n");
+ ENDDEBUG
+ if (isop->isop_route.ro_rt)
+ rtfree(isop->isop_route.ro_rt);
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach 3.1\n");
+ ENDDEBUG
+ if (isop->isop_clnpcache != NULL) {
+ struct clnp_cache *clcp =
+ mtod(isop->isop_clnpcache, struct clnp_cache *);
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach 3.2: clcp 0x%x freeing clc_hdr x%x\n",
+ clcp, clcp->clc_hdr);
+ ENDDEBUG
+ if (clcp->clc_hdr != NULL)
+ m_free(clcp->clc_hdr);
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach 3.3: freeing cache x%x\n",
+ isop->isop_clnpcache);
+ ENDDEBUG
+ m_free(isop->isop_clnpcache);
+ }
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach 4 \n");
+ ENDDEBUG
+ remque(isop);
+ IFDEBUG(D_ISO)
+ printf("iso_pcbdetach 5 \n");
+ ENDDEBUG
+ if (isop->isop_laddr && (isop->isop_laddr != &isop->isop_sladdr))
+ m_freem(dtom(isop->isop_laddr));
+ free((caddr_t)isop, M_PCB);
+}
+
+
+/*
+ * FUNCTION: iso_pcbnotify
+ *
+ * PURPOSE: notify all connections in this protocol's queue (head)
+ * that have peer address (dst) of the problem (errno)
+ * by calling (notify) on the connections' isopcbs.
+ *
+ * RETURNS: Rien.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: (notify) is called at splimp!
+ */
+void
+iso_pcbnotify(head, siso, errno, notify)
+ struct isopcb *head;
+ register struct sockaddr_iso *siso;
+ int errno, (*notify)();
+{
+ register struct isopcb *isop;
+ int s = splimp();
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcbnotify(head 0x%x, notify 0x%x) dst:\n", head, notify);
+ ENDDEBUG
+ for (isop = head->isop_next; isop != head; isop = isop->isop_next) {
+ if (isop->isop_socket == 0 || isop->isop_faddr == 0 ||
+ !SAME_ISOADDR(siso, isop->isop_faddr)) {
+ IFDEBUG(D_ISO)
+ printf("iso_pcbnotify: CONTINUE isop 0x%x, sock 0x%x\n" ,
+ isop, isop->isop_socket);
+ printf("addrmatch cmp'd with (0x%x):\n", isop->isop_faddr);
+ dump_isoaddr(isop->isop_faddr);
+ ENDDEBUG
+ continue;
+ }
+ if (errno)
+ isop->isop_socket->so_error = errno;
+ if (notify)
+ (*notify)(isop);
+ }
+ splx(s);
+ IFDEBUG(D_ISO)
+ printf("END OF iso_pcbnotify\n" );
+ ENDDEBUG
+}
+
+
+/*
+ * FUNCTION: iso_pcblookup
+ *
+ * PURPOSE: looks for a given combination of (faddr), (fport),
+ * (lport), (laddr) in the queue named by (head).
+ * Argument (flags) is ignored.
+ *
+ * RETURNS: ptr to the isopcb if it finds a connection matching
+ * these arguments, o.w. returns zero.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+struct isopcb *
+iso_pcblookup(head, fportlen, fport, laddr)
+ struct isopcb *head;
+ register struct sockaddr_iso *laddr;
+ caddr_t fport;
+ int fportlen;
+{
+ register struct isopcb *isop;
+ register caddr_t lp = TSEL(laddr);
+ unsigned int llen = laddr->siso_tlen;
+
+ IFDEBUG(D_ISO)
+ printf("iso_pcblookup(head 0x%x laddr 0x%x fport 0x%x)\n",
+ head, laddr, fport);
+ ENDDEBUG
+ for (isop = head->isop_next; isop != head; isop = isop->isop_next) {
+ if (isop->isop_laddr == 0 || isop->isop_laddr == laddr)
+ continue;
+ if (isop->isop_laddr->siso_tlen != llen)
+ continue;
+ if (bcmp(lp, TSEL(isop->isop_laddr), llen))
+ continue;
+ if (fportlen && isop->isop_faddr &&
+ bcmp(fport, TSEL(isop->isop_faddr), (unsigned)fportlen))
+ continue;
+ /* PHASE2
+ * addrmatch1 should be iso_addrmatch(a, b, mask)
+ * where mask is taken from isop->isop_laddrmask (new field)
+ * isop_lnetmask will also be available in isop
+ if (laddr != &zeroiso_addr &&
+ !iso_addrmatch1(laddr, &(isop->isop_laddr.siso_addr)))
+ continue;
+ */
+ if (laddr->siso_nlen && (!SAME_ISOADDR(laddr, isop->isop_laddr)))
+ continue;
+ return (isop);
+ }
+ return (struct isopcb *)0;
+}
+#endif /* ISO */
diff --git a/sys/netiso/iso_pcb.h b/sys/netiso/iso_pcb.h
new file mode 100644
index 000000000000..aad76bcc065c
--- /dev/null
+++ b/sys/netiso/iso_pcb.h
@@ -0,0 +1,113 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_pcb.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_pcb.h,v 4.3 88/06/29 15:00:01 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/iso_pcb.h,v $ */
+
+#define MAXX25CRUDLEN 16 /* 16 bytes of call request user data */
+
+/*
+ * Common structure pcb for argo protocol implementation.
+ */
+struct isopcb {
+ struct isopcb *isop_next,*isop_prev; /* pointers to other pcb's */
+ struct isopcb *isop_head; /* pointer back to chain of pcbs for
+ this protocol */
+ struct socket *isop_socket; /* back pointer to socket */
+ struct sockaddr_iso *isop_laddr;
+ struct sockaddr_iso *isop_faddr;
+ struct route_iso {
+ struct rtentry *ro_rt;
+ struct sockaddr_iso ro_dst;
+ } isop_route; /* CLNP routing entry */
+ struct mbuf *isop_options; /* CLNP options */
+ struct mbuf *isop_optindex; /* CLNP options index */
+ struct mbuf *isop_clnpcache; /* CLNP cached hdr */
+ caddr_t isop_chan; /* actually struct pklcb * */
+ u_short isop_refcnt; /* mult TP4 tpcb's -> here */
+ u_short isop_lport; /* MISLEADLING work var */
+ u_short isop_tuba_cached; /* for tuba address ref cnts */
+ int isop_x25crud_len; /* x25 call request ud */
+ char isop_x25crud[MAXX25CRUDLEN];
+ struct ifaddr *isop_ifa; /* ESIS interface assoc w/sock */
+ struct sockaddr_iso isop_sladdr, /* preallocated laddr */
+ isop_sfaddr; /* preallocated faddr */
+};
+
+#ifdef sotorawcb
+/*
+ * Common structure pcb for raw clnp protocol access.
+ * Here are clnp specific extensions to the raw control block,
+ * and space is allocated to the necessary sockaddrs.
+ */
+struct rawisopcb {
+ struct rawcb risop_rcb; /* common control block prefix */
+ int risop_flags; /* flags, e.g. raw sockopts */
+ struct isopcb risop_isop; /* space for bound addresses, routes etc.*/
+};
+#endif
+
+#define sotoisopcb(so) ((struct isopcb *)(so)->so_pcb)
+#define sotorawisopcb(so) ((struct rawisopcb *)(so)->so_pcb)
+
+#ifdef KERNEL
+struct isopcb *iso_pcblookup();
+#endif
diff --git a/sys/netiso/iso_proto.c b/sys/netiso/iso_proto.c
new file mode 100644
index 000000000000..59575c7513bf
--- /dev/null
+++ b/sys/netiso/iso_proto.c
@@ -0,0 +1,197 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_proto.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_proto.c,v 4.4 88/09/08 08:38:42 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso_proto.c,v $
+ *
+ * iso_proto.c : protocol switch tables in the ISO domain
+ *
+ * ISO protocol family includes TP, CLTP, CLNP, 8208
+ * TP and CLNP are implemented here.
+ */
+
+#ifdef ISO
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <net/radix.h>
+
+#include <netiso/iso.h>
+
+int clnp_output(), clnp_init(),clnp_slowtimo(),clnp_drain();
+int rclnp_input(), rclnp_output(), rclnp_ctloutput(), raw_usrreq();
+int clnp_usrreq();
+
+int tp_ctloutput(), tpclnp_ctlinput(), tpclnp_input(), tp_usrreq();
+int tp_init(), tp_fasttimo(), tp_slowtimo(), tp_drain();
+int cons_init(), tpcons_input();
+
+int isis_input();
+int esis_input(), esis_ctlinput(), esis_init(), esis_usrreq();
+int idrp_input(), idrp_init(), idrp_usrreq();
+int cltp_input(), cltp_ctlinput(), cltp_init(), cltp_usrreq(), cltp_output();
+
+#ifdef TUBA
+int tuba_usrreq(), tuba_ctloutput(), tuba_init(), tuba_tcpinput();
+int tuba_slowtimo(), tuba_fasttimo();
+#endif
+
+struct protosw isosw[] = {
+/*
+ * We need a datagram entry through which net mgmt programs can get
+ * to the iso_control procedure (iso ioctls). Thus, a minimal
+ * SOCK_DGRAM interface is provided here.
+ * THIS ONE MUST BE FIRST: Kludge city : socket() says if(!proto) call
+ * pffindtype, which gets the first entry that matches the type.
+ * sigh.
+ */
+{ SOCK_DGRAM, &isodomain, ISOPROTO_CLTP, PR_ATOMIC|PR_ADDR,
+ 0, cltp_output, 0, 0,
+ cltp_usrreq,
+ cltp_init, 0, 0, 0
+},
+
+/*
+ * A datagram interface for clnp cannot co-exist with TP/CLNP
+ * because CLNP has no way to discriminate incoming TP packets from
+ * packets coming in for any other higher layer protocol.
+ * Old way: set it up so that pffindproto(... dgm, clnp) fails.
+ * New way: let pffindproto work (for x.25, thank you) but create
+ * a clnp_usrreq() that returns error on PRU_ATTACH.
+ */
+{SOCK_DGRAM, &isodomain, ISOPROTO_CLNP, 0,
+ 0, clnp_output, 0, 0,
+ clnp_usrreq,
+ clnp_init, 0, clnp_slowtimo, clnp_drain,
+},
+
+/* raw clnp */
+{ SOCK_RAW, &isodomain, ISOPROTO_RAW, PR_ATOMIC|PR_ADDR,
+ rclnp_input, rclnp_output, 0, rclnp_ctloutput,
+ clnp_usrreq,
+ 0, 0, 0, 0
+},
+
+/* ES-IS protocol */
+{ SOCK_DGRAM, &isodomain, ISOPROTO_ESIS, PR_ATOMIC|PR_ADDR,
+ esis_input, 0, esis_ctlinput, 0,
+ esis_usrreq,
+ esis_init, 0, 0, 0
+},
+
+/* ISOPROTO_INTRAISIS */
+{ SOCK_DGRAM, &isodomain, ISOPROTO_INTRAISIS, PR_ATOMIC|PR_ADDR,
+ isis_input, 0, 0, 0,
+ esis_usrreq,
+ 0, 0, 0, 0
+},
+
+/* ISOPROTO_IDRP */
+{ SOCK_DGRAM, &isodomain, ISOPROTO_IDRP, PR_ATOMIC|PR_ADDR,
+ idrp_input, 0, 0, 0,
+ idrp_usrreq,
+ idrp_init, 0, 0, 0
+},
+
+/* ISOPROTO_TP */
+{ SOCK_SEQPACKET, &isodomain, ISOPROTO_TP, PR_CONNREQUIRED|PR_WANTRCVD,
+ tpclnp_input, 0, tpclnp_ctlinput, tp_ctloutput,
+ tp_usrreq,
+ tp_init, tp_fasttimo, tp_slowtimo, tp_drain,
+},
+
+#ifdef TUBA
+{ SOCK_STREAM, &isodomain, ISOPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD,
+ tuba_tcpinput, 0, 0, tuba_ctloutput,
+ tuba_usrreq,
+ tuba_init, tuba_fasttimo, tuba_fasttimo, 0
+},
+#endif
+
+#ifdef TPCONS
+/* ISOPROTO_TP */
+{ SOCK_SEQPACKET, &isodomain, ISOPROTO_TP0, PR_CONNREQUIRED|PR_WANTRCVD,
+ tpcons_input, 0, 0, tp_ctloutput,
+ tp_usrreq,
+ cons_init, 0, 0, 0,
+},
+#endif
+
+};
+
+
+struct domain isodomain = {
+ AF_ISO, /* family */
+ "iso-domain", /* name */
+ 0, /* initialize routine */
+ 0, /* externalize access rights */
+ 0, /* dispose of internalized rights */
+ isosw, /* protosw */
+ &isosw[sizeof(isosw)/sizeof(isosw[0])], /* NPROTOSW */
+ 0, /* next */
+ rn_inithead, /* rtattach */
+ 48, /* rtoffset */
+ sizeof(struct sockaddr_iso) /* maxkeylen */
+};
+#endif /* ISO */
diff --git a/sys/netiso/iso_snpac.c b/sys/netiso/iso_snpac.c
new file mode 100644
index 000000000000..2473ae7a12d3
--- /dev/null
+++ b/sys/netiso/iso_snpac.c
@@ -0,0 +1,736 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_snpac.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_snpac.c,v 1.8 88/09/19 13:51:36 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/iso_snpac.c,v $ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/esis.h>
+#include <netiso/argo_debug.h>
+
+int iso_systype = SNPA_ES; /* default to be an ES */
+extern short esis_holding_time, esis_config_time, esis_esconfig_time;
+extern struct timeval time;
+extern void esis_config();
+extern int hz;
+static void snpac_fixdstandmask();
+
+struct sockaddr_iso blank_siso = {sizeof(blank_siso), AF_ISO};
+extern u_long iso_hashchar();
+static struct sockaddr_iso
+ dst = {sizeof(dst), AF_ISO},
+ gte = {sizeof(dst), AF_ISO},
+ src = {sizeof(dst), AF_ISO},
+ msk = {sizeof(dst), AF_ISO},
+ zmk = {0};
+#define zsi blank_siso
+#define zero_isoa zsi.siso_addr
+#define zap_isoaddr(a, b) {Bzero(&a.siso_addr, sizeof(*r)); r = b; \
+ Bcopy(r, &a.siso_addr, 1 + (r)->isoa_len);}
+#define S(x) ((struct sockaddr *)&(x))
+
+static struct sockaddr_dl blank_dl = {sizeof(blank_dl), AF_LINK};
+static struct sockaddr_dl gte_dl;
+#define zap_linkaddr(a, b, c, i) \
+ (*a = blank_dl, bcopy(b, a->sdl_data, a->sdl_alen = c), a->sdl_index = i)
+
+/*
+ * We only keep track of a single IS at a time.
+ */
+struct rtentry *known_is;
+
+/*
+ * Addresses taken from NBS agreements, December 1987.
+ *
+ * These addresses assume on-the-wire transmission of least significant
+ * bit first. This is the method used by 802.3. When these
+ * addresses are passed to the token ring driver, (802.5), they
+ * must be bit-swaped because 802.5 transmission order is MSb first.
+ *
+ * Furthermore, according to IBM Austin, these addresses are not
+ * true token ring multicast addresses. More work is necessary
+ * to get multicast to work right on token ring.
+ *
+ * Currently, the token ring driver does not handle multicast, so
+ * these addresses are converted into the broadcast address in
+ * lan_output() That means that if these multicast addresses change
+ * the token ring driver must be altered.
+ */
+char all_es_snpa[] = { 0x09, 0x00, 0x2b, 0x00, 0x00, 0x04 };
+char all_is_snpa[] = { 0x09, 0x00, 0x2b, 0x00, 0x00, 0x05 };
+char all_l1is_snpa[] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x14};
+char all_l2is_snpa[] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x15};
+
+union sockunion {
+ struct sockaddr_iso siso;
+ struct sockaddr_dl sdl;
+ struct sockaddr sa;
+};
+
+/*
+ * FUNCTION: llc_rtrequest
+ *
+ * PURPOSE: Manage routing table entries specific to LLC for ISO.
+ *
+ * NOTES: This does a lot of obscure magic;
+ */
+llc_rtrequest(req, rt, sa)
+int req;
+register struct rtentry *rt;
+struct sockaddr *sa;
+{
+ register union sockunion *gate = (union sockunion *)rt->rt_gateway;
+ register struct llinfo_llc *lc = (struct llinfo_llc *)rt->rt_llinfo, *lc2;
+ struct rtentry *rt2;
+ struct ifnet *ifp = rt->rt_ifp;
+ int addrlen = ifp->if_addrlen;
+#define LLC_SIZE 3 /* XXXXXX do this right later */
+
+ IFDEBUG (D_SNPA)
+ printf("llc_rtrequest(%d, %x, %x)\n", req, rt, sa);
+ ENDDEBUG
+ if (rt->rt_flags & RTF_GATEWAY)
+ return;
+ else switch (req) {
+ case RTM_ADD:
+ /*
+ * Case 1: This route may come from a route to iface with mask
+ * or from a default route.
+ */
+ if (rt->rt_flags & RTF_CLONING) {
+ iso_setmcasts(ifp, req);
+ rt_setgate(rt, rt_key(rt), &blank_dl);
+ return;
+ }
+ if (lc != 0)
+ return; /* happens on a route change */
+ /* FALLTHROUGH */
+ case RTM_RESOLVE:
+ /*
+ * Case 2: This route may come from cloning, or a manual route
+ * add with a LL address.
+ */
+ if (gate->sdl.sdl_family != AF_LINK) {
+ log(LOG_DEBUG, "llc_rtrequest: got non-link non-gateway route\n");
+ break;
+ }
+ R_Malloc(lc, struct llinfo_llc *, sizeof (*lc));
+ rt->rt_llinfo = (caddr_t)lc;
+ if (lc == 0) {
+ log(LOG_DEBUG, "llc_rtrequest: malloc failed\n");
+ break;
+ }
+ Bzero(lc, sizeof(*lc));
+ lc->lc_rt = rt;
+ rt->rt_flags |= RTF_LLINFO;
+ insque(lc, &llinfo_llc);
+ if (gate->sdl.sdl_alen == sizeof(struct esis_req) + addrlen) {
+ gate->sdl.sdl_alen -= sizeof(struct esis_req);
+ bcopy(addrlen + LLADDR(&gate->sdl),
+ (caddr_t)&lc->lc_er, sizeof(lc->lc_er));
+ } else if (gate->sdl.sdl_alen == addrlen)
+ lc->lc_flags = (SNPA_ES | SNPA_VALID | SNPA_PERM);
+ break;
+ case RTM_DELETE:
+ if (rt->rt_flags & RTF_CLONING)
+ iso_setmcasts(ifp, req);
+ if (lc == 0)
+ return;
+ remque(lc);
+ Free(lc);
+ rt->rt_llinfo = 0;
+ rt->rt_flags &= ~RTF_LLINFO;
+ break;
+ }
+ if (rt->rt_rmx.rmx_mtu == 0) {
+ rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu - LLC_SIZE;
+ }
+}
+/*
+ * FUNCTION: iso_setmcasts
+ *
+ * PURPOSE: Enable/Disable ESIS/ISIS multicast reception on interfaces.
+ *
+ * NOTES: This also does a lot of obscure magic;
+ */
+iso_setmcasts(ifp, req)
+ struct ifnet *ifp;
+ int req;
+{
+ static char *addrlist[] =
+ { all_es_snpa, all_is_snpa, all_l1is_snpa, all_l2is_snpa, 0};
+ struct ifreq ifr;
+ register caddr_t *cpp;
+ int doreset = 0;
+
+ bzero((caddr_t)&ifr, sizeof(ifr));
+ for (cpp = (caddr_t *)addrlist; *cpp; cpp++) {
+ bcopy(*cpp, (caddr_t)ifr.ifr_addr.sa_data, 6);
+ if (req == RTM_ADD)
+ if (ether_addmulti(&ifr, (struct arpcom *)ifp) == ENETRESET)
+ doreset++;
+ else
+ if (ether_delmulti(&ifr, (struct arpcom *)ifp) == ENETRESET)
+ doreset++;
+ }
+ if (doreset) {
+ if (ifp->if_reset)
+ (*ifp->if_reset)(ifp->if_unit);
+ else
+ printf("iso_setmcasts: %s%d needs reseting to receive iso mcasts\n",
+ ifp->if_name, ifp->if_unit);
+ }
+}
+/*
+ * FUNCTION: iso_snparesolve
+ *
+ * PURPOSE: Resolve an iso address into snpa address
+ *
+ * RETURNS: 0 if addr is resolved
+ * errno if addr is unknown
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Now that we have folded the snpa cache into the routing
+ * table, we know there is no snpa address known for this
+ * destination. If we know of a default IS, then the address
+ * of the IS is returned. If no IS is known, then return the
+ * multi-cast address for "all ES" for this interface.
+ *
+ * NB: the last case described above constitutes the
+ * query configuration function 9542, sec 6.5
+ * A mechanism is needed to prevent this function from
+ * being invoked if the system is an IS.
+ */
+iso_snparesolve(ifp, dest, snpa, snpa_len)
+struct ifnet *ifp; /* outgoing interface */
+struct sockaddr_iso *dest; /* destination */
+caddr_t snpa; /* RESULT: snpa to be used */
+int *snpa_len; /* RESULT: length of snpa */
+{
+ struct llinfo_llc *sc; /* ptr to snpa table entry */
+ caddr_t found_snpa;
+ int addrlen;
+
+ /*
+ * This hack allows us to send esis packets that have the destination snpa
+ * addresss embedded in the destination nsap address
+ */
+ if (dest->siso_data[0] == AFI_SNA) {
+ /*
+ * This is a subnetwork address. Return it immediately
+ */
+ IFDEBUG(D_SNPA)
+ printf("iso_snparesolve: return SN address\n");
+ ENDDEBUG
+ addrlen = dest->siso_nlen - 1; /* subtract size of AFI */
+ found_snpa = (caddr_t) dest->siso_data + 1;
+ /*
+ * If we are an IS, we can't do much with the packet;
+ * Check if we know about an IS.
+ */
+ } else if (iso_systype != SNPA_IS && known_is != 0 &&
+ (sc = (struct llinfo_llc *)known_is->rt_llinfo) &&
+ (sc->lc_flags & SNPA_VALID)) {
+ register struct sockaddr_dl *sdl =
+ (struct sockaddr_dl *)(known_is->rt_gateway);
+ found_snpa = LLADDR(sdl);
+ addrlen = sdl->sdl_alen;
+ } else if (ifp->if_flags & IFF_BROADCAST) {
+ /*
+ * no IS, no match. Return "all es" multicast address for this
+ * interface, as per Query Configuration Function (9542 sec 6.5)
+ *
+ * Note: there is a potential problem here. If the destination
+ * is on the subnet and it does not respond with a ESH, but
+ * does send back a TP CC, a connection could be established
+ * where we always transmit the CLNP packet to "all es"
+ */
+ addrlen = ifp->if_addrlen;
+ found_snpa = (caddr_t)all_es_snpa;
+ } else
+ return (ENETUNREACH);
+ bcopy(found_snpa, snpa, *snpa_len = addrlen);
+ return (0);
+}
+
+
+/*
+ * FUNCTION: snpac_free
+ *
+ * PURPOSE: free an entry in the iso address map table
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: If there is a route entry associated with cache
+ * entry, then delete that as well
+ */
+snpac_free(lc)
+register struct llinfo_llc *lc; /* entry to free */
+{
+ register struct rtentry *rt = lc->lc_rt;
+ register struct iso_addr *r;
+
+ if (known_is == rt)
+ known_is = 0;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
+ RTFREE(rt);
+ rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt),
+ rt->rt_flags, (struct rtentry **)0);
+ RTFREE(rt);
+ }
+}
+
+/*
+ * FUNCTION: snpac_add
+ *
+ * PURPOSE: Add an entry to the snpa cache
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: If entry already exists, then update holding time.
+ */
+snpac_add(ifp, nsap, snpa, type, ht, nsellength)
+struct ifnet *ifp; /* interface info is related to */
+struct iso_addr *nsap; /* nsap to add */
+caddr_t snpa; /* translation */
+char type; /* SNPA_IS or SNPA_ES */
+u_short ht; /* holding time (in seconds) */
+int nsellength; /* nsaps may differ only in trailing bytes */
+{
+ register struct llinfo_llc *lc;
+ register struct rtentry *rt;
+ struct rtentry *mrt = 0;
+ register struct iso_addr *r; /* for zap_isoaddr macro */
+ int snpalen = min(ifp->if_addrlen, MAX_SNPALEN);
+ int new_entry = 0, index = ifp->if_index, iftype = ifp->if_type;
+
+ IFDEBUG(D_SNPA)
+ printf("snpac_add(%x, %x, %x, %x, %x, %x)\n",
+ ifp, nsap, snpa, type, ht, nsellength);
+ ENDDEBUG
+ zap_isoaddr(dst, nsap);
+ rt = rtalloc1(S(dst), 0);
+ IFDEBUG(D_SNPA)
+ printf("snpac_add: rtalloc1 returns %x\n", rt);
+ ENDDEBUG
+ if (rt == 0) {
+ struct sockaddr *netmask;
+ int flags;
+ add:
+ if (nsellength) {
+ netmask = S(msk); flags = RTF_UP;
+ snpac_fixdstandmask(nsellength);
+ } else {
+ netmask = 0; flags = RTF_UP | RTF_HOST;
+ }
+ new_entry = 1;
+ zap_linkaddr((&gte_dl), snpa, snpalen, index);
+ gte_dl.sdl_type = iftype;
+ if (rtrequest(RTM_ADD, S(dst), S(gte_dl), netmask, flags, &mrt) ||
+ mrt == 0)
+ return (0);
+ rt = mrt;
+ rt->rt_refcnt--;
+ } else {
+ register struct sockaddr_dl *sdl = (struct sockaddr_dl *)rt->rt_gateway;
+ rt->rt_refcnt--;
+ if ((rt->rt_flags & RTF_LLINFO) == 0)
+ goto add;
+ if (nsellength && (rt->rt_flags & RTF_HOST)) {
+ if (rt->rt_refcnt == 0) {
+ rtrequest(RTM_DELETE, S(dst), (struct sockaddr *)0,
+ (struct sockaddr *)0, 0, (struct rtentry *)0);
+ rt = 0;
+ goto add;
+ } else {
+ static struct iso_addr nsap2; register char *cp;
+ nsap2 = *nsap;
+ cp = nsap2.isoa_genaddr + nsap->isoa_len - nsellength;
+ while (cp < (char *)(1 + &nsap2))
+ *cp++ = 0;
+ (void) snpac_add(ifp, &nsap2, snpa, type, ht, nsellength);
+ }
+ }
+ if (sdl->sdl_family != AF_LINK || sdl->sdl_alen == 0) {
+ int old_sdl_len = sdl->sdl_len;
+ if (old_sdl_len < sizeof(*sdl)) {
+ log(LOG_DEBUG, "snpac_add: cant make room for lladdr\n");
+ return (0);
+ }
+ zap_linkaddr(sdl, snpa, snpalen, index);
+ sdl->sdl_len = old_sdl_len;
+ sdl->sdl_type = iftype;
+ new_entry = 1;
+ }
+ }
+ if ((lc = (struct llinfo_llc *)rt->rt_llinfo) == 0)
+ panic("snpac_rtrequest");
+ rt->rt_rmx.rmx_expire = ht + time.tv_sec;
+ lc->lc_flags = SNPA_VALID | type;
+ if ((type & SNPA_IS) && !(iso_systype & SNPA_IS))
+ snpac_logdefis(rt);
+ return (new_entry);
+}
+
+static void
+snpac_fixdstandmask(nsellength)
+{
+ register char *cp = msk.siso_data, *cplim;
+
+ cplim = cp + (dst.siso_nlen -= nsellength);
+ msk.siso_len = cplim - (char *)&msk;
+ msk.siso_nlen = 0;
+ while (cp < cplim)
+ *cp++ = -1;
+ while (cp < (char *)msk.siso_pad)
+ *cp++ = 0;
+ for (cp = dst.siso_data + dst.siso_nlen; cp < (char *)dst.siso_pad; )
+ *cp++ = 0;
+}
+
+/*
+ * FUNCTION: snpac_ioctl
+ *
+ * PURPOSE: Set/Get the system type and esis parameters
+ *
+ * RETURNS: 0 on success, or unix error code
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+snpac_ioctl (so, cmd, data)
+struct socket *so;
+int cmd; /* ioctl to process */
+caddr_t data; /* data for the cmd */
+{
+ register struct systype_req *rq = (struct systype_req *)data;
+
+ IFDEBUG(D_IOCTL)
+ if (cmd == SIOCSSTYPE)
+ printf("snpac_ioctl: cmd set, type x%x, ht %d, ct %d\n",
+ rq->sr_type, rq->sr_holdt, rq->sr_configt);
+ else
+ printf("snpac_ioctl: cmd get\n");
+ ENDDEBUG
+
+ if (cmd == SIOCSSTYPE) {
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+ if ((rq->sr_type & (SNPA_ES|SNPA_IS)) == (SNPA_ES|SNPA_IS))
+ return(EINVAL);
+ if (rq->sr_type & SNPA_ES) {
+ iso_systype = SNPA_ES;
+ } else if (rq->sr_type & SNPA_IS) {
+ iso_systype = SNPA_IS;
+ } else {
+ return(EINVAL);
+ }
+ esis_holding_time = rq->sr_holdt;
+ esis_config_time = rq->sr_configt;
+ if (esis_esconfig_time != rq->sr_esconfigt) {
+ untimeout(esis_config, (caddr_t)0);
+ esis_esconfig_time = rq->sr_esconfigt;
+ esis_config();
+ }
+ } else if (cmd == SIOCGSTYPE) {
+ rq->sr_type = iso_systype;
+ rq->sr_holdt = esis_holding_time;
+ rq->sr_configt = esis_config_time;
+ rq->sr_esconfigt = esis_esconfig_time;
+ } else {
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * FUNCTION: snpac_logdefis
+ *
+ * PURPOSE: Mark the IS passed as the default IS
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+snpac_logdefis(sc)
+register struct rtentry *sc;
+{
+ register struct iso_addr *r;
+ register struct sockaddr_dl *sdl = (struct sockaddr_dl *)sc->rt_gateway;
+ register struct rtentry *rt;
+
+ if (known_is == sc || !(sc->rt_flags & RTF_HOST))
+ return;
+ if (known_is) {
+ RTFREE(known_is);
+ }
+ known_is = sc;
+ sc->rt_refcnt++;
+ rt = rtalloc1((struct sockaddr *)&zsi, 0);
+ if (rt == 0)
+ rtrequest(RTM_ADD, S(zsi), rt_key(sc), S(zmk),
+ RTF_DYNAMIC|RTF_GATEWAY, 0);
+ else {
+ if ((rt->rt_flags & RTF_DYNAMIC) &&
+ (rt->rt_flags & RTF_GATEWAY) && rt_mask(rt)->sa_len == 0)
+ rt_setgate(rt, rt_key(rt), rt_key(sc));
+ }
+}
+
+/*
+ * FUNCTION: snpac_age
+ *
+ * PURPOSE: Time out snpac entries
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: When encountering an entry for the first time, snpac_age
+ * may delete up to SNPAC_AGE too many seconds. Ie.
+ * if the entry is added a moment before snpac_age is
+ * called, the entry will immediately have SNPAC_AGE
+ * seconds taken off the holding time, even though
+ * it has only been held a brief moment.
+ *
+ * The proper way to do this is set an expiry timeval
+ * equal to current time + holding time. Then snpac_age
+ * would time out entries where expiry date is older
+ * than the current time.
+ */
+void
+snpac_age()
+{
+ register struct llinfo_llc *lc, *nlc;
+ register struct rtentry *rt;
+
+ timeout(snpac_age, (caddr_t)0, SNPAC_AGE * hz);
+
+ for (lc = llinfo_llc.lc_next; lc != & llinfo_llc; lc = nlc) {
+ nlc = lc->lc_next;
+ if (lc->lc_flags & SNPA_VALID) {
+ rt = lc->lc_rt;
+ if (rt->rt_rmx.rmx_expire && rt->rt_rmx.rmx_expire < time.tv_sec)
+ snpac_free(lc);
+ }
+ }
+}
+
+/*
+ * FUNCTION: snpac_ownmulti
+ *
+ * PURPOSE: Determine if the snpa address is a multicast address
+ * of the same type as the system.
+ *
+ * RETURNS: true or false
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: Used by interface drivers when not in eavesdrop mode
+ * as interm kludge until
+ * real multicast addresses can be configured
+ */
+snpac_ownmulti(snpa, len)
+caddr_t snpa;
+u_int len;
+{
+ return (((iso_systype & SNPA_ES) &&
+ (!bcmp(snpa, (caddr_t)all_es_snpa, len))) ||
+ ((iso_systype & SNPA_IS) &&
+ (!bcmp(snpa, (caddr_t)all_is_snpa, len))));
+}
+
+/*
+ * FUNCTION: snpac_flushifp
+ *
+ * PURPOSE: Flush entries associated with specific ifp
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+snpac_flushifp(ifp)
+struct ifnet *ifp;
+{
+ register struct llinfo_llc *lc;
+
+ for (lc = llinfo_llc.lc_next; lc != & llinfo_llc; lc = lc->lc_next) {
+ if (lc->lc_rt->rt_ifp == ifp && (lc->lc_flags & SNPA_VALID))
+ snpac_free(lc);
+ }
+}
+
+/*
+ * FUNCTION: snpac_rtrequest
+ *
+ * PURPOSE: Make a routing request
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: In the future, this should make a request of a user
+ * level routing daemon.
+ */
+snpac_rtrequest(req, host, gateway, netmask, flags, ret_nrt)
+int req;
+struct iso_addr *host;
+struct iso_addr *gateway;
+struct iso_addr *netmask;
+short flags;
+struct rtentry **ret_nrt;
+{
+ register struct iso_addr *r;
+
+ IFDEBUG(D_SNPA)
+ printf("snpac_rtrequest: ");
+ if (req == RTM_ADD)
+ printf("add");
+ else if (req == RTM_DELETE)
+ printf("delete");
+ else
+ printf("unknown command");
+ printf(" dst: %s\n", clnp_iso_addrp(host));
+ printf("\tgateway: %s\n", clnp_iso_addrp(gateway));
+ ENDDEBUG
+
+
+ zap_isoaddr(dst, host);
+ zap_isoaddr(gte, gateway);
+ if (netmask) {
+ zap_isoaddr(msk, netmask);
+ msk.siso_nlen = 0;
+ msk.siso_len = msk.siso_pad - (u_char *)&msk;
+ }
+
+ rtrequest(req, S(dst), S(gte), (netmask ? S(msk) : (struct sockaddr *)0),
+ flags, ret_nrt);
+}
+
+/*
+ * FUNCTION: snpac_addrt
+ *
+ * PURPOSE: Associate a routing entry with an snpac entry
+ *
+ * RETURNS: nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: If a cache entry exists for gateway, then
+ * make a routing entry (host, gateway) and associate
+ * with gateway.
+ *
+ * If a route already exists and is different, first delete
+ * it.
+ *
+ * This could be made more efficient by checking
+ * the existing route before adding a new one.
+ */
+snpac_addrt(ifp, host, gateway, netmask)
+struct ifnet *ifp;
+struct iso_addr *host, *gateway, *netmask;
+{
+ register struct iso_addr *r;
+
+ zap_isoaddr(dst, host);
+ zap_isoaddr(gte, gateway);
+ if (netmask) {
+ zap_isoaddr(msk, netmask);
+ msk.siso_nlen = 0;
+ msk.siso_len = msk.siso_pad - (u_char *)&msk;
+ rtredirect(S(dst), S(gte), S(msk), RTF_DONE, S(gte), 0);
+ } else
+ rtredirect(S(dst), S(gte), (struct sockaddr *)0,
+ RTF_DONE | RTF_HOST, S(gte), 0);
+}
+#endif /* ISO */
diff --git a/sys/netiso/iso_snpac.h b/sys/netiso/iso_snpac.h
new file mode 100644
index 000000000000..105e8dd11d67
--- /dev/null
+++ b/sys/netiso/iso_snpac.h
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_snpac.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#define MAX_SNPALEN 8 /* curiously equal to sizeof x.121 (
+ plus 1 for nibble len) addr */
+struct snpa_req {
+ struct iso_addr sr_isoa; /* nsap address */
+ u_char sr_len; /* length of snpa */
+ u_char sr_snpa[MAX_SNPALEN]; /* snpa associated
+ with nsap address */
+ u_char sr_flags; /* true if entry is valid */
+ u_short sr_ht; /* holding time */
+};
+
+#define SNPA_VALID 0x01
+#define SNPA_ES 0x02
+#define SNPA_IS 0x04
+#define SNPA_PERM 0x10
+
+struct systype_req {
+ short sr_holdt; /* holding timer */
+ short sr_configt; /* configuration timer */
+ short sr_esconfigt; /* suggested ES configuration timer */
+ char sr_type; /* SNPA_ES or SNPA_IS */
+};
+
+struct esis_req {
+ short er_ht; /* holding time */
+ u_char er_flags; /* type and validity */
+};
+/*
+ * Space for this structure gets added onto the end of a route
+ * going to an ethernet or other 802.[45x] device.
+ */
+
+struct llinfo_llc {
+ struct llinfo_llc *lc_next; /* keep all llc routes linked */
+ struct llinfo_llc *lc_prev; /* keep all llc routes linked */
+ struct rtentry *lc_rt; /* backpointer to route */
+ struct esis_req lc_er; /* holding time, etc */
+#define lc_ht lc_er.er_ht
+#define lc_flags lc_er.er_flags
+};
+
+
+/* ISO arp IOCTL data structures */
+
+#define SIOCSSTYPE _IOW('a', 39, struct systype_req) /* set system type */
+#define SIOCGSTYPE _IOR('a', 40, struct systype_req) /* get system type */
+
+#ifdef KERNEL
+struct llinfo_llc llinfo_llc; /* head for linked lists */
+#endif /* KERNEL */
diff --git a/sys/netiso/iso_var.h b/sys/netiso/iso_var.h
new file mode 100644
index 000000000000..946aeea93fe8
--- /dev/null
+++ b/sys/netiso/iso_var.h
@@ -0,0 +1,137 @@
+/*-
+ * Copyright (c) 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_var.h,v 4.2 88/06/29 15:00:08 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso_var.h,v $
+ */
+
+/*
+ * Interface address, iso version. One of these structures is
+ * allocated for each interface with an osi address. The ifaddr
+ * structure conatins the protocol-independent part
+ * of the structure, and is assumed to be first.
+ */
+struct iso_ifaddr {
+ struct ifaddr ia_ifa; /* protocol-independent info */
+#define ia_ifp ia_ifa.ifa_ifp
+#define ia_flags ia_ifa.ifa_flags
+ int ia_snpaoffset;
+ struct iso_ifaddr *ia_next; /* next in list of iso addresses */
+ struct sockaddr_iso ia_addr; /* reserve space for interface name */
+ struct sockaddr_iso ia_dstaddr; /* reserve space for broadcast addr */
+#define ia_broadaddr ia_dstaddr
+ struct sockaddr_iso ia_sockmask; /* reserve space for general netmask */
+};
+
+struct iso_aliasreq {
+ char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct sockaddr_iso ifra_addr;
+ struct sockaddr_iso ifra_dstaddr;
+ struct sockaddr_iso ifra_mask;
+ int ifra_snpaoffset;
+};
+
+struct iso_ifreq {
+ char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct sockaddr_iso ifr_Addr;
+};
+
+/*
+ * Given a pointer to an iso_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockaddr_iso
+ */
+/*
+#define IA_SIS(ia) ((struct sockaddr_iso *)(ia.ia_ifa->ifa_addr))
+ * works if sockaddr_iso becomes variable sized.
+ */
+#define IA_SIS(ia) (&(((struct iso_ifaddr *)ia)->ia_addr))
+
+#define SIOCDIFADDR_ISO _IOW('i',25, struct iso_ifreq) /* delete IF addr */
+#define SIOCAIFADDR_ISO _IOW('i',26, struct iso_aliasreq)/* add/chg IFalias */
+#define SIOCGIFADDR_ISO _IOWR('i',33, struct iso_ifreq) /* get ifnet address */
+#define SIOCGIFDSTADDR_ISO _IOWR('i',34, struct iso_ifreq) /* get dst address */
+#define SIOCGIFNETMASK_ISO _IOWR('i',37, struct iso_ifreq) /* get dst address */
+
+/*
+ * This stuff should go in if.h or if_llc.h or someplace else,
+ * but for now . . .
+ */
+
+struct llc_etherhdr {
+ char dst[6];
+ char src[6];
+ char len[2];
+ char llc_dsap;
+ char llc_ssap;
+ char llc_ui_byte;
+};
+
+struct snpa_hdr {
+ struct ifnet *snh_ifp;
+ char snh_dhost[6];
+ char snh_shost[6];
+ short snh_flags;
+};
+#ifdef KERNEL
+struct iso_ifaddr *iso_ifaddr; /* linked list of iso address ifaces */
+struct iso_ifaddr *iso_localifa(); /* linked list of iso address ifaces */
+struct ifqueue clnlintrq; /* clnl packet input queue */
+#endif /* KERNEL */
diff --git a/sys/netiso/tp.trans b/sys/netiso/tp.trans
new file mode 100644
index 000000000000..edefc769b816
--- /dev/null
+++ b/sys/netiso/tp.trans
@@ -0,0 +1,1342 @@
+/* NEW */
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp.trans 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: tp.trans,v 5.1 88/10/12 12:22:07 root Exp $
+ *
+ * Transition file for TP.
+ *
+ * DO NOT:
+ * - change the order of any of the events or states. to do so will
+ * make tppt, netstat, etc. cease working.
+ *
+ * NOTE:
+ * some hooks exist for data on (dis)connect, but it's ***NOT***SUPPORTED***
+ * (read: may not work!)
+ *
+ * I tried to put everything that causes a change of state in here, hence
+ * there are some seemingly trivial events like T_DETACH and T_LISTEN_req.
+ *
+ * Almost everything having to do w/ setting & cancelling timers is here
+ * but once it was debugged, I moved the setting of the
+ * keepalive (sendack) timer to tp_emit(), where an AK_TPDU is sent.
+ * This is so the code wouldn't be duplicated all over creation in here.
+ *
+ */
+*PROTOCOL tp
+
+*INCLUDE
+{
+/* @(#)tp.trans 8.1 (Berkeley) 6/10/93 */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/errno.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/iso_errno.h>
+#include <netiso/tp_seq.h>
+#include <netiso/cons.h>
+
+#define DRIVERTRACE TPPTdriver
+#define sbwakeup(sb) sowakeup(p->tp_sock, sb);
+#define MCPY(d, w) (d ? m_copym(d, 0, (int)M_COPYALL, w): 0)
+
+static trick_hc = 1;
+
+int tp_emit(),
+ tp_goodack(), tp_goodXack(),
+ tp_stash()
+;
+void tp_indicate(), tp_getoptions(),
+ tp_soisdisconnecting(), tp_soisdisconnected(),
+ tp_recycle_tsuffix(),
+#ifdef TP_DEBUG_TIMERS
+ tp_etimeout(), tp_euntimeout(),
+ tp_ctimeout(), tp_cuntimeout(),
+ tp_ctimeout_MIN(),
+#endif
+ tp_freeref(), tp_detach(),
+ tp0_stash(), tp0_send(),
+ tp_netcmd(), tp_send()
+;
+
+typedef struct tp_pcb tpcb_struct;
+
+
+}
+
+*PCB tpcb_struct SYNONYM P
+
+*STATES
+
+TP_CLOSED
+TP_CRSENT
+TP_AKWAIT
+TP_OPEN
+TP_CLOSING
+TP_REFWAIT
+TP_LISTENING /* Local to this implementation */
+TP_CONFIRMING /* Local to this implementation */
+
+*EVENTS { struct timeval e_time; } SYNONYM E
+
+ /*
+ * C (typically cancelled) timers -
+ *
+ * let these be the first ones so for the sake of convenience
+ * their values are 0--> n-1
+ * DO NOT CHANGE THE ORDER OF THESE TIMER EVENTS!!
+ */
+ TM_inact
+ TM_retrans
+ /* TM_retrans is used for all
+ * simple retransmissions - CR,CC,XPD,DR
+ */
+
+ TM_sendack
+ /* TM_sendack does dual duty - keepalive AND closed-window
+ * Probes.
+ * It's set w/ keepalive-ticks every time an ack is sent.
+ * (this is done in (void) tp_emit() ).
+ * Whenever a DT arrives which doesn't require immediate acking,
+ * a separate fast-timeout flag is set ensuring 200ms response.
+ */
+ TM_notused
+
+ /*
+ * E (typically expired) timers - these may be in any order.
+ * These cause procedures to be executed directly; may not
+ * cause an 'event' as we know them here.
+ */
+ TM_reference { SeqNum e_low; SeqNum e_high; int e_retrans; }
+ TM_data_retrans { SeqNum e_low; SeqNum e_high; int e_retrans; }
+
+/* NOTE: in tp_input is a minor optimization that assumes that
+ * for all tpdu types that can take e_data and e_datalen, these
+ * fields fall in the same place in the event structure, that is,
+ * e_data is the first field and e_datalen is the 2nd field.
+ */
+
+ ER_TPDU {
+ u_char e_reason;
+ }
+ CR_TPDU { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_int e_cdt;
+ }
+ DR_TPDU { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_short e_sref;
+ u_char e_reason;
+ }
+ DC_TPDU
+ CC_TPDU { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_short e_sref;
+ u_int e_cdt;
+ }
+ AK_TPDU { u_int e_cdt;
+ SeqNum e_seq;
+ SeqNum e_subseq;
+ u_char e_fcc_present;
+ }
+ DT_TPDU { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_int e_eot;
+ SeqNum e_seq;
+ }
+ XPD_TPDU { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ SeqNum e_seq;
+ }
+ XAK_TPDU { SeqNum e_seq; }
+
+ T_CONN_req
+ T_DISC_req { u_char e_reason; }
+ T_LISTEN_req
+ T_DATA_req
+ T_XPD_req
+ T_USR_rcvd
+ T_USR_Xrcvd
+ T_DETACH
+ T_NETRESET
+ T_ACPT_req
+
+
+*TRANSITIONS
+
+
+/* TP_AKWAIT doesn't exist in TP 0 */
+SAME <== TP_AKWAIT [ CC_TPDU, DC_TPDU, XAK_TPDU ]
+ DEFAULT
+ NULLACTION
+;
+
+
+/* applicable in TP4, TP0 */
+SAME <== TP_REFWAIT DR_TPDU
+ ( $$.e_sref != 0 )
+ {
+ (void) tp_emit(DC_TPDU_type, $P, 0, 0, MNULL);
+ }
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_REFWAIT [ CR_TPDU, CC_TPDU, DT_TPDU,
+ DR_TPDU, XPD_TPDU, AK_TPDU, XAK_TPDU, DC_TPDU, ER_TPDU ]
+ DEFAULT
+ {
+# ifdef TP_DEBUG
+ if( $E.ev_number != AK_TPDU )
+ printf("TPDU 0x%x in REFWAIT!!!!\n", $E.ev_number);
+# endif TP_DEBUG
+ }
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_REFWAIT [ T_DETACH, T_DISC_req ]
+ DEFAULT
+ NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_CRSENT AK_TPDU
+ ($P.tp_class == TP_CLASS_0)
+ {
+ /* oh, man is this grotesque or what? */
+ (void) tp_goodack($P, $$.e_cdt, $$.e_seq, $$.e_subseq);
+ /* but it's necessary because this pseudo-ack may happen
+ * before the CC arrives, but we HAVE to adjust the
+ * snduna as a result of the ack, WHENEVER it arrives
+ */
+ }
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_CRSENT
+ [ CR_TPDU, DC_TPDU, DT_TPDU, XPD_TPDU, XAK_TPDU ]
+ DEFAULT
+ NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_CLOSED [ DT_TPDU, XPD_TPDU,
+ ER_TPDU, DC_TPDU, AK_TPDU, XAK_TPDU ]
+ DEFAULT
+ NULLACTION
+;
+
+/* TP_CLOSING doesn't exist in TP 0 */
+SAME <== TP_CLOSING
+ [ CC_TPDU, CR_TPDU, DT_TPDU, XPD_TPDU, AK_TPDU, XAK_TPDU ]
+ DEFAULT
+ NULLACTION
+;
+
+
+/* DC_TPDU doesn't exist in TP 0 */
+SAME <== TP_OPEN DC_TPDU
+ DEFAULT
+ NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_LISTENING [DR_TPDU, CC_TPDU, DT_TPDU, XPD_TPDU,
+ ER_TPDU, DC_TPDU, AK_TPDU, XAK_TPDU ]
+ DEFAULT
+ NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+TP_LISTENING <== TP_CLOSED T_LISTEN_req
+ DEFAULT
+ NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+TP_CLOSED <== [ TP_LISTENING, TP_CLOSED ] T_DETACH
+ DEFAULT
+ {
+ tp_detach($P);
+ }
+;
+
+TP_CONFIRMING <== TP_LISTENING CR_TPDU
+ ( $P.tp_class == TP_CLASS_0)
+ {
+ $P.tp_refstate = REF_OPEN; /* has timers ??? */
+ }
+;
+
+TP_CONFIRMING <== TP_LISTENING CR_TPDU
+ DEFAULT
+ {
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "CR datalen data", $$.e_datalen, $$.e_data,0,0);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("CR datalen 0x%x data 0x%x", $$.e_datalen, $$.e_data);
+ ENDDEBUG
+ $P.tp_refstate = REF_OPEN; /* has timers */
+ $P.tp_fcredit = $$.e_cdt;
+
+ if ($$.e_datalen > 0) {
+ /* n/a for class 0 */
+ ASSERT($P.tp_Xrcv.sb_cc == 0);
+ sbappendrecord(&$P.tp_Xrcv, $$.e_data);
+ $$.e_data = MNULL;
+ }
+ }
+;
+
+TP_OPEN <== TP_CONFIRMING T_ACPT_req
+ ( $P.tp_class == TP_CLASS_0 )
+ {
+ IncStat(ts_tp0_conn);
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "Confiming", $P, 0,0,0);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("Confirming connection: $P" );
+ ENDDEBUG
+ soisconnected($P.tp_sock);
+ (void) tp_emit(CC_TPDU_type, $P, 0,0, MNULL) ;
+ $P.tp_fcredit = 1;
+ }
+;
+
+TP_AKWAIT <== TP_CONFIRMING T_ACPT_req
+ (tp_emit(CC_TPDU_type, $P, 0,0, MCPY($P.tp_ucddata, M_NOWAIT)) == 0)
+ {
+ IncStat(ts_tp4_conn); /* even though not quite open */
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "Confiming", $P, 0,0,0);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("Confirming connection: $P" );
+ ENDDEBUG
+ tp_getoptions($P);
+ soisconnecting($P.tp_sock);
+ if (($P.tp_rx_strat & TPRX_FASTSTART) && ($P.tp_fcredit > 0))
+ $P.tp_cong_win = $P.tp_fcredit * $P.tp_l_tpdusize;
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_cc_ticks);
+ }
+;
+
+/* TP4 only */
+TP_CLOSED <== TP_CONFIRMING T_ACPT_req
+ DEFAULT /* emit failed */
+ {
+ IFDEBUG(D_CONN)
+ printf("event: CR_TPDU emit CC failed done " );
+ ENDDEBUG
+ soisdisconnected($P.tp_sock);
+ tp_recycle_tsuffix($P);
+ tp_freeref($P.tp_lref);
+ tp_detach($P);
+ }
+;
+
+/* applicable in TP4, TP0 */
+TP_CRSENT <== TP_CLOSED T_CONN_req
+ DEFAULT
+ {
+ int error;
+ struct mbuf *data = MNULL;
+
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "T_CONN_req flags ucddata", (int)$P.tp_flags,
+ $P.tp_ucddata, 0, 0);
+ ENDTRACE
+ data = MCPY($P.tp_ucddata, M_WAIT);
+ if (data) {
+ IFDEBUG(D_CONN)
+ printf("T_CONN_req.trans m_copy cc 0x%x\n",
+ $P.tp_ucddata);
+ dump_mbuf(data, "sosnd @ T_CONN_req");
+ ENDDEBUG
+ }
+
+ if (error = tp_emit(CR_TPDU_type, $P, 0, 0, data) )
+ return error; /* driver WON'T change state; will return error */
+
+ $P.tp_refstate = REF_OPEN; /* has timers */
+ if($P.tp_class != TP_CLASS_0) {
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_cr_ticks);
+ }
+ }
+;
+
+/* applicable in TP4, TP0, but state TP_AKWAIT doesn't apply to TP0 */
+TP_REFWAIT <== [ TP_CRSENT, TP_AKWAIT, TP_OPEN ] DR_TPDU
+ DEFAULT
+ {
+ sbflush(&$P.tp_Xrcv); /* purge non-delivered data data */
+ if ($$.e_datalen > 0) {
+ sbappendrecord(&$P.tp_Xrcv, $$.e_data);
+ $$.e_data = MNULL;
+ }
+ if ($P.tp_state == TP_OPEN)
+ tp_indicate(T_DISCONNECT, $P, 0);
+ else {
+ int so_error = ECONNREFUSED;
+ if ($$.e_reason != (E_TP_NO_SESSION ^ TP_ERROR_MASK) &&
+ $$.e_reason != (E_TP_NO_CR_ON_NC ^ TP_ERROR_MASK) &&
+ $$.e_reason != (E_TP_REF_OVERFLOW ^ TP_ERROR_MASK))
+ so_error = ECONNABORTED;
+ tp_indicate(T_DISCONNECT, $P, so_error);
+ }
+ tp_soisdisconnected($P);
+ if ($P.tp_class != TP_CLASS_0) {
+ if ($P.tp_state == TP_OPEN ) {
+ tp_euntimeout($P, TM_data_retrans); /* all */
+ tp_cuntimeout($P, TM_retrans);
+ tp_cuntimeout($P, TM_inact);
+ tp_cuntimeout($P, TM_sendack);
+ $P.tp_flags &= ~TPF_DELACK;
+ }
+ tp_cuntimeout($P, TM_retrans);
+ if( $$.e_sref != 0 )
+ (void) tp_emit(DC_TPDU_type, $P, 0, 0, MNULL);
+ }
+ }
+;
+
+SAME <== TP_CLOSED DR_TPDU
+ DEFAULT
+ {
+ if( $$.e_sref != 0 )
+ (void) tp_emit(DC_TPDU_type, $P, 0, 0, MNULL);
+ /* reference timer already set - reset it to be safe (???) */
+ tp_euntimeout($P, TM_reference); /* all */
+ tp_etimeout($P, TM_reference, (int)$P.tp_refer_ticks);
+ }
+;
+
+/* NBS(34) */
+TP_REFWAIT <== TP_CRSENT ER_TPDU
+ DEFAULT
+ {
+ tp_cuntimeout($P, TM_retrans);
+ tp_indicate(ER_TPDU, $P, $$.e_reason);
+ tp_soisdisconnected($P);
+ }
+;
+
+/* NBS(27) */
+TP_REFWAIT <== TP_CLOSING DR_TPDU
+ DEFAULT
+ {
+ tp_cuntimeout($P, TM_retrans);
+ tp_soisdisconnected($P);
+ }
+;
+/* these two transitions are the same but can't be combined because xebec
+ * can't handle the use of $$.e_reason if they're combined
+ */
+/* NBS(27) */
+TP_REFWAIT <== TP_CLOSING ER_TPDU
+ DEFAULT
+ {
+ tp_indicate(ER_TPDU, $P, $$.e_reason);
+ tp_cuntimeout($P, TM_retrans);
+ tp_soisdisconnected($P);
+ }
+;
+/* NBS(27) */
+TP_REFWAIT <== TP_CLOSING DC_TPDU
+ DEFAULT
+ {
+ tp_cuntimeout($P, TM_retrans);
+ tp_soisdisconnected($P);
+ }
+;
+
+/* NBS(21) */
+SAME <== TP_CLOSED [ CC_TPDU, CR_TPDU ]
+ DEFAULT
+ { /* don't ask me why we have to do this - spec says so */
+ (void) tp_emit(DR_TPDU_type, $P, 0, E_TP_NO_SESSION, MNULL);
+ /* don't bother with retransmissions of the DR */
+ }
+;
+
+/* NBS(34) */
+TP_REFWAIT <== TP_OPEN ER_TPDU
+ ($P.tp_class == TP_CLASS_0)
+ {
+ tp_soisdisconnecting($P.tp_sock);
+ tp_indicate(ER_TPDU, $P, $$.e_reason);
+ tp_soisdisconnected($P);
+ tp_netcmd( $P, CONN_CLOSE );
+ }
+;
+
+TP_CLOSING <== [ TP_AKWAIT, TP_OPEN ] ER_TPDU
+ DEFAULT
+ {
+ if ($P.tp_state == TP_OPEN) {
+ tp_euntimeout($P, TM_data_retrans); /* all */
+ tp_cuntimeout($P, TM_inact);
+ tp_cuntimeout($P, TM_sendack);
+ }
+ tp_soisdisconnecting($P.tp_sock);
+ tp_indicate(ER_TPDU, $P, $$.e_reason);
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+ (void) tp_emit(DR_TPDU_type, $P, 0, E_TP_PROTO_ERR, MNULL);
+ }
+;
+/* NBS(6) */
+TP_OPEN <== TP_CRSENT CC_TPDU
+ ($P.tp_class == TP_CLASS_0)
+ {
+ tp_cuntimeout($P, TM_retrans);
+ IncStat(ts_tp0_conn);
+ $P.tp_fcredit = 1;
+ soisconnected($P.tp_sock);
+ }
+;
+
+TP_OPEN <== TP_CRSENT CC_TPDU
+ DEFAULT
+ {
+ IFDEBUG(D_CONN)
+ printf("trans: CC_TPDU in CRSENT state flags 0x%x\n",
+ (int)$P.tp_flags);
+ ENDDEBUG
+ IncStat(ts_tp4_conn);
+ $P.tp_fref = $$.e_sref;
+ $P.tp_fcredit = $$.e_cdt;
+ if (($P.tp_rx_strat & TPRX_FASTSTART) && ($$.e_cdt > 0))
+ $P.tp_cong_win = $$.e_cdt * $P.tp_l_tpdusize;
+ tp_getoptions($P);
+ tp_cuntimeout($P, TM_retrans);
+ if ($P.tp_ucddata) {
+ IFDEBUG(D_CONN)
+ printf("dropping user connect data cc 0x%x\n",
+ $P.tp_ucddata->m_len);
+ ENDDEBUG
+ m_freem($P.tp_ucddata);
+ $P.tp_ucddata = 0;
+ }
+ soisconnected($P.tp_sock);
+ if ($$.e_datalen > 0) {
+ ASSERT($P.tp_Xrcv.sb_cc == 0); /* should be empty */
+ sbappendrecord(&$P.tp_Xrcv, $$.e_data);
+ $$.e_data = MNULL;
+ }
+
+ (void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL);
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ }
+;
+
+/* TP4 only */
+SAME <== TP_CRSENT TM_retrans
+ ( $P.tp_retrans > 0 )
+ {
+ struct mbuf *data = MNULL;
+ int error;
+
+ IncStat(ts_retrans_cr);
+ $P.tp_cong_win = 1 * $P.tp_l_tpdusize;
+ data = MCPY($P.tp_ucddata, M_NOWAIT);
+ if($P.tp_ucddata) {
+ IFDEBUG(D_CONN)
+ printf("TM_retrans.trans m_copy cc 0x%x\n", data);
+ dump_mbuf($P.tp_ucddata, "sosnd @ TM_retrans");
+ ENDDEBUG
+ if( data == MNULL )
+ return ENOBUFS;
+ }
+
+ $P.tp_retrans --;
+ if( error = tp_emit(CR_TPDU_type, $P, 0, 0, data) ) {
+ $P.tp_sock->so_error = error;
+ }
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_cr_ticks);
+ }
+;
+
+/* TP4 only */
+TP_REFWAIT <== TP_CRSENT TM_retrans
+ DEFAULT /* no more CR retransmissions */
+ {
+ IncStat(ts_conn_gaveup);
+ $P.tp_sock->so_error = ETIMEDOUT;
+ tp_indicate(T_DISCONNECT, $P, ETIMEDOUT);
+ tp_soisdisconnected($P);
+ }
+;
+
+/* TP4 only */
+SAME <== TP_AKWAIT CR_TPDU
+ DEFAULT
+ /* duplicate CR (which doesn't really exist in the context of
+ * a connectionless network layer)
+ * Doesn't occur in class 0.
+ */
+ {
+ int error;
+ struct mbuf *data = MCPY($P.tp_ucddata, M_WAIT);
+
+ if( error = tp_emit(CC_TPDU_type, $P, 0, 0, data) ) {
+ $P.tp_sock->so_error = error;
+ }
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_cc_ticks);
+ }
+;
+
+/* TP4 only */
+TP_OPEN <== TP_AKWAIT DT_TPDU
+ ( IN_RWINDOW( $P, $$.e_seq,
+ $P.tp_rcvnxt, SEQ($P, $P.tp_rcvnxt + $P.tp_lcredit)) )
+ {
+ int doack;
+
+ /*
+ * Get rid of any confirm or connect data, so that if we
+ * crash or close, it isn't thought of as disconnect data.
+ */
+ if ($P.tp_ucddata) {
+ m_freem($P.tp_ucddata);
+ $P.tp_ucddata = 0;
+ }
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ tp_cuntimeout($P, TM_retrans);
+ soisconnected($P.tp_sock);
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+
+ /* see also next 2 transitions, if you make any changes */
+
+ doack = tp_stash($P, $E);
+ IFDEBUG(D_DATA)
+ printf("tp_stash returns %d\n",doack);
+ ENDDEBUG
+
+ if (doack) {
+ (void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL );
+ tp_ctimeout($P, TM_sendack, (int)$P.tp_keepalive_ticks);
+ } else
+ tp_ctimeout( $P, TM_sendack, (int)$P.tp_sendack_ticks);
+
+ IFDEBUG(D_DATA)
+ printf("after stash calling sbwakeup\n");
+ ENDDEBUG
+ }
+;
+
+SAME <== TP_OPEN DT_TPDU
+ ( $P.tp_class == TP_CLASS_0 )
+ {
+ tp0_stash($P, $E);
+ sbwakeup( &$P.tp_sock->so_rcv );
+
+ IFDEBUG(D_DATA)
+ printf("after stash calling sbwakeup\n");
+ ENDDEBUG
+ }
+;
+
+/* TP4 only */
+SAME <== TP_OPEN DT_TPDU
+ ( IN_RWINDOW( $P, $$.e_seq,
+ $P.tp_rcvnxt, SEQ($P, $P.tp_rcvnxt + $P.tp_lcredit)) )
+ {
+ int doack; /* tells if we must ack immediately */
+
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ sbwakeup( &$P.tp_sock->so_rcv );
+
+ doack = tp_stash($P, $E);
+ IFDEBUG(D_DATA)
+ printf("tp_stash returns %d\n",doack);
+ ENDDEBUG
+
+ if(doack)
+ (void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL );
+ else
+ tp_ctimeout_MIN( $P, TM_sendack, (int)$P.tp_sendack_ticks);
+
+ IFDEBUG(D_DATA)
+ printf("after stash calling sbwakeup\n");
+ ENDDEBUG
+ }
+;
+
+/* Not in window - we must ack under certain circumstances, namely
+ * a) if the seq number is below lwe but > lwe - (max credit ever given)
+ * (to handle lost acks) Can use max-possible-credit for this ^^^.
+ * and
+ * b) seq number is > uwe but < uwe + previously sent & withdrawn credit
+ *
+ * (see 12.2.3.8.1 of ISO spec, p. 73)
+ * We just always ack.
+ */
+/* TP4 only */
+SAME <== [ TP_OPEN, TP_AKWAIT ] DT_TPDU
+ DEFAULT /* Not in window */
+ {
+ IFTRACE(D_DATA)
+ tptrace(TPPTmisc, "NIW seq rcvnxt lcredit ",
+ $$.e_seq, $P.tp_rcvnxt, $P.tp_lcredit, 0);
+ ENDTRACE
+ IncStat(ts_dt_niw);
+ m_freem($$.e_data);
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ (void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL );
+ }
+;
+
+/* TP4 only */
+TP_OPEN <== TP_AKWAIT AK_TPDU
+ DEFAULT
+ {
+ if ($P.tp_ucddata) {
+ m_freem($P.tp_ucddata);
+ $P.tp_ucddata = 0;
+ }
+ (void) tp_goodack($P, $$.e_cdt, $$.e_seq, $$.e_subseq);
+ tp_cuntimeout($P, TM_retrans);
+
+ soisconnected($P.tp_sock);
+ IFTRACE(D_CONN)
+ struct socket *so = $P.tp_sock;
+ tptrace(TPPTmisc,
+ "called sosiconn: so so_state rcv.sb_sel rcv.sb_flags",
+ so, so->so_state, so->so_rcv.sb_sel, so->so_rcv.sb_flags);
+ tptrace(TPPTmisc,
+ "called sosiconn 2: so_qlen so_error so_rcv.sb_cc so_head",
+ so->so_qlen, so->so_error, so->so_rcv.sb_cc, so->so_head);
+ ENDTRACE
+
+ tp_ctimeout($P, TM_sendack, (int)$P.tp_keepalive_ticks);
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ }
+;
+
+/* TP4 only */
+TP_OPEN <== [ TP_OPEN, TP_AKWAIT ] XPD_TPDU
+ ($P.tp_Xrcvnxt == $$.e_seq)
+ {
+ if( $P.tp_state == TP_AKWAIT ) {
+ if ($P.tp_ucddata) {
+ m_freem($P.tp_ucddata);
+ $P.tp_ucddata = 0;
+ }
+ tp_cuntimeout($P, TM_retrans);
+ soisconnected($P.tp_sock);
+ tp_ctimeout($P, TM_sendack, (int)$P.tp_keepalive_ticks);
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ }
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD tpdu accepted Xrcvnxt, e_seq datalen m_len\n",
+ $P.tp_Xrcvnxt,$$.e_seq, $$.e_datalen, $$.e_data->m_len);
+ ENDTRACE
+
+ $P.tp_sock->so_state |= SS_RCVATMARK;
+ $$.e_data->m_flags |= M_EOR;
+ sbinsertoob(&$P.tp_Xrcv, $$.e_data);
+ IFDEBUG(D_XPD)
+ dump_mbuf($$.e_data, "XPD TPDU: tp_Xrcv");
+ ENDDEBUG
+ tp_indicate(T_XDATA, $P, 0);
+ sbwakeup( &$P.tp_Xrcv );
+
+ (void) tp_emit(XAK_TPDU_type, $P, $P.tp_Xrcvnxt, 0, MNULL);
+ SEQ_INC($P, $P.tp_Xrcvnxt);
+ }
+;
+
+/* TP4 only */
+SAME <== TP_OPEN T_USR_Xrcvd
+ DEFAULT
+ {
+ if( $P.tp_Xrcv.sb_cc == 0 ) {
+ /* kludge for select(): */
+ /* $P.tp_sock->so_state &= ~SS_OOBAVAIL; */
+ }
+ }
+ /* OLD WAY:
+ * Ack only after the user receives the XPD. This is better for
+ * users that use one XPD right after another.
+ * Acking right away (the NEW WAY, see the prev. transition) is
+ * better for occasional * XPD, when the receiving user doesn't
+ * want to read the XPD immediately (which is session's behavior).
+ *
+ int error = tp_emit(XAK_TPDU_type, $P, $P.tp_Xrcvnxt, 0, MNULL);
+ SEQ_INC($P, $P.tp_Xrcvnxt);
+ return error;
+ */
+;
+
+/* NOTE: presently if the user doesn't read the connection data
+ * before and expedited data PDU comes in, the connection data will
+ * be dropped. This is a bug. To avoid it, we need somewhere else
+ * to put the connection data.
+ * On the other hand, we need not to have it sitting around forever.
+ * This is a problem with the idea of trying to accommodate
+ * data on connect w/ a passive-open user interface.
+ */
+/* TP4 only */
+
+SAME <== [ TP_AKWAIT, TP_OPEN ] XPD_TPDU
+ DEFAULT /* not in window or cdt==0 */
+ {
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD tpdu niw (Xrcvnxt, e_seq) or not cdt (cc)\n",
+ $P.tp_Xrcvnxt, $$.e_seq, $P.tp_Xrcv.sb_cc , 0);
+ ENDTRACE
+ if( $P.tp_Xrcvnxt != $$.e_seq )
+ IncStat(ts_xpd_niw);
+ if( $P.tp_Xrcv.sb_cc ) {
+ /* might as well kick 'em again */
+ tp_indicate(T_XDATA, $P, 0);
+ IncStat(ts_xpd_dup);
+ }
+ m_freem($$.e_data);
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ /* don't send an xack because the xak gives "last one received", not
+ * "next one i expect" (dumb)
+ */
+ }
+;
+
+/* Occurs (AKWAIT, OPEN) when parent (listening) socket gets aborted, and tries
+ * to detach all its "children"
+ * Also (CRSENT) when user kills a job that's doing a connect()
+ */
+TP_REFWAIT <== TP_CRSENT T_DETACH
+ ($P.tp_class == TP_CLASS_0)
+ {
+ struct socket *so = $P.tp_sock;
+
+ /* detach from parent socket so it can finish closing */
+ if (so->so_head) {
+ if (!soqremque(so, 0) && !soqremque(so, 1))
+ panic("tp: T_DETACH");
+ so->so_head = 0;
+ }
+ tp_soisdisconnecting($P.tp_sock);
+ tp_netcmd( $P, CONN_CLOSE);
+ tp_soisdisconnected($P);
+ }
+;
+
+/* TP4 only */
+TP_CLOSING <== [ TP_CLOSING, TP_AKWAIT, TP_CRSENT, TP_CONFIRMING ] T_DETACH
+ DEFAULT
+ {
+ struct socket *so = $P.tp_sock;
+ struct mbuf *data = MNULL;
+
+ /* detach from parent socket so it can finish closing */
+ if (so->so_head) {
+ if (!soqremque(so, 0) && !soqremque(so, 1))
+ panic("tp: T_DETACH");
+ so->so_head = 0;
+ }
+ if ($P.tp_state != TP_CLOSING) {
+ tp_soisdisconnecting($P.tp_sock);
+ data = MCPY($P.tp_ucddata, M_NOWAIT);
+ (void) tp_emit(DR_TPDU_type, $P, 0, E_TP_NORMAL_DISC, data);
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+ }
+ }
+;
+
+TP_REFWAIT <== [ TP_OPEN, TP_CRSENT ] T_DISC_req
+ ( $P.tp_class == TP_CLASS_0 )
+ {
+ tp_soisdisconnecting($P.tp_sock);
+ tp_netcmd( $P, CONN_CLOSE);
+ tp_soisdisconnected($P);
+ }
+;
+
+/* TP4 only */
+TP_CLOSING <== [ TP_AKWAIT, TP_OPEN, TP_CRSENT, TP_CONFIRMING ] T_DISC_req
+ DEFAULT
+ {
+ struct mbuf *data = MCPY($P.tp_ucddata, M_WAIT);
+
+ if($P.tp_state == TP_OPEN) {
+ tp_euntimeout($P, TM_data_retrans); /* all */
+ tp_cuntimeout($P, TM_inact);
+ tp_cuntimeout($P, TM_sendack);
+ $P.tp_flags &= ~TPF_DELACK;
+ }
+ if (data) {
+ IFDEBUG(D_CONN)
+ printf("T_DISC_req.trans tp_ucddata 0x%x\n",
+ $P.tp_ucddata);
+ dump_mbuf(data, "ucddata @ T_DISC_req");
+ ENDDEBUG
+ }
+ tp_soisdisconnecting($P.tp_sock);
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+
+ if( trick_hc )
+ return tp_emit(DR_TPDU_type, $P, 0, $$.e_reason, data);
+ }
+;
+
+/* TP4 only */
+SAME <== TP_AKWAIT TM_retrans
+ ( $P.tp_retrans > 0 )
+ {
+ int error;
+ struct mbuf *data = MCPY($P.tp_ucddata, M_WAIT);
+
+ IncStat(ts_retrans_cc);
+ $P.tp_retrans --;
+ $P.tp_cong_win = 1 * $P.tp_l_tpdusize;
+
+ if( error = tp_emit(CC_TPDU_type, $P, 0, 0, data) )
+ $P.tp_sock->so_error = error;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_cc_ticks);
+ }
+;
+
+/* TP4 only */
+TP_CLOSING <== TP_AKWAIT TM_retrans
+ DEFAULT /* out of time */
+ {
+ IncStat(ts_conn_gaveup);
+ tp_soisdisconnecting($P.tp_sock);
+ $P.tp_sock->so_error = ETIMEDOUT;
+ tp_indicate(T_DISCONNECT, $P, ETIMEDOUT);
+ (void) tp_emit(DR_TPDU_type, $P, 0, E_TP_CONGEST, MNULL);
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+ }
+;
+
+/* the retrans timers had better go off BEFORE the inactivity timer does,
+ * if transmissions are going on.
+ * (i.e., TM_inact should be greater than timer for all retrans plus ack
+ * turnaround)
+ */
+/* TP4 only */
+TP_CLOSING <== TP_OPEN [ TM_inact, TM_retrans, TM_data_retrans ]
+ DEFAULT
+ {
+ tp_euntimeout($P, TM_data_retrans); /* all */
+ tp_cuntimeout($P, TM_inact);
+ tp_cuntimeout($P, TM_sendack);
+
+ IncStat(ts_conn_gaveup);
+ tp_soisdisconnecting($P.tp_sock);
+ $P.tp_sock->so_error = ETIMEDOUT;
+ tp_indicate(T_DISCONNECT, $P, ETIMEDOUT);
+ (void) tp_emit(DR_TPDU_type, $P, 0, E_TP_CONGEST_2, MNULL);
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+ }
+;
+
+/* TP4 only */
+SAME <== TP_OPEN TM_retrans
+ ( $P.tp_retrans > 0 )
+ {
+ $P.tp_cong_win = 1 * $P.tp_l_tpdusize;
+ /* resume XPD */
+ if ( $P.tp_Xsnd.sb_mb ) {
+ struct mbuf *m = m_copy($P.tp_Xsnd.sb_mb, 0, (int)$P.tp_Xsnd.sb_cc);
+ int shift;
+
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD retrans: Xuna Xsndnxt sndnxt snduna",
+ $P.tp_Xuna, $P.tp_Xsndnxt, $P.tp_sndnxt,
+ $P.tp_snduna);
+ ENDTRACE
+ IFDEBUG(D_XPD)
+ dump_mbuf(m, "XPD retrans emitting M");
+ ENDDEBUG
+ IncStat(ts_retrans_xpd);
+ $P.tp_retrans --;
+ shift = max($P.tp_Nretrans - $P.tp_retrans, 6);
+ (void) tp_emit(XPD_TPDU_type, $P, $P.tp_Xuna, 1, m);
+ tp_ctimeout($P, TM_retrans, ((int)$P.tp_dt_ticks) << shift);
+ }
+ }
+;
+
+/* TP4 only */
+SAME <== TP_OPEN TM_data_retrans
+ ($P.tp_rxtshift < TP_NRETRANS)
+ {
+ $P.tp_rxtshift++;
+ (void) tp_data_retrans($P);
+ }
+;
+
+/* TP4 only */
+SAME <== TP_CLOSING TM_retrans
+ ( $P.tp_retrans > 0 )
+ {
+ $P.tp_retrans --;
+ (void) tp_emit(DR_TPDU_type, $P, 0, E_TP_DR_NO_REAS, MNULL);
+ IncStat(ts_retrans_dr);
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+ }
+;
+
+/* TP4 only */
+TP_REFWAIT <== TP_CLOSING TM_retrans
+ DEFAULT /* no more retrans - gave up */
+ {
+ $P.tp_sock->so_error = ETIMEDOUT;
+ $P.tp_refstate = REF_FROZEN;
+ tp_recycle_tsuffix( $P );
+ tp_etimeout($P, TM_reference, (int)$P.tp_refer_ticks);
+ }
+;
+
+/*
+ * The resources are kept around until the ref timer goes off.
+ * The suffices are wiped out sooner so they can be reused right away.
+ */
+/* applicable in TP4, TP0 */
+TP_CLOSED <== TP_REFWAIT TM_reference
+ DEFAULT
+ {
+ tp_freeref($P.tp_lref);
+ tp_detach($P);
+ }
+;
+
+/* applicable in TP4, TP0 */
+/* A duplicate CR from connectionless network layer can't happen */
+SAME <== TP_OPEN [ CR_TPDU, CC_TPDU ]
+ DEFAULT
+ {
+ if( $P.tp_class != TP_CLASS_0) {
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ if ( $E.ev_number == CC_TPDU )
+ (void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL);
+ }
+ /* ignore it if class 0 - state tables are blank for this */
+ }
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_OPEN T_DATA_req
+ DEFAULT
+ {
+ IFTRACE(D_DATA)
+ tptrace(TPPTmisc, "T_DATA_req sndnxt snduna fcredit, tpcb",
+ $P.tp_sndnxt, $P.tp_snduna, $P.tp_fcredit, $P);
+ ENDTRACE
+
+ tp_send($P);
+ }
+;
+
+/* TP4 only */
+SAME <== TP_OPEN T_XPD_req
+ DEFAULT
+ /* T_XPD_req was issued by sosend iff xpd socket buf was empty
+ * at time of sosend(),
+ * AND (which means) there were no unacknowledged XPD tpdus outstanding!
+ */
+ {
+ int error = 0;
+
+ /* resume XPD */
+ if ( $P.tp_Xsnd.sb_mb ) {
+ struct mbuf *m = m_copy($P.tp_Xsnd.sb_mb, 0, (int)$P.tp_Xsnd.sb_cc);
+ /* m_copy doesn't preserve the m_xlink field, but at this pt.
+ * that doesn't matter
+ */
+
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD req: Xuna Xsndnxt sndnxt snduna",
+ $P.tp_Xuna, $P.tp_Xsndnxt, $P.tp_sndnxt,
+ $P.tp_snduna);
+ ENDTRACE
+ IFDEBUG(D_XPD)
+ printf("T_XPD_req: sb_cc 0x%x\n", $P.tp_Xsnd.sb_cc);
+ dump_mbuf(m, "XPD req emitting M");
+ ENDDEBUG
+ error =
+ tp_emit(XPD_TPDU_type, $P, $P.tp_Xuna, 1, m);
+ $P.tp_retrans = $P.tp_Nretrans;
+
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_rxtcur);
+ SEQ_INC($P, $P.tp_Xsndnxt);
+ }
+ if(trick_hc)
+ return error;
+ }
+;
+
+/* TP4, faked ack in TP0 when cons send completes */
+SAME <== TP_OPEN AK_TPDU
+ ( tp_goodack($P, $$.e_cdt, $$.e_seq, $$.e_subseq) )
+
+ /* tp_goodack == true means
+ * EITHER it actually acked something heretofore unacknowledged
+ * OR no news but the credit should be processed.
+ */
+ {
+ struct sockbuf *sb = &$P.tp_sock->so_snd;
+
+ IFDEBUG(D_ACKRECV)
+ printf("GOOD ACK seq 0x%x cdt 0x%x\n", $$.e_seq, $$.e_cdt);
+ ENDDEBUG
+ if( $P.tp_class != TP_CLASS_0) {
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ }
+ sbwakeup(sb);
+ IFDEBUG(D_ACKRECV)
+ printf("GOOD ACK new sndnxt 0x%x\n", $P.tp_sndnxt);
+ ENDDEBUG
+ }
+;
+
+/* TP4, and TP0 after sending a CC or possibly a CR */
+SAME <== TP_OPEN AK_TPDU
+ DEFAULT
+ {
+ IFTRACE(D_ACKRECV)
+ tptrace(TPPTmisc, "BOGUS ACK fcc_present, tp_r_subseq e_subseq",
+ $$.e_fcc_present, $P.tp_r_subseq, $$.e_subseq, 0);
+ ENDTRACE
+ if( $P.tp_class != TP_CLASS_0 ) {
+
+ if ( !$$.e_fcc_present ) {
+ /* send ACK with FCC */
+ IncStat( ts_ackreason[_ACK_FCC_] );
+ (void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 1, MNULL);
+ }
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ }
+ }
+;
+
+/* NBS(47) */
+ /* goes in at *** */
+ /* just so happens that this is never true now, because we allow
+ * only 1 packet in the queue at once (this could be changed)
+ if ( $P.tp_Xsnd.sb_mb ) {
+ struct mbuf *m = m_copy($P.tp_Xsnd.sb_mb, 0, ??);
+
+ (void) tp_emit(XPD_TPDU_type, $P, $P.tp_Xuna, 1, m);
+ $P.tp_retrans = $P.tp_Nretrans;
+ tp_ctimeout($P, TM_retrans, (int)$P.tp_xpd_ticks);
+ SEQ_INC($P, $P.tp_Xsndnxt);
+ }
+ */
+ /* end of the above hack */
+
+/* TP4 only */
+SAME <== TP_OPEN XAK_TPDU
+ ( tp_goodXack($P, $$.e_seq) )
+ /* tp_goodXack checks for good ack, removes the correct
+ * tpdu from the queue and returns 1 if ack was legit, 0 if not.
+ * also updates tp_Xuna
+ */
+ {
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ tp_cuntimeout($P, TM_retrans);
+
+ sbwakeup( &$P.tp_sock->so_snd );
+
+ /* resume normal data */
+ tp_send($P);
+ }
+;
+
+/* TP4, and TP0 after sending a CC or possibly a CR */
+SAME <== TP_OPEN XAK_TPDU
+ DEFAULT
+ {
+ IFTRACE(D_ACKRECV)
+ tptrace(TPPTmisc, "BOGUS XACK eventtype ", $E.ev_number, 0, 0,0);
+ ENDTRACE
+ if( $P.tp_class != TP_CLASS_0 ) {
+ tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+ }
+ }
+;
+
+/* TP4 only */
+SAME <== TP_OPEN TM_sendack
+ DEFAULT
+ {
+ int timo;
+ IFTRACE(D_TIMER)
+ tptrace(TPPTsendack, -1, $P.tp_lcredit, $P.tp_sent_uwe,
+ $P.tp_sent_lcdt, 0);
+ ENDTRACE
+ IncPStat($P, tps_n_TMsendack);
+ (void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL);
+ if ($P.tp_fcredit == 0) {
+ if ($P.tp_rxtshift < TP_MAXRXTSHIFT)
+ $P.tp_rxtshift++;
+ timo = ($P.tp_dt_ticks) << $P.tp_rxtshift;
+ } else
+ timo = $P.tp_sendack_ticks;
+ tp_ctimeout($P, TM_sendack, timo);
+ }
+;
+
+/* TP0 only */
+SAME <== TP_OPEN T_USR_rcvd
+ ($P.tp_class == TP_CLASS_0)
+ {
+ if (sbspace(&$P.tp_sock->so_rcv) > 0)
+ tp0_openflow($P);
+ }
+;
+
+/* TP4 only */
+ /* If old credit was zero,
+ * we'd better inform other side that we now have space
+ * But this is not enough. Sender might not yet have
+ * seen an ack with cdt 0 but it might still think the
+ * window is closed, so it's going to wait.
+ * Best to send an ack each time.
+ * Strictly speaking, this ought to be a function of the
+ * general ack strategy.
+ */
+SAME <== TP_OPEN T_USR_rcvd
+ DEFAULT
+ {
+ if( trick_hc ) {
+ SeqNum ack_thresh;
+ /*
+ * If the upper window edge has advanced a reasonable
+ * amount beyond what was known, send an ACK.
+ * A reasonable amount is 2 packets, unless the max window
+ * is only 1 or 2 packets, in which case we
+ * should send an ack for any advance in the upper window edge.
+ */
+ LOCAL_CREDIT($P);
+ ack_thresh = SEQ_SUB($P, $P.tp_lcredit + $P.tp_rcvnxt,
+ ($P.tp_maxlcredit > 2 ? 2 : 1));
+ if (SEQ_GT($P, ack_thresh, $P.tp_sent_uwe)) {
+ IncStat(ts_ackreason[_ACK_USRRCV_]);
+ $P.tp_flags &= ~TPF_DELACK;
+ return tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL);
+ }
+ }
+ }
+;
+
+/* applicable in TP4, TP0 */
+SAME <== TP_REFWAIT [ T_USR_rcvd, T_USR_Xrcvd ]
+ DEFAULT
+ /* This happens if other end sent a DR when the user was waiting
+ * on a receive.
+ * Processing the DR includes putting us in REFWAIT state.
+ */
+ {
+ if(trick_hc)
+ return ECONNABORTED;
+ }
+;
+
+/* TP0 only */
+TP_REFWAIT <== [ TP_OPEN, TP_CRSENT, TP_LISTENING ] T_NETRESET
+ ( $P.tp_class != TP_CLASS_4 )
+ /* 0 or (4 and 0) */
+ /* in OPEN class will be 0 or 4 but not both */
+ /* in CRSENT or LISTENING it could be in negotiation, hence both */
+ /* Actually, this shouldn't ever happen in LISTENING */
+ {
+ ASSERT( $P.tp_state != TP_LISTENING );
+ tp_indicate(T_DISCONNECT, $P, ECONNRESET);
+ tp_soisdisconnected($P);
+ }
+;
+
+/* TP4: ignore resets */
+SAME <== [ TP_OPEN, TP_CRSENT, TP_AKWAIT,
+ TP_CLOSING, TP_LISTENING ] T_NETRESET
+ DEFAULT
+ NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+SAME <== [ TP_CLOSED, TP_REFWAIT ] T_NETRESET
+ DEFAULT
+ NULLACTION
+;
+
+/* C'EST TOUT */
diff --git a/sys/netiso/tp_astring.c b/sys/netiso/tp_astring.c
new file mode 100644
index 000000000000..af08cebbc869
--- /dev/null
+++ b/sys/netiso/tp_astring.c
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_astring.c 8.1 (Berkeley) 6/10/93
+ */
+
+char *tp_sstring[] = {
+"ST_ERROR(0x0)",
+"TP_CLOSED(0x1)",
+"TP_CRSENT(0x2)",
+"TP_AKWAIT(0x3)",
+"TP_OPEN(0x4)",
+"TP_CLOSING(0x5)",
+"TP_REFWAIT(0x6)",
+"TP_LISTENING(0x7)",
+"TP_CONFIRMING(0x8)",
+};
+
+char *tp_estring[] = {
+"TM_inact(0x0)",
+"TM_retrans(0x1)",
+"TM_sendack(0x2)",
+"TM_notused(0x3)",
+"TM_reference(0x4)",
+"TM_data_retrans(0x5)",
+"ER_TPDU(0x6)",
+"CR_TPDU(0x7)",
+"DR_TPDU(0x8)",
+"DC_TPDU(0x9)",
+"CC_TPDU(0xa)",
+"AK_TPDU(0xb)",
+"DT_TPDU(0xc)",
+"XPD_TPDU(0xd)",
+"XAK_TPDU(0xe)",
+"T_CONN_req(0xf)",
+"T_DISC_req(0x10)",
+"T_LISTEN_req(0x11)",
+"T_DATA_req(0x12)",
+"T_XPD_req(0x13)",
+"T_USR_rcvd(0x14)",
+"T_USR_Xrcvd(0x15)",
+"T_DETACH(0x16)",
+"T_NETRESET(0x17)",
+"T_ACPT_req(0x18)",
+};
diff --git a/sys/netiso/tp_clnp.h b/sys/netiso/tp_clnp.h
new file mode 100644
index 000000000000..81a7cffc13ea
--- /dev/null
+++ b/sys/netiso/tp_clnp.h
@@ -0,0 +1,94 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_clnp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_clnp.h,v 5.1 88/10/12 12:16:36 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_clnp.h,v $
+ *
+ * AF_ISO net-dependent structures and include files
+ *
+ */
+
+
+#ifndef __TP_CLNP__
+#define __TP_CLNP__
+
+#ifndef SOCK_STREAM
+#include <sys/socket.h>
+#endif /* SOCK_STREAM */
+
+#ifndef RTFREE
+#include <net/route.h>
+#endif
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/iso_pcb.h>
+#ifndef IF_DEQUEUE
+#include <net/if.h>
+#endif
+#include <netiso/iso_var.h>
+
+struct isopcb tp_isopcb;
+ /* queue of active inpcbs for tp ; for tp with dod ip */
+
+#endif /* __TP_CLNP__ */
diff --git a/sys/netiso/tp_cons.c b/sys/netiso/tp_cons.c
new file mode 100644
index 000000000000..797ee9ef829e
--- /dev/null
+++ b/sys/netiso/tp_cons.c
@@ -0,0 +1,308 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_cons.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ * $Header: tp_cons.c,v 5.6 88/11/18 17:27:13 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_cons.c,v $
+ *
+ * Here is where you find the iso- and cons-dependent code. We've tried
+ * keep all net-level and (primarily) address-family-dependent stuff
+ * out of the tp source, and everthing here is reached indirectly
+ * through a switch table (struct nl_protosw *) tpcb->tp_nlproto
+ * (see tp_pcb.c).
+ * The routines here are:
+ * tpcons_input: pullup and call tp_input w/ correct arguments
+ * tpcons_output: package a pkt for cons given an isopcb & some data
+ * cons_chan_to_tpcb: find a tpcb based on the channel #
+ */
+
+#ifdef ISO
+#ifdef TPCONS
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/cons.h>
+#include <netiso/tp_seq.h>
+
+#undef FALSE
+#undef TRUE
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+#include <netiso/if_cons.c>
+int tpcons_output();
+
+/*
+ * CALLED FROM:
+ * tp_route_to() for PRU_CONNECT
+ * FUNCTION, ARGUMENTS, SIDE EFFECTS and RETURN VALUE:
+ * version of the previous procedure for X.25
+ */
+
+tpcons_pcbconnect(isop, nam)
+struct isopcb *isop;
+register struct mbuf *nam;
+{
+ int error;
+ if (error = iso_pcbconnect(isop, nam))
+ return error;
+ if ((isop->isop_chan = (caddr_t) pk_attach((struct socket *)0)) == 0) {
+ IFDEBUG(D_CCONS)
+ printf("tpcons_pcbconnect: no pklcd; returns 0x%x\n", error);
+ ENDDEBUG
+ return ENOBUFS;
+ }
+ if (error = cons_connect(isop)) { /* if it doesn't work */
+ /* oh, dear, throw packet away */
+ pk_disconnect((struct pklcd *)isop->isop_chan);
+ isop->isop_chan = 0;
+ } else
+ isop->isop_refcnt = 1;
+ return error;
+}
+
+
+/*
+ * CALLED FROM:
+ * cons
+ * FUNCTION and ARGUMENTS:
+ * THIS MAYBE BELONGS IN SOME OTHER PLACE??? but i think not -
+ */
+ProtoHook
+tpcons_ctlinput(cmd, siso, isop)
+ int cmd;
+ struct sockaddr_iso *siso;
+ struct isopcb *isop;
+{
+ register struct tp_pcb *tpcb = 0;
+
+ if (isop->isop_socket)
+ tpcb = (struct tp_pcb *)isop->isop_socket->so_pcb;
+ switch (cmd) {
+
+ case PRC_CONS_SEND_DONE:
+ if (tpcb) {
+ struct tp_event E;
+ int error = 0;
+
+ if (tpcb->tp_class == TP_CLASS_0) {
+ /* only if class is exactly class zero, not
+ * still in class negotiation
+ */
+ /* fake an ack */
+ register SeqNum seq = SEQ_ADD(tpcb, tpcb->tp_snduna, 1);
+
+ IFTRACE(D_DATA)
+ tptrace(TPPTmisc, "FAKE ACK seq cdt 1",
+ seq, 0,0,0);
+ ENDTRACE
+ IFDEBUG(D_DATA)
+ printf("FAKE ACK seq 0x%x cdt 1\n", seq );
+ ENDDEBUG
+ E.ATTR(AK_TPDU).e_cdt = 1;
+ E.ATTR(AK_TPDU).e_seq = seq;
+ E.ATTR(AK_TPDU).e_subseq = 0;
+ E.ATTR(AK_TPDU).e_fcc_present = 0;
+ error = DoEvent(AK_TPDU);
+ if( error ) {
+ tpcb->tp_sock->so_error = error;
+ }
+ } /* else ignore it */
+ }
+ break;
+ case PRC_ROUTEDEAD:
+ if (tpcb && tpcb->tp_class == TP_CLASS_0) {
+ tpiso_reset(isop);
+ break;
+ } /* else drop through */
+ default:
+ (void) tpclnp_ctlinput(cmd, siso);
+ break;
+ }
+ return 0;
+}
+
+/*
+ * CALLED FROM:
+ * cons's intr routine
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from cons, pullup m as required by tp,
+ * ignore the socket argument, and call tp_input.
+ * No return value.
+ */
+ProtoHook
+tpcons_input(m, faddr, laddr, channel)
+ struct mbuf *m;
+ struct sockaddr_iso *faddr, *laddr;
+ caddr_t channel;
+{
+ if( m == MNULL)
+ return 0;
+
+ m = (struct mbuf *)tp_inputprep(m);
+
+ IFDEBUG(D_TPINPUT)
+ printf("tpcons_input before tp_input(m 0x%x)\n", m);
+ dump_buf( m, 12+ m->m_len);
+ ENDDEBUG
+ tp_input(m, faddr, laddr, channel, tpcons_output, 0);
+ return 0;
+}
+
+
+/*
+ * CALLED FROM:
+ * tp_emit()
+ * FUNCTION and ARGUMENTS:
+ * Take a packet(m0) from tp and package it so that cons will accept it.
+ * This means filling in a few of the fields.
+ * inp is the isopcb structure; datalen is the length of the data in the
+ * mbuf string m0.
+ * RETURN VALUE:
+ * whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpcons_output(isop, m0, datalen, nochksum)
+ struct isopcb *isop;
+ struct mbuf *m0;
+ int datalen;
+ int nochksum;
+{
+ register struct mbuf *m = m0;
+ int error;
+
+ IFDEBUG(D_EMIT)
+ printf(
+ "tpcons_output(isop 0x%x, m 0x%x, len 0x%x socket 0x%x\n",
+ isop, m0, datalen, isop->isop_socket);
+ ENDDEBUG
+ if (m == MNULL)
+ return 0;
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == 0)
+ return ENOBUFS;
+ m->m_next = m0;
+ }
+ m->m_pkthdr.len = datalen;
+ if (isop->isop_chan == 0) {
+ /* got a restart maybe? */
+ if ((isop->isop_chan = (caddr_t) pk_attach((struct socket *)0)) == 0) {
+ IFDEBUG(D_CCONS)
+ printf("tpcons_output: no pklcd\n");
+ ENDDEBUG
+ error = ENOBUFS;
+ }
+ if (error = cons_connect(isop)) {
+ pk_disconnect((struct pklcd *)isop->isop_chan);
+ isop->isop_chan = 0;
+ IFDEBUG(D_CCONS)
+ printf("tpcons_output: can't reconnect\n");
+ ENDDEBUG
+ }
+ } else {
+ error = pk_send(isop->isop_chan, m);
+ IncStat(ts_tpdu_sent);
+ }
+ return error;
+}
+/*
+ * CALLED FROM:
+ * tp_error_emit()
+ * FUNCTION and ARGUMENTS:
+ * Take a packet(m0) from tp and package it so that cons will accept it.
+ * chan is the cons channel to use; datalen is the length of the data in the
+ * mbuf string m0.
+ * RETURN VALUE:
+ * whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpcons_dg_output(chan, m0, datalen)
+ caddr_t chan;
+ struct mbuf *m0;
+ int datalen;
+{
+ return tpcons_output(((struct pklcd *)chan)->lcd_upnext, m0, datalen, 0);
+}
+#endif /* TPCONS */
+#endif /* ISO */
diff --git a/sys/netiso/tp_driver.c b/sys/netiso/tp_driver.c
new file mode 100644
index 000000000000..586ef4e2ade8
--- /dev/null
+++ b/sys/netiso/tp_driver.c
@@ -0,0 +1,999 @@
+/* $Header$ */
+/* $Source$ */
+#ifndef lint
+static char *rcsid = "$Header/**/$";
+#endif lint
+#define _XEBEC_PG static
+
+#include "tp_states.h"
+
+static struct act_ent {
+ int a_newstate;
+ int a_action;
+} statetable[] = { {0,0},
+#include "tp_states.init"
+};
+
+/* @(#)tp.trans 8.1 (Berkeley) 6/10/93 */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/errno.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/iso_errno.h>
+#include <netiso/tp_seq.h>
+#include <netiso/cons.h>
+
+#define DRIVERTRACE TPPTdriver
+#define sbwakeup(sb) sowakeup(p->tp_sock, sb);
+#define MCPY(d, w) (d ? m_copym(d, 0, (int)M_COPYALL, w): 0)
+
+static trick_hc = 1;
+
+int tp_emit(),
+ tp_goodack(), tp_goodXack(),
+ tp_stash()
+;
+void tp_indicate(), tp_getoptions(),
+ tp_soisdisconnecting(), tp_soisdisconnected(),
+ tp_recycle_tsuffix(),
+#ifdef TP_DEBUG_TIMERS
+ tp_etimeout(), tp_euntimeout(),
+ tp_ctimeout(), tp_cuntimeout(),
+ tp_ctimeout_MIN(),
+#endif
+ tp_freeref(), tp_detach(),
+ tp0_stash(), tp0_send(),
+ tp_netcmd(), tp_send()
+;
+
+typedef struct tp_pcb tpcb_struct;
+
+
+
+typedef tpcb_struct tp_PCB_;
+
+#include "tp_events.h"
+
+_XEBEC_PG int _Xebec_action(a,e,p)
+int a;
+struct tp_event *e;
+tp_PCB_ *p;
+{
+switch(a) {
+case -1: return tp_protocol_error(e,p);
+case 0x1:
+ {
+ (void) tp_emit(DC_TPDU_type, p, 0, 0, MNULL);
+ }
+ break;
+case 0x2:
+ {
+# ifdef TP_DEBUG
+ if( e->ev_number != AK_TPDU )
+ printf("TPDU 0x%x in REFWAIT!!!!\n", e->ev_number);
+# endif TP_DEBUG
+ }
+ break;
+case 0x3:
+ {
+ /* oh, man is this grotesque or what? */
+ (void) tp_goodack(p, e->ev_union.EV_AK_TPDU.e_cdt, e->ev_union.EV_AK_TPDU.e_seq, e->ev_union.EV_AK_TPDU.e_subseq);
+ /* but it's necessary because this pseudo-ack may happen
+ * before the CC arrives, but we HAVE to adjust the
+ * snduna as a result of the ack, WHENEVER it arrives
+ */
+ }
+ break;
+case 0x4:
+ {
+ tp_detach(p);
+ }
+ break;
+case 0x5:
+ {
+ p->tp_refstate = REF_OPEN; /* has timers ??? */
+ }
+ break;
+case 0x6:
+ {
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "CR datalen data", e->ev_union.EV_CR_TPDU.e_datalen, e->ev_union.EV_CR_TPDU.e_data,0,0);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("CR datalen 0x%x data 0x%x", e->ev_union.EV_CR_TPDU.e_datalen, e->ev_union.EV_CR_TPDU.e_data);
+ ENDDEBUG
+ p->tp_refstate = REF_OPEN; /* has timers */
+ p->tp_fcredit = e->ev_union.EV_CR_TPDU.e_cdt;
+
+ if (e->ev_union.EV_CR_TPDU.e_datalen > 0) {
+ /* n/a for class 0 */
+ ASSERT(p->tp_Xrcv.sb_cc == 0);
+ sbappendrecord(&p->tp_Xrcv, e->ev_union.EV_CR_TPDU.e_data);
+ e->ev_union.EV_CR_TPDU.e_data = MNULL;
+ }
+ }
+ break;
+case 0x7:
+ {
+ IncStat(ts_tp0_conn);
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "Confiming", p, 0,0,0);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("Confirming connection: p" );
+ ENDDEBUG
+ soisconnected(p->tp_sock);
+ (void) tp_emit(CC_TPDU_type, p, 0,0, MNULL) ;
+ p->tp_fcredit = 1;
+ }
+ break;
+case 0x8:
+ {
+ IncStat(ts_tp4_conn); /* even though not quite open */
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "Confiming", p, 0,0,0);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("Confirming connection: p" );
+ ENDDEBUG
+ tp_getoptions(p);
+ soisconnecting(p->tp_sock);
+ if ((p->tp_rx_strat & TPRX_FASTSTART) && (p->tp_fcredit > 0))
+ p->tp_cong_win = p->tp_fcredit * p->tp_l_tpdusize;
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_cc_ticks);
+ }
+ break;
+case 0x9:
+ {
+ IFDEBUG(D_CONN)
+ printf("event: CR_TPDU emit CC failed done " );
+ ENDDEBUG
+ soisdisconnected(p->tp_sock);
+ tp_recycle_tsuffix(p);
+ tp_freeref(p->tp_lref);
+ tp_detach(p);
+ }
+ break;
+case 0xa:
+ {
+ int error;
+ struct mbuf *data = MNULL;
+
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "T_CONN_req flags ucddata", (int)p->tp_flags,
+ p->tp_ucddata, 0, 0);
+ ENDTRACE
+ data = MCPY(p->tp_ucddata, M_WAIT);
+ if (data) {
+ IFDEBUG(D_CONN)
+ printf("T_CONN_req.trans m_copy cc 0x%x\n",
+ p->tp_ucddata);
+ dump_mbuf(data, "sosnd @ T_CONN_req");
+ ENDDEBUG
+ }
+
+ if (error = tp_emit(CR_TPDU_type, p, 0, 0, data) )
+ return error; /* driver WON'T change state; will return error */
+
+ p->tp_refstate = REF_OPEN; /* has timers */
+ if(p->tp_class != TP_CLASS_0) {
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_cr_ticks);
+ }
+ }
+ break;
+case 0xb:
+ {
+ sbflush(&p->tp_Xrcv); /* purge non-delivered data data */
+ if (e->ev_union.EV_DR_TPDU.e_datalen > 0) {
+ sbappendrecord(&p->tp_Xrcv, e->ev_union.EV_DR_TPDU.e_data);
+ e->ev_union.EV_DR_TPDU.e_data = MNULL;
+ }
+ if (p->tp_state == TP_OPEN)
+ tp_indicate(T_DISCONNECT, p, 0);
+ else {
+ int so_error = ECONNREFUSED;
+ if (e->ev_union.EV_DR_TPDU.e_reason != (E_TP_NO_SESSION ^ TP_ERROR_MASK) &&
+ e->ev_union.EV_DR_TPDU.e_reason != (E_TP_NO_CR_ON_NC ^ TP_ERROR_MASK) &&
+ e->ev_union.EV_DR_TPDU.e_reason != (E_TP_REF_OVERFLOW ^ TP_ERROR_MASK))
+ so_error = ECONNABORTED;
+ tp_indicate(T_DISCONNECT, p, so_error);
+ }
+ tp_soisdisconnected(p);
+ if (p->tp_class != TP_CLASS_0) {
+ if (p->tp_state == TP_OPEN ) {
+ tp_euntimeout(p, TM_data_retrans); /* all */
+ tp_cuntimeout(p, TM_retrans);
+ tp_cuntimeout(p, TM_inact);
+ tp_cuntimeout(p, TM_sendack);
+ p->tp_flags &= ~TPF_DELACK;
+ }
+ tp_cuntimeout(p, TM_retrans);
+ if( e->ev_union.EV_DR_TPDU.e_sref != 0 )
+ (void) tp_emit(DC_TPDU_type, p, 0, 0, MNULL);
+ }
+ }
+ break;
+case 0xc:
+ {
+ if( e->ev_union.EV_DR_TPDU.e_sref != 0 )
+ (void) tp_emit(DC_TPDU_type, p, 0, 0, MNULL);
+ /* reference timer already set - reset it to be safe (???) */
+ tp_euntimeout(p, TM_reference); /* all */
+ tp_etimeout(p, TM_reference, (int)p->tp_refer_ticks);
+ }
+ break;
+case 0xd:
+ {
+ tp_cuntimeout(p, TM_retrans);
+ tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+ tp_soisdisconnected(p);
+ }
+ break;
+case 0xe:
+ {
+ tp_cuntimeout(p, TM_retrans);
+ tp_soisdisconnected(p);
+ }
+ break;
+case 0xf:
+ {
+ tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+ tp_cuntimeout(p, TM_retrans);
+ tp_soisdisconnected(p);
+ }
+ break;
+case 0x10:
+ {
+ tp_cuntimeout(p, TM_retrans);
+ tp_soisdisconnected(p);
+ }
+ break;
+case 0x11:
+ { /* don't ask me why we have to do this - spec says so */
+ (void) tp_emit(DR_TPDU_type, p, 0, E_TP_NO_SESSION, MNULL);
+ /* don't bother with retransmissions of the DR */
+ }
+ break;
+case 0x12:
+ {
+ tp_soisdisconnecting(p->tp_sock);
+ tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+ tp_soisdisconnected(p);
+ tp_netcmd( p, CONN_CLOSE );
+ }
+ break;
+case 0x13:
+ {
+ if (p->tp_state == TP_OPEN) {
+ tp_euntimeout(p, TM_data_retrans); /* all */
+ tp_cuntimeout(p, TM_inact);
+ tp_cuntimeout(p, TM_sendack);
+ }
+ tp_soisdisconnecting(p->tp_sock);
+ tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+ (void) tp_emit(DR_TPDU_type, p, 0, E_TP_PROTO_ERR, MNULL);
+ }
+ break;
+case 0x14:
+ {
+ tp_cuntimeout(p, TM_retrans);
+ IncStat(ts_tp0_conn);
+ p->tp_fcredit = 1;
+ soisconnected(p->tp_sock);
+ }
+ break;
+case 0x15:
+ {
+ IFDEBUG(D_CONN)
+ printf("trans: CC_TPDU in CRSENT state flags 0x%x\n",
+ (int)p->tp_flags);
+ ENDDEBUG
+ IncStat(ts_tp4_conn);
+ p->tp_fref = e->ev_union.EV_CC_TPDU.e_sref;
+ p->tp_fcredit = e->ev_union.EV_CC_TPDU.e_cdt;
+ if ((p->tp_rx_strat & TPRX_FASTSTART) && (e->ev_union.EV_CC_TPDU.e_cdt > 0))
+ p->tp_cong_win = e->ev_union.EV_CC_TPDU.e_cdt * p->tp_l_tpdusize;
+ tp_getoptions(p);
+ tp_cuntimeout(p, TM_retrans);
+ if (p->tp_ucddata) {
+ IFDEBUG(D_CONN)
+ printf("dropping user connect data cc 0x%x\n",
+ p->tp_ucddata->m_len);
+ ENDDEBUG
+ m_freem(p->tp_ucddata);
+ p->tp_ucddata = 0;
+ }
+ soisconnected(p->tp_sock);
+ if (e->ev_union.EV_CC_TPDU.e_datalen > 0) {
+ ASSERT(p->tp_Xrcv.sb_cc == 0); /* should be empty */
+ sbappendrecord(&p->tp_Xrcv, e->ev_union.EV_CC_TPDU.e_data);
+ e->ev_union.EV_CC_TPDU.e_data = MNULL;
+ }
+
+ (void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL);
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ }
+ break;
+case 0x16:
+ {
+ struct mbuf *data = MNULL;
+ int error;
+
+ IncStat(ts_retrans_cr);
+ p->tp_cong_win = 1 * p->tp_l_tpdusize;
+ data = MCPY(p->tp_ucddata, M_NOWAIT);
+ if(p->tp_ucddata) {
+ IFDEBUG(D_CONN)
+ printf("TM_retrans.trans m_copy cc 0x%x\n", data);
+ dump_mbuf(p->tp_ucddata, "sosnd @ TM_retrans");
+ ENDDEBUG
+ if( data == MNULL )
+ return ENOBUFS;
+ }
+
+ p->tp_retrans --;
+ if( error = tp_emit(CR_TPDU_type, p, 0, 0, data) ) {
+ p->tp_sock->so_error = error;
+ }
+ tp_ctimeout(p, TM_retrans, (int)p->tp_cr_ticks);
+ }
+ break;
+case 0x17:
+ {
+ IncStat(ts_conn_gaveup);
+ p->tp_sock->so_error = ETIMEDOUT;
+ tp_indicate(T_DISCONNECT, p, ETIMEDOUT);
+ tp_soisdisconnected(p);
+ }
+ break;
+case 0x18:
+ {
+ int error;
+ struct mbuf *data = MCPY(p->tp_ucddata, M_WAIT);
+
+ if( error = tp_emit(CC_TPDU_type, p, 0, 0, data) ) {
+ p->tp_sock->so_error = error;
+ }
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_cc_ticks);
+ }
+ break;
+case 0x19:
+ {
+ int doack;
+
+ /*
+ * Get rid of any confirm or connect data, so that if we
+ * crash or close, it isn't thought of as disconnect data.
+ */
+ if (p->tp_ucddata) {
+ m_freem(p->tp_ucddata);
+ p->tp_ucddata = 0;
+ }
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ tp_cuntimeout(p, TM_retrans);
+ soisconnected(p->tp_sock);
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+
+ /* see also next 2 transitions, if you make any changes */
+
+ doack = tp_stash(p, e);
+ IFDEBUG(D_DATA)
+ printf("tp_stash returns %d\n",doack);
+ ENDDEBUG
+
+ if (doack) {
+ (void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL );
+ tp_ctimeout(p, TM_sendack, (int)p->tp_keepalive_ticks);
+ } else
+ tp_ctimeout( p, TM_sendack, (int)p->tp_sendack_ticks);
+
+ IFDEBUG(D_DATA)
+ printf("after stash calling sbwakeup\n");
+ ENDDEBUG
+ }
+ break;
+case 0x1a:
+ {
+ tp0_stash(p, e);
+ sbwakeup( &p->tp_sock->so_rcv );
+
+ IFDEBUG(D_DATA)
+ printf("after stash calling sbwakeup\n");
+ ENDDEBUG
+ }
+ break;
+case 0x1b:
+ {
+ int doack; /* tells if we must ack immediately */
+
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ sbwakeup( &p->tp_sock->so_rcv );
+
+ doack = tp_stash(p, e);
+ IFDEBUG(D_DATA)
+ printf("tp_stash returns %d\n",doack);
+ ENDDEBUG
+
+ if(doack)
+ (void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL );
+ else
+ tp_ctimeout_MIN( p, TM_sendack, (int)p->tp_sendack_ticks);
+
+ IFDEBUG(D_DATA)
+ printf("after stash calling sbwakeup\n");
+ ENDDEBUG
+ }
+ break;
+case 0x1c:
+ {
+ IFTRACE(D_DATA)
+ tptrace(TPPTmisc, "NIW seq rcvnxt lcredit ",
+ e->ev_union.EV_DT_TPDU.e_seq, p->tp_rcvnxt, p->tp_lcredit, 0);
+ ENDTRACE
+ IncStat(ts_dt_niw);
+ m_freem(e->ev_union.EV_DT_TPDU.e_data);
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ (void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL );
+ }
+ break;
+case 0x1d:
+ {
+ if (p->tp_ucddata) {
+ m_freem(p->tp_ucddata);
+ p->tp_ucddata = 0;
+ }
+ (void) tp_goodack(p, e->ev_union.EV_AK_TPDU.e_cdt, e->ev_union.EV_AK_TPDU.e_seq, e->ev_union.EV_AK_TPDU.e_subseq);
+ tp_cuntimeout(p, TM_retrans);
+
+ soisconnected(p->tp_sock);
+ IFTRACE(D_CONN)
+ struct socket *so = p->tp_sock;
+ tptrace(TPPTmisc,
+ "called sosiconn: so so_state rcv.sb_sel rcv.sb_flags",
+ so, so->so_state, so->so_rcv.sb_sel, so->so_rcv.sb_flags);
+ tptrace(TPPTmisc,
+ "called sosiconn 2: so_qlen so_error so_rcv.sb_cc so_head",
+ so->so_qlen, so->so_error, so->so_rcv.sb_cc, so->so_head);
+ ENDTRACE
+
+ tp_ctimeout(p, TM_sendack, (int)p->tp_keepalive_ticks);
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ }
+ break;
+case 0x1e:
+ {
+ if( p->tp_state == TP_AKWAIT ) {
+ if (p->tp_ucddata) {
+ m_freem(p->tp_ucddata);
+ p->tp_ucddata = 0;
+ }
+ tp_cuntimeout(p, TM_retrans);
+ soisconnected(p->tp_sock);
+ tp_ctimeout(p, TM_sendack, (int)p->tp_keepalive_ticks);
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ }
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD tpdu accepted Xrcvnxt, e_seq datalen m_len\n",
+ p->tp_Xrcvnxt,e->ev_union.EV_XPD_TPDU.e_seq, e->ev_union.EV_XPD_TPDU.e_datalen, e->ev_union.EV_XPD_TPDU.e_data->m_len);
+ ENDTRACE
+
+ p->tp_sock->so_state |= SS_RCVATMARK;
+ e->ev_union.EV_XPD_TPDU.e_data->m_flags |= M_EOR;
+ sbinsertoob(&p->tp_Xrcv, e->ev_union.EV_XPD_TPDU.e_data);
+ IFDEBUG(D_XPD)
+ dump_mbuf(e->ev_union.EV_XPD_TPDU.e_data, "XPD TPDU: tp_Xrcv");
+ ENDDEBUG
+ tp_indicate(T_XDATA, p, 0);
+ sbwakeup( &p->tp_Xrcv );
+
+ (void) tp_emit(XAK_TPDU_type, p, p->tp_Xrcvnxt, 0, MNULL);
+ SEQ_INC(p, p->tp_Xrcvnxt);
+ }
+ break;
+case 0x1f:
+ {
+ if( p->tp_Xrcv.sb_cc == 0 ) {
+ /* kludge for select(): */
+ /* p->tp_sock->so_state &= ~SS_OOBAVAIL; */
+ }
+ }
+ break;
+case 0x20:
+ {
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD tpdu niw (Xrcvnxt, e_seq) or not cdt (cc)\n",
+ p->tp_Xrcvnxt, e->ev_union.EV_XPD_TPDU.e_seq, p->tp_Xrcv.sb_cc , 0);
+ ENDTRACE
+ if( p->tp_Xrcvnxt != e->ev_union.EV_XPD_TPDU.e_seq )
+ IncStat(ts_xpd_niw);
+ if( p->tp_Xrcv.sb_cc ) {
+ /* might as well kick 'em again */
+ tp_indicate(T_XDATA, p, 0);
+ IncStat(ts_xpd_dup);
+ }
+ m_freem(e->ev_union.EV_XPD_TPDU.e_data);
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ /* don't send an xack because the xak gives "last one received", not
+ * "next one i expect" (dumb)
+ */
+ }
+ break;
+case 0x21:
+ {
+ struct socket *so = p->tp_sock;
+
+ /* detach from parent socket so it can finish closing */
+ if (so->so_head) {
+ if (!soqremque(so, 0) && !soqremque(so, 1))
+ panic("tp: T_DETACH");
+ so->so_head = 0;
+ }
+ tp_soisdisconnecting(p->tp_sock);
+ tp_netcmd( p, CONN_CLOSE);
+ tp_soisdisconnected(p);
+ }
+ break;
+case 0x22:
+ {
+ struct socket *so = p->tp_sock;
+ struct mbuf *data = MNULL;
+
+ /* detach from parent socket so it can finish closing */
+ if (so->so_head) {
+ if (!soqremque(so, 0) && !soqremque(so, 1))
+ panic("tp: T_DETACH");
+ so->so_head = 0;
+ }
+ if (p->tp_state != TP_CLOSING) {
+ tp_soisdisconnecting(p->tp_sock);
+ data = MCPY(p->tp_ucddata, M_NOWAIT);
+ (void) tp_emit(DR_TPDU_type, p, 0, E_TP_NORMAL_DISC, data);
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+ }
+ }
+ break;
+case 0x23:
+ {
+ tp_soisdisconnecting(p->tp_sock);
+ tp_netcmd( p, CONN_CLOSE);
+ tp_soisdisconnected(p);
+ }
+ break;
+case 0x24:
+ {
+ struct mbuf *data = MCPY(p->tp_ucddata, M_WAIT);
+
+ if(p->tp_state == TP_OPEN) {
+ tp_euntimeout(p, TM_data_retrans); /* all */
+ tp_cuntimeout(p, TM_inact);
+ tp_cuntimeout(p, TM_sendack);
+ p->tp_flags &= ~TPF_DELACK;
+ }
+ if (data) {
+ IFDEBUG(D_CONN)
+ printf("T_DISC_req.trans tp_ucddata 0x%x\n",
+ p->tp_ucddata);
+ dump_mbuf(data, "ucddata @ T_DISC_req");
+ ENDDEBUG
+ }
+ tp_soisdisconnecting(p->tp_sock);
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+
+ if( trick_hc )
+ return tp_emit(DR_TPDU_type, p, 0, e->ev_union.EV_T_DISC_req.e_reason, data);
+ }
+ break;
+case 0x25:
+ {
+ int error;
+ struct mbuf *data = MCPY(p->tp_ucddata, M_WAIT);
+
+ IncStat(ts_retrans_cc);
+ p->tp_retrans --;
+ p->tp_cong_win = 1 * p->tp_l_tpdusize;
+
+ if( error = tp_emit(CC_TPDU_type, p, 0, 0, data) )
+ p->tp_sock->so_error = error;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_cc_ticks);
+ }
+ break;
+case 0x26:
+ {
+ IncStat(ts_conn_gaveup);
+ tp_soisdisconnecting(p->tp_sock);
+ p->tp_sock->so_error = ETIMEDOUT;
+ tp_indicate(T_DISCONNECT, p, ETIMEDOUT);
+ (void) tp_emit(DR_TPDU_type, p, 0, E_TP_CONGEST, MNULL);
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+ }
+ break;
+case 0x27:
+ {
+ tp_euntimeout(p, TM_data_retrans); /* all */
+ tp_cuntimeout(p, TM_inact);
+ tp_cuntimeout(p, TM_sendack);
+
+ IncStat(ts_conn_gaveup);
+ tp_soisdisconnecting(p->tp_sock);
+ p->tp_sock->so_error = ETIMEDOUT;
+ tp_indicate(T_DISCONNECT, p, ETIMEDOUT);
+ (void) tp_emit(DR_TPDU_type, p, 0, E_TP_CONGEST_2, MNULL);
+ p->tp_retrans = p->tp_Nretrans;
+ tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+ }
+ break;
+case 0x28:
+ {
+ p->tp_cong_win = 1 * p->tp_l_tpdusize;
+ /* resume XPD */
+ if ( p->tp_Xsnd.sb_mb ) {
+ struct mbuf *m = m_copy(p->tp_Xsnd.sb_mb, 0, (int)p->tp_Xsnd.sb_cc);
+ int shift;
+
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD retrans: Xuna Xsndnxt sndnxt snduna",
+ p->tp_Xuna, p->tp_Xsndnxt, p->tp_sndnxt,
+ p->tp_snduna);
+ ENDTRACE
+ IFDEBUG(D_XPD)
+ dump_mbuf(m, "XPD retrans emitting M");
+ ENDDEBUG
+ IncStat(ts_retrans_xpd);
+ p->tp_retrans --;
+ shift = max(p->tp_Nretrans - p->tp_retrans, 6);
+ (void) tp_emit(XPD_TPDU_type, p, p->tp_Xuna, 1, m);
+ tp_ctimeout(p, TM_retrans, ((int)p->tp_dt_ticks) << shift);
+ }
+ }
+ break;
+case 0x29:
+ {
+ p->tp_rxtshift++;
+ (void) tp_data_retrans(p);
+ }
+ break;
+case 0x2a:
+ {
+ p->tp_retrans --;
+ (void) tp_emit(DR_TPDU_type, p, 0, E_TP_DR_NO_REAS, MNULL);
+ IncStat(ts_retrans_dr);
+ tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+ }
+ break;
+case 0x2b:
+ {
+ p->tp_sock->so_error = ETIMEDOUT;
+ p->tp_refstate = REF_FROZEN;
+ tp_recycle_tsuffix( p );
+ tp_etimeout(p, TM_reference, (int)p->tp_refer_ticks);
+ }
+ break;
+case 0x2c:
+ {
+ tp_freeref(p->tp_lref);
+ tp_detach(p);
+ }
+ break;
+case 0x2d:
+ {
+ if( p->tp_class != TP_CLASS_0) {
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ if ( e->ev_number == CC_TPDU )
+ (void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL);
+ }
+ /* ignore it if class 0 - state tables are blank for this */
+ }
+ break;
+case 0x2e:
+ {
+ IFTRACE(D_DATA)
+ tptrace(TPPTmisc, "T_DATA_req sndnxt snduna fcredit, tpcb",
+ p->tp_sndnxt, p->tp_snduna, p->tp_fcredit, p);
+ ENDTRACE
+
+ tp_send(p);
+ }
+ break;
+case 0x2f:
+ {
+ int error = 0;
+
+ /* resume XPD */
+ if ( p->tp_Xsnd.sb_mb ) {
+ struct mbuf *m = m_copy(p->tp_Xsnd.sb_mb, 0, (int)p->tp_Xsnd.sb_cc);
+ /* m_copy doesn't preserve the m_xlink field, but at this pt.
+ * that doesn't matter
+ */
+
+ IFTRACE(D_XPD)
+ tptrace(TPPTmisc, "XPD req: Xuna Xsndnxt sndnxt snduna",
+ p->tp_Xuna, p->tp_Xsndnxt, p->tp_sndnxt,
+ p->tp_snduna);
+ ENDTRACE
+ IFDEBUG(D_XPD)
+ printf("T_XPD_req: sb_cc 0x%x\n", p->tp_Xsnd.sb_cc);
+ dump_mbuf(m, "XPD req emitting M");
+ ENDDEBUG
+ error =
+ tp_emit(XPD_TPDU_type, p, p->tp_Xuna, 1, m);
+ p->tp_retrans = p->tp_Nretrans;
+
+ tp_ctimeout(p, TM_retrans, (int)p->tp_rxtcur);
+ SEQ_INC(p, p->tp_Xsndnxt);
+ }
+ if(trick_hc)
+ return error;
+ }
+ break;
+case 0x30:
+ {
+ struct sockbuf *sb = &p->tp_sock->so_snd;
+
+ IFDEBUG(D_ACKRECV)
+ printf("GOOD ACK seq 0x%x cdt 0x%x\n", e->ev_union.EV_AK_TPDU.e_seq, e->ev_union.EV_AK_TPDU.e_cdt);
+ ENDDEBUG
+ if( p->tp_class != TP_CLASS_0) {
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ }
+ sbwakeup(sb);
+ IFDEBUG(D_ACKRECV)
+ printf("GOOD ACK new sndnxt 0x%x\n", p->tp_sndnxt);
+ ENDDEBUG
+ }
+ break;
+case 0x31:
+ {
+ IFTRACE(D_ACKRECV)
+ tptrace(TPPTmisc, "BOGUS ACK fcc_present, tp_r_subseq e_subseq",
+ e->ev_union.EV_AK_TPDU.e_fcc_present, p->tp_r_subseq, e->ev_union.EV_AK_TPDU.e_subseq, 0);
+ ENDTRACE
+ if( p->tp_class != TP_CLASS_0 ) {
+
+ if ( !e->ev_union.EV_AK_TPDU.e_fcc_present ) {
+ /* send ACK with FCC */
+ IncStat( ts_ackreason[_ACK_FCC_] );
+ (void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 1, MNULL);
+ }
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ }
+ }
+ break;
+case 0x32:
+ {
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ tp_cuntimeout(p, TM_retrans);
+
+ sbwakeup( &p->tp_sock->so_snd );
+
+ /* resume normal data */
+ tp_send(p);
+ }
+ break;
+case 0x33:
+ {
+ IFTRACE(D_ACKRECV)
+ tptrace(TPPTmisc, "BOGUS XACK eventtype ", e->ev_number, 0, 0,0);
+ ENDTRACE
+ if( p->tp_class != TP_CLASS_0 ) {
+ tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+ }
+ }
+ break;
+case 0x34:
+ {
+ int timo;
+ IFTRACE(D_TIMER)
+ tptrace(TPPTsendack, -1, p->tp_lcredit, p->tp_sent_uwe,
+ p->tp_sent_lcdt, 0);
+ ENDTRACE
+ IncPStat(p, tps_n_TMsendack);
+ (void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL);
+ if (p->tp_fcredit == 0) {
+ if (p->tp_rxtshift < TP_MAXRXTSHIFT)
+ p->tp_rxtshift++;
+ timo = (p->tp_dt_ticks) << p->tp_rxtshift;
+ } else
+ timo = p->tp_sendack_ticks;
+ tp_ctimeout(p, TM_sendack, timo);
+ }
+ break;
+case 0x35:
+ {
+ if (sbspace(&p->tp_sock->so_rcv) > 0)
+ tp0_openflow(p);
+ }
+ break;
+case 0x36:
+ {
+ if( trick_hc ) {
+ SeqNum ack_thresh;
+ /*
+ * If the upper window edge has advanced a reasonable
+ * amount beyond what was known, send an ACK.
+ * A reasonable amount is 2 packets, unless the max window
+ * is only 1 or 2 packets, in which case we
+ * should send an ack for any advance in the upper window edge.
+ */
+ LOCAL_CREDIT(p);
+ ack_thresh = SEQ_SUB(p, p->tp_lcredit + p->tp_rcvnxt,
+ (p->tp_maxlcredit > 2 ? 2 : 1));
+ if (SEQ_GT(p, ack_thresh, p->tp_sent_uwe)) {
+ IncStat(ts_ackreason[_ACK_USRRCV_]);
+ p->tp_flags &= ~TPF_DELACK;
+ return tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL);
+ }
+ }
+ }
+ break;
+case 0x37:
+ {
+ if(trick_hc)
+ return ECONNABORTED;
+ }
+ break;
+case 0x38:
+ {
+ ASSERT( p->tp_state != TP_LISTENING );
+ tp_indicate(T_DISCONNECT, p, ECONNRESET);
+ tp_soisdisconnected(p);
+ }
+ break;
+ }
+return 0;
+}
+
+_XEBEC_PG int
+_Xebec_index( e,p )
+ struct tp_event *e;
+ tp_PCB_ *p;
+{
+switch( (e->ev_number<<4)+(p->tp_state) ) {
+case 0x12:
+ if ( p->tp_retrans > 0 ) return 0x1e;
+ else return 0x1f;
+case 0x13:
+ if ( p->tp_retrans > 0 ) return 0x2f;
+ else return 0x30;
+case 0x14:
+ if ( p->tp_retrans > 0 ) return 0x32;
+ else return 0x31;
+case 0x15:
+ if ( p->tp_retrans > 0 ) return 0x34;
+ else return 0x35;
+case 0x54:
+ if (p->tp_rxtshift < TP_NRETRANS) return 0x33;
+ else return 0x31;
+case 0x64:
+ if (p->tp_class == TP_CLASS_0) return 0x1a;
+ else return 0x1b;
+case 0x77:
+ if ( p->tp_class == TP_CLASS_0) return 0xd;
+ else return 0xe;
+case 0x86:
+ if ( e->ev_union.EV_DR_TPDU.e_sref != 0 ) return 0x2;
+ else return 0x3;
+case 0xa2:
+ if (p->tp_class == TP_CLASS_0) return 0x1c;
+ else return 0x1d;
+case 0xb2:
+ if (p->tp_class == TP_CLASS_0) return 0x5;
+ else return 0x0;
+case 0xb4:
+ if ( tp_goodack(p, e->ev_union.EV_AK_TPDU.e_cdt, e->ev_union.EV_AK_TPDU.e_seq, e->ev_union.EV_AK_TPDU.e_subseq) ) return 0x3a;
+ else return 0x3b;
+case 0xc3:
+ if ( IN_RWINDOW( p, e->ev_union.EV_DT_TPDU.e_seq,
+ p->tp_rcvnxt, SEQ(p, p->tp_rcvnxt + p->tp_lcredit)) ) return 0x21;
+ else return 0x24;
+case 0xc4:
+ if ( p->tp_class == TP_CLASS_0 ) return 0x22;
+ else if ( IN_RWINDOW( p, e->ev_union.EV_DT_TPDU.e_seq,
+ p->tp_rcvnxt, SEQ(p, p->tp_rcvnxt + p->tp_lcredit)) ) return 0x23;
+ else return 0x25;
+case 0xd3:
+ if (p->tp_Xrcvnxt == e->ev_union.EV_XPD_TPDU.e_seq) return 0x27;
+ else return 0x2a;
+case 0xd4:
+ if (p->tp_Xrcvnxt == e->ev_union.EV_XPD_TPDU.e_seq) return 0x27;
+ else return 0x29;
+case 0xe4:
+ if ( tp_goodXack(p, e->ev_union.EV_XAK_TPDU.e_seq) ) return 0x3c;
+ else return 0x3d;
+case 0x102:
+ if ( p->tp_class == TP_CLASS_0 ) return 0x2d;
+ else return 0x2e;
+case 0x104:
+ if ( p->tp_class == TP_CLASS_0 ) return 0x2d;
+ else return 0x2e;
+case 0x144:
+ if (p->tp_class == TP_CLASS_0) return 0x3f;
+ else return 0x40;
+case 0x162:
+ if (p->tp_class == TP_CLASS_0) return 0x2b;
+ else return 0x2c;
+case 0x172:
+ if ( p->tp_class != TP_CLASS_4 ) return 0x42;
+ else return 0x46;
+case 0x174:
+ if ( p->tp_class != TP_CLASS_4 ) return 0x42;
+ else return 0x47;
+case 0x177:
+ if ( p->tp_class != TP_CLASS_4 ) return 0x42;
+ else return 0x43;
+case 0x188:
+ if ( p->tp_class == TP_CLASS_0 ) return 0xf;
+ else if (tp_emit(CC_TPDU_type, p, 0,0, MCPY(p->tp_ucddata, M_NOWAIT)) == 0) return 0x10;
+ else return 0x11;
+default: return 0;
+} /* end switch */
+} /* _Xebec_index() */
+static int inx[26][9] = { {0,0,0,0,0,0,0,0,0,},
+ {0x0,0x0,0x0,0x0,0x31,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,-1,-1,-1,-1,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x3e,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x0,0x0,0x36,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,-1,0x0,0x0,0x0,0x0, },
+ {0x0,0x7,0x15,0x1b,-1,0x17,0x3,0xa,0x0, },
+ {0x0,0x19,0x6,0x20,0x37,0x8,0x3,-1,0x0, },
+ {0x0,0x14,0x13,0x13,0x13,0x16,-1,0xa,0x0, },
+ {0x0,0x7,0x6,0x1,0x9,0x18,0x3,0xa,0x0, },
+ {0x0,0x19,-1,0x1,0x37,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,-1,0x26,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,0x6,-1,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,0x6,-1,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,0x6,0x1,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x12,0x0,0x0,0x0,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,-1,0x2e,-1,0x0,0x4,0x0,0x2e, },
+ {0x0,0xb,0x0,0x0,0x0,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x38,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x39,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,-1,0x0,0x41,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x28,0x0,0x41,0x0,0x0, },
+ {0x0,0xc,-1,0x2c,0x0,0x2c,0x4,0xc,0x2c, },
+ {0x0,0x49,-1,0x45,-1,0x44,0x48,-1,0x0, },
+ {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,-1, },
+};
+tp_driver(p, e)
+register tp_PCB_ *p;
+register struct tp_event *e;
+{
+ register int index, error=0;
+ struct act_ent *a;
+ static struct act_ent erroraction = {0,-1};
+
+ index = inx[1 + e->ev_number][p->tp_state];
+ if(index<0) index=_Xebec_index(e, p);
+ if (index==0) {
+ a = &erroraction;
+ } else
+ a = &statetable[index];
+
+ if(a->a_action)
+ error = _Xebec_action( a->a_action, e, p );
+ IFTRACE(D_DRIVER)
+ tptrace(DRIVERTRACE, a->a_newstate, p->tp_state, e->ev_number, a->a_action, 0);
+ ENDTRACE
+ if(error==0)
+ p->tp_state = a->a_newstate;
+ return error;
+}
diff --git a/sys/netiso/tp_emit.c b/sys/netiso/tp_emit.c
new file mode 100644
index 000000000000..16ed5bc7b7b2
--- /dev/null
+++ b/sys/netiso/tp_emit.c
@@ -0,0 +1,996 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_emit.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_emit.c,v 5.5 88/11/18 17:27:20 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_emit.c,v $
+ *
+ * This file contains tp_emit() and tp_error_emit(), which
+ * form TPDUs and hand them to ip.
+ * They take data in the form of mbuf chain, allocate mbufs as
+ * necessary for headers, and set the fields as appropriate from
+ * information found in the tpcb and net-level pcb.
+ *
+ * The worst thing about this code is adding the variable-length
+ * options on a machine that requires alignment for any memory access
+ * that isn't of size 1. See the macro ADDOPTION() below.
+ *
+ * We don't do any concatenation. (There's a kludge to test the
+ * basic mechanism of separation under the 'w' tpdebug option, that's all.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/tp_seq.h>
+#include <netiso/iso_errno.h>
+
+#include <net/if.h>
+#ifdef TRUE
+#undef FALSE
+#undef TRUE
+#endif
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+void iso_gen_csum();
+
+
+/* Here is a mighty kludge. The token ring misorders packets if you
+ * fire them at it too fast, and TP sans checksum is "too fast", so
+ * we have introduced a delay when checksumming isn't used.
+ */
+char tp_delay = 0x00; /* delay to keep token ring from blowing it */
+
+/*
+ * NAME: tp_emit()
+ *
+ * CALLED FROM: tp.trans and from tp_sbsend()
+ *
+ * FUNCTION and ARGUMENTS:
+ * Emits one tpdu of the type (dutype), of the format appropriate
+ * to the connection described by the pcb (tpcb), with sequence
+ * number (seq) (where appropriate), end-of-tsdu bit (eot) where
+ * appropriate, and with the data in the mbuf chain (data).
+ * For DR and ER tpdus, the argument (eot) is
+ * the reason for issuing the tpdu rather than an end-of-tsdu indicator.
+ *
+ * RETURNS:
+ * 0 OK
+ * ENOBUFS
+ * E* returned from net layer output rtn
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ *
+ * WE ASSUME that the tp header + all options will fit in ONE mbuf.
+ * If mbufs are 256 this will most likely be true, but if they are 128 it's
+ * possible that they won't.
+ * If you used every option on the CR + max. user data you'd overrun
+ * 112 but unless you used > 115 bytes for the security
+ * parameter, it would fit in a 256-byte mbuf (240 bytes for the header)
+ * We don't support the security parameter, so this isn't a problem.
+ * If security is added, we ought to remove this assumption.
+ *
+ * We do not implement the flow control confirmation "element of procedure".
+ * A) it should not affect interoperability,
+ * B) it should not be necessary - the protocol will eventually
+ * straighten things out w/o FCC, as long as we don't have severely
+ * mismatched keepalive and inactivity timers, and
+ * C) it appears not to be REQUIRED, and
+ * D) it's incredibly grotesque, and no doubt will lengthen a few
+ * critical paths.
+ * HOWEVER, we're thinking about putting it in anyway, for
+ * completeness, just like we did with ack subsequencing.
+ */
+
+int
+tp_emit(dutype, tpcb, seq, eot, data)
+ int dutype;
+ struct tp_pcb *tpcb;
+ SeqNum seq;
+ u_int eot;
+ struct mbuf *data;
+{
+ register struct tpdu *hdr;
+ register struct mbuf *m;
+ int csum_offset=0;
+ int datalen = 0;
+ int error = 0;
+ SeqNum olduwe;
+ int acking_ooo;
+
+ /* NOTE:
+ * here we treat tpdu_li as if it DID include the li field, up until
+ * the end, at which time we subtract 1
+ * THis is because if we subtract 1 right away, we end up adding
+ * one every time we add an option.
+ */
+ IFDEBUG(D_EMIT)
+ printf(
+ "tp_emit dutype 0x%x, tpcb 0x%x, eot 0x%x, seq 0x%x, data 0x%x",
+ dutype, tpcb, eot, seq, data);
+ ENDDEBUG
+
+ if (dutype == CR_TPDU || dutype == CC_TPDU) {
+ m = (struct mbuf *) malloc((u_long)256, M_MBUF, M_DONTWAIT);
+ if (m) {
+ m->m_type = TPMT_TPHDR;
+ mbstat.m_mtypes[TPMT_TPHDR]++;
+ m->m_next = MNULL;
+ m->m_nextpkt = MNULL;
+ m->m_data = m->m_pktdat;
+ m->m_flags = M_PKTHDR;
+ }
+ } else {
+ MGETHDR(m, M_DONTWAIT, TPMT_TPHDR);
+ }
+ m->m_data += max_hdr;
+ if (m == NULL) {
+ if(data != (struct mbuf *)0)
+ m_freem(data);
+ error = ENOBUFS;
+ goto done;
+ }
+ m->m_len = sizeof(struct tpdu);
+ m->m_act = MNULL;
+
+ hdr = mtod(m, struct tpdu *);
+ bzero((caddr_t)hdr, sizeof(struct tpdu));
+
+ {
+ int tp_headersize();
+
+ hdr->tpdu_type = dutype;
+ hdr->tpdu_li = tp_headersize(dutype, tpcb);
+ /*
+ * class 0 doesn't use this for DT
+ * it'll just get overwritten below
+ */
+ hdr->tpdu_dref = htons(tpcb->tp_fref);
+ if( tpcb->tp_use_checksum ||
+ (dutype == CR_TPDU_type && (tpcb->tp_class & TP_CLASS_4) )) {
+ csum_offset = hdr->tpdu_li + 2; /* DOESN'T include csum */
+ ADDOPTION(TPP_checksum, hdr, 2, eot /* dummy arg */);
+ IFDEBUG(D_CHKSUM)
+ printf(
+ "tp_emit: csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+ csum_offset, hdr->tpdu_li);
+ ENDDEBUG
+ }
+ /*
+ * VARIABLE PARTS...
+ */
+ switch( dutype ) {
+
+ case CR_TPDU_type:
+ hdr->tpdu_CRdref_0 = 0; /* must be zero */
+ case CC_TPDU_type:
+ if (!tpcb->tp_cebit_off) {
+ tpcb->tp_win_recv = tp_start_win << 8;
+ LOCAL_CREDIT(tpcb);
+ CONG_INIT_SAMPLE(tpcb);
+ } else
+ LOCAL_CREDIT(tpcb);
+
+/* Case CC_TPDU_type used to be here */
+ {
+ u_char x;
+
+ hdr->tpdu_CCsref = htons(tpcb->tp_lref); /* same as CRsref */
+
+ if( tpcb->tp_class > TP_CLASS_1 ) {
+ tpcb->tp_sent_uwe = tpcb->tp_lcredit -1;
+ tpcb->tp_sent_rcvnxt = 1;
+ tpcb->tp_sent_lcdt = tpcb->tp_lcredit;
+ hdr->tpdu_cdt = tpcb->tp_lcredit;
+ } else {
+#ifdef TPCONS
+ if (tpcb->tp_netservice == ISO_CONS) {
+ struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+ struct pklcd *lcp = (struct pklcd *)(isop->isop_chan);
+ lcp->lcd_flags &= ~X25_DG_CIRCUIT;
+ }
+#endif
+ hdr->tpdu_cdt = 0;
+ }
+ hdr->tpdu_CCclass = tp_mask_to_num(tpcb->tp_class);
+ hdr->tpdu_CCoptions =
+ (tpcb->tp_xtd_format? TPO_XTD_FMT:0) |
+ (tpcb->tp_use_efc? TPO_USE_EFC:0);
+
+ IFPERF(tpcb)
+ u_char perf_meas = tpcb->tp_perf_on;
+ ADDOPTION(TPP_perf_meas, hdr, sizeof(perf_meas), perf_meas);
+ ENDPERF
+
+ if( dutype == CR_TPDU_type ) {
+ IncStat(ts_CR_sent);
+
+ ASSERT( tpcb->tp_lsuffixlen > 0 );
+ ASSERT( tpcb->tp_fsuffixlen > 0 );
+
+ ADDOPTION(TPP_calling_sufx, hdr,
+ tpcb->tp_lsuffixlen, tpcb->tp_lsuffix[0]);
+ ADDOPTION(TPP_called_sufx, hdr,
+ tpcb->tp_fsuffixlen, tpcb->tp_fsuffix[0]);
+ } else {
+ IncStat(ts_CC_sent);
+ }
+
+ ADDOPTION(TPP_tpdu_size, hdr,
+ sizeof(tpcb->tp_tpdusize), tpcb->tp_tpdusize);
+
+ if (tpcb->tp_class != TP_CLASS_0) {
+ short millisec = 500*(tpcb->tp_sendack_ticks);
+
+ millisec = htons(millisec);
+ ADDOPTION(TPP_acktime, hdr, sizeof(short), millisec);
+
+ x = (tpcb->tp_use_nxpd? TPAO_USE_NXPD: 0)
+ | (tpcb->tp_use_rcc? TPAO_USE_RCC : 0)
+ | (tpcb->tp_use_checksum?0: TPAO_NO_CSUM)
+ | (tpcb->tp_xpd_service? TPAO_USE_TXPD: 0);
+ ADDOPTION(TPP_addl_opt, hdr, 1, x);
+
+ if ((tpcb->tp_l_tpdusize ^ (1 << tpcb->tp_tpdusize)) != 0) {
+ u_short size_s = tpcb->tp_l_tpdusize >> 7;
+ u_char size_c = size_s;
+ ASSERT(tpcb->tp_l_tpdusize < 65536 * 128);
+ if (dutype == CR_TPDU_type)
+ tpcb->tp_ptpdusize = size_s;
+ if (size_s < 256) {
+ ADDOPTION(TPP_ptpdu_size, hdr, 1, size_c);
+ } else {
+ size_s = htons(size_s);
+ ADDOPTION(TPP_ptpdu_size, hdr, 2, size_s);
+ }
+ }
+ }
+
+ if( (dutype == CR_TPDU_type) && (tpcb->tp_class != TP_CLASS_0)){
+
+ ASSERT( 1 == sizeof(tpcb->tp_vers) );
+ ADDOPTION(TPP_vers, hdr, 1, tpcb->tp_vers);
+
+ /* for each alt protocol class x,
+ * x = x<<4;
+ * option = concat(option, x);
+ * Well, for now we only have TP0 for an
+ * alternative so... this is easy.
+ *
+ * HOWEVER... There should be NO alt protocol
+ * class over CLNS. Need to see if the route suggests
+ * CONS, and iff so add alt class.
+ */
+ x = 0;
+ ADDOPTION(TPP_alt_class, hdr, 1, x);
+ }
+
+ if( hdr->tpdu_li > MLEN)
+ panic("tp_emit CR/CC");
+ }
+ break;
+
+ case DR_TPDU_type:
+ if( hdr->tpdu_DRdref == 0 ) {
+ /* don't issue the DR */
+ goto done;
+ }
+ hdr->tpdu_cdt = 0;
+ hdr->tpdu_DRsref = htons(tpcb->tp_lref);
+ hdr->tpdu_DRreason = (u_char)eot; /* WHICH BYTE OF THIS??? */
+
+ /* forget the add'l information variable part */
+ IncStat(ts_DR_sent);
+ break;
+
+ case DC_TPDU_type: /* not used in class 0 */
+ ASSERT( tpcb->tp_class != TP_CLASS_0);
+ hdr->tpdu_DCsref = htons(tpcb->tp_lref);
+ hdr->tpdu_cdt = 0;
+ data = (struct mbuf *)0;
+ IncStat(ts_DC_sent);
+ break;
+
+ case XAK_TPDU_type: /* xak not used in class 0 */
+ ASSERT( tpcb->tp_class != TP_CLASS_0); /* fall through */
+ hdr->tpdu_cdt = 0;
+
+ IFTRACE(D_XPD)
+ tptraceTPCB(TPPTXack, seq, 0, 0, 0, 0);
+ ENDTRACE
+ data = (struct mbuf *)0;
+ if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seq = seq;
+ seqeotX.s_eot = 1;
+ hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+#else
+ hdr->tpdu_XAKseqX = seq;
+#endif /* BYTE_ORDER */
+ } else {
+ hdr->tpdu_XAKseq = seq;
+ }
+ IncStat(ts_XAK_sent);
+ IncPStat(tpcb, tps_XAK_sent);
+ break;
+
+ case XPD_TPDU_type: /* xpd not used in class 0 */
+ ASSERT( tpcb->tp_class != TP_CLASS_0); /* fall through */
+ hdr->tpdu_cdt = 0;
+ if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seq = seq;
+ seqeotX.s_eot = 1;
+ hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+#else
+ hdr->tpdu_XPDseqX = seq;
+ hdr->tpdu_XPDeotX = 1; /* always 1 for XPD tpdu */
+#endif /* BYTE_ORDER */
+ } else {
+ hdr->tpdu_XPDseq = seq;
+ hdr->tpdu_XPDeot = 1; /* always 1 for XPD tpdu */
+ }
+ IncStat(ts_XPD_sent);
+ IncPStat(tpcb, tps_XPD_sent);
+
+ /* kludge to test the input size checking */
+ IFDEBUG(D_SIZE_CHECK)
+ /*if(data->m_len <= 16 && data->m_off < (MLEN-18) ) {
+ printf("Sending too much data on XPD: 18 bytes\n");
+ data->m_len = 18;
+ }*/
+ ENDDEBUG
+ break;
+
+ case DT_TPDU_type:
+ hdr->tpdu_cdt = 0;
+ IFTRACE(D_DATA)
+ tptraceTPCB(TPPTmisc, "emit DT: eot seq tpdu_li", eot, seq,
+ hdr->tpdu_li, 0);
+ ENDTRACE
+ if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seq = seq;
+ seqeotX.s_eot = eot;
+ hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+#else
+ hdr->tpdu_DTseqX = seq;
+ hdr->tpdu_DTeotX = eot;
+#endif /* BYTE_ORDER */
+ } else if (tpcb->tp_class == TP_CLASS_0) {
+ IFDEBUG(D_EMIT)
+ printf("DT tpdu: class 0 m 0x%x hdr 0x%x\n", m, hdr);
+ dump_buf( hdr, hdr->tpdu_li + 1 );
+ ENDDEBUG
+ ((struct tp0du *)hdr)->tp0du_eot = eot;
+ ((struct tp0du *)hdr)->tp0du_mbz = 0;
+ IFDEBUG(D_EMIT)
+ printf("DT 2 tpdu: class 0 m 0x%x hdr 0x%x\n", m, hdr);
+ dump_buf( hdr, hdr->tpdu_li + 1 );
+ ENDDEBUG
+ } else {
+ hdr->tpdu_DTseq = seq;
+ hdr->tpdu_DTeot = eot;
+ }
+ if(eot) {
+ IncStat(ts_EOT_sent);
+ }
+ IncStat(ts_DT_sent);
+ IncPStat(tpcb, tps_DT_sent);
+ break;
+
+ case AK_TPDU_type:/* ak not used in class 0 */
+ ASSERT( tpcb->tp_class != TP_CLASS_0);
+ data = (struct mbuf *)0;
+ olduwe = tpcb->tp_sent_uwe;
+
+ if (seq != tpcb->tp_sent_rcvnxt || tpcb->tp_rsycnt == 0) {
+ LOCAL_CREDIT( tpcb );
+ tpcb->tp_sent_uwe =
+ SEQ(tpcb,tpcb->tp_rcvnxt + tpcb->tp_lcredit -1);
+ tpcb->tp_sent_lcdt = tpcb->tp_lcredit;
+ acking_ooo = 0;
+ } else
+ acking_ooo = 1;
+
+ IFDEBUG(D_RENEG)
+ /* occasionally fake a reneging so
+ you can test subsequencing */
+ if( olduwe & 0x1 ) {
+ tpcb->tp_reneged = 1;
+ IncStat(ts_ldebug);
+ }
+ ENDDEBUG
+ /* Are we about to reneg on credit?
+ * When might we do so?
+ * a) when using optimistic credit (which we no longer do).
+ * b) when drain() gets implemented (not in the plans).
+ * c) when D_RENEG is on.
+ * d) when DEC BIT response is implemented.
+ * (not- when we do this, we'll need to implement flow control
+ * confirmation)
+ */
+ if( SEQ_LT(tpcb, tpcb->tp_sent_uwe, olduwe) ) {
+ tpcb->tp_reneged = 1;
+ IncStat(ts_lcdt_reduced);
+ IFTRACE(D_CREDIT)
+ tptraceTPCB(TPPTmisc,
+ "RENEG: olduwe newuwe lcredit rcvnxt",
+ olduwe,
+ tpcb->tp_sent_uwe, tpcb->tp_lcredit,
+ tpcb->tp_rcvnxt);
+ ENDTRACE
+ }
+ IFPERF(tpcb)
+ /* new lwe is less than old uwe means we're
+ * acking before we received a whole window full
+ */
+ if( SEQ_LT( tpcb, tpcb->tp_rcvnxt, olduwe) ) {
+ /* tmp1 = number of pkts fewer than the full window */
+ register int tmp1 =
+ (int) SEQ_SUB( tpcb, olduwe, tpcb->tp_rcvnxt);
+
+ if(tmp1 > TP_PM_MAX)
+ tmp1 = TP_PM_MAX;
+ IncPStat( tpcb, tps_ack_early[tmp1] );
+
+ /* tmp1 = amt of new cdt we're advertising */
+ tmp1 = SEQ_SUB( tpcb, seq, tpcb->tp_sent_rcvnxt);
+ if(tmp1 > TP_PM_MAX )
+ tmp1 = TP_PM_MAX;
+
+ IncPStat( tpcb,
+ tps_cdt_acked [ tmp1 ]
+ [ ((tpcb->tp_lcredit > TP_PM_MAX)?
+ TP_PM_MAX:tpcb->tp_lcredit) ] );
+
+ }
+ ENDPERF
+
+ IFTRACE(D_ACKSEND)
+ tptraceTPCB(TPPTack, seq, tpcb->tp_lcredit, tpcb->tp_sent_uwe,
+ tpcb->tp_r_subseq, 0);
+ ENDTRACE
+ if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seq = seq;
+ seqeotX.s_eot = 0;
+ hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+ hdr->tpdu_AKcdtX = htons(tpcb->tp_lcredit);
+#else
+ hdr->tpdu_cdt = 0;
+ hdr->tpdu_AKseqX = seq;
+ hdr->tpdu_AKcdtX = tpcb->tp_lcredit;
+#endif /* BYTE_ORDER */
+ } else {
+ hdr->tpdu_AKseq = seq;
+ hdr->tpdu_AKcdt = tpcb->tp_lcredit;
+ }
+ if ((tpcb->tp_class == TP_CLASS_4) &&
+ (tpcb->tp_reneged || acking_ooo)) {
+ /*
+ * Ack subsequence parameter req'd if WE reneged on
+ * credit offered. (ISO 8073, 12.2.3.8.2, p. 74)
+ */
+ IFDEBUG(D_RENEG)
+ printf("Adding subseq 0x%x\n", tpcb->tp_s_subseq);
+ ENDDEBUG
+ tpcb->tp_s_subseq++;
+ /*
+ * add tmp subseq and do a htons on it.
+ */
+ ADDOPTION(TPP_subseq, hdr,
+ sizeof(tpcb->tp_s_subseq), tpcb->tp_s_subseq);
+ } else
+ tpcb->tp_s_subseq = 0;
+
+ if ( tpcb->tp_sendfcc || eot ) /* overloaded to mean SEND FCC */ {
+ /*
+ * Rules for sending FCC ("should" send when) :
+ * %a) received an ack from peer with NO NEWS whatsoever,
+ * and it did not contain an FCC
+ * b) received an ack from peer that opens its closed window.
+ * c) received an ack from peer after it reneged on its
+ * offered credit, AND this ack raises UWE but LWE is same
+ * and below UWE at time of reneging (reduction)
+ * Now, ISO 8073 12.2.3.8.3 says
+ * that a retransmitted AK shall not contain the FCC
+ * parameter. Now, how the hell you tell the difference
+ * between a retransmitted ack and an ack that's sent in
+ * response to a received ack, I don't know, because without
+ * any local activity, and w/o any received DTs, they
+ * will contain exactly the same credit/seq# information.
+ * Anyway, given that the "retransmission of acks"
+ * procedure (ISO 8073 12.2.3.8.3) is optional, and we
+ * don't do it (although the peer can't tell that), we
+ * ignore this last rule.
+ *
+ * We send FCC for reasons a) and b) only.
+ * To add reason c) would require a ridiculous amount of state.
+ *
+ */
+ u_short bogus[4]; /* lwe(32), subseq(16), cdt(16) */
+ SeqNum lwe;
+ u_short subseq, fcredit;
+
+ tpcb->tp_sendfcc = 0;
+
+ lwe = (SeqNum) htonl(tpcb->tp_snduna);
+ subseq = htons(tpcb->tp_r_subseq);
+ fcredit = htons(tpcb->tp_fcredit);
+
+ bcopy((caddr_t) &lwe, (caddr_t)&bogus[0], sizeof(SeqNum));
+ bcopy((caddr_t) &subseq, (caddr_t)&bogus[2], sizeof(u_short));
+ bcopy((caddr_t) &fcredit, (caddr_t)&bogus[3], sizeof(u_short));
+
+ IFTRACE(D_ACKSEND)
+ tptraceTPCB(TPPTmisc,
+ "emit w/FCC: snduna r_subseq fcredit",
+ tpcb->tp_snduna, tpcb->tp_r_subseq,
+ tpcb->tp_fcredit, 0);
+ ENDTRACE
+
+ IFDEBUG(D_ACKSEND)
+ printf("Calling ADDOPTION 0x%x, 0x%x, 0x%x,0x%x\n",
+ TPP_flow_cntl_conf,
+ hdr, sizeof(bogus), bogus[0]);
+ ENDDEBUG
+ ADDOPTION(TPP_flow_cntl_conf, hdr, sizeof(bogus), bogus[0]);
+ IFDEBUG(D_ACKSEND)
+ printf("after ADDOPTION hdr 0x%x hdr->tpdu_li 0x%x\n",
+ hdr, hdr->tpdu_li);
+ printf(
+ "after ADDOPTION csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+ csum_offset, hdr->tpdu_li);
+ ENDDEBUG
+
+ }
+ tpcb->tp_reneged = 0;
+ tpcb->tp_sent_rcvnxt = seq;
+ if (tpcb->tp_fcredit == 0) {
+ int timo = tpcb->tp_keepalive_ticks;
+ if (tpcb->tp_rxtshift < TP_MAXRXTSHIFT)
+ tpcb->tp_rxtshift++;
+ timo = min(timo, ((int)tpcb->tp_dt_ticks) << tpcb->tp_rxtshift);
+ tp_ctimeout(tpcb, TM_sendack, timo);
+ } else
+ tp_ctimeout(tpcb, TM_sendack, tpcb->tp_keepalive_ticks);
+ IncStat(ts_AK_sent);
+ IncPStat(tpcb, tps_AK_sent);
+ IFDEBUG(D_ACKSEND)
+ printf(
+ "2 after rADDOPTION csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+ csum_offset, hdr->tpdu_li);
+ ENDDEBUG
+ break;
+
+ case ER_TPDU_type:
+ hdr->tpdu_ERreason = eot;
+ hdr->tpdu_cdt = 0;
+ /* no user data */
+ data = (struct mbuf *)0;
+ IncStat(ts_ER_sent);
+ break;
+ }
+
+ }
+ ASSERT( ((int)hdr->tpdu_li > 0) && ((int)hdr->tpdu_li < MLEN) );
+
+ m->m_next = data;
+
+ ASSERT( hdr->tpdu_li < MLEN ); /* leave this in */
+ ASSERT( hdr->tpdu_li != 0 ); /* leave this in */
+
+ m->m_len = hdr->tpdu_li ;
+ hdr->tpdu_li --; /* doesn't include the li field */
+
+ datalen = m_datalen( m ); /* total len */
+
+ ASSERT( datalen <= tpcb->tp_l_tpdusize ); /* may become a problem
+ when CLNP is used; leave in here for the time being */
+ IFDEBUG(D_ACKSEND)
+ printf(
+ "4 after rADDOPTION csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+ csum_offset, hdr->tpdu_li);
+ ENDDEBUG
+ if( datalen > tpcb->tp_l_tpdusize ) {
+ printf("data len 0x%x tpcb->tp_l_tpdusize 0x%x\n",
+ datalen, tpcb->tp_l_tpdusize);
+ }
+ IFDEBUG(D_EMIT)
+ printf(
+ "tp_emit before gen_csum m_len 0x%x, csum_offset 0x%x, datalen 0x%x\n",
+ m->m_len, csum_offset, datalen);
+ ENDDEBUG
+ if( tpcb->tp_use_checksum ||
+ (dutype == CR_TPDU_type && (tpcb->tp_class & TP_CLASS_4)) ) {
+ iso_gen_csum(m, csum_offset, datalen);
+ }
+
+ IFDEBUG(D_EMIT)
+ printf("tp_emit before tpxxx_output tpcb 0x%x, dutype 0x%x, datalen 0x%x\n",
+ tpcb, dutype, datalen);
+ dump_buf(mtod(m, caddr_t), datalen);
+ ENDDEBUG
+
+ IFPERF(tpcb)
+ if( dutype == DT_TPDU_type ) {
+ PStat(tpcb, Nb_to_ll) += (datalen - m->m_len);
+ tpmeas( tpcb->tp_lref, TPtime_to_ll, (struct timeval *)0,
+ seq, PStat(tpcb, Nb_to_ll), (datalen - m->m_len));
+ }
+ ENDPERF
+
+ IFTRACE(D_EMIT)
+ tptraceTPCB(TPPTtpduout, dutype, hdr, hdr->tpdu_li+1, datalen, 0);
+ ENDTRACE
+ IFDEBUG(D_EMIT)
+ printf("OUTPUT: tpcb 0x%x, isop 0x%x, so 0x%x\n",
+ tpcb, tpcb->tp_npcb, tpcb->tp_sock);
+ ENDDEBUG
+
+ { extern char tp_delay;
+
+ if( tp_delay )
+ if( tpcb->tp_use_checksum == 0 ) {
+ register u_int i = tp_delay;
+ for (; i!= 0; i--)
+ (void) iso_check_csum(m, datalen);
+ }
+ }
+ ASSERT( m->m_len > 0 );
+ error = (tpcb->tp_nlproto->nlp_output)(tpcb->tp_npcb, m, datalen,
+ !tpcb->tp_use_checksum);
+ IFDEBUG(D_EMIT)
+ printf("OUTPUT: returned 0x%x\n", error);
+ ENDDEBUG
+ IFTRACE(D_EMIT)
+ tptraceTPCB(TPPTmisc,
+ "tp_emit nlproto->output netservice returns datalen",
+ tpcb->tp_nlproto->nlp_output, tpcb->tp_netservice, error, datalen);
+ ENDTRACE
+done:
+ if (error) {
+ if (dutype == AK_TPDU_type)
+ tp_ctimeout(tpcb, TM_sendack, 1);
+ if (error == E_CO_QFULL) {
+ tp_quench(tpcb, PRC_QUENCH);
+ return 0;
+ }
+ }
+ return error;
+}
+/*
+ * NAME: tp_error_emit()
+ * CALLED FROM: tp_input() when a DR or ER is to be issued in
+ * response to an input error.
+ * FUNCTION and ARGUMENTS:
+ * The error type is the first argument.
+ * The argument (sref) is the source reference on the bad incoming tpdu,
+ * and is used for a destination reference on the outgoing packet.
+ * (faddr) and (laddr) are the foreign and local addresses for this
+ * connection.
+ * (erdata) is a ptr to the errant incoming tpdu, and is copied into the
+ * outgoing ER, if an ER is to be issued.
+ * (erlen) is the number of octets of the errant tpdu that we should
+ * try to copy.
+ * (tpcb) is the pcb that describes the connection for which the bad tpdu
+ * arrived.
+ * RETURN VALUES:
+ * 0 OK
+ * ENOBUFS
+ * E* from net layer datagram output routine
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+
+int
+tp_error_emit(error, sref, faddr, laddr, erdata, erlen, tpcb, cons_channel,
+ dgout_routine)
+ int error;
+ u_long sref;
+ struct sockaddr_iso *faddr, *laddr;
+ struct mbuf *erdata;
+ int erlen;
+ struct tp_pcb *tpcb;
+ caddr_t cons_channel;
+ int (*dgout_routine)();
+{
+ int dutype;
+ int datalen = 0;
+ register struct tpdu *hdr;
+ register struct mbuf *m;
+ int csum_offset;
+
+ IFTRACE(D_ERROR_EMIT)
+ tptrace(TPPTmisc, "tp_error_emit error sref tpcb erlen",
+ error, sref, tpcb, erlen);
+ ENDTRACE
+ IFDEBUG(D_ERROR_EMIT)
+ printf(
+ "tp_error_emit error 0x%x sref 0x%x tpcb 0x%x erlen 0x%x chan 0x%x\n",
+ error, sref, tpcb, erlen, cons_channel);
+ ENDDEBUG
+
+ MGET(m, M_DONTWAIT, TPMT_TPHDR);
+ if (m == NULL) {
+ return ENOBUFS;
+ }
+ m->m_len = sizeof(struct tpdu);
+ m->m_act = MNULL;
+
+ hdr = mtod(m, struct tpdu *);
+
+ IFDEBUG(D_ERROR_EMIT)
+ printf("[error 0x%x] [error&0xff 0x%x] [(char)error 0x%x]\n",
+ error, error&0xff, (char)error);
+ ENDDEBUG
+
+
+ if (error & TP_ERROR_SNDC)
+ dutype = DC_TPDU_type;
+ else if (error & 0x40) {
+ error &= ~0x40;
+ dutype = ER_TPDU_type;
+ } else
+ dutype = DR_TPDU_type;
+ error &= 0xff;
+
+ hdr->tpdu_type = dutype;
+ hdr->tpdu_cdt = 0;
+
+ switch( dutype ) {
+
+ case DC_TPDU_type:
+ IncStat(ts_DC_sent);
+ hdr->tpdu_li = 6;
+ hdr->tpdu_DCdref = htons(sref);
+ hdr->tpdu_DCsref = tpcb ? htons(tpcb->tp_lref) : 0;
+ IFDEBUG(D_ERROR_EMIT)
+ printf("DC case:\n");
+ dump_buf( hdr, 6);
+ ENDDEBUG
+ /* forget the add'l information variable part */
+ break;
+
+ case DR_TPDU_type:
+ IncStat(ts_DR_sent);
+ hdr->tpdu_li = 7;
+ hdr->tpdu_DRdref = htons(sref);
+ hdr->tpdu_DRsref = 0;
+ hdr->tpdu_DRreason = (char)error;
+ IFDEBUG(D_ERROR_EMIT)
+ printf("DR case:\n");
+ dump_buf( hdr, 7);
+ ENDDEBUG
+ /* forget the add'l information variable part */
+ break;
+
+ case ER_TPDU_type:
+ IncStat(ts_ER_sent);
+ hdr->tpdu_li = 5;
+ hdr->tpdu_ERreason = (char)error;
+ hdr->tpdu_ERdref = htons(sref);
+ break;
+
+ default:
+ ASSERT(0);
+ printf("TP PANIC: bad dutype 0x%x\n", dutype);
+ }
+
+ if(tpcb)
+ if( tpcb->tp_use_checksum ) {
+ ADDOPTION(TPP_checksum, hdr, 2, csum_offset /* dummy argument */);
+ csum_offset = hdr->tpdu_li - 2;
+ }
+
+ ASSERT( hdr->tpdu_li < MLEN );
+
+ if (dutype == ER_TPDU_type) {
+ /* copy the errant tpdu into another 'variable part' */
+ register caddr_t P;
+
+ IFTRACE(D_ERROR_EMIT)
+ tptrace(TPPTmisc, "error_emit ER len tpduli", erlen, hdr->tpdu_li,
+ 0,0);
+ ENDTRACE
+ IFDEBUG(D_ERROR_EMIT)
+ printf("error_emit ER len 0x%x tpduli 0x%x\n", erlen, hdr->tpdu_li);
+ ENDDEBUG
+
+ /* copy at most as many octets for which you have room */
+ if (erlen + hdr->tpdu_li + 2 > TP_MAX_HEADER_LEN)
+ erlen = TP_MAX_HEADER_LEN - hdr->tpdu_li - 2;
+
+ /* add the "invalid tpdu" parameter : required in class 0 */
+ P = (caddr_t)hdr + (int)(hdr->tpdu_li);
+ vbptr(P)->tpv_code = TPP_invalid_tpdu; /* parameter code */
+ vbptr(P)->tpv_len = erlen; /* parameter length */
+ m->m_len = hdr->tpdu_li + 2; /* 1 for code, 1 for length */
+
+ /* tp_input very likely handed us an mbuf chain w/ nothing in
+ * the first mbuf and the data following the empty mbuf
+ */
+ if(erdata->m_len == 0) {
+ erdata = m_free(erdata); /* returns the next mbuf on the chain */
+ }
+ /*
+ * copy only up to the bad octet
+ * (or max that will fit in a header
+ */
+ m->m_next = m_copy(erdata, 0, erlen);
+ hdr->tpdu_li += erlen + 2;
+ m_freem(erdata);
+ } else {
+ IFDEBUG(D_ERROR_EMIT)
+ printf("error_emit DR error tpduli 0x%x\n", error, hdr->tpdu_li);
+ dump_buf( (char *)hdr, hdr->tpdu_li );
+ ENDDEBUG
+ m->m_len = hdr->tpdu_li ;
+ m_freem(erdata);
+ }
+
+ hdr->tpdu_li --;
+ IFTRACE(D_ERROR_EMIT)
+ tptrace(TPPTtpduout, 2, hdr, hdr->tpdu_li+1, 0, 0);
+ ENDTRACE
+
+ datalen = m_datalen( m);
+ if (tpcb) {
+ if( tpcb->tp_use_checksum ) {
+ IFTRACE(D_ERROR_EMIT)
+ tptrace(TPPTmisc, "before gen csum datalen", datalen,0,0,0);
+ ENDTRACE
+ IFDEBUG(D_ERROR_EMIT)
+ printf("before gen csum datalen 0x%x, csum_offset 0x%x\n",
+ datalen, csum_offset);
+ ENDDEBUG
+
+ iso_gen_csum(m, csum_offset, datalen);
+ }
+
+ IFDEBUG(D_ERROR_EMIT)
+ printf("OUTPUT: tpcb 0x%x, isop 0x%x, so 0x%x\n",
+ tpcb, tpcb->tp_npcb, tpcb->tp_sock);
+ ENDDEBUG
+ }
+ if (cons_channel) {
+#ifdef TPCONS
+ struct pklcd *lcp = (struct pklcd *)cons_channel;
+ struct isopcb *isop = (struct isopcb *)lcp->lcd_upnext;
+
+ tpcons_dg_output(cons_channel, m, datalen);
+ /* was if (tpcb == 0) iso_pcbdetach(isop); */
+ /* but other side may want to try again over same VC,
+ so, we'll depend on him closing it, but in case it gets forgotten
+ we'll mark it for garbage collection */
+ lcp->lcd_flags |= X25_DG_CIRCUIT;
+ IFDEBUG(D_ERROR_EMIT)
+ printf("OUTPUT: dutype 0x%x channel 0x%x\n",
+ dutype, cons_channel);
+ ENDDEBUG
+#else
+ printf("TP panic! cons channel 0x%x but not cons configured\n",
+ cons_channel);
+#endif
+ } else if (tpcb) {
+
+ IFDEBUG(D_ERROR_EMIT)
+ printf("tp_error_emit 1 sending DG: Laddr\n");
+ dump_addr((struct sockaddr *)laddr);
+ printf("Faddr\n");
+ dump_addr((struct sockaddr *)faddr);
+ ENDDEBUG
+ return (tpcb->tp_nlproto->nlp_dgoutput)(
+ &laddr->siso_addr,
+ &faddr->siso_addr,
+ m, datalen,
+ /* no route */ (caddr_t)0, !tpcb->tp_use_checksum);
+ } else if (dgout_routine) {
+ IFDEBUG(D_ERROR_EMIT)
+ printf("tp_error_emit sending DG: Laddr\n");
+ dump_addr((struct sockaddr *)laddr);
+ printf("Faddr\n");
+ dump_addr((struct sockaddr *)faddr);
+ ENDDEBUG
+ return (*dgout_routine)( &laddr->siso_addr, &faddr->siso_addr,
+ m, datalen, /* no route */
+ (caddr_t)0, /* nochecksum==false */0);
+ } else {
+ IFDEBUG(D_ERROR_EMIT)
+ printf("tp_error_emit DROPPING \n", m);
+ ENDDEBUG
+ IncStat(ts_send_drop);
+ m_freem(m);
+ return 0;
+ }
+}
diff --git a/sys/netiso/tp_events.h b/sys/netiso/tp_events.h
new file mode 100644
index 000000000000..48222830a0d2
--- /dev/null
+++ b/sys/netiso/tp_events.h
@@ -0,0 +1,84 @@
+/* $Header$ */
+/* $Source$ */
+struct tp_event {
+ int ev_number;
+ struct timeval e_time;
+#define TM_inact 0x0
+#define TM_retrans 0x1
+#define TM_sendack 0x2
+#define TM_notused 0x3
+
+ union{
+struct { SeqNum e_low; SeqNum e_high; int e_retrans; } EV_TM_reference;
+
+#define TM_reference 0x4
+struct { SeqNum e_low; SeqNum e_high; int e_retrans; } EV_TM_data_retrans;
+
+#define TM_data_retrans 0x5
+struct {
+ u_char e_reason;
+ } EV_ER_TPDU;
+
+#define ER_TPDU 0x6
+struct { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_int e_cdt;
+ } EV_CR_TPDU;
+
+#define CR_TPDU 0x7
+struct { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_short e_sref;
+ u_char e_reason;
+ } EV_DR_TPDU;
+
+#define DR_TPDU 0x8
+#define DC_TPDU 0x9
+struct { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_short e_sref;
+ u_int e_cdt;
+ } EV_CC_TPDU;
+
+#define CC_TPDU 0xa
+struct { u_int e_cdt;
+ SeqNum e_seq;
+ SeqNum e_subseq;
+ u_char e_fcc_present;
+ } EV_AK_TPDU;
+
+#define AK_TPDU 0xb
+struct { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ u_int e_eot;
+ SeqNum e_seq;
+ } EV_DT_TPDU;
+
+#define DT_TPDU 0xc
+struct { struct mbuf *e_data; /* first field */
+ int e_datalen; /* 2nd field */
+ SeqNum e_seq;
+ } EV_XPD_TPDU;
+
+#define XPD_TPDU 0xd
+struct { SeqNum e_seq; } EV_XAK_TPDU;
+
+#define XAK_TPDU 0xe
+#define T_CONN_req 0xf
+struct { u_char e_reason; } EV_T_DISC_req;
+
+#define T_DISC_req 0x10
+#define T_LISTEN_req 0x11
+#define T_DATA_req 0x12
+#define T_XPD_req 0x13
+#define T_USR_rcvd 0x14
+#define T_USR_Xrcvd 0x15
+#define T_DETACH 0x16
+#define T_NETRESET 0x17
+#define T_ACPT_req 0x18
+ }ev_union;
+};/* end struct event */
+
+#define tp_NEVENTS 0x19
+
+#define ATTR(X)ev_union.EV_/**/X/**/
diff --git a/sys/netiso/tp_inet.c b/sys/netiso/tp_inet.c
new file mode 100644
index 000000000000..fb013718ba24
--- /dev/null
+++ b/sys/netiso/tp_inet.c
@@ -0,0 +1,688 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_inet.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ * $Header: tp_inet.c,v 5.3 88/11/18 17:27:29 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_inet.c,v $
+ *
+ * Here is where you find the inet-dependent code. We've tried
+ * keep all net-level and (primarily) address-family-dependent stuff
+ * out of the tp source, and everthing here is reached indirectly
+ * through a switch table (struct nl_protosw *) tpcb->tp_nlproto
+ * (see tp_pcb.c).
+ * The routines here are:
+ * in_getsufx: gets transport suffix out of an inpcb structure.
+ * in_putsufx: put transport suffix into an inpcb structure.
+ * in_putnetaddr: put a whole net addr into an inpcb.
+ * in_getnetaddr: get a whole net addr from an inpcb.
+ * in_cmpnetaddr: compare a whole net addr from an isopcb.
+ * in_recycle_suffix: clear suffix for reuse in inpcb
+ * tpip_mtu: figure out what size tpdu to use
+ * tpip_input: take a pkt from ip, strip off its ip header, give to tp
+ * tpip_output_dg: package a pkt for ip given 2 addresses & some data
+ * tpip_output: package a pkt for ip given an inpcb & some data
+ */
+
+#ifdef INET
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netinet/in_var.h>
+
+#ifndef ISO
+#include <netiso/iso_chksum.c>
+#endif
+
+/*
+ * NAME: in_getsufx()
+
+ * CALLED FROM: pr_usrreq() on PRU_BIND,
+ * PRU_CONNECT, PRU_ACCEPT, and PRU_PEERADDR
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ * Get a transport suffix from an inpcb structure (inp).
+ * The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ *
+ * RETURNS: internet port / transport suffix
+ * (CAST TO AN INT)
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+in_getsufx(inp, lenp, data_out, which)
+ struct inpcb *inp;
+ u_short *lenp;
+ caddr_t data_out;
+ int which;
+{
+ *lenp = sizeof(u_short);
+ switch (which) {
+ case TP_LOCAL:
+ *(u_short *)data_out = inp->inp_lport;
+ return;
+
+ case TP_FOREIGN:
+ *(u_short *)data_out = inp->inp_fport;
+ }
+
+}
+
+/*
+ * NAME: in_putsufx()
+ *
+ * CALLED FROM: tp_newsocket(); i.e., when a connection
+ * is being established by an incoming CR_TPDU.
+ *
+ * FUNCTION, ARGUMENTS:
+ * Put a transport suffix (found in name) into an inpcb structure (inp).
+ * The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+/*ARGSUSED*/
+void
+in_putsufx(inp, sufxloc, sufxlen, which)
+ struct inpcb *inp;
+ caddr_t sufxloc;
+ int which;
+{
+ if (which == TP_FOREIGN) {
+ bcopy(sufxloc, (caddr_t)&inp->inp_fport, sizeof(inp->inp_fport));
+ }
+}
+
+/*
+ * NAME: in_recycle_tsuffix()
+ *
+ * CALLED FROM: tp.trans whenever we go into REFWAIT state.
+ *
+ * FUNCTION and ARGUMENT:
+ * Called when a ref is frozen, to allow the suffix to be reused.
+ * (inp) is the net level pcb.
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: This really shouldn't have to be done in a NET level pcb
+ * but... for the internet world that just the way it is done in BSD...
+ * The alternative is to have the port unusable until the reference
+ * timer goes off.
+ */
+void
+in_recycle_tsuffix(inp)
+ struct inpcb *inp;
+{
+ inp->inp_fport = inp->inp_lport = 0;
+}
+
+/*
+ * NAME: in_putnetaddr()
+ *
+ * CALLED FROM:
+ * tp_newsocket(); i.e., when a connection is being established by an
+ * incoming CR_TPDU.
+ *
+ * FUNCTION and ARGUMENTS:
+ * Copy a whole net addr from a struct sockaddr (name).
+ * into an inpcb (inp).
+ * The argument (which) takes values TP_LOCAL or TP_FOREIGN
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+in_putnetaddr(inp, name, which)
+ register struct inpcb *inp;
+ struct sockaddr_in *name;
+ int which;
+{
+ switch (which) {
+ case TP_LOCAL:
+ bcopy((caddr_t)&name->sin_addr,
+ (caddr_t)&inp->inp_laddr, sizeof(struct in_addr));
+ /* won't work if the dst address (name) is INADDR_ANY */
+
+ break;
+ case TP_FOREIGN:
+ if( name != (struct sockaddr_in *)0 ) {
+ bcopy((caddr_t)&name->sin_addr,
+ (caddr_t)&inp->inp_faddr, sizeof(struct in_addr));
+ }
+ }
+}
+
+/*
+ * NAME: in_putnetaddr()
+ *
+ * CALLED FROM:
+ * tp_input() when a connection is being established by an
+ * incoming CR_TPDU, and considered for interception.
+ *
+ * FUNCTION and ARGUMENTS:
+ * Compare a whole net addr from a struct sockaddr (name),
+ * with that implicitly stored in an inpcb (inp).
+ * The argument (which) takes values TP_LOCAL or TP_FOREIGN
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+in_cmpnetaddr(inp, name, which)
+ register struct inpcb *inp;
+ register struct sockaddr_in *name;
+ int which;
+{
+ if (which == TP_LOCAL) {
+ if (name->sin_port && name->sin_port != inp->inp_lport)
+ return 0;
+ return (name->sin_addr.s_addr == inp->inp_laddr.s_addr);
+ }
+ if (name->sin_port && name->sin_port != inp->inp_fport)
+ return 0;
+ return (name->sin_addr.s_addr == inp->inp_faddr.s_addr);
+}
+
+/*
+ * NAME: in_getnetaddr()
+ *
+ * CALLED FROM:
+ * pr_usrreq() PRU_SOCKADDR, PRU_ACCEPT, PRU_PEERADDR
+ * FUNCTION and ARGUMENTS:
+ * Copy a whole net addr from an inpcb (inp) into
+ * an mbuf (name);
+ * The argument (which) takes values TP_LOCAL or TP_FOREIGN.
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+
+void
+in_getnetaddr( inp, name, which)
+ register struct mbuf *name;
+ struct inpcb *inp;
+ int which;
+{
+ register struct sockaddr_in *sin = mtod(name, struct sockaddr_in *);
+ bzero((caddr_t)sin, sizeof(*sin));
+ switch (which) {
+ case TP_LOCAL:
+ sin->sin_addr = inp->inp_laddr;
+ sin->sin_port = inp->inp_lport;
+ break;
+ case TP_FOREIGN:
+ sin->sin_addr = inp->inp_faddr;
+ sin->sin_port = inp->inp_fport;
+ break;
+ default:
+ return;
+ }
+ name->m_len = sin->sin_len = sizeof (*sin);
+ sin->sin_family = AF_INET;
+}
+
+/*
+ * NAME: tpip_mtu()
+ *
+ * CALLED FROM:
+ * tp_route_to() on incoming CR, CC, and pr_usrreq() for PRU_CONNECT
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ *
+ * Perform subnetwork dependent part of determining MTU information.
+ * It appears that setting a double pointer to the rtentry associated with
+ * the destination, and returning the header size for the network protocol
+ * suffices.
+ *
+ * SIDE EFFECTS:
+ * Sets tp_routep pointer in pcb.
+ *
+ * NOTES:
+ */
+
+tpip_mtu(tpcb)
+register struct tp_pcb *tpcb;
+{
+ struct inpcb *inp = (struct inpcb *)tpcb->tp_npcb;
+
+ IFDEBUG(D_CONN)
+ printf("tpip_mtu(tpcb)\n", tpcb);
+ printf("tpip_mtu routing to addr 0x%x\n", inp->inp_faddr.s_addr);
+ ENDDEBUG
+ tpcb->tp_routep = &(inp->inp_route.ro_rt);
+ return (sizeof (struct ip));
+
+}
+
+/*
+ * NAME: tpip_output()
+ *
+ * CALLED FROM: tp_emit()
+ *
+ * FUNCTION and ARGUMENTS:
+ * Take a packet(m0) from tp and package it so that ip will accept it.
+ * This means prepending space for the ip header and filling in a few
+ * of the fields.
+ * inp is the inpcb structure; datalen is the length of the data in the
+ * mbuf string m0.
+ * RETURNS:
+ * whatever (E*) is returned form the net layer output routine.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+
+int
+tpip_output(inp, m0, datalen, nochksum)
+ struct inpcb *inp;
+ struct mbuf *m0;
+ int datalen;
+ int nochksum;
+{
+ return tpip_output_dg( &inp->inp_laddr, &inp->inp_faddr, m0, datalen,
+ &inp->inp_route, nochksum);
+}
+
+/*
+ * NAME: tpip_output_dg()
+ *
+ * CALLED FROM: tp_error_emit()
+ *
+ * FUNCTION and ARGUMENTS:
+ * This is a copy of tpip_output that takes the addresses
+ * instead of a pcb. It's used by the tp_error_emit, when we
+ * don't have an in_pcb with which to call the normal output rtn.
+ *
+ * RETURNS: ENOBUFS or whatever (E*) is
+ * returned form the net layer output routine.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+
+/*ARGSUSED*/
+int
+tpip_output_dg(laddr, faddr, m0, datalen, ro, nochksum)
+ struct in_addr *laddr, *faddr;
+ struct mbuf *m0;
+ int datalen;
+ struct route *ro;
+ int nochksum;
+{
+ register struct mbuf *m;
+ register struct ip *ip;
+ int error;
+
+ IFDEBUG(D_EMIT)
+ printf("tpip_output_dg datalen 0x%x m0 0x%x\n", datalen, m0);
+ ENDDEBUG
+
+
+ MGETHDR(m, M_DONTWAIT, TPMT_IPHDR);
+ if (m == 0) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ m->m_next = m0;
+ MH_ALIGN(m, sizeof(struct ip));
+ m->m_len = sizeof(struct ip);
+
+ ip = mtod(m, struct ip *);
+ bzero((caddr_t)ip, sizeof *ip);
+
+ ip->ip_p = IPPROTO_TP;
+ m->m_pkthdr.len = ip->ip_len = sizeof(struct ip) + datalen;
+ ip->ip_ttl = MAXTTL;
+ /* don't know why you need to set ttl;
+ * overlay doesn't even make this available
+ */
+
+ ip->ip_src = *laddr;
+ ip->ip_dst = *faddr;
+
+ IncStat(ts_tpdu_sent);
+ IFDEBUG(D_EMIT)
+ dump_mbuf(m, "tpip_output_dg before ip_output\n");
+ ENDDEBUG
+
+ error = ip_output(m, (struct mbuf *)0, ro, IP_ALLOWBROADCAST, NULL);
+
+ IFDEBUG(D_EMIT)
+ printf("tpip_output_dg after ip_output\n");
+ ENDDEBUG
+
+ return error;
+
+bad:
+ m_freem(m);
+ IncStat(ts_send_drop);
+ return error;
+}
+
+/*
+ * NAME: tpip_input()
+ *
+ * CALLED FROM:
+ * ip's input routine, indirectly through the protosw.
+ *
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from ip, strip off the ip header and give it to tp
+ *
+ * RETURNS: No return value.
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+ProtoHook
+tpip_input(m, iplen)
+ struct mbuf *m;
+ int iplen;
+{
+ struct sockaddr_in src, dst;
+ register struct ip *ip;
+ int s = splnet(), hdrlen;
+
+ IncStat(ts_pkt_rcvd);
+
+ /*
+ * IP layer has already pulled up the IP header,
+ * but the first byte after the IP header may not be there,
+ * e.g. if you came in via loopback, so you have to do an
+ * m_pullup to before you can even look to see how much you
+ * really need. The good news is that m_pullup will round
+ * up to almost the next mbuf's worth.
+ */
+
+
+ if((m = m_pullup(m, iplen + 1)) == MNULL)
+ goto discard;
+ CHANGE_MTYPE(m, TPMT_DATA);
+
+ /*
+ * Now pull up the whole tp header:
+ * Unfortunately, there may be IP options to skip past so we
+ * just fetch it as an unsigned char.
+ */
+ hdrlen = iplen + 1 + mtod(m, u_char *)[iplen];
+
+ if( m->m_len < hdrlen ) {
+ if((m = m_pullup(m, hdrlen)) == MNULL){
+ IFDEBUG(D_TPINPUT)
+ printf("tp_input, pullup 2!\n");
+ ENDDEBUG
+ goto discard;
+ }
+ }
+ /*
+ * cannot use tp_inputprep() here 'cause you don't
+ * have quite the same situation
+ */
+
+ IFDEBUG(D_TPINPUT)
+ dump_mbuf(m, "after tpip_input both pullups");
+ ENDDEBUG
+ /*
+ * m_pullup may have returned a different mbuf
+ */
+ ip = mtod(m, struct ip *);
+
+ /*
+ * drop the ip header from the front of the mbuf
+ * this is necessary for the tp checksum
+ */
+ m->m_len -= iplen;
+ m->m_data += iplen;
+
+ src.sin_addr = *(struct in_addr *)&(ip->ip_src);
+ src.sin_family = AF_INET;
+ src.sin_len = sizeof(src);
+ dst.sin_addr = *(struct in_addr *)&(ip->ip_dst);
+ dst.sin_family = AF_INET;
+ dst.sin_len = sizeof(dst);
+
+ (void) tp_input(m, (struct sockaddr *)&src, (struct sockaddr *)&dst,
+ 0, tpip_output_dg, 0);
+ return 0;
+
+discard:
+ IFDEBUG(D_TPINPUT)
+ printf("tpip_input DISCARD\n");
+ ENDDEBUG
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "tpip_input DISCARD m", m,0,0,0);
+ ENDTRACE
+ m_freem(m);
+ IncStat(ts_recv_drop);
+ splx(s);
+ return 0;
+}
+
+
+#include <sys/protosw.h>
+#include <netinet/ip_icmp.h>
+
+extern void tp_quench();
+/*
+ * NAME: tpin_quench()
+ *
+ * CALLED FROM: tpip_ctlinput()
+ *
+ * FUNCTION and ARGUMENTS: find the tpcb pointer and pass it to tp_quench
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+
+void
+tpin_quench(inp)
+ struct inpcb *inp;
+{
+ tp_quench((struct tp_pcb *)inp->inp_socket->so_pcb, PRC_QUENCH);
+}
+
+/*
+ * NAME: tpip_ctlinput()
+ *
+ * CALLED FROM:
+ * The network layer through the protosw table.
+ *
+ * FUNCTION and ARGUMENTS:
+ * When clnp gets an ICMP msg this gets called.
+ * It either returns an error status to the user or
+ * causes all connections on this address to be aborted
+ * by calling the appropriate xx_notify() routine.
+ * (cmd) is the type of ICMP error.
+ * (sa) the address of the sender
+ *
+ * RETURNS: Nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+ProtoHook
+tpip_ctlinput(cmd, sin)
+ int cmd;
+ struct sockaddr_in *sin;
+{
+ extern u_char inetctlerrmap[];
+ extern struct in_addr zeroin_addr;
+ void tp_quench __P((struct inpcb *,int));
+ void tpin_abort __P((struct inpcb *,int));
+
+ if (sin->sin_family != AF_INET && sin->sin_family != AF_IMPLINK)
+ return 0;
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ return 0;
+ if (cmd < 0 || cmd > PRC_NCMDS)
+ return 0;
+ switch (cmd) {
+
+ case PRC_QUENCH:
+ in_pcbnotify(&tp_inpcb, (struct sockaddr *)sin, 0,
+ zeroin_addr, 0, cmd, tp_quench);
+ break;
+
+ case PRC_ROUTEDEAD:
+ case PRC_HOSTUNREACH:
+ case PRC_UNREACH_NET:
+ case PRC_IFDOWN:
+ case PRC_HOSTDEAD:
+ in_pcbnotify(&tp_inpcb, (struct sockaddr *)sin, 0,
+ zeroin_addr, 0, cmd, in_rtchange);
+ break;
+
+ default:
+ /*
+ case PRC_MSGSIZE:
+ case PRC_UNREACH_HOST:
+ case PRC_UNREACH_PROTOCOL:
+ case PRC_UNREACH_PORT:
+ case PRC_UNREACH_NEEDFRAG:
+ case PRC_UNREACH_SRCFAIL:
+ case PRC_REDIRECT_NET:
+ case PRC_REDIRECT_HOST:
+ case PRC_REDIRECT_TOSNET:
+ case PRC_REDIRECT_TOSHOST:
+ case PRC_TIMXCEED_INTRANS:
+ case PRC_TIMXCEED_REASS:
+ case PRC_PARAMPROB:
+ */
+ in_pcbnotify(&tp_inpcb, (struct sockaddr *)sin, 0,
+ zeroin_addr, 0, cmd, tpin_abort);
+ }
+ return 0;
+}
+
+/*
+ * NAME: tpin_abort()
+ *
+ * CALLED FROM:
+ * xxx_notify() from tp_ctlinput() when
+ * net level gets some ICMP-equiv. type event.
+ *
+ * FUNCTION and ARGUMENTS:
+ * Cause the connection to be aborted with some sort of error
+ * reason indicating that the network layer caused the abort.
+ * Fakes an ER TPDU so we can go through the driver.
+ *
+ * RETURNS: Nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+
+ProtoHook
+tpin_abort(inp)
+ struct inpcb *inp;
+{
+ struct tp_event e;
+
+ e.ev_number = ER_TPDU;
+ e.ATTR(ER_TPDU).e_reason = ENETRESET;
+ (void) tp_driver((struct tp_pcb *)inp->inp_ppcb, &e);
+ return 0;
+}
+
+#ifdef ARGO_DEBUG
+dump_inaddr(addr)
+ register struct sockaddr_in *addr;
+{
+ printf("INET: port 0x%x; addr 0x%x\n", addr->sin_port, addr->sin_addr);
+}
+#endif /* ARGO_DEBUG */
+#endif /* INET */
diff --git a/sys/netiso/tp_input.c b/sys/netiso/tp_input.c
new file mode 100644
index 000000000000..a071a5d4addf
--- /dev/null
+++ b/sys/netiso/tp_input.c
@@ -0,0 +1,1624 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_input.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_input.c,v 5.6 88/11/18 17:27:38 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_input.c,v $
+ *
+ * tp_input() gets an mbuf chain from ip. Actually, not directly
+ * from ip, because ip calls a net-level routine that strips off
+ * the net header and then calls tp_input(), passing the proper type
+ * of addresses for the address family in use (how it figures out
+ * which AF is not yet determined.)
+ *
+ * Decomposing the tpdu is some of the most laughable code. The variable-length
+ * parameters and the problem of non-aligned memory references
+ * necessitates such abominations as the macros WHILE_OPTIONS (q.v. below)
+ * to loop through the header and decompose it.
+ *
+ * The routine tp_newsocket() is called when a CR comes in for a listening
+ * socket. tp_input calls sonewconn() and tp_newsocket() to set up the
+ * "child" socket. Most tpcb values are copied from the parent tpcb into
+ * the child.
+ *
+ * Also in here is tp_headersize() (grot) which tells the expected size
+ * of a tp header, to be used by other layers. It's in here because it
+ * uses the static structure tpdu_info.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_tpdu.h>
+
+#include <net/if.h>
+#ifdef TRUE
+#undef FALSE
+#undef TRUE
+#endif
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+int iso_check_csum(), tp_driver(), tp_headersize(), tp_error_emit();
+
+/*
+ #ifdef lint
+ #undef ATTR
+ #define ATTR(X)ev_number
+ #endif lint
+*/
+
+struct mbuf *
+tp_inputprep(m)
+ register struct mbuf *m;
+{
+ int hdrlen;
+
+ IFDEBUG(D_TPINPUT)
+ printf("tp_inputprep: m 0x%x\n", m) ;
+ ENDDEBUG
+
+ while( m->m_len < 1 ) {
+ /* The "m_free" logic
+ * if( (m = m_free(m)) == MNULL )
+ * return (struct mbuf *)0;
+ * would cause a system crash if ever executed.
+ * This logic will be executed if the first mbuf
+ * in the chain only contains a CLNP header. The m_free routine
+ * will release the mbuf containing the CLNP header from the
+ * chain and the new head of the chain will not have the
+ * M_PKTHDR bit set. This routine, tp_inputprep, will
+ * eventually call the "sbappendaddr" routine. "sbappendaddr"
+ * calls "panic" if M_PKTHDR is not set. m_pullup is a cheap
+ * way of keeping the head of the chain from being freed.
+ */
+ if((m = m_pullup(m, 1)) == MNULL)
+ return (MNULL);
+ }
+ if(((int)m->m_data) & 0x3) {
+ /* If we are not 4-byte aligned, we have to be
+ * above the beginning of the mbuf, and it is ok just
+ * to slide it back.
+ */
+ caddr_t ocp = m->m_data;
+
+ m->m_data = (caddr_t)(((int)m->m_data) & ~0x3);
+ bcopy(ocp, m->m_data, (unsigned)m->m_len);
+ }
+ CHANGE_MTYPE(m, TPMT_DATA);
+
+ /* we KNOW that there is at least 1 byte in this mbuf
+ and that it is hdr->tpdu_li XXXXXXX! */
+
+ hdrlen = 1 + *mtod( m, u_char *);
+
+ /*
+ * now pull up the whole tp header
+ */
+ if ( m->m_len < hdrlen) {
+ if ((m = m_pullup(m, hdrlen)) == MNULL ) {
+ IncStat(ts_recv_drop);
+ return (struct mbuf *)0;
+ }
+ }
+ IFDEBUG(D_INPUT)
+ printf(
+ " at end: m 0x%x hdr->tpdu_li 0x%x m_len 0x%x\n",m,
+ hdrlen, m->m_len);
+ ENDDEBUG
+ return m;
+}
+
+/* begin groan
+ * -- this array and the following macros allow you to step through the
+ * parameters of the variable part of a header
+ * note that if for any reason the values of the **_TPDU macros (in tp_events.h)
+ * should change, this array has to be rearranged
+ */
+
+#define TP_LEN_CLASS_0_INDEX 2
+#define TP_MAX_DATA_INDEX 3
+
+static u_char tpdu_info[][4] =
+{
+/* length max data len */
+/* reg fmt xtd fmt class 0 */
+ /* UNUSED 0x0 */ 0x0 , 0x0, 0x0, 0x0,
+ /* XPD_TPDU_type 0x1 */ 0x5, 0x8, 0x0, TP_MAX_XPD_DATA,
+ /* XAK_TPDU_type 0x2 */ 0x5 , 0x8, 0x0, 0x0,
+ /* GR_TPDU_type 0x3 */ 0x0 , 0x0, 0x0, 0x0,
+ /* UNUSED 0x4 */ 0x0 , 0x0, 0x0, 0x0,
+ /* UNUSED 0x5 */ 0x0 , 0x0, 0x0, 0x0,
+ /* AK_TPDU_type 0x6 */ 0x5, 0xa, 0x0, 0x0,
+ /* ER_TPDU_type 0x7 */ 0x5, 0x5, 0x0, 0x0,
+ /* DR_TPDU_type 0x8 */ 0x7, 0x7, 0x7, TP_MAX_DR_DATA,
+ /* UNUSED 0x9 */ 0x0 , 0x0, 0x0, 0x0,
+ /* UNUSED 0xa */ 0x0 , 0x0, 0x0, 0x0,
+ /* UNUSED 0xb */ 0x0 , 0x0, 0x0, 0x0,
+ /* DC_TPDU_type 0xc */ 0x6, 0x6, 0x0, 0x0,
+ /* CC_TPDU_type 0xd */ 0x7, 0x7, 0x7, TP_MAX_CC_DATA,
+ /* CR_TPDU_type 0xe */ 0x7, 0x7, 0x7, TP_MAX_CR_DATA,
+ /* DT_TPDU_type 0xf */ 0x5, 0x8, 0x3, 0x0,
+};
+
+#define CHECK(Phrase, Erval, Stat, Whattodo, Loc)\
+ if (Phrase) {error = (Erval); errlen = (int)(Loc); IncStat(Stat);\
+ goto Whattodo; }
+
+/*
+ * WHENEVER YOU USE THE FOLLOWING MACRO,
+ * BE SURE THE TPDUTYPE IS A LEGIT VALUE FIRST!
+ */
+
+#define WHILE_OPTIONS(P, hdr, format)\
+{ register caddr_t P = tpdu_info[(hdr)->tpdu_type][(format)] + (caddr_t)hdr;\
+ caddr_t PLIM = 1 + hdr->tpdu_li + (caddr_t)hdr;\
+ for (;; P += 2 + ((struct tp_vbp *)P)->tpv_len) {\
+ CHECK((P > PLIM), E_TP_LENGTH_INVAL, ts_inv_length,\
+ respond, P - (caddr_t)hdr);\
+ if (P == PLIM) break;
+
+#define END_WHILE_OPTIONS(P) } }
+
+/* end groan */
+
+/*
+ * NAME: tp_newsocket()
+ *
+ * CALLED FROM:
+ * tp_input() on incoming CR, when a socket w/ the called suffix
+ * is awaiting a connection request
+ *
+ * FUNCTION and ARGUMENTS:
+ * Create a new socket structure, attach to it a new transport pcb,
+ * using a copy of the net level pcb for the parent socket.
+ * (so) is the parent socket.
+ * (fname) is the foreign address (all that's used is the nsap portion)
+ *
+ * RETURN VALUE:
+ * a new socket structure, being this end of the newly formed connection.
+ *
+ * SIDE EFFECTS:
+ * Sets a few things in the tpcb and net level pcb
+ *
+ * NOTES:
+ */
+static struct socket *
+tp_newsocket(so, fname, cons_channel, class_to_use, netservice)
+ struct socket *so;
+ struct sockaddr *fname;
+ caddr_t cons_channel;
+ u_char class_to_use;
+ u_int netservice;
+{
+ register struct tp_pcb *tpcb = sototpcb(so); /* old tpcb, needed below */
+ register struct tp_pcb *newtpcb;
+
+ /*
+ * sonewconn() gets a new socket structure,
+ * a new lower layer pcb and a new tpcb,
+ * but the pcbs are unnamed (not bound)
+ */
+ IFTRACE(D_NEWSOCK)
+ tptraceTPCB(TPPTmisc, "newsock: listg_so, _tpcb, so_head",
+ so, tpcb, so->so_head, 0);
+ ENDTRACE
+
+ if ((so = sonewconn(so, SS_ISCONFIRMING)) == (struct socket *)0)
+ return so;
+ IFTRACE(D_NEWSOCK)
+ tptraceTPCB(TPPTmisc, "newsock: after newconn so, so_head",
+ so, so->so_head, 0, 0);
+ ENDTRACE
+
+ IFDEBUG(D_NEWSOCK)
+ printf("tp_newsocket(channel 0x%x) after sonewconn so 0x%x \n",
+ cons_channel, so);
+ dump_addr(fname);
+ {
+ struct socket *t, *head ;
+
+ head = so->so_head;
+ t = so;
+ printf("so 0x%x so_head 0x%x so_q0 0x%x, q0len %d\n",
+ t, t->so_head, t->so_q0, t->so_q0len);
+ while( (t=t->so_q0) && t!= so && t!= head)
+ printf("so 0x%x so_head 0x%x so_q0 0x%x, q0len %d\n",
+ t, t->so_head, t->so_q0, t->so_q0len);
+ }
+ ENDDEBUG
+
+ /*
+ * before we clobber the old tpcb ptr, get these items from the parent pcb
+ */
+ newtpcb = sototpcb(so);
+ newtpcb->_tp_param = tpcb->_tp_param;
+ newtpcb->tp_flags = tpcb->tp_flags;
+ newtpcb->tp_lcredit = tpcb->tp_lcredit;
+ newtpcb->tp_l_tpdusize = tpcb->tp_l_tpdusize;
+ newtpcb->tp_lsuffixlen = tpcb->tp_lsuffixlen;
+ bcopy( tpcb->tp_lsuffix, newtpcb->tp_lsuffix, newtpcb->tp_lsuffixlen);
+
+ if( /* old */ tpcb->tp_ucddata) {
+ /*
+ * These data are the connect- , confirm- or disconnect- data.
+ */
+ struct mbuf *conndata;
+
+ conndata = m_copy(tpcb->tp_ucddata, 0, (int)M_COPYALL);
+ IFDEBUG(D_CONN)
+ dump_mbuf(conndata, "conndata after mcopy");
+ ENDDEBUG
+ newtpcb->tp_ucddata = conndata;
+ }
+
+ tpcb = newtpcb;
+ tpcb->tp_state = TP_LISTENING;
+ tpcb->tp_class = class_to_use;
+ tpcb->tp_netservice = netservice;
+
+
+ ASSERT( fname != 0 ) ; /* just checking */
+ if ( fname ) {
+ /*
+ * tp_route_to takes its address argument in the form of an mbuf.
+ */
+ struct mbuf *m;
+ int err;
+
+ MGET(m, M_DONTWAIT, MT_SONAME); /* mbuf type used is confusing */
+ if (m) {
+ /*
+ * this seems a bit grotesque, but tp_route_to expects
+ * an mbuf * instead of simply a sockaddr; it calls the ll
+ * pcb_connect, which expects the name/addr in an mbuf as well.
+ * sigh.
+ */
+ bcopy((caddr_t)fname, mtod(m, caddr_t), fname->sa_len);
+ m->m_len = fname->sa_len;
+
+ /* grot : have to say the kernel can override params in
+ * the passive open case
+ */
+ tpcb->tp_dont_change_params = 0;
+ err = tp_route_to( m, tpcb, cons_channel);
+ m_free(m);
+
+ if (!err)
+ goto ok;
+ }
+ IFDEBUG(D_CONN)
+ printf("tp_route_to FAILED! detaching tpcb 0x%x, so 0x%x\n",
+ tpcb, so);
+ ENDDEBUG
+ (void) tp_detach(tpcb);
+ return 0;
+ }
+ok:
+ IFDEBUG(D_TPINPUT)
+ printf("tp_newsocket returning so 0x%x, sototpcb(so) 0x%x\n",
+ so, sototpcb(so));
+ ENDDEBUG
+ return so;
+}
+
+#ifndef TPCONS
+tpcons_output()
+{
+ return(0);
+}
+#endif /* !CONS */
+
+/*
+ * NAME: tp_input()
+ *
+ * CALLED FROM:
+ * net layer input routine
+ *
+ * FUNCTION and ARGUMENTS:
+ * Process an incoming TPDU (m), finding the associated tpcb if there
+ * is one. Create the appropriate type of event and call the driver.
+ * (faddr) and (laddr) are the foreign and local addresses.
+ *
+ * When tp_input() is called we KNOW that the ENTIRE TP HEADER
+ * has been m_pullup-ed.
+ *
+ * RETURN VALUE: Nada
+ *
+ * SIDE EFFECTS:
+ * When using COSNS it may affect the state of the net-level pcb
+ *
+ * NOTE:
+ * The initial value of acktime is 2 so that we will never
+ * have a 0 value for tp_peer_acktime. It gets used in the
+ * computation of the retransmission timer value, and so it
+ * mustn't be zero.
+ * 2 seems like a reasonable minimum.
+ */
+ProtoHook
+tp_input(m, faddr, laddr, cons_channel, dgout_routine, ce_bit)
+ register struct mbuf *m;
+ struct sockaddr *faddr, *laddr; /* NSAP addresses */
+ caddr_t cons_channel;
+ int (*dgout_routine)();
+ int ce_bit;
+
+{
+ register struct tp_pcb *tpcb;
+ register struct tpdu *hdr;
+ struct socket *so;
+ struct tp_event e;
+ int error;
+ unsigned dutype;
+ u_short dref, sref, acktime, subseq;
+ u_char preferred_class, class_to_use, pdusize;
+ u_char opt, dusize, addlopt, version;
+#ifdef TP_PERF_MEAS
+ u_char perf_meas;
+#endif /* TP_PERF_MEAS */
+ u_char fsufxlen, lsufxlen;
+ caddr_t fsufxloc, lsufxloc;
+ int tpdu_len;
+ u_int takes_data;
+ u_int fcc_present;
+ int errlen;
+ struct tp_conn_param tpp;
+ int tpcons_output();
+
+again:
+ hdr = mtod(m, struct tpdu *);
+ tpcb = 0;
+ error = errlen = tpdu_len = 0;
+ takes_data = fcc_present = FALSE;
+ acktime = 2; sref = subseq = 0;
+ fsufxloc = lsufxloc = NULL;
+ fsufxlen = lsufxlen =
+ preferred_class = class_to_use = pdusize = addlopt = 0;
+ dusize = TP_DFL_TPDUSIZE;
+#ifdef TP_PERF_MEAS
+ GET_CUR_TIME( &e.e_time ); perf_meas = 0;
+#endif /* TP_PERF_MEAS */
+
+ IFDEBUG(D_TPINPUT)
+ printf("tp_input(0x%x, ... 0x%x)\n", m, cons_channel);
+ ENDDEBUG
+
+
+ /*
+ * get the actual tpdu length - necessary for monitoring
+ * and for checksumming
+ *
+ * Also, maybe measure the mbuf chain lengths and sizes.
+ */
+
+ { register struct mbuf *n=m;
+# ifdef ARGO_DEBUG
+ int chain_length = 0;
+# endif ARGO_DEBUG
+
+ for(;;) {
+ tpdu_len += n->m_len;
+ IFDEBUG(D_MBUF_MEAS)
+ if( n->m_flags & M_EXT) {
+ IncStat(ts_mb_cluster);
+ } else {
+ IncStat(ts_mb_small);
+ }
+ chain_length ++;
+ ENDDEBUG
+ if (n->m_next == MNULL ) {
+ break;
+ }
+ n = n->m_next;
+ }
+ IFDEBUG(D_MBUF_MEAS)
+ if(chain_length > 16)
+ chain_length = 0; /* zero used for anything > 16 */
+ tp_stat.ts_mb_len_distr[chain_length] ++;
+ ENDDEBUG
+ }
+ IFTRACE(D_TPINPUT)
+ tptraceTPCB(TPPTtpduin, hdr->tpdu_type, hdr, hdr->tpdu_li+1, tpdu_len,
+ 0);
+ ENDTRACE
+
+ dref = ntohs((short)hdr->tpdu_dref);
+ sref = ntohs((short)hdr->tpdu_sref);
+ dutype = (int)hdr->tpdu_type;
+
+ IFDEBUG(D_TPINPUT)
+ printf("input: dutype 0x%x cons_channel 0x%x dref 0x%x\n", dutype,
+ cons_channel, dref);
+ printf("input: dref 0x%x sref 0x%x\n", dref, sref);
+ ENDDEBUG
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "channel dutype dref ",
+ cons_channel, dutype, dref, 0);
+ ENDTRACE
+
+
+#ifdef ARGO_DEBUG
+ if( (dutype < TP_MIN_TPDUTYPE) || (dutype > TP_MAX_TPDUTYPE)) {
+ printf("BAD dutype! 0x%x, channel 0x%x dref 0x%x\n",
+ dutype, cons_channel, dref);
+ dump_buf (m, sizeof( struct mbuf ));
+
+ IncStat(ts_inv_dutype);
+ goto discard;
+ }
+#endif /* ARGO_DEBUG */
+
+ CHECK( (dutype < TP_MIN_TPDUTYPE || dutype > TP_MAX_TPDUTYPE),
+ E_TP_INV_TPDU, ts_inv_dutype, respond,
+ 2 );
+ /* unfortunately we can't take the address of the tpdu_type field,
+ * since it's a bit field - so we just use the constant offset 2
+ */
+
+ /* Now this isn't very neat but since you locate a pcb one way
+ * at the beginning of connection establishment, and by
+ * the dref for each tpdu after that, we have to treat CRs differently
+ */
+ if ( dutype == CR_TPDU_type ) {
+ u_char alt_classes = 0;
+
+ preferred_class = 1 << hdr->tpdu_CRclass;
+ opt = hdr->tpdu_CRoptions;
+
+ WHILE_OPTIONS(P, hdr, 1 ) /* { */
+
+ switch( vbptr(P)->tpv_code ) {
+
+ case TPP_tpdu_size:
+ vb_getval(P, u_char, dusize);
+ IFDEBUG(D_TPINPUT)
+ printf("CR dusize 0x%x\n", dusize);
+ ENDDEBUG
+ /* COS tests: NBS IA (Dec. 1987) Sec. 4.5.2.1 */
+ if (dusize < TP_MIN_TPDUSIZE || dusize > TP_MAX_TPDUSIZE)
+ dusize = TP_DFL_TPDUSIZE;
+ break;
+ case TPP_ptpdu_size:
+ switch (vbptr(P)->tpv_len) {
+ case 1: pdusize = vbval(P, u_char); break;
+ case 2: pdusize = ntohs(vbval(P, u_short)); break;
+ default: ;
+ IFDEBUG(D_TPINPUT)
+ printf("malformed prefered TPDU option\n");
+ ENDDEBUG
+ }
+ break;
+ case TPP_addl_opt:
+ vb_getval(P, u_char, addlopt);
+ break;
+ case TPP_calling_sufx:
+ /* could use vb_getval, but we want to save the loc & len
+ * for later use
+ */
+ fsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+ fsufxlen = vbptr(P)->tpv_len;
+ IFDEBUG(D_TPINPUT)
+ printf("CR fsufx:");
+ { register int j;
+ for(j=0; j<fsufxlen; j++ ) {
+ printf(" 0x%x. ", *((caddr_t)(fsufxloc+j)) );
+ }
+ printf("\n");
+ }
+ ENDDEBUG
+ break;
+ case TPP_called_sufx:
+ /* could use vb_getval, but we want to save the loc & len
+ * for later use
+ */
+ lsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+ lsufxlen = vbptr(P)->tpv_len;
+ IFDEBUG(D_TPINPUT)
+ printf("CR lsufx:");
+ { register int j;
+ for(j=0; j<lsufxlen; j++ ) {
+ printf(" 0x%x. ", *((u_char *)(lsufxloc+j)) );
+ }
+ printf("\n");
+ }
+ ENDDEBUG
+ break;
+
+#ifdef TP_PERF_MEAS
+ case TPP_perf_meas:
+ vb_getval(P, u_char, perf_meas);
+ break;
+#endif /* TP_PERF_MEAS */
+
+ case TPP_vers:
+ /* not in class 0; 1 octet; in CR_TPDU only */
+ /* COS tests says if version wrong, use default version!?XXX */
+ CHECK( (vbval(P, u_char) != TP_VERSION ),
+ E_TP_INV_PVAL, ts_inv_pval, setversion,
+ (1 + (caddr_t)&vbptr(P)->tpv_val - (caddr_t)hdr) );
+ setversion:
+ version = vbval(P, u_char);
+ break;
+ case TPP_acktime:
+ vb_getval(P, u_short, acktime);
+ acktime = ntohs(acktime);
+ acktime = acktime/500; /* convert to slowtimo ticks */
+ if((short)acktime <=0 )
+ acktime = 2; /* don't allow a bad peer to screw us up */
+ IFDEBUG(D_TPINPUT)
+ printf("CR acktime 0x%x\n", acktime);
+ ENDDEBUG
+ break;
+
+ case TPP_alt_class:
+ {
+ u_char *aclass = 0;
+ register int i;
+ static u_char bad_alt_classes[5] =
+ { ~0, ~3, ~5, ~0xf, ~0x1f};
+
+ aclass =
+ (u_char *) &(((struct tp_vbp *)P)->tpv_val);
+ for (i = ((struct tp_vbp *)P)->tpv_len; i>0; i--) {
+ alt_classes |= (1<<((*aclass++)>>4));
+ }
+ CHECK( (bad_alt_classes[hdr->tpdu_CRclass] & alt_classes),
+ E_TP_INV_PVAL, ts_inv_aclass, respond,
+ ((caddr_t)aclass) - (caddr_t)hdr);
+ IFDEBUG(D_TPINPUT)
+ printf("alt_classes 0x%x\n", alt_classes);
+ ENDDEBUG
+ }
+ break;
+
+ case TPP_security:
+ case TPP_residER:
+ case TPP_priority:
+ case TPP_transdelay:
+ case TPP_throughput:
+ case TPP_addl_info:
+ case TPP_subseq:
+ default:
+ IFDEBUG(D_TPINPUT)
+ printf("param ignored CR_TPDU code= 0x%x\n",
+ vbptr(P)->tpv_code);
+ ENDDEBUG
+ IncStat(ts_param_ignored);
+ break;
+
+ case TPP_checksum:
+ IFDEBUG(D_TPINPUT)
+ printf("CR before cksum\n");
+ ENDDEBUG
+
+ CHECK( iso_check_csum(m, tpdu_len),
+ E_TP_INV_PVAL, ts_bad_csum, discard, 0)
+
+ IFDEBUG(D_TPINPUT)
+ printf("CR before cksum\n");
+ ENDDEBUG
+ break;
+ }
+
+ /* } */ END_WHILE_OPTIONS(P)
+
+ if (lsufxlen == 0) {
+ /* can't look for a tpcb w/o any called sufx */
+ error = E_TP_LENGTH_INVAL;
+ IncStat(ts_inv_sufx);
+ goto respond;
+ } else {
+ register struct tp_pcb *t;
+ /*
+ * The intention here is to trap all CR requests
+ * to a given nsap, for constructing transport
+ * service bridges at user level; so these
+ * intercepts should precede the normal listens.
+ * Phrasing the logic in this way also allows for
+ * mop-up listeners, which we don't currently implement.
+ * We also wish to have a single socket be able to
+ * listen over any network service provider,
+ * (cons or clns or ip).
+ */
+ for (t = tp_listeners; t ; t = t->tp_nextlisten)
+ if ((t->tp_lsuffixlen == 0 ||
+ (lsufxlen == t->tp_lsuffixlen &&
+ bcmp(lsufxloc, t->tp_lsuffix, lsufxlen) == 0)) &&
+ ((t->tp_flags & TPF_GENERAL_ADDR) ||
+ (laddr->sa_family == t->tp_domain &&
+ (*t->tp_nlproto->nlp_cmpnetaddr)
+ (t->tp_npcb, laddr, TP_LOCAL))))
+ break;
+
+ CHECK(t == 0, E_TP_NO_SESSION, ts_inv_sufx, respond,
+ (1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+ /* _tpduf is the fixed part; add 2 to get the dref bits of
+ * the fixed part (can't take the address of a bit field)
+ */
+ IFDEBUG(D_TPINPUT)
+ printf("checking if dup CR\n");
+ ENDDEBUG
+ tpcb = t;
+ for (t = tpcb->tp_next; t != tpcb; t = t->tp_next) {
+ if (sref != t->tp_fref)
+ continue;
+ if ((*tpcb->tp_nlproto->nlp_cmpnetaddr)(
+ t->tp_npcb, faddr, TP_FOREIGN)) {
+ IFDEBUG(D_TPINPUT)
+ printf("duplicate CR discarded\n");
+ ENDDEBUG
+ goto discard;
+ }
+ }
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "tp_input: tpcb *lsufxloc tpstate",
+ tpcb, *lsufxloc, tpcb->tp_state, 0);
+ ENDTRACE
+ }
+
+ /*
+ * WE HAVE A TPCB
+ * already know that the classes in the CR match at least
+ * one class implemented, but we don't know yet if they
+ * include any classes permitted by this server.
+ */
+
+ IFDEBUG(D_TPINPUT)
+ printf("HAVE A TPCB 1: 0x%x\n", tpcb);
+ ENDDEBUG
+ IFDEBUG(D_CONN)
+ printf(
+"CR: bef CHKS: flags 0x%x class_to_use 0x%x alt 0x%x opt 0x%x tp_class 0x%x\n",
+ tpcb->tp_flags, class_to_use, alt_classes, opt, tpcb->tp_class);
+ ENDDEBUG
+ /* tpcb->tp_class doesn't include any classes not implemented */
+ class_to_use = (preferred_class & tpcb->tp_class);
+ if( (class_to_use = preferred_class & tpcb->tp_class) == 0 )
+ class_to_use = alt_classes & tpcb->tp_class;
+
+ class_to_use = 1 << tp_mask_to_num(class_to_use);
+
+ {
+ tpp = tpcb->_tp_param;
+ tpp.p_class = class_to_use;
+ tpp.p_tpdusize = dusize;
+ tpp.p_ptpdusize = pdusize;
+ tpp.p_xtd_format = (opt & TPO_XTD_FMT) == TPO_XTD_FMT;
+ tpp.p_xpd_service = (addlopt & TPAO_USE_TXPD) == TPAO_USE_TXPD;
+ tpp.p_use_checksum = (tpp.p_class == TP_CLASS_0)?0:
+ (addlopt & TPAO_NO_CSUM) == 0;
+ tpp.p_version = version;
+#ifdef notdef
+ tpp.p_use_efc = (opt & TPO_USE_EFC) == TPO_USE_EFC;
+ tpp.p_use_nxpd = (addlopt & TPAO_USE_NXPD) == TPAO_USE_NXPD;
+ tpp.p_use_rcc = (addlopt & TPAO_USE_RCC) == TPAO_USE_RCC;
+#endif /* notdef */
+
+ CHECK(
+ tp_consistency(tpcb, 0 /* not force or strict */, &tpp) != 0,
+ E_TP_NEGOT_FAILED, ts_negotfailed, clear_parent_tcb,
+ (1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr)
+ /* ^ more or less the location of class */
+ )
+ }
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc,
+ "after 1 consist class_to_use class, out, tpconsout",
+ class_to_use,
+ tpcb->tp_class, dgout_routine, tpcons_output
+ );
+ ENDTRACE
+ CHECK(
+ ((class_to_use == TP_CLASS_0)&&(dgout_routine != tpcons_output)),
+ E_TP_NEGOT_FAILED, ts_negotfailed, clear_parent_tcb,
+ (1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr)
+ /* ^ more or less the location of class */
+ )
+ IFDEBUG(D_CONN)
+ printf("CR: after CRCCCHECKS: tpcb 0x%x, flags 0x%x\n",
+ tpcb, tpcb->tp_flags);
+ ENDDEBUG
+ takes_data = TRUE;
+ e.ATTR(CR_TPDU).e_cdt = hdr->tpdu_CRcdt;
+ e.ev_number = CR_TPDU;
+
+ so = tpcb->tp_sock;
+ if (so->so_options & SO_ACCEPTCONN) {
+ struct tp_pcb *parent_tpcb = tpcb;
+ /*
+ * Create a socket, tpcb, ll pcb, etc.
+ * for this newborn connection, and fill in all the values.
+ */
+ IFDEBUG(D_CONN)
+ printf("abt to call tp_newsocket(0x%x, 0x%x, 0x%x, 0x%x)\n",
+ so, laddr, faddr, cons_channel);
+ ENDDEBUG
+ if( (so =
+ tp_newsocket(so, faddr, cons_channel,
+ class_to_use,
+ ((tpcb->tp_netservice == IN_CLNS) ? IN_CLNS :
+ (dgout_routine == tpcons_output)?ISO_CONS:ISO_CLNS))
+ ) == (struct socket *)0 ) {
+ /* note - even if netservice is IN_CLNS, as far as
+ * the tp entity is concerned, the only differences
+ * are CO vs CL
+ */
+ IFDEBUG(D_CONN)
+ printf("tp_newsocket returns 0\n");
+ ENDDEBUG
+ goto discard;
+ clear_parent_tcb:
+ tpcb = 0;
+ goto respond;
+ }
+ tpcb = sototpcb(so);
+ insque(tpcb, parent_tpcb);
+
+ /*
+ * Stash the addresses in the net level pcb
+ * kind of like a pcbconnect() but don't need
+ * or want all those checks.
+ */
+ (tpcb->tp_nlproto->nlp_putnetaddr)(tpcb->tp_npcb, faddr, TP_FOREIGN);
+ (tpcb->tp_nlproto->nlp_putnetaddr)(tpcb->tp_npcb, laddr, TP_LOCAL);
+
+ /* stash the f suffix in the new tpcb */
+ if (tpcb->tp_fsuffixlen = fsufxlen) {
+ bcopy(fsufxloc, tpcb->tp_fsuffix, fsufxlen);
+ (tpcb->tp_nlproto->nlp_putsufx)
+ (tpcb->tp_npcb, fsufxloc, fsufxlen, TP_FOREIGN);
+ }
+ /* stash the l suffix in the new tpcb */
+ tpcb->tp_lsuffixlen = lsufxlen;
+ bcopy(lsufxloc, tpcb->tp_lsuffix, lsufxlen);
+ (tpcb->tp_nlproto->nlp_putsufx)
+ (tpcb->tp_npcb, lsufxloc, lsufxlen, TP_LOCAL);
+#ifdef TP_PERF_MEAS
+ if( tpcb->tp_perf_on = perf_meas ) { /* assignment */
+ /* ok, let's create an mbuf for stashing the
+ * statistics if one doesn't already exist
+ */
+ (void) tp_setup_perf(tpcb);
+ }
+#endif /* TP_PERF_MEAS */
+ tpcb->tp_fref = sref;
+
+ /* We've already checked for consistency with the options
+ * set in tpp, but we couldn't set them earlier because
+ * we didn't want to change options in the LISTENING tpcb.
+ * Now we set the options in the new socket's tpcb.
+ */
+ (void) tp_consistency( tpcb, TP_FORCE, &tpp);
+
+ if(!tpcb->tp_use_checksum)
+ IncStat(ts_csum_off);
+ if(tpcb->tp_xpd_service)
+ IncStat(ts_use_txpd);
+ if(tpcb->tp_xtd_format)
+ IncStat(ts_xtd_fmt);
+
+ tpcb->tp_peer_acktime = acktime;
+
+ /*
+ * The following kludge is used to test retransmissions and
+ * timeout during connection establishment.
+ */
+ IFDEBUG(D_ZDREF)
+ IncStat(ts_zdebug);
+ /*tpcb->tp_fref = 0;*/
+ ENDDEBUG
+ }
+ LOCAL_CREDIT(tpcb);
+ IncStat(ts_CR_rcvd);
+ if (!tpcb->tp_cebit_off) {
+ tpcb->tp_win_recv = tp_start_win << 8;
+ tpcb->tp_cong_sample.cs_size = 0;
+ CONG_INIT_SAMPLE(tpcb);
+ CONG_UPDATE_SAMPLE(tpcb, ce_bit);
+ }
+ } else if ( dutype == ER_TPDU_type ) {
+ /*
+ * ER TPDUs have to be recognized separately
+ * because they don't necessarily have a tpcb
+ * with them and we don't want err out looking for such
+ * a beast.
+ * We could put a bunch of little kludges in the
+ * next section of code so it would avoid references to tpcb
+ * if dutype == ER_TPDU_type but we don't want code for ERs to
+ * mess up code for data transfer.
+ */
+ IncStat(ts_ER_rcvd);
+ e.ev_number = ER_TPDU;
+ e.ATTR(ER_TPDU).e_reason = (u_char)hdr->tpdu_ERreason;
+ CHECK (((int)dref <= 0 || dref >= tp_refinfo.tpr_size ||
+ (tpcb = tp_ref[dref].tpr_pcb ) == (struct tp_pcb *) 0 ||
+ tpcb->tp_refstate == REF_FREE ||
+ tpcb->tp_refstate == REF_FROZEN),
+ E_TP_MISM_REFS, ts_inv_dref, discard, 0)
+
+ } else {
+ /* tpdu type is CC, XPD, XAK, GR, AK, DR, DC, or DT */
+
+ /* In the next 4 checks,
+ * _tpduf is the fixed part; add 2 to get the dref bits of
+ * the fixed part (can't take the address of a bit field)
+ */
+#ifdef TPCONS
+ if (cons_channel && dutype == DT_TPDU_type) {
+ struct isopcb *isop = ((struct isopcb *)
+ ((struct pklcd *)cons_channel)->lcd_upnext);
+ if (isop && isop->isop_refcnt == 1 && isop->isop_socket &&
+ (tpcb = sototpcb(isop->isop_socket)) &&
+ (tpcb->tp_class == TP_CLASS_0/* || == CLASS_1 */)) {
+ IFDEBUG(D_TPINPUT)
+ printf("tpinput_dt: class 0 short circuit\n");
+ ENDDEBUG
+ dref = tpcb->tp_lref;
+ sref = tpcb->tp_fref;
+ CHECK( (tpcb->tp_refstate == REF_FREE),
+ E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+ (1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+ goto tp0_data;
+ }
+
+ }
+#endif
+ {
+
+ CHECK( ((int)dref <= 0 || dref >= tp_refinfo.tpr_size) ,
+ E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+ (1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+ CHECK( ((tpcb = tp_ref[dref].tpr_pcb ) == (struct tp_pcb *) 0 ),
+ E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+ (1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+ CHECK( (tpcb->tp_refstate == REF_FREE),
+ E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+ (1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+ }
+
+ IFDEBUG(D_TPINPUT)
+ printf("HAVE A TPCB 2: 0x%x\n", tpcb);
+ ENDDEBUG
+
+ /* causes a DR to be sent for CC; ER for all else */
+ CHECK( (tpcb->tp_refstate == REF_FROZEN),
+ (dutype == CC_TPDU_type?E_TP_NO_SESSION:E_TP_MISM_REFS),
+ ts_inv_dref, respond,
+ (1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+
+ IFDEBUG(D_TPINPUT)
+ printf("state of dref %d ok, tpcb 0x%x\n", dref,tpcb);
+ ENDDEBUG
+ /*
+ * At this point the state of the dref could be
+ * FROZEN: tpr_pcb == NULL, has ( reference only) timers
+ * for example, DC may arrive after the close() has detached
+ * the tpcb (e.g., if user turned off SO_LISTEN option)
+ * OPENING : a tpcb exists but no timers yet
+ * OPEN : tpcb exists & timers are outstanding
+ */
+
+ if (!tpcb->tp_cebit_off)
+ CONG_UPDATE_SAMPLE(tpcb, ce_bit);
+
+ dusize = tpcb->tp_tpdusize;
+ pdusize = tpcb->tp_ptpdusize;
+
+ dutype = hdr->tpdu_type << 8; /* for the switch below */
+
+ WHILE_OPTIONS(P, hdr, tpcb->tp_xtd_format) /* { */
+
+#define caseof(x,y) case (((x)<<8)+(y))
+ switch( dutype | vbptr(P)->tpv_code ) {
+
+ caseof( CC_TPDU_type, TPP_addl_opt ):
+ /* not in class 0; 1 octet */
+ vb_getval(P, u_char, addlopt);
+ break;
+ caseof( CC_TPDU_type, TPP_tpdu_size ):
+ {
+ u_char odusize = dusize;
+ vb_getval(P, u_char, dusize);
+ CHECK( (dusize < TP_MIN_TPDUSIZE ||
+ dusize > TP_MAX_TPDUSIZE || dusize > odusize),
+ E_TP_INV_PVAL, ts_inv_pval, respond,
+ (1 + (caddr_t)&vbptr(P)->tpv_val - (caddr_t)hdr) )
+ IFDEBUG(D_TPINPUT)
+ printf("CC dusize 0x%x\n", dusize);
+ ENDDEBUG
+ }
+ break;
+ caseof( CC_TPDU_type, TPP_ptpdu_size ):
+ {
+ u_short opdusize = pdusize;
+ switch (vbptr(P)->tpv_len) {
+ case 1: pdusize = vbval(P, u_char); break;
+ case 2: pdusize = ntohs(vbval(P, u_short)); break;
+ default: ;
+ IFDEBUG(D_TPINPUT)
+ printf("malformed prefered TPDU option\n");
+ ENDDEBUG
+ }
+ CHECK( (pdusize == 0 ||
+ (opdusize && (pdusize > opdusize))),
+ E_TP_INV_PVAL, ts_inv_pval, respond,
+ (1 + (caddr_t)&vbptr(P)->tpv_val - (caddr_t)hdr) )
+ }
+ break;
+ caseof( CC_TPDU_type, TPP_calling_sufx):
+ IFDEBUG(D_TPINPUT)
+ printf("CC calling (local) sufxlen 0x%x\n", lsufxlen);
+ ENDDEBUG
+ lsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+ lsufxlen = vbptr(P)->tpv_len;
+ break;
+ caseof( CC_TPDU_type, TPP_acktime ):
+ /* class 4 only, 2 octets */
+ vb_getval(P, u_short, acktime);
+ acktime = ntohs(acktime);
+ acktime = acktime/500; /* convert to slowtimo ticks */
+ if( (short)acktime <=0 )
+ acktime = 2;
+ break;
+ caseof( CC_TPDU_type, TPP_called_sufx):
+ fsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+ fsufxlen = vbptr(P)->tpv_len;
+ IFDEBUG(D_TPINPUT)
+ printf("CC called (foreign) sufx len %d\n", fsufxlen);
+ ENDDEBUG
+ break;
+
+ caseof( CC_TPDU_type, TPP_checksum):
+ caseof( DR_TPDU_type, TPP_checksum):
+ caseof( DT_TPDU_type, TPP_checksum):
+ caseof( XPD_TPDU_type, TPP_checksum):
+ if( tpcb->tp_use_checksum ) {
+ CHECK( iso_check_csum(m, tpdu_len),
+ E_TP_INV_PVAL, ts_bad_csum, discard, 0)
+ }
+ break;
+
+ /* this is different from the above because in the context
+ * of concat/ sep tpdu_len might not be the same as hdr len
+ */
+ caseof( AK_TPDU_type, TPP_checksum):
+ caseof( XAK_TPDU_type, TPP_checksum):
+ caseof( DC_TPDU_type, TPP_checksum):
+ if( tpcb->tp_use_checksum ) {
+ CHECK( iso_check_csum(m, (int)hdr->tpdu_li + 1),
+ E_TP_INV_PVAL, ts_bad_csum, discard, 0)
+ }
+ break;
+#ifdef notdef
+ caseof( DR_TPDU_type, TPP_addl_info ):
+ /* ignore - its length and meaning are
+ * user defined and there's no way
+ * to pass this info to the user anyway
+ */
+ break;
+#endif /* notdef */
+
+ caseof( AK_TPDU_type, TPP_subseq ):
+ /* used after reduction of window */
+ vb_getval(P, u_short, subseq);
+ subseq = ntohs(subseq);
+ IFDEBUG(D_ACKRECV)
+ printf("AK dref 0x%x Subseq 0x%x\n", dref, subseq);
+ ENDDEBUG
+ break;
+
+ caseof( AK_TPDU_type, TPP_flow_cntl_conf ):
+ {
+ u_int ylwe;
+ u_short ysubseq, ycredit;
+
+ fcc_present = TRUE;
+ vb_getval(P, u_int, ylwe);
+ vb_getval(P, u_short, ysubseq);
+ vb_getval(P, u_short, ycredit);
+ ylwe = ntohl(ylwe);
+ ysubseq = ntohs(ysubseq);
+ ycredit = ntohs(ycredit);
+ IFDEBUG(D_ACKRECV)
+ printf("%s%x, subseq 0x%x, cdt 0x%x dref 0x%x\n",
+ "AK FCC lwe 0x", ylwe, ysubseq, ycredit, dref);
+ ENDDEBUG
+ }
+ break;
+
+ default:
+ IFDEBUG(D_TPINPUT)
+ printf("param ignored dutype 0x%x, code 0x%x\n",
+ dutype, vbptr(P)->tpv_code);
+ ENDDEBUG
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "param ignored dutype code ",
+ dutype, vbptr(P)->tpv_code ,0,0);
+ ENDTRACE
+ IncStat(ts_param_ignored);
+ break;
+#undef caseof
+ }
+ /* } */ END_WHILE_OPTIONS(P)
+
+ /* NOTE: the variable dutype has been shifted left! */
+
+ switch( hdr->tpdu_type ) {
+ case CC_TPDU_type:
+ /* If CC comes back with an unacceptable class
+ * respond with a DR or ER
+ */
+
+ opt = hdr->tpdu_CCoptions; /* 1 byte */
+
+ {
+ tpp = tpcb->_tp_param;
+ tpp.p_class = (1<<hdr->tpdu_CCclass);
+ tpp.p_tpdusize = dusize;
+ tpp.p_ptpdusize = pdusize;
+ tpp.p_dont_change_params = 0;
+ tpp.p_xtd_format = (opt & TPO_XTD_FMT) == TPO_XTD_FMT;
+ tpp.p_xpd_service = (addlopt & TPAO_USE_TXPD) == TPAO_USE_TXPD;
+ tpp.p_use_checksum = (addlopt & TPAO_NO_CSUM) == 0;
+#ifdef notdef
+ tpp.p_use_efc = (opt & TPO_USE_EFC) == TPO_USE_EFC;
+ tpp.p_use_nxpd = (addlopt & TPAO_USE_NXPD) == TPAO_USE_NXPD;
+ tpp.p_use_rcc = (addlopt & TPAO_USE_RCC) == TPAO_USE_RCC;
+#endif /* notdef */
+
+ CHECK(
+ tp_consistency(tpcb, TP_FORCE, &tpp) != 0,
+ E_TP_NEGOT_FAILED, ts_negotfailed, respond,
+ (1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr)
+ /* ^ more or less the location of class */
+ )
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc,
+ "after 1 consist class, out, tpconsout",
+ tpcb->tp_class, dgout_routine, tpcons_output, 0
+ );
+ ENDTRACE
+ CHECK(
+ ((class_to_use == TP_CLASS_0)&&
+ (dgout_routine != tpcons_output)),
+ E_TP_NEGOT_FAILED, ts_negotfailed, respond,
+ (1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr)
+ /* ^ more or less the location of class */
+ )
+#ifdef TPCONS
+ if (tpcb->tp_netservice == ISO_CONS &&
+ class_to_use == TP_CLASS_0) {
+ struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+ struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+ lcp->lcd_flags &= ~X25_DG_CIRCUIT;
+ }
+#endif
+ }
+ if( ! tpcb->tp_use_checksum)
+ IncStat(ts_csum_off);
+ if(tpcb->tp_xpd_service)
+ IncStat(ts_use_txpd);
+ if(tpcb->tp_xtd_format)
+ IncStat(ts_xtd_fmt);
+
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "after CC class flags dusize CCclass",
+ tpcb->tp_class, tpcb->tp_flags, tpcb->tp_tpdusize,
+ hdr->tpdu_CCclass);
+ ENDTRACE
+
+ /* if called or calling suffices appeared on the CC,
+ * they'd better jive with what's in the pcb
+ */
+ if( fsufxlen ) {
+ CHECK( ((tpcb->tp_fsuffixlen != fsufxlen) ||
+ bcmp(fsufxloc, tpcb->tp_fsuffix, fsufxlen)),
+ E_TP_INV_PVAL,ts_inv_sufx, respond,
+ (1+fsufxloc - (caddr_t)hdr))
+ }
+ if( lsufxlen ) {
+ CHECK( ((tpcb->tp_lsuffixlen != lsufxlen) ||
+ bcmp(lsufxloc, tpcb->tp_lsuffix, lsufxlen)),
+ E_TP_INV_PVAL,ts_inv_sufx, respond,
+ (1+lsufxloc - (caddr_t)hdr))
+ }
+
+ e.ATTR(CC_TPDU).e_sref = sref;
+ e.ATTR(CC_TPDU).e_cdt = hdr->tpdu_CCcdt;
+ takes_data = TRUE;
+ e.ev_number = CC_TPDU;
+ IncStat(ts_CC_rcvd);
+ break;
+
+ case DC_TPDU_type:
+ if (sref != tpcb->tp_fref)
+ printf("INPUT: inv sufx DCsref 0x%x, tp_fref 0x%x\n",
+ sref, tpcb->tp_fref);
+
+ CHECK( (sref != tpcb->tp_fref),
+ E_TP_MISM_REFS, ts_inv_sufx, discard,
+ (1 + (caddr_t)&hdr->tpdu_DCsref - (caddr_t)hdr))
+
+ e.ev_number = DC_TPDU;
+ IncStat(ts_DC_rcvd);
+ break;
+
+ case DR_TPDU_type:
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "DR recvd", hdr->tpdu_DRreason, 0, 0, 0);
+ ENDTRACE
+ if (sref != tpcb->tp_fref) {
+ printf("INPUT: inv sufx DRsref 0x%x tp_fref 0x%x\n",
+ sref, tpcb->tp_fref);
+ }
+
+ CHECK( (sref != 0 && sref != tpcb->tp_fref &&
+ tpcb->tp_state != TP_CRSENT),
+ (TP_ERROR_SNDC | E_TP_MISM_REFS),ts_inv_sufx, respond,
+ (1 + (caddr_t)&hdr->tpdu_DRsref - (caddr_t)hdr))
+
+ e.ATTR(DR_TPDU).e_reason = hdr->tpdu_DRreason;
+ e.ATTR(DR_TPDU).e_sref = (u_short)sref;
+ takes_data = TRUE;
+ e.ev_number = DR_TPDU;
+ IncStat(ts_DR_rcvd);
+ break;
+
+ case ER_TPDU_type:
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "ER recvd", hdr->tpdu_ERreason,0,0,0);
+ ENDTRACE
+ e.ev_number = ER_TPDU;
+ e.ATTR(ER_TPDU).e_reason = hdr->tpdu_ERreason;
+ IncStat(ts_ER_rcvd);
+ break;
+
+ case AK_TPDU_type:
+
+ e.ATTR(AK_TPDU).e_subseq = subseq;
+ e.ATTR(AK_TPDU).e_fcc_present = fcc_present;
+
+ if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+ e.ATTR(AK_TPDU).e_seq = seqeotX.s_seq;
+ e.ATTR(AK_TPDU).e_cdt = ntohs(hdr->tpdu_AKcdtX);
+#else
+ e.ATTR(AK_TPDU).e_cdt = hdr->tpdu_AKcdtX;
+ e.ATTR(AK_TPDU).e_seq = hdr->tpdu_AKseqX;
+#endif /* BYTE_ORDER */
+ } else {
+ e.ATTR(AK_TPDU).e_cdt = hdr->tpdu_AKcdt;
+ e.ATTR(AK_TPDU).e_seq = hdr->tpdu_AKseq;
+ }
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "AK recvd seq cdt subseq fcc_pres",
+ e.ATTR(AK_TPDU).e_seq, e.ATTR(AK_TPDU).e_cdt,
+ subseq, fcc_present);
+ ENDTRACE
+
+ e.ev_number = AK_TPDU;
+ IncStat(ts_AK_rcvd);
+ IncPStat(tpcb, tps_AK_rcvd);
+ break;
+
+ case XAK_TPDU_type:
+ if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+ e.ATTR(XAK_TPDU).e_seq = seqeotX.s_seq;
+#else
+ e.ATTR(XAK_TPDU).e_seq = hdr->tpdu_XAKseqX;
+#endif /* BYTE_ORDER */
+ } else {
+ e.ATTR(XAK_TPDU).e_seq = hdr->tpdu_XAKseq;
+ }
+ e.ev_number = XAK_TPDU;
+ IncStat(ts_XAK_rcvd);
+ IncPStat(tpcb, tps_XAK_rcvd);
+ break;
+
+ case XPD_TPDU_type:
+ if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+ e.ATTR(XPD_TPDU).e_seq = seqeotX.s_seq;
+#else
+ e.ATTR(XPD_TPDU).e_seq = hdr->tpdu_XPDseqX;
+#endif /* BYTE_ORDER */
+ } else {
+ e.ATTR(XPD_TPDU).e_seq = hdr->tpdu_XPDseq;
+ }
+ takes_data = TRUE;
+ e.ev_number = XPD_TPDU;
+ IncStat(ts_XPD_rcvd);
+ IncPStat(tpcb, tps_XPD_rcvd);
+ break;
+
+ case DT_TPDU_type:
+ { /* the y option will cause occasional packets to be dropped.
+ * A little crude but it works.
+ */
+
+ IFDEBUG(D_DROP)
+ if(time.tv_usec & 0x4 && hdr->tpdu_DTseq & 0x1) {
+ IncStat(ts_ydebug);
+ goto discard;
+ }
+ ENDDEBUG
+ }
+ if (tpcb->tp_class == TP_CLASS_0) {
+ tp0_data:
+ e.ATTR(DT_TPDU).e_seq = 0; /* actually don't care */
+ e.ATTR(DT_TPDU).e_eot = (((struct tp0du *)hdr)->tp0du_eot);
+ } else if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+ union seq_type seqeotX;
+
+ seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+ e.ATTR(DT_TPDU).e_seq = seqeotX.s_seq;
+ e.ATTR(DT_TPDU).e_eot = seqeotX.s_eot;
+#else
+ e.ATTR(DT_TPDU).e_seq = hdr->tpdu_DTseqX;
+ e.ATTR(DT_TPDU).e_eot = hdr->tpdu_DTeotX;
+#endif /* BYTE_ORDER */
+ } else {
+ e.ATTR(DT_TPDU).e_seq = hdr->tpdu_DTseq;
+ e.ATTR(DT_TPDU).e_eot = hdr->tpdu_DTeot;
+ }
+ if(e.ATTR(DT_TPDU).e_eot)
+ IncStat(ts_eot_input);
+ takes_data = TRUE;
+ e.ev_number = DT_TPDU;
+ IncStat(ts_DT_rcvd);
+ IncPStat(tpcb, tps_DT_rcvd);
+ break;
+
+ case GR_TPDU_type:
+ tp_indicate(T_DISCONNECT, tpcb, ECONNABORTED);
+ /* drop through */
+ default:
+ /* this should NEVER happen because there is a
+ * check for dutype well above here
+ */
+ error = E_TP_INV_TPDU; /* causes an ER */
+ IFDEBUG(D_TPINPUT)
+ printf("INVALID dutype 0x%x\n", hdr->tpdu_type);
+ ENDDEBUG
+ IncStat(ts_inv_dutype);
+ goto respond;
+ }
+ }
+ /* peel off the tp header;
+ * remember that the du_li doesn't count itself.
+ * This may leave us w/ an empty mbuf at the front of a chain.
+ * We can't just throw away the empty mbuf because hdr still points
+ * into the mbuf's data area and we're still using hdr (the tpdu header)
+ */
+ m->m_len -= ((int)hdr->tpdu_li + 1);
+ m->m_data += ((int)hdr->tpdu_li + 1);
+
+ if (takes_data) {
+ int max = tpdu_info[ hdr->tpdu_type ] [TP_MAX_DATA_INDEX];
+ int datalen = tpdu_len - hdr->tpdu_li - 1, mbtype = MT_DATA;
+ struct {
+ struct tp_disc_reason dr;
+ struct cmsghdr x_hdr;
+ } x;
+#define c_hdr x.x_hdr
+ register struct mbuf *n;
+
+ CHECK( (max && datalen > max), E_TP_LENGTH_INVAL,
+ ts_inv_length, respond, (max + hdr->tpdu_li + 1) );
+ switch( hdr->tpdu_type ) {
+
+ case CR_TPDU_type:
+ c_hdr.cmsg_type = TPOPT_CONN_DATA;
+ goto make_control_msg;
+
+ case CC_TPDU_type:
+ c_hdr.cmsg_type = TPOPT_CFRM_DATA;
+ goto make_control_msg;
+
+ case DR_TPDU_type:
+ x.dr.dr_hdr.cmsg_len = sizeof(x) - sizeof(c_hdr);
+ x.dr.dr_hdr.cmsg_type = TPOPT_DISC_REASON;
+ x.dr.dr_hdr.cmsg_level = SOL_TRANSPORT;
+ x.dr.dr_reason = hdr->tpdu_DRreason;
+ c_hdr.cmsg_type = TPOPT_DISC_DATA;
+ make_control_msg:
+ datalen += sizeof(c_hdr);
+ c_hdr.cmsg_len = datalen;
+ c_hdr.cmsg_level = SOL_TRANSPORT;
+ mbtype = MT_CONTROL;
+ MGET(n, M_DONTWAIT, MT_DATA);
+ if (n == 0)
+ {m_freem(m); m = 0; datalen = 0; goto invoke; }
+ if (hdr->tpdu_type == DR_TPDU_type) {
+ datalen += sizeof(x) - sizeof(c_hdr);
+ bcopy((caddr_t)&x, mtod(n, caddr_t), n->m_len = sizeof(x));
+ } else
+ bcopy((caddr_t)&c_hdr, mtod(n, caddr_t),
+ n->m_len = sizeof(c_hdr));
+ n->m_next = m;
+ m = n;
+ /* FALLTHROUGH */
+
+ case XPD_TPDU_type:
+ if (mbtype != MT_CONTROL)
+ mbtype = MT_OOBDATA;
+ m->m_flags |= M_EOR;
+ /* FALLTHROUGH */
+
+ case DT_TPDU_type:
+ for (n = m; n; n = n->m_next) {
+ MCHTYPE(n, mbtype);
+ }
+ invoke:
+ e.ATTR(DT_TPDU).e_datalen = datalen;
+ e.ATTR(DT_TPDU).e_data = m;
+ break;
+
+ default:
+ printf(
+ "ERROR in tp_input! hdr->tpdu_type 0x%x takes_data 0x%x m 0x%x\n",
+ hdr->tpdu_type, takes_data, m);
+ break;
+ }
+ /* prevent m_freem() after tp_driver() from throwing it all away */
+ m = MNULL;
+ }
+
+ IncStat(ts_tpdu_rcvd);
+
+ IFDEBUG(D_TPINPUT)
+ printf( "tp_input: before driver, state 0x%x event 0x%x m 0x%x",
+ tpcb->tp_state, e.ev_number, m );
+ printf(" e.e_data 0x%x\n", e.ATTR(DT_TPDU).e_data);
+ printf("takes_data 0x%x m_len 0x%x, tpdu_len 0x%x\n",
+ takes_data, (m==MNULL)?0:m->m_len, tpdu_len);
+ ENDDEBUG
+
+ error = tp_driver(tpcb, &e);
+
+ ASSERT(tpcb != (struct tp_pcb *)0);
+ ASSERT(tpcb->tp_sock != (struct socket *)0);
+ if( tpcb->tp_sock->so_error == 0 )
+ tpcb->tp_sock->so_error = error;
+
+ /* Kludge to keep the state tables under control (adding
+ * data on connect & disconnect & freeing the mbuf containing
+ * the data would have exploded the tables and made a big mess ).
+ */
+ switch(e.ev_number) {
+ case CC_TPDU:
+ case DR_TPDU:
+ case CR_TPDU:
+ m = e.ATTR(CC_TPDU).e_data; /* same field for all three dutypes */
+ IFDEBUG(D_TPINPUT)
+ printf("after driver, restoring m to 0x%x, takes_data 0x%x\n",
+ m, takes_data);
+ ENDDEBUG
+ break;
+ default:
+ break;
+ }
+ /* Concatenated sequences are terminated by any tpdu that
+ * carries data: CR, CC, DT, XPD, DR.
+ * All other tpdu types may be concatenated: AK, XAK, DC, ER.
+ */
+
+separate:
+ if ( takes_data == 0 ) {
+ ASSERT( m != MNULL );
+ /*
+ * we already peeled off the prev. tp header so
+ * we can just pull up some more and repeat
+ */
+
+ if( m = tp_inputprep(m) ) {
+ IFDEBUG(D_TPINPUT)
+ hdr = mtod(m, struct tpdu *);
+ printf("tp_input @ separate: hdr 0x%x size %d m 0x%x\n",
+ hdr, (int) hdr->tpdu_li + 1, m);
+ dump_mbuf(m, "tp_input after driver, at separate");
+ ENDDEBUG
+
+ IncStat(ts_concat_rcvd);
+ goto again;
+ }
+ }
+ if ( m != MNULL ) {
+ IFDEBUG(D_TPINPUT)
+ printf("tp_input : m_freem(0x%x)\n", m);
+ ENDDEBUG
+ m_freem(m);
+ IFDEBUG(D_TPINPUT)
+ printf("tp_input : after m_freem 0x%x\n", m);
+ ENDDEBUG
+ }
+ return (ProtoHook) tpcb;
+
+discard:
+ /* class 4: drop the tpdu */
+ /* class 2,0: Should drop the net connection, if you can figure out
+ * to which connection it applies
+ */
+ IFDEBUG(D_TPINPUT)
+ printf("tp_input DISCARD\n");
+ ENDDEBUG
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "tp_input DISCARD m", m,0,0,0);
+ ENDTRACE
+ m_freem(m);
+ IncStat(ts_recv_drop);
+ return (ProtoHook)0;
+
+nonx_dref:
+ switch (dutype) {
+ default:
+ goto discard;
+ case CC_TPDU_type:
+ /* error = E_TP_MISM_REFS; */
+ break;
+ case DR_TPDU_type:
+ error |= TP_ERROR_SNDC;
+ }
+respond:
+ IFDEBUG(D_TPINPUT)
+ printf("RESPOND: error 0x%x, errlen 0x%x\n", error, errlen);
+ ENDDEBUG
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "tp_input RESPOND m error sref", m, error, sref, 0);
+ ENDTRACE
+ if (sref == 0)
+ goto discard;
+ (void) tp_error_emit(error, (u_long)sref, (struct sockaddr_iso *)faddr,
+ (struct sockaddr_iso *)laddr, m, errlen, tpcb,
+ cons_channel, dgout_routine);
+ IFDEBUG(D_ERROR_EMIT)
+ printf("tp_input after error_emit\n");
+ ENDDEBUG
+
+#ifdef lint
+ printf("",sref,opt);
+#endif /* lint */
+ IncStat(ts_recv_drop);
+ return (ProtoHook)0;
+}
+
+
+/*
+ * NAME: tp_headersize()
+ *
+ * CALLED FROM:
+ * tp_emit() and tp_sbsend()
+ * TP needs to know the header size so it can figure out how
+ * much data to put in each tpdu.
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ * For a given connection, represented by (tpcb), and
+ * tpdu type (dutype), return the size of a tp header.
+ *
+ * RETURNS: the expected size of the heade in bytesr
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: It would be nice if it got the network header size as well.
+ */
+int
+tp_headersize(dutype, tpcb)
+ int dutype;
+ struct tp_pcb *tpcb;
+{
+ register int size = 0;
+
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "tp_headersize dutype class xtd_format",
+ dutype, tpcb->tp_class, tpcb->tp_xtd_format, 0);
+ ENDTRACE
+ if( !( (tpcb->tp_class == TP_CLASS_0) ||
+ (tpcb->tp_class == TP_CLASS_4) ||
+ (dutype == DR_TPDU_type) ||
+ (dutype == CR_TPDU_type) )) {
+ printf("tp_headersize:dutype 0x%x, class 0x%x",
+ dutype, tpcb->tp_class);
+ /* TODO: identify this and GET RID OF IT */
+ }
+ ASSERT( (tpcb->tp_class == TP_CLASS_0) ||
+ (tpcb->tp_class == TP_CLASS_4) ||
+ (dutype == DR_TPDU_type) ||
+ (dutype == CR_TPDU_type) );
+
+ if( tpcb->tp_class == TP_CLASS_0 ) {
+ size = tpdu_info[ dutype ] [TP_LEN_CLASS_0_INDEX];
+ } else {
+ size = tpdu_info[ dutype ] [tpcb->tp_xtd_format];
+ }
+ return size;
+ /* caller must get network level header size separately */
+}
diff --git a/sys/netiso/tp_ip.h b/sys/netiso/tp_ip.h
new file mode 100644
index 000000000000..f2777676e133
--- /dev/null
+++ b/sys/netiso/tp_ip.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_ip.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_ip.h,v 5.1 88/10/12 12:19:47 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_ip.h,v $
+ *
+ * internet IP-dependent structures and include files
+ *
+ */
+
+
+#ifndef __TP_IP__
+#define __TP_IP__
+
+#ifndef SOCK_STREAM
+#include <sys/socket.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <net/route.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+
+
+struct inpcb tp_inpcb;
+ /* queue of active inpcbs for tp ; for tp with dod ip */
+
+#endif /* __TP_IP__ */
diff --git a/sys/netiso/tp_iso.c b/sys/netiso/tp_iso.c
new file mode 100644
index 000000000000..1cf67f86648f
--- /dev/null
+++ b/sys/netiso/tp_iso.c
@@ -0,0 +1,693 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_iso.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ * $Header: /var/src/sys/netiso/RCS/tp_iso.c,v 5.1 89/02/09 16:20:51 hagens Exp $
+ * $Source: /var/src/sys/netiso/RCS/tp_iso.c,v $
+ *
+ * Here is where you find the iso-dependent code. We've tried
+ * keep all net-level and (primarily) address-family-dependent stuff
+ * out of the tp source, and everthing here is reached indirectly
+ * through a switch table (struct nl_protosw *) tpcb->tp_nlproto
+ * (see tp_pcb.c).
+ * The routines here are:
+ * iso_getsufx: gets transport suffix out of an isopcb structure.
+ * iso_putsufx: put transport suffix into an isopcb structure.
+ * iso_putnetaddr: put a whole net addr into an isopcb.
+ * iso_getnetaddr: get a whole net addr from an isopcb.
+ * iso_cmpnetaddr: compare a whole net addr from an isopcb.
+ * iso_recycle_suffix: clear suffix for reuse in isopcb
+ * tpclnp_ctlinput: handle ER CNLPdu : icmp-like stuff
+ * tpclnp_mtu: figure out what size tpdu to use
+ * tpclnp_input: take a pkt from clnp, strip off its clnp header,
+ * give to tp
+ * tpclnp_output_dg: package a pkt for clnp given 2 addresses & some data
+ * tpclnp_output: package a pkt for clnp given an isopcb & some data
+ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_clnp.h>
+#include <netiso/cltp_var.h>
+
+/*
+ * CALLED FROM:
+ * pr_usrreq() on PRU_BIND, PRU_CONNECT, PRU_ACCEPT, and PRU_PEERADDR
+ * FUNCTION, ARGUMENTS:
+ * The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ */
+
+iso_getsufx(isop, lenp, data_out, which)
+ struct isopcb *isop;
+ u_short *lenp;
+ caddr_t data_out;
+ int which;
+{
+ register struct sockaddr_iso *addr = 0;
+
+ switch (which) {
+ case TP_LOCAL:
+ addr = isop->isop_laddr;
+ break;
+
+ case TP_FOREIGN:
+ addr = isop->isop_faddr;
+ }
+ if (addr)
+ bcopy(TSEL(addr), data_out, (*lenp = addr->siso_tlen));
+}
+
+/* CALLED FROM:
+ * tp_newsocket(); i.e., when a connection is being established by an
+ * incoming CR_TPDU.
+ *
+ * FUNCTION, ARGUMENTS:
+ * Put a transport suffix (found in name) into an isopcb structure (isop).
+ * The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ */
+void
+iso_putsufx(isop, sufxloc, sufxlen, which)
+ struct isopcb *isop;
+ caddr_t sufxloc;
+ int sufxlen, which;
+{
+ struct sockaddr_iso **dst, *backup;
+ register struct sockaddr_iso *addr;
+ struct mbuf *m;
+ int len;
+
+ switch (which) {
+ default:
+ return;
+
+ case TP_LOCAL:
+ dst = &isop->isop_laddr;
+ backup = &isop->isop_sladdr;
+ break;
+
+ case TP_FOREIGN:
+ dst = &isop->isop_faddr;
+ backup = &isop->isop_sfaddr;
+ }
+ if ((addr = *dst) == 0) {
+ addr = *dst = backup;
+ addr->siso_nlen = 0;
+ addr->siso_slen = 0;
+ addr->siso_plen = 0;
+ printf("iso_putsufx on un-initialized isopcb\n");
+ }
+ len = sufxlen + addr->siso_nlen +
+ (sizeof(*addr) - sizeof(addr->siso_data));
+ if (addr == backup) {
+ if (len > sizeof(*addr)) {
+ m = m_getclr(M_DONTWAIT, MT_SONAME);
+ if (m == 0)
+ return;
+ addr = *dst = mtod(m, struct sockaddr_iso *);
+ *addr = *backup;
+ m->m_len = len;
+ }
+ }
+ bcopy(sufxloc, TSEL(addr), sufxlen);
+ addr->siso_tlen = sufxlen;
+ addr->siso_len = len;
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans whenever we go into REFWAIT state.
+ * FUNCTION and ARGUMENT:
+ * Called when a ref is frozen, to allow the suffix to be reused.
+ * (isop) is the net level pcb. This really shouldn't have to be
+ * done in a NET level pcb but... for the internet world that just
+ * the way it is done in BSD...
+ * The alternative is to have the port unusable until the reference
+ * timer goes off.
+ */
+void
+iso_recycle_tsuffix(isop)
+ struct isopcb *isop;
+{
+ isop->isop_laddr->siso_tlen = isop->isop_faddr->siso_tlen = 0;
+}
+
+/*
+ * CALLED FROM:
+ * tp_newsocket(); i.e., when a connection is being established by an
+ * incoming CR_TPDU.
+ *
+ * FUNCTION and ARGUMENTS:
+ * Copy a whole net addr from a struct sockaddr (name).
+ * into an isopcb (isop).
+ * The argument (which) takes values TP_LOCAL or TP_FOREIGN
+ */
+void
+iso_putnetaddr(isop, name, which)
+ register struct isopcb *isop;
+ struct sockaddr_iso *name;
+ int which;
+{
+ struct sockaddr_iso **sisop, *backup;
+ register struct sockaddr_iso *siso;
+
+ switch (which) {
+ default:
+ printf("iso_putnetaddr: should panic\n");
+ return;
+ case TP_LOCAL:
+ sisop = &isop->isop_laddr;
+ backup = &isop->isop_sladdr;
+ break;
+ case TP_FOREIGN:
+ sisop = &isop->isop_faddr;
+ backup = &isop->isop_sfaddr;
+ }
+ siso = ((*sisop == 0) ? (*sisop = backup) : *sisop);
+ IFDEBUG(D_TPISO)
+ printf("ISO_PUTNETADDR\n");
+ dump_isoaddr(isop->isop_faddr);
+ ENDDEBUG
+ siso->siso_addr = name->siso_addr;
+}
+
+/*
+ * CALLED FROM:
+ * tp_input() when a connection is being established by an
+ * incoming CR_TPDU, and considered for interception.
+ *
+ * FUNCTION and ARGUMENTS:
+ * compare a whole net addr from a struct sockaddr (name),
+ * with that implicitly stored in an isopcb (isop).
+ * The argument (which) takes values TP_LOCAL or TP_FOREIGN.
+ */
+iso_cmpnetaddr(isop, name, which)
+ register struct isopcb *isop;
+ register struct sockaddr_iso *name;
+ int which;
+{
+ struct sockaddr_iso **sisop, *backup;
+ register struct sockaddr_iso *siso;
+
+ switch (which) {
+ default:
+ printf("iso_cmpnetaddr: should panic\n");
+ return 0;
+ case TP_LOCAL:
+ sisop = &isop->isop_laddr;
+ backup = &isop->isop_sladdr;
+ break;
+ case TP_FOREIGN:
+ sisop = &isop->isop_faddr;
+ backup = &isop->isop_sfaddr;
+ }
+ siso = ((*sisop == 0) ? (*sisop = backup) : *sisop);
+ IFDEBUG(D_TPISO)
+ printf("ISO_CMPNETADDR\n");
+ dump_isoaddr(siso);
+ ENDDEBUG
+ if (name->siso_tlen && bcmp(TSEL(name), TSEL(siso), name->siso_tlen))
+ return (0);
+ return (bcmp((caddr_t)name->siso_data,
+ (caddr_t)siso->siso_data, name->siso_nlen) == 0);
+}
+
+/*
+ * CALLED FROM:
+ * pr_usrreq() PRU_SOCKADDR, PRU_ACCEPT, PRU_PEERADDR
+ * FUNCTION and ARGUMENTS:
+ * Copy a whole net addr from an isopcb (isop) into
+ * a struct sockaddr (name).
+ * The argument (which) takes values TP_LOCAL or TP_FOREIGN.
+ */
+
+void
+iso_getnetaddr( isop, name, which)
+ struct isopcb *isop;
+ struct mbuf *name;
+ int which;
+{
+ struct sockaddr_iso *siso =
+ (which == TP_LOCAL ? isop->isop_laddr : isop->isop_faddr);
+ if (siso)
+ bcopy((caddr_t)siso, mtod(name, caddr_t),
+ (unsigned)(name->m_len = siso->siso_len));
+ else
+ name->m_len = 0;
+}
+/*
+ * NAME: tpclnp_mtu()
+ *
+ * CALLED FROM:
+ * tp_route_to() on incoming CR, CC, and pr_usrreq() for PRU_CONNECT
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ *
+ * Perform subnetwork dependent part of determining MTU information.
+ * It appears that setting a double pointer to the rtentry associated with
+ * the destination, and returning the header size for the network protocol
+ * suffices.
+ *
+ * SIDE EFFECTS:
+ * Sets tp_routep pointer in pcb.
+ *
+ * NOTES:
+ */
+tpclnp_mtu(tpcb)
+register struct tp_pcb *tpcb;
+{
+ struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+
+ IFDEBUG(D_CONN)
+ printf("tpclnp_mtu(tpcb)\n", tpcb);
+ ENDDEBUG
+ tpcb->tp_routep = &(isop->isop_route.ro_rt);
+ if (tpcb->tp_netservice == ISO_CONS)
+ return 0;
+ else
+ return (sizeof(struct clnp_fixed) + sizeof(struct clnp_segment) +
+ 2 * sizeof(struct iso_addr));
+
+}
+
+/*
+ * CALLED FROM:
+ * tp_emit()
+ * FUNCTION and ARGUMENTS:
+ * Take a packet(m0) from tp and package it so that clnp will accept it.
+ * This means prepending space for the clnp header and filling in a few
+ * of the fields.
+ * isop is the isopcb structure; datalen is the length of the data in the
+ * mbuf string m0.
+ * RETURN VALUE:
+ * whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpclnp_output(isop, m0, datalen, nochksum)
+ struct isopcb *isop;
+ struct mbuf *m0;
+ int datalen;
+ int nochksum;
+{
+ register struct mbuf *m = m0;
+ IncStat(ts_tpdu_sent);
+
+ IFDEBUG(D_TPISO)
+ struct tpdu *hdr = mtod(m0, struct tpdu *);
+
+ printf(
+"abt to call clnp_output: datalen 0x%x, hdr.li 0x%x, hdr.dutype 0x%x nocsum x%x dst addr:\n",
+ datalen,
+ (int)hdr->tpdu_li, (int)hdr->tpdu_type, nochksum);
+ dump_isoaddr(isop->isop_faddr);
+ printf("\nsrc addr:\n");
+ dump_isoaddr(isop->isop_laddr);
+ dump_mbuf(m0, "at tpclnp_output");
+ ENDDEBUG
+
+ return
+ clnp_output(m0, isop, datalen, /* flags */nochksum ? CLNP_NO_CKSUM : 0);
+}
+
+/*
+ * CALLED FROM:
+ * tp_error_emit()
+ * FUNCTION and ARGUMENTS:
+ * This is a copy of tpclnp_output that takes the addresses
+ * instead of a pcb. It's used by the tp_error_emit, when we
+ * don't have an iso_pcb with which to call the normal output rtn.
+ * RETURN VALUE:
+ * ENOBUFS or
+ * whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpclnp_output_dg(laddr, faddr, m0, datalen, ro, nochksum)
+ struct iso_addr *laddr, *faddr;
+ struct mbuf *m0;
+ int datalen;
+ struct route *ro;
+ int nochksum;
+{
+ struct isopcb tmppcb;
+ int err;
+ int flags;
+ register struct mbuf *m = m0;
+
+ IFDEBUG(D_TPISO)
+ printf("tpclnp_output_dg datalen 0x%x m0 0x%x\n", datalen, m0);
+ ENDDEBUG
+
+ /*
+ * Fill in minimal portion of isopcb so that clnp can send the
+ * packet.
+ */
+ bzero((caddr_t)&tmppcb, sizeof(tmppcb));
+ tmppcb.isop_laddr = &tmppcb.isop_sladdr;
+ tmppcb.isop_laddr->siso_addr = *laddr;
+ tmppcb.isop_faddr = &tmppcb.isop_sfaddr;
+ tmppcb.isop_faddr->siso_addr = *faddr;
+
+ IFDEBUG(D_TPISO)
+ printf("tpclnp_output_dg faddr: \n");
+ dump_isoaddr(&tmppcb.isop_sfaddr);
+ printf("\ntpclnp_output_dg laddr: \n");
+ dump_isoaddr(&tmppcb.isop_sladdr);
+ printf("\n");
+ ENDDEBUG
+
+ /*
+ * Do not use packet cache since this is a one shot error packet
+ */
+ flags = (CLNP_NOCACHE|(nochksum?CLNP_NO_CKSUM:0));
+
+ IncStat(ts_tpdu_sent);
+
+ err = clnp_output(m0, &tmppcb, datalen, flags);
+
+ /*
+ * Free route allocated by clnp (if the route was indeed allocated)
+ */
+ if (tmppcb.isop_route.ro_rt)
+ RTFREE(tmppcb.isop_route.ro_rt);
+
+ return(err);
+}
+/*
+ * CALLED FROM:
+ * clnp's input routine, indirectly through the protosw.
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from clnp, strip off the clnp header and give it to tp
+ * No return value.
+ */
+ProtoHook
+tpclnp_input(m, src, dst, clnp_len, ce_bit)
+ register struct mbuf *m;
+ struct sockaddr_iso *src, *dst;
+ int clnp_len, ce_bit;
+{
+ struct mbuf *tp_inputprep();
+ int tp_input(), cltp_input(), (*input)() = tp_input;
+
+ IncStat(ts_pkt_rcvd);
+
+ IFDEBUG(D_TPINPUT)
+ printf("tpclnp_input: m 0x%x clnp_len 0x%x\n", m, clnp_len);
+ dump_mbuf(m, "at tpclnp_input");
+ ENDDEBUG
+ /*
+ * CLNP gives us an mbuf chain WITH the clnp header pulled up,
+ * and the length of the clnp header.
+ * First, strip off the Clnp header. leave the mbuf there for the
+ * pullup that follows.
+ */
+ m->m_len -= clnp_len;
+ m->m_data += clnp_len;
+ m->m_pkthdr.len -= clnp_len;
+ /* XXXX: should probably be in clnp_input */
+ switch (dst->siso_data[dst->siso_nlen - 1]) {
+#ifdef TUBA
+ case ISOPROTO_TCP:
+ return (tuba_tcpinput(m, src, dst));
+#endif
+ case 0:
+ if (m->m_len == 0 && (m = m_pullup(m, 1)) == 0)
+ return 0;
+ if (*(mtod(m, u_char *)) == ISO10747_IDRP)
+ return (idrp_input(m, src, dst));
+ }
+ m = tp_inputprep(m);
+ if (m == 0)
+ return 0;
+ if (mtod(m, u_char *)[1] == UD_TPDU_type)
+ input = cltp_input;
+
+ IFDEBUG(D_TPINPUT)
+ dump_mbuf(m, "after tpclnp_input both pullups");
+ ENDDEBUG
+
+ IFDEBUG(D_TPISO)
+ printf("calling %sinput : src 0x%x, dst 0x%x, src addr:\n",
+ (input == tp_input ? "tp_" : "clts_"), src, dst);
+ dump_isoaddr(src);
+ printf(" dst addr:\n");
+ dump_isoaddr(dst);
+ ENDDEBUG
+
+ (void) (*input)(m, (struct sockaddr *)src, (struct sockaddr *)dst,
+ 0, tpclnp_output_dg, ce_bit);
+
+ IFDEBUG(D_QUENCH)
+ {
+ if(time.tv_usec & 0x4 && time.tv_usec & 0x40) {
+ printf("tpclnp_input: FAKING %s\n",
+ tp_stat.ts_pkt_rcvd & 0x1?"QUENCH":"QUENCH2");
+ if(tp_stat.ts_pkt_rcvd & 0x1) {
+ tpclnp_ctlinput(PRC_QUENCH, &src);
+ } else {
+ tpclnp_ctlinput(PRC_QUENCH2, &src);
+ }
+ }
+ }
+ ENDDEBUG
+
+ return 0;
+}
+
+ProtoHook
+iso_rtchange()
+{
+ return 0;
+}
+
+/*
+ * CALLED FROM:
+ * tpclnp_ctlinput()
+ * FUNCTION and ARGUMENTS:
+ * find the tpcb pointer and pass it to tp_quench
+ */
+void
+tpiso_decbit(isop)
+ struct isopcb *isop;
+{
+ tp_quench((struct tp_pcb *)isop->isop_socket->so_pcb, PRC_QUENCH2);
+}
+/*
+ * CALLED FROM:
+ * tpclnp_ctlinput()
+ * FUNCTION and ARGUMENTS:
+ * find the tpcb pointer and pass it to tp_quench
+ */
+void
+tpiso_quench(isop)
+ struct isopcb *isop;
+{
+ tp_quench((struct tp_pcb *)isop->isop_socket->so_pcb, PRC_QUENCH);
+}
+
+/*
+ * CALLED FROM:
+ * The network layer through the protosw table.
+ * FUNCTION and ARGUMENTS:
+ * When clnp an ICMP-like msg this gets called.
+ * It either returns an error status to the user or
+ * it causes all connections on this address to be aborted
+ * by calling the appropriate xx_notify() routine.
+ * (cmd) is the type of ICMP error.
+ * (siso) is the address of the guy who sent the ER CLNPDU
+ */
+ProtoHook
+tpclnp_ctlinput(cmd, siso)
+ int cmd;
+ struct sockaddr_iso *siso;
+{
+ extern u_char inetctlerrmap[];
+ extern ProtoHook tpiso_abort();
+ extern ProtoHook iso_rtchange();
+ extern ProtoHook tpiso_reset();
+ void iso_pcbnotify();
+
+ IFDEBUG(D_TPINPUT)
+ printf("tpclnp_ctlinput1: cmd 0x%x addr: \n", cmd);
+ dump_isoaddr(siso);
+ ENDDEBUG
+
+ if (cmd < 0 || cmd > PRC_NCMDS)
+ return 0;
+ if (siso->siso_family != AF_ISO)
+ return 0;
+ switch (cmd) {
+
+ case PRC_QUENCH2:
+ iso_pcbnotify(&tp_isopcb, siso, 0, (int (*)())tpiso_decbit);
+ break;
+
+ case PRC_QUENCH:
+ iso_pcbnotify(&tp_isopcb, siso, 0, (int (*)())tpiso_quench);
+ break;
+
+ case PRC_TIMXCEED_REASS:
+ case PRC_ROUTEDEAD:
+ iso_pcbnotify(&tp_isopcb, siso, 0, tpiso_reset);
+ break;
+
+ case PRC_HOSTUNREACH:
+ case PRC_UNREACH_NET:
+ case PRC_IFDOWN:
+ case PRC_HOSTDEAD:
+ iso_pcbnotify(&tp_isopcb, siso,
+ (int)inetctlerrmap[cmd], iso_rtchange);
+ break;
+
+ default:
+ /*
+ case PRC_MSGSIZE:
+ case PRC_UNREACH_HOST:
+ case PRC_UNREACH_PROTOCOL:
+ case PRC_UNREACH_PORT:
+ case PRC_UNREACH_NEEDFRAG:
+ case PRC_UNREACH_SRCFAIL:
+ case PRC_REDIRECT_NET:
+ case PRC_REDIRECT_HOST:
+ case PRC_REDIRECT_TOSNET:
+ case PRC_REDIRECT_TOSHOST:
+ case PRC_TIMXCEED_INTRANS:
+ case PRC_PARAMPROB:
+ */
+ iso_pcbnotify(&tp_isopcb, siso, (int)inetctlerrmap[cmd], tpiso_abort);
+ break;
+ }
+ return 0;
+}
+/*
+ * XXX - Variant which is called by clnp_er.c with an isoaddr rather
+ * than a sockaddr_iso.
+ */
+
+static struct sockaddr_iso siso = {sizeof(siso), AF_ISO};
+tpclnp_ctlinput1(cmd, isoa)
+ int cmd;
+ struct iso_addr *isoa;
+{
+ bzero((caddr_t)&siso.siso_addr, sizeof(siso.siso_addr));
+ bcopy((caddr_t)isoa, (caddr_t)&siso.siso_addr, isoa->isoa_len);
+ tpclnp_ctlinput(cmd, &siso);
+}
+
+/*
+ * These next 2 routines are
+ * CALLED FROM:
+ * xxx_notify() from tp_ctlinput() when
+ * net level gets some ICMP-equiv. type event.
+ * FUNCTION and ARGUMENTS:
+ * Cause the connection to be aborted with some sort of error
+ * reason indicating that the network layer caused the abort.
+ * Fakes an ER TPDU so we can go through the driver.
+ * abort always aborts the TP connection.
+ * reset may or may not, depending on the TP class that's in use.
+ */
+ProtoHook
+tpiso_abort(isop)
+ struct isopcb *isop;
+{
+ struct tp_event e;
+
+ IFDEBUG(D_CONN)
+ printf("tpiso_abort 0x%x\n", isop);
+ ENDDEBUG
+ e.ev_number = ER_TPDU;
+ e.ATTR(ER_TPDU).e_reason = ECONNABORTED;
+ return tp_driver((struct tp_pcb *)isop->isop_socket->so_pcb, &e);
+}
+
+ProtoHook
+tpiso_reset(isop)
+ struct isopcb *isop;
+{
+ struct tp_event e;
+
+ e.ev_number = T_NETRESET;
+ return tp_driver((struct tp_pcb *)isop->isop_socket->so_pcb, &e);
+
+}
+
+#endif /* ISO */
diff --git a/sys/netiso/tp_meas.c b/sys/netiso/tp_meas.c
new file mode 100644
index 000000000000..f8bbbe6dceb6
--- /dev/null
+++ b/sys/netiso/tp_meas.c
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_meas.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: tp_meas.c,v 5.2 88/11/18 17:28:04 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_meas.c,v $
+ *
+ * tp_meas.c : create a performance measurement event
+ * in the circular buffer tp_Meas[]
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_meas.h>
+
+extern struct timeval time;
+
+#ifdef TP_PERF_MEAS
+int tp_Measn = 0;
+struct tp_Meas tp_Meas[TPMEASN];
+
+/*
+ * NAME: tpmeas()
+ *
+ * CALLED FROM: tp_emit(), tp_soisdisconecting(), tp_soisdisconnected()
+ * tp0_stash(), tp_stash(), tp_send(), tp_goodack(), tp_usrreq()
+ *
+ * FUNCTION and ARGUMENTS:
+ * stashes a performance-measurement event for the given reference (ref)
+ * (kind) tells which kind of event, timev is the time to be stored
+ * with this event, (seq), (win), and (size) are integers that usually
+ * refer to the sequence number, window number (on send) and
+ * size of tpdu or window.
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+Tpmeas(ref, kind, timev, seq, win, size)
+ u_int ref;
+ u_int kind;
+ struct timeval *timev;
+ u_int seq, win, size;
+{
+ register struct tp_Meas *tpm;
+ static int mseq;
+
+ tpm = &tp_Meas[tp_Measn++];
+ tp_Measn %= TPMEASN;
+
+ tpm->tpm_kind = kind;
+ tpm->tpm_tseq = mseq++;
+ tpm->tpm_ref = ref;
+ if(kind == TPtime_from_ll)
+ bcopy((caddr_t)timev, (caddr_t)&tpm->tpm_time, sizeof(struct timeval));
+ else
+ bcopy( (caddr_t)&time,
+ (caddr_t)&tpm->tpm_time, sizeof(struct timeval) );
+ tpm->tpm_seq = seq;
+ tpm->tpm_window = win;
+ tpm->tpm_size = size;
+}
+
+#endif /* TP_PERF_MEAS */
diff --git a/sys/netiso/tp_meas.h b/sys/netiso/tp_meas.h
new file mode 100644
index 000000000000..10ef93d350b6
--- /dev/null
+++ b/sys/netiso/tp_meas.h
@@ -0,0 +1,94 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_meas.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+#ifdef TP_PERF_MEAS
+#define tpmeas(a, b, t, c, d, e) \
+ Tpmeas((u_int)(a), (u_int)(b), t, (u_int)(c), (u_int)(d), (u_int)(e))
+
+struct tp_Meas {
+ int tpm_tseq;
+ u_char tpm_kind;
+ u_short tpm_ref;
+ u_short tpm_size;
+ u_short tpm_window;
+ u_int tpm_seq;
+ struct timeval tpm_time;
+};
+
+#define TPMEASN 4000
+extern int tp_Measn;
+extern struct tp_Meas tp_Meas[];
+
+/*
+ * the kinds of events for packet tracing are:
+ */
+#define TPtime_from_session 0x01
+#define TPtime_to_session 0x02
+#define TPtime_ack_rcvd 0x03
+#define TPtime_ack_sent 0x04
+#define TPtime_from_ll 0x05
+#define TPtime_to_ll 0x06
+#define TPsbsend 0x07
+#define TPtime_open 0x08
+#define TPtime_open_X 0x28 /* xtd format */
+#define TPtime_close 0x09
+
+#endif /* TP_PERF_MEAS */
diff --git a/sys/netiso/tp_output.c b/sys/netiso/tp_output.c
new file mode 100644
index 000000000000..cdd7c4fe76b3
--- /dev/null
+++ b/sys/netiso/tp_output.c
@@ -0,0 +1,712 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_output.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_output.c,v 5.4 88/11/18 17:28:08 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_output.c,v $
+ *
+ * In here is tp_ctloutput(), the guy called by [sg]etsockopt(),
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_user.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_clnp.h>
+#include <netiso/tp_timer.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+
+#define TPDUSIZESHIFT 24
+#define CLASSHIFT 16
+
+/*
+ * NAME: tp_consistency()
+ *
+ * CALLED FROM:
+ * tp_ctloutput(), tp_input()
+ *
+ * FUNCTION and ARGUMENTS:
+ * Checks the consistency of options and tpdusize with class,
+ * using the parameters passed in via (param).
+ * (cmd) may be TP_STRICT or TP_FORCE or both.
+ * Force means it will set all the values in (tpcb) to those in
+ * the input arguements iff no errors were encountered.
+ * Strict means that no inconsistency will be tolerated. If it's
+ * not used, checksum and tpdusize inconsistencies will be tolerated.
+ * The reason for this is that in some cases, when we're negotiating down
+ * from class 4, these options should be changed but should not
+ * cause negotiation to fail.
+ *
+ * RETURNS
+ * E* or EOK
+ * E* if the various parms aren't ok for a given class
+ * EOK if they are ok for a given class
+ */
+
+int
+tp_consistency( tpcb, cmd, param )
+ u_int cmd;
+ struct tp_conn_param *param;
+ struct tp_pcb *tpcb;
+{
+ register int error = EOK;
+ int class_to_use = tp_mask_to_num(param->p_class);
+
+ IFTRACE(D_SETPARAMS)
+ tptrace(TPPTmisc,
+ "tp_consist enter class_to_use dontchange param.class cmd",
+ class_to_use, param->p_dont_change_params, param->p_class, cmd);
+ ENDTRACE
+ IFDEBUG(D_SETPARAMS)
+ printf("tp_consistency %s %s\n",
+ cmd& TP_FORCE? "TP_FORCE": "",
+ cmd& TP_STRICT? "TP_STRICT":"");
+ ENDDEBUG
+ if ((cmd & TP_FORCE) && (param->p_dont_change_params)) {
+ cmd &= ~TP_FORCE;
+ }
+ /* can switch net services within a domain, but
+ * cannot switch domains
+ */
+ switch( param->p_netservice) {
+ case ISO_CONS:
+ case ISO_CLNS:
+ case ISO_COSNS:
+ /* param->p_netservice in ISO DOMAIN */
+ if(tpcb->tp_domain != AF_ISO ) {
+ error = EINVAL; goto done;
+ }
+ break;
+ case IN_CLNS:
+ /* param->p_netservice in INET DOMAIN */
+ if( tpcb->tp_domain != AF_INET ) {
+ error = EINVAL; goto done;
+ }
+ break;
+ /* no others not possible-> netservice is a 2-bit field! */
+ }
+
+ IFDEBUG(D_SETPARAMS)
+ printf("p_class 0x%x, class_to_use 0x%x\n", param->p_class,
+ class_to_use);
+ ENDDEBUG
+ if((param->p_netservice < 0) || (param->p_netservice > TP_MAX_NETSERVICES)){
+ error = EINVAL; goto done;
+ }
+ if( (param->p_class & TP_CLASSES_IMPLEMENTED) == 0 ) {
+ error = EINVAL; goto done;
+ }
+ IFDEBUG(D_SETPARAMS)
+ printf("Nretrans 0x%x\n", param->p_Nretrans );
+ ENDDEBUG
+ if( ( param->p_Nretrans < 1 ) ||
+ (param->p_cr_ticks < 1) || (param->p_cc_ticks < 1) ) {
+ /* bad for any class because negot has to be done a la class 4 */
+ error = EINVAL; goto done;
+ }
+ IFDEBUG(D_SETPARAMS)
+ printf("use_csum 0x%x\n", param->p_use_checksum );
+ printf("xtd_format 0x%x\n", param->p_xtd_format );
+ printf("xpd_service 0x%x\n", param->p_xpd_service );
+ printf("tpdusize 0x%x\n", param->p_tpdusize );
+ printf("tpcb->flags 0x%x\n", tpcb->tp_flags );
+ ENDDEBUG
+ switch( class_to_use ) {
+
+ case 0:
+ /* do not use checksums, xtd format, or XPD */
+
+ if( param->p_use_checksum | param->p_xtd_format | param->p_xpd_service ) {
+ if(cmd & TP_STRICT) {
+ error = EINVAL;
+ } else {
+ param->p_use_checksum = 0;
+ param->p_xtd_format = 0;
+ param->p_xpd_service = 0;
+ }
+ break;
+ }
+
+ if (param->p_tpdusize < TP_MIN_TPDUSIZE) {
+ if(cmd & TP_STRICT) {
+ error = EINVAL;
+ } else {
+ param->p_tpdusize = TP_MIN_TPDUSIZE;
+ }
+ break;
+ }
+ if (param->p_tpdusize > TP0_TPDUSIZE) {
+ if (cmd & TP_STRICT) {
+ error = EINVAL;
+ } else {
+ param->p_tpdusize = TP0_TPDUSIZE;
+ }
+ break;
+ }
+
+ /* connect/disc data not allowed for class 0 */
+ if (tpcb->tp_ucddata) {
+ if(cmd & TP_STRICT) {
+ error = EINVAL;
+ } else if(cmd & TP_FORCE) {
+ m_freem(tpcb->tp_ucddata);
+ tpcb->tp_ucddata = 0;
+ }
+ }
+ break;
+
+ case 4:
+ IFDEBUG(D_SETPARAMS)
+ printf("dt_ticks 0x%x\n", param->p_dt_ticks );
+ printf("x_ticks 0x%x\n", param->p_x_ticks );
+ printf("dr_ticks 0x%x\n", param->p_dr_ticks );
+ printf("keepalive 0x%x\n", param->p_keepalive_ticks );
+ printf("sendack 0x%x\n", param->p_sendack_ticks );
+ printf("inact 0x%x\n", param->p_inact_ticks );
+ printf("ref 0x%x\n", param->p_ref_ticks );
+ ENDDEBUG
+ if( (param->p_class & TP_CLASS_4 ) && (
+ (param->p_dt_ticks < 1) || (param->p_dr_ticks < 1) ||
+ (param->p_x_ticks < 1) || (param->p_keepalive_ticks < 1) ||
+ (param->p_sendack_ticks < 1) || (param->p_ref_ticks < 1) ||
+ (param->p_inact_ticks < 1) ) ) {
+ error = EINVAL;
+ break;
+ }
+ IFDEBUG(D_SETPARAMS)
+ printf("rx_strat 0x%x\n", param->p_rx_strat );
+ ENDDEBUG
+ if(param->p_rx_strat >
+ ( TPRX_USE_CW | TPRX_EACH | TPRX_FASTSTART) ) {
+ if(cmd & TP_STRICT) {
+ error = EINVAL;
+ } else {
+ param->p_rx_strat = TPRX_USE_CW;
+ }
+ break;
+ }
+ IFDEBUG(D_SETPARAMS)
+ printf("ack_strat 0x%x\n", param->p_ack_strat );
+ ENDDEBUG
+ if((param->p_ack_strat != 0) && (param->p_ack_strat != 1)) {
+ if(cmd & TP_STRICT) {
+ error = EINVAL;
+ } else {
+ param->p_ack_strat = TPACK_WINDOW;
+ }
+ break;
+ }
+ if (param->p_tpdusize < TP_MIN_TPDUSIZE) {
+ if(cmd & TP_STRICT) {
+ error = EINVAL;
+ } else {
+ param->p_tpdusize = TP_MIN_TPDUSIZE;
+ }
+ break;
+ }
+ if (param->p_tpdusize > TP_TPDUSIZE) {
+ if(cmd & TP_STRICT) {
+ error = EINVAL;
+ } else {
+ param->p_tpdusize = TP_TPDUSIZE;
+ }
+ break;
+ }
+ break;
+ }
+
+ if ((error==0) && (cmd & TP_FORCE)) {
+ long dusize = ((long)param->p_ptpdusize) << 7;
+ /* Enforce Negotation rules below */
+ tpcb->tp_class = param->p_class;
+ if (tpcb->tp_use_checksum || param->p_use_checksum)
+ tpcb->tp_use_checksum = 1;
+ if (!tpcb->tp_xpd_service || !param->p_xpd_service)
+ tpcb->tp_xpd_service = 0;
+ if (!tpcb->tp_xtd_format || !param->p_xtd_format)
+ tpcb->tp_xtd_format = 0;
+ if (dusize) {
+ if (tpcb->tp_l_tpdusize > dusize)
+ tpcb->tp_l_tpdusize = dusize;
+ if (tpcb->tp_ptpdusize == 0 ||
+ tpcb->tp_ptpdusize > param->p_ptpdusize)
+ tpcb->tp_ptpdusize = param->p_ptpdusize;
+ } else {
+ if (param->p_tpdusize != 0 &&
+ tpcb->tp_tpdusize > param->p_tpdusize)
+ tpcb->tp_tpdusize = param->p_tpdusize;
+ tpcb->tp_l_tpdusize = 1 << tpcb->tp_tpdusize;
+ }
+ }
+done:
+
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "tp_consist returns class xtdfmt cmd",
+ error, tpcb->tp_class, tpcb->tp_xtd_format, cmd);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf(
+ "tp_consist rtns 0x%x class 0x%x xtd_fmt 0x%x cmd 0x%x\n",
+ error, tpcb->tp_class, tpcb->tp_xtd_format, cmd);
+ ENDDEBUG
+ return error;
+}
+
+/*
+ * NAME: tp_ctloutput()
+ *
+ * CALLED FROM:
+ * [sg]etsockopt(), via so[sg]etopt().
+ *
+ * FUNCTION and ARGUMENTS:
+ * Implements the socket options at transport level.
+ * (cmd) is either PRCO_SETOPT or PRCO_GETOPT (see ../sys/protosw.h).
+ * (so) is the socket.
+ * (level) is SOL_TRANSPORT (see ../sys/socket.h)
+ * (optname) is the particular command or option to be set.
+ * (**mp) is an mbuf structure.
+ *
+ * RETURN VALUE:
+ * ENOTSOCK if the socket hasn't got an associated tpcb
+ * EINVAL if
+ * trying to set window too big
+ * trying to set illegal max tpdu size
+ * trying to set illegal credit fraction
+ * trying to use unknown or unimplemented class of TP
+ * structure passed to set timer values is wrong size
+ * illegal combination of command/GET-SET option,
+ * e.g., GET w/ TPOPT_CDDATA_CLEAR:
+ * EOPNOTSUPP if the level isn't transport, or command is neither GET nor SET
+ * or if the transport-specific command is not implemented
+ * EISCONN if trying a command that isn't allowed after a connection
+ * is established
+ * ENOTCONN if trying a command that is allowed only if a connection is
+ * established
+ * EMSGSIZE if trying to give too much data on connect/disconnect
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+ProtoHook
+tp_ctloutput(cmd, so, level, optname, mp)
+ int cmd, level, optname;
+ struct socket *so;
+ struct mbuf **mp;
+{
+ struct tp_pcb *tpcb = sototpcb(so);
+ int s = splnet();
+ caddr_t value;
+ unsigned val_len;
+ int error = 0;
+
+ IFTRACE(D_REQUEST)
+ tptrace(TPPTmisc, "tp_ctloutput cmd so optname mp",
+ cmd, so, optname, mp);
+ ENDTRACE
+ IFDEBUG(D_REQUEST)
+ printf(
+ "tp_ctloutput so 0x%x cmd 0x%x optname 0x%x, mp 0x%x *mp 0x%x tpcb 0x%x\n",
+ so, cmd, optname, mp, mp?*mp:0, tpcb);
+ ENDDEBUG
+ if( tpcb == (struct tp_pcb *)0 ) {
+ error = ENOTSOCK; goto done;
+ }
+ if(*mp == MNULL) {
+ register struct mbuf *m;
+
+ MGET(m, M_DONTWAIT, TPMT_SONAME); /* does off, type, next */
+ if (m == NULL) {
+ splx(s);
+ return ENOBUFS;
+ }
+ m->m_len = 0;
+ m->m_act = 0;
+ *mp = m;
+ }
+
+ /*
+ * Hook so one can set network options via a tp socket.
+ */
+ if ( level == SOL_NETWORK ) {
+ if ((tpcb->tp_nlproto == NULL) || (tpcb->tp_npcb == NULL))
+ error = ENOTSOCK;
+ else if (tpcb->tp_nlproto->nlp_ctloutput == NULL)
+ error = EOPNOTSUPP;
+ else
+ return ((tpcb->tp_nlproto->nlp_ctloutput)(cmd, optname,
+ tpcb->tp_npcb, *mp));
+ goto done;
+ } else if ( level == SOL_SOCKET) {
+ if (optname == SO_RCVBUF && cmd == PRCO_SETOPT) {
+ u_long old_credit = tpcb->tp_maxlcredit;
+ tp_rsyset(tpcb);
+ if (tpcb->tp_rhiwat != so->so_rcv.sb_hiwat &&
+ tpcb->tp_state == TP_OPEN &&
+ (old_credit < tpcb->tp_maxlcredit))
+ tp_emit(AK_TPDU_type, tpcb,
+ tpcb->tp_rcvnxt, 0, MNULL);
+ tpcb->tp_rhiwat = so->so_rcv.sb_hiwat;
+ }
+ goto done;
+ } else if ( level != SOL_TRANSPORT ) {
+ error = EOPNOTSUPP; goto done;
+ }
+ if (cmd != PRCO_GETOPT && cmd != PRCO_SETOPT) {
+ error = EOPNOTSUPP; goto done;
+ }
+ if ( so->so_error ) {
+ error = so->so_error; goto done;
+ }
+
+ /* The only options allowed after connection is established
+ * are GET (anything) and SET DISC DATA and SET PERF MEAS
+ */
+ if ( ((so->so_state & SS_ISCONNECTING)||(so->so_state & SS_ISCONNECTED))
+ &&
+ (cmd == PRCO_SETOPT &&
+ optname != TPOPT_DISC_DATA &&
+ optname != TPOPT_CFRM_DATA &&
+ optname != TPOPT_PERF_MEAS &&
+ optname != TPOPT_CDDATA_CLEAR ) ) {
+ error = EISCONN; goto done;
+ }
+ /* The only options allowed after disconnection are GET DISC DATA,
+ * and TPOPT_PSTATISTICS
+ * and they're not allowed if the ref timer has gone off, because
+ * the tpcb is gone
+ */
+ if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
+ if ( so->so_pcb == (caddr_t)0 ) {
+ error = ENOTCONN; goto done;
+ }
+ if ( (tpcb->tp_state == TP_REFWAIT || tpcb->tp_state == TP_CLOSING) &&
+ (optname != TPOPT_DISC_DATA && optname != TPOPT_PSTATISTICS)) {
+ error = ENOTCONN; goto done;
+ }
+ }
+
+ value = mtod(*mp, caddr_t); /* it's aligned, don't worry,
+ * but lint complains about it
+ */
+ val_len = (*mp)->m_len;
+
+ switch (optname) {
+
+ case TPOPT_INTERCEPT:
+#define INA(t) (((struct inpcb *)(t->tp_npcb))->inp_laddr.s_addr)
+#define ISOA(t) (((struct isopcb *)(t->tp_npcb))->isop_laddr->siso_addr)
+
+ if ((so->so_state & SS_PRIV) == 0) {
+ error = EPERM;
+ } else if (cmd != PRCO_SETOPT || tpcb->tp_state != TP_CLOSED ||
+ (tpcb->tp_flags & TPF_GENERAL_ADDR) ||
+ tpcb->tp_next == 0)
+ error = EINVAL;
+ else {
+ register struct tp_pcb *t;
+ error = EADDRINUSE;
+ for (t = tp_listeners; t; t = t->tp_nextlisten)
+ if ((t->tp_flags & TPF_GENERAL_ADDR) == 0 &&
+ t->tp_domain == tpcb->tp_domain)
+ switch (tpcb->tp_domain) {
+ default:
+ goto done;
+#ifdef INET
+ case AF_INET:
+ if (INA(t) == INA(tpcb))
+ goto done;
+ continue;
+#endif
+#ifdef ISO
+ case AF_ISO:
+ if (bcmp(ISOA(t).isoa_genaddr, ISOA(tpcb).isoa_genaddr,
+ ISOA(t).isoa_len) == 0)
+ goto done;
+ continue;
+#endif
+ }
+ tpcb->tp_lsuffixlen = 0;
+ tpcb->tp_state = TP_LISTENING;
+ error = 0;
+ remque(tpcb);
+ tpcb->tp_next = tpcb->tp_prev = tpcb;
+ tpcb->tp_nextlisten = tp_listeners;
+ tp_listeners = tpcb;
+ }
+ break;
+
+ case TPOPT_MY_TSEL:
+ if ( cmd == PRCO_GETOPT ) {
+ ASSERT( tpcb->tp_lsuffixlen <= MAX_TSAP_SEL_LEN );
+ bcopy((caddr_t)tpcb->tp_lsuffix, value, tpcb->tp_lsuffixlen);
+ (*mp)->m_len = tpcb->tp_lsuffixlen;
+ } else /* cmd == PRCO_SETOPT */ {
+ if( (val_len > MAX_TSAP_SEL_LEN) || (val_len <= 0 )) {
+ printf("val_len 0x%x (*mp)->m_len 0x%x\n", val_len, (*mp));
+ error = EINVAL;
+ } else {
+ bcopy(value, (caddr_t)tpcb->tp_lsuffix, val_len);
+ tpcb->tp_lsuffixlen = val_len;
+ }
+ }
+ break;
+
+ case TPOPT_PEER_TSEL:
+ if ( cmd == PRCO_GETOPT ) {
+ ASSERT( tpcb->tp_fsuffixlen <= MAX_TSAP_SEL_LEN );
+ bcopy((caddr_t)tpcb->tp_fsuffix, value, tpcb->tp_fsuffixlen);
+ (*mp)->m_len = tpcb->tp_fsuffixlen;
+ } else /* cmd == PRCO_SETOPT */ {
+ if( (val_len > MAX_TSAP_SEL_LEN) || (val_len <= 0 )) {
+ printf("val_len 0x%x (*mp)->m_len 0x%x\n", val_len, (*mp));
+ error = EINVAL;
+ } else {
+ bcopy(value, (caddr_t)tpcb->tp_fsuffix, val_len);
+ tpcb->tp_fsuffixlen = val_len;
+ }
+ }
+ break;
+
+ case TPOPT_FLAGS:
+ IFDEBUG(D_REQUEST)
+ printf("%s TPOPT_FLAGS value 0x%x *value 0x%x, flags 0x%x \n",
+ cmd==PRCO_GETOPT?"GET":"SET",
+ value,
+ *value,
+ tpcb->tp_flags);
+ ENDDEBUG
+
+ if ( cmd == PRCO_GETOPT ) {
+ *(int *)value = (int)tpcb->tp_flags;
+ (*mp)->m_len = sizeof(u_int);
+ } else /* cmd == PRCO_SETOPT */ {
+ error = EINVAL; goto done;
+ }
+ break;
+
+ case TPOPT_PARAMS:
+ /* This handles:
+ * timer values,
+ * class, use of transport expedited data,
+ * max tpdu size, checksum, xtd format and
+ * disconnect indications, and may get rid of connect/disc data
+ */
+ IFDEBUG(D_SETPARAMS)
+ printf("TPOPT_PARAMS value 0x%x, cmd %s \n", value,
+ cmd==PRCO_GETOPT?"GET":"SET");
+ ENDDEBUG
+ IFDEBUG(D_REQUEST)
+ printf("TPOPT_PARAMS value 0x%x, cmd %s \n", value,
+ cmd==PRCO_GETOPT?"GET":"SET");
+ ENDDEBUG
+
+ if ( cmd == PRCO_GETOPT ) {
+ *(struct tp_conn_param *)value = tpcb->_tp_param;
+ (*mp)->m_len = sizeof(tpcb->_tp_param);
+ } else /* cmd == PRCO_SETOPT */ {
+ if( (error =
+ tp_consistency(tpcb, TP_STRICT | TP_FORCE,
+ (struct tp_conn_param *)value))==0) {
+ /*
+ * tp_consistency doesn't copy the whole set of params
+ */
+ tpcb->_tp_param = *(struct tp_conn_param *)value;
+ (*mp)->m_len = sizeof(tpcb->_tp_param);
+ }
+ }
+ break;
+
+ case TPOPT_PSTATISTICS:
+#ifdef TP_PERF_MEAS
+ if (cmd == PRCO_SETOPT) {
+ error = EINVAL; goto done;
+ }
+ IFPERF(tpcb)
+ if (*mp) {
+ struct mbuf * n;
+ do {
+ MFREE(*mp, n);
+ *mp = n;
+ } while (n);
+ }
+ *mp = m_copym(tpcb->tp_p_mbuf, (int)M_COPYALL, M_WAITOK);
+ ENDPERF
+ else {
+ error = EINVAL; goto done;
+ }
+ break;
+#else
+ error = EOPNOTSUPP;
+ goto done;
+#endif /* TP_PERF_MEAS */
+
+ case TPOPT_CDDATA_CLEAR:
+ if (cmd == PRCO_GETOPT) {
+ error = EINVAL;
+ } else {
+ if (tpcb->tp_ucddata) {
+ m_freem(tpcb->tp_ucddata);
+ tpcb->tp_ucddata = 0;
+ }
+ }
+ break;
+
+ case TPOPT_CFRM_DATA:
+ case TPOPT_DISC_DATA:
+ case TPOPT_CONN_DATA:
+ if( tpcb->tp_class == TP_CLASS_0 ) {
+ error = EOPNOTSUPP;
+ break;
+ }
+ IFDEBUG(D_REQUEST)
+ printf("%s\n", optname==TPOPT_DISC_DATA?"DISC data":"CONN data");
+ printf("m_len 0x%x, vallen 0x%x so_snd.cc 0x%x\n",
+ (*mp)->m_len, val_len, so->so_snd.sb_cc);
+ dump_mbuf(so->so_snd.sb_mb, "tp_ctloutput: sosnd ");
+ ENDDEBUG
+ if (cmd == PRCO_SETOPT) {
+ int len = tpcb->tp_ucddata ? tpcb->tp_ucddata->m_len : 0;
+ /* can append connect data in several calls */
+ if (len + val_len >
+ (optname==TPOPT_CONN_DATA?TP_MAX_CR_DATA:TP_MAX_DR_DATA) ) {
+ error = EMSGSIZE; goto done;
+ }
+ (*mp)->m_next = MNULL;
+ (*mp)->m_act = 0;
+ if (tpcb->tp_ucddata)
+ m_cat(tpcb->tp_ucddata, *mp);
+ else
+ tpcb->tp_ucddata = *mp;
+ IFDEBUG(D_REQUEST)
+ dump_mbuf(tpcb->tp_ucddata, "tp_ctloutput after CONN_DATA");
+ ENDDEBUG
+ IFTRACE(D_REQUEST)
+ tptrace(TPPTmisc,"C/D DATA: flags snd.sbcc val_len",
+ tpcb->tp_flags, so->so_snd.sb_cc,val_len,0);
+ ENDTRACE
+ *mp = MNULL;
+ if (optname == TPOPT_CFRM_DATA && (so->so_state & SS_ISCONFIRMING))
+ (void) tp_confirm(tpcb);
+ }
+ break;
+
+ case TPOPT_PERF_MEAS:
+#ifdef TP_PERF_MEAS
+ if (cmd == PRCO_GETOPT) {
+ *value = (u_int)tpcb->tp_perf_on;
+ (*mp)->m_len = sizeof(u_int);
+ } else if (cmd == PRCO_SETOPT) {
+ (*mp)->m_len = 0;
+ if ((*value) != 0 && (*value) != 1 )
+ error = EINVAL;
+ else tpcb->tp_perf_on = (*value);
+ }
+ if( tpcb->tp_perf_on )
+ error = tp_setup_perf(tpcb);
+#else /* TP_PERF_MEAS */
+ error = EOPNOTSUPP;
+#endif /* TP_PERF_MEAS */
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ }
+
+done:
+ IFDEBUG(D_REQUEST)
+ dump_mbuf(so->so_snd.sb_mb, "tp_ctloutput sosnd at end");
+ dump_mbuf(*mp, "tp_ctloutput *mp");
+ ENDDEBUG
+ /*
+ * sigh: getsockopt looks only at m_len : all output data must
+ * reside in the first mbuf
+ */
+ if (*mp) {
+ if (cmd == PRCO_SETOPT) {
+ m_freem(*mp);
+ *mp = MNULL;
+ } else {
+ ASSERT ( m_compress(*mp, mp) <= MLEN );
+ if (error)
+ (*mp)->m_len = 0;
+ IFDEBUG(D_REQUEST)
+ dump_mbuf(*mp, "tp_ctloutput *mp after compress");
+ ENDDEBUG
+ }
+ }
+ splx(s);
+ return error;
+}
diff --git a/sys/netiso/tp_param.h b/sys/netiso/tp_param.h
new file mode 100644
index 000000000000..f1862a243924
--- /dev/null
+++ b/sys/netiso/tp_param.h
@@ -0,0 +1,367 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_param.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_param.h,v 5.3 88/11/18 17:28:18 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_param.h,v $
+ *
+ */
+
+#ifndef __TP_PARAM__
+#define __TP_PARAM__
+
+
+/******************************************************
+ * compile time parameters that can be changed
+ *****************************************************/
+
+#define TP_CLASSES_IMPLEMENTED 0x11 /* zero and 4 */
+
+#define TP_DECBIT_CLEAR_COUNT 3
+
+/*#define N_TPREF 100 */
+#ifdef KERNEL
+extern int N_TPREF;
+#endif
+
+#define TP_SOCKBUFSIZE ((u_long)4096)
+#define TP0_SOCKBUFSIZE ((u_long)512)
+#define MAX_TSAP_SEL_LEN 64
+
+/* maximum tpdu size we'll accept: */
+#define TP_TPDUSIZE 0xc /* 4096 octets for classes 1-4*/
+#define TP0_TPDUSIZE 0xb /* 2048 octets for class 0 */
+#define TP_DFL_TPDUSIZE 0x7 /* 128 octets default */
+ /* NOTE: don't ever negotiate 8192 because could get
+ * wraparound in checksumming
+ * (No mtu is likely to be larger than 4K anyway...)
+ */
+#define TP_NRETRANS 12 /* TCP_MAXRXTSHIFT + 1 */
+#define TP_MAXRXTSHIFT 6 /* factor of 64 */
+#define TP_MAXPORT 0xefff
+
+/* ALPHA: to be used in the context: gain= 1/(2**alpha), or
+ * put another way, gaintimes(x) (x)>>alpha (forgetting the case alpha==0)
+ */
+#define TP_RTT_ALPHA 3
+#define TP_RTV_ALPHA 2
+#define TP_REXMTVAL(tpcb)\
+ ((tp_rttadd + (tpcb)->tp_rtt + ((tpcb)->tp_rtv) << 2) / tp_rttdiv)
+#define TP_RANGESET(tv, value, min, max) \
+ ((tv = value) > (max) ? (tv = max) : (tv < min ? tv = min : tv))
+
+/*
+ * not sure how to treat data on disconnect
+ */
+#define T_CONN_DATA 0x1
+#define T_DISCONNECT 0x2
+#define T_DISC_DATA 0x4
+#define T_XDATA 0x8
+
+#define ISO_CLNS 0
+#define IN_CLNS 1
+#define ISO_CONS 2
+#define ISO_COSNS 3
+#define TP_MAX_NETSERVICES 3
+
+/* Indices into tp stats ackreason[i] */
+#define _ACK_DONT_ 0
+#define _ACK_STRAT_EACH_ 0x1
+#define _ACK_STRAT_FULLWIN_ 0x2
+#define _ACK_DUP_ 0x3
+#define _ACK_EOT_ 0x4
+#define _ACK_REORDER_ 0x5
+#define _ACK_USRRCV_ 0x6
+#define _ACK_FCC_ 0x7
+#define _ACK_NUM_REASONS_ 0x8
+
+/* masks for use in tp_stash() */
+#define ACK_DONT 0
+#define ACK_STRAT_EACH (1<< _ACK_STRAT_EACH_)
+#define ACK_STRAT_FULLWIN (1<< _ACK_STRAT_FULLWIN_)
+#define ACK_DUP (1<< _ACK_DUP_)
+#define ACK_EOT (1<< _ACK_EOT_)
+#define ACK_REORDER (1<< _ACK_REORDER_)
+
+/******************************************************
+ * constants used in the protocol
+ *****************************************************/
+
+#define TP_VERSION 0x1
+
+#define TP_MAX_HEADER_LEN 256
+
+#define TP_MIN_TPDUSIZE 0x7 /* 128 octets */
+#define TP_MAX_TPDUSIZE 0xd /* 8192 octets */
+
+#define TP_MAX_XPD_DATA 0x10 /* 16 octets */
+#define TP_MAX_CC_DATA 0x20 /* 32 octets */
+#define TP_MAX_CR_DATA TP_MAX_CC_DATA
+#define TP_MAX_DR_DATA 0x40 /* 64 octets */
+
+#define TP_XTD_FMT_BIT 0x80000000
+#define TP_XTD_FMT_MASK 0x7fffffff
+#define TP_NML_FMT_BIT 0x80
+#define TP_NML_FMT_MASK 0x7f
+
+/*
+ * values for the tpdu_type field, 2nd byte in a tpdu
+ */
+
+#define TP_MIN_TPDUTYPE 0x1
+
+#define XPD_TPDU_type 0x1
+#define XAK_TPDU_type 0x2
+#define GR_TPDU_type 0x3
+#define AK_TPDU_type 0x6
+#define ER_TPDU_type 0x7
+#define DR_TPDU_type 0x8
+#define DC_TPDU_type 0xc
+#define CC_TPDU_type 0xd
+#define CR_TPDU_type 0xe
+#define DT_TPDU_type 0xf
+
+#define TP_MAX_TPDUTYPE 0xf
+
+/*
+ * identifiers for the variable-length options in tpdus
+ */
+
+#define TPP_acktime 0x85
+#define TPP_residER 0x86
+#define TPP_priority 0x87
+#define TPP_transdelay 0x88
+#define TPP_throughput 0x89
+#define TPP_subseq 0x8a
+#define TPP_flow_cntl_conf 0x8c /* not implemented */
+#define TPP_addl_info 0xe0
+#define TPP_tpdu_size 0xc0
+#define TPP_calling_sufx 0xc1
+#define TPP_invalid_tpdu 0xc1 /* the bozos used a value twice */
+#define TPP_called_sufx 0xc2
+#define TPP_checksum 0xc3
+#define TPP_vers 0xc4
+#define TPP_security 0xc5
+#define TPP_addl_opt 0xc6
+#define TPP_alt_class 0xc7
+#define TPP_perf_meas 0xc8 /* local item : perf meas on, svp */
+#define TPP_ptpdu_size 0xf0 /* preferred TPDU size */
+#define TPP_inact_time 0xf2 /* inactivity time exchanged */
+
+
+/******************************************************
+ * Some fundamental data types
+ *****************************************************/
+#ifndef TRUE
+#define TRUE 1
+#endif /* TRUE */
+
+#ifndef FALSE
+#define FALSE 0
+#endif /* FALSE */
+
+#define TP_LOCAL 22
+#define TP_FOREIGN 33
+
+#ifndef EOK
+#define EOK 0
+#endif /* EOK */
+
+#define TP_CLASS_0 (1<<0)
+#define TP_CLASS_1 (1<<1)
+#define TP_CLASS_2 (1<<2)
+#define TP_CLASS_3 (1<<3)
+#define TP_CLASS_4 (1<<4)
+
+#define TP_FORCE 0x1
+#define TP_STRICT 0x2
+
+#ifndef MNULL
+#define MNULL (struct mbuf *)0
+#endif /* MNULL */
+ /* if ../sys/mbuf.h gets MT_types up to 0x40, these will
+ * have to be changed:
+ */
+#define MT_XPD 0x44
+#define MT_EOT 0x40
+
+#define TP_ENOREF 0x80000000
+
+typedef unsigned int SeqNum;
+typedef unsigned short RefNum;
+typedef int ProtoHook;
+
+/******************************************************
+ * Macro used all over, for driver
+ *****************************************************/
+
+#define DoEvent(x) \
+ ((E.ev_number=(x)),(tp_driver(tpcb,&E)))
+
+/******************************************************
+ * Some macros used all over, for timestamping
+ *****************************************************/
+
+#define GET_CUR_TIME(tvalp) ((*tvalp) = time)
+
+#define GET_TIME_SINCE(oldtvalp, diffp) {\
+ (diffp)->tv_sec = time.tv_sec - (oldtvalp)->tv_sec;\
+ (diffp)->tv_usec = time.tv_usec - (oldtvalp)->tv_usec;\
+ if( (diffp)->tv_usec <0 ) {\
+ (diffp)->tv_sec --;\
+ (diffp)->tv_usec = 1000000 - (diffp)->tv_usec;\
+ }\
+}
+
+/******************************************************
+ * Some macros used for address families
+ *****************************************************/
+
+#define satosiso(ADDR) ((struct sockaddr_iso *)(ADDR))
+#define satosin(ADDR) ((struct sockaddr_in *)(ADDR))
+
+/******************************************************
+ * Macro used for changing types of mbufs
+ *****************************************************/
+
+#define CHANGE_MTYPE(m, TYPE)\
+ if((m)->m_type != TYPE) { \
+ mbstat.m_mtypes[(m)->m_type]--; mbstat.m_mtypes[TYPE]++; \
+ (m)->m_type = TYPE; \
+ }
+
+/******************************************************
+ * Macros used for adding options to a tpdu header and for
+ * parsing the headers.
+ * Options are variable-length and must be bcopy-d because on the
+ * RT your assignments must be N-word aligned for objects of length
+ * N. Such a drag.
+ *****************************************************/
+
+struct tp_vbp {
+ u_char tpv_code;
+ char tpv_len;
+ char tpv_val;
+};
+#define vbptr(x) ((struct tp_vbp *)(x))
+#define vbval(x,type) (*((type *)&(((struct tp_vbp *)(x))->tpv_val)))
+#define vbcode(x) (vbptr(x)->tpv_code)
+#define vblen(x) (vbptr(x)->tpv_len)
+
+#define vb_putval(dst,type,src)\
+ bcopy((caddr_t)&(src),(caddr_t)&(((struct tp_vbp *)(dst))->tpv_val),\
+ sizeof(type))
+
+#define vb_getval(src,type,dst)\
+bcopy((caddr_t)&(((struct tp_vbp *)(src))->tpv_val),(caddr_t)&(dst),sizeof(type))
+
+#define ADDOPTION(type, DU, len, src)\
+{ register caddr_t P;\
+ P = (caddr_t)(DU) + (int)((DU)->tpdu_li);\
+ vbptr(P)->tpv_code = type;\
+ vbptr(P)->tpv_len = len;\
+ bcopy((caddr_t)&src, (caddr_t)&(vbptr(P)->tpv_val), (unsigned)len);\
+ DU->tpdu_li += len+2;/* 1 for code, 1 for length */\
+}
+/******************************************************
+ * Macro for the local credit:
+ * uses max transmission unit for the ll
+ * (as modified by the max TPDU size negotiated)
+ *****************************************************/
+
+#if defined(ARGO_DEBUG)&&!defined(LOCAL_CREDIT_EXPAND)
+#define LOCAL_CREDIT(tpcb) tp_local_credit(tpcb)
+#else
+#define LOCAL_CREDIT(tpcb) { if (tpcb->tp_rsycnt == 0) {\
+ register struct sockbuf *xxsb = &((tpcb)->tp_sock->so_rcv);\
+ register int xxi = sbspace(xxsb);\
+ xxi = (xxi<0) ? 0 : ((xxi) / (tpcb)->tp_l_tpdusize);\
+ xxi = min(xxi, (tpcb)->tp_maxlcredit); \
+ if (!(tpcb->tp_cebit_off)) { \
+ (tpcb)->tp_lcredit = ROUND((tpcb)->tp_win_recv); \
+ if (xxi < (tpcb)->tp_lcredit) { \
+ (tpcb)->tp_lcredit = xxi; \
+ } \
+ } else \
+ (tpcb)->tp_lcredit = xxi; \
+} }
+#endif /* ARGO_DEBUG */
+
+#ifdef KERNEL
+extern int tp_rttadd, tp_rttdiv;
+#include <sys/syslog.h>
+#define printf logpri(LOG_DEBUG),addlog
+
+#ifndef tp_NSTATES
+
+#include <netiso/tp_states.h>
+#include <netiso/tp_events.h>
+#if defined(__STDC__) || defined(__cplusplus)
+#undef ATTR
+#define ATTR(X) ev_union.EV_ ## X
+#endif /* defined(__STDC__) || defined(__cplusplus) */
+
+#endif /* tp_NSTATES */
+#endif /* KERNEL */
+
+#endif /* __TP_PARAM__ */
diff --git a/sys/netiso/tp_pcb.c b/sys/netiso/tp_pcb.c
new file mode 100644
index 000000000000..de345c1e377d
--- /dev/null
+++ b/sys/netiso/tp_pcb.c
@@ -0,0 +1,999 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_pcb.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_pcb.c,v 5.4 88/11/18 17:28:24 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_pcb.c,v $
+ *
+ *
+ * This is the initialization and cleanup stuff -
+ * for the tp machine in general as well as for the individual pcbs.
+ * tp_init() is called at system startup. tp_attach() and tp_getref() are
+ * called when a socket is created. tp_detach() and tp_freeref()
+ * are called during the closing stage and/or when the reference timer
+ * goes off.
+ * tp_soisdisconnecting() and tp_soisdisconnected() are tp-specific
+ * versions of soisconnect*
+ * and are called (obviously) during the closing phase.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/tp_seq.h>
+#include <netiso/tp_clnp.h>
+
+/* ticks are in units of:
+ * 500 nano-fortnights ;-) or
+ * 500 ms or
+ * 1/2 second
+ */
+
+struct tp_conn_param tp_conn_param[] = {
+ /* ISO_CLNS: TP4 CONNECTION LESS */
+ {
+ TP_NRETRANS, /* short p_Nretrans; */
+ 20, /* 10 sec */ /* short p_dr_ticks; */
+
+ 20, /* 10 sec */ /* short p_cc_ticks; */
+ 20, /* 10 sec */ /* short p_dt_ticks; */
+
+ 40, /* 20 sec */ /* short p_x_ticks; */
+ 80, /* 40 sec */ /* short p_cr_ticks;*/
+
+ 240, /* 2 min */ /* short p_keepalive_ticks;*/
+ 10, /* 5 sec */ /* short p_sendack_ticks; */
+
+ 600, /* 5 min */ /* short p_ref_ticks; */
+ 360, /* 3 min */ /* short p_inact_ticks; */
+
+ (short) 100, /* short p_lcdtfract */
+ (short) TP_SOCKBUFSIZE, /* short p_winsize */
+ TP_TPDUSIZE, /* u_char p_tpdusize */
+
+ TPACK_WINDOW, /* 4 bits p_ack_strat */
+ TPRX_USE_CW | TPRX_FASTSTART,
+ /* 4 bits p_rx_strat*/
+ TP_CLASS_4 | TP_CLASS_0,/* 5 bits p_class */
+ 1, /* 1 bit xtd format */
+ 1, /* 1 bit xpd service */
+ 1, /* 1 bit use_checksum */
+ 0, /* 1 bit use net xpd */
+ 0, /* 1 bit use rcc */
+ 0, /* 1 bit use efc */
+ 1, /* no disc indications */
+ 0, /* don't change params */
+ ISO_CLNS, /* p_netservice */
+ },
+ /* IN_CLNS: TP4 CONNECTION LESS */
+ {
+ TP_NRETRANS, /* short p_Nretrans; */
+ 20, /* 10 sec */ /* short p_dr_ticks; */
+
+ 20, /* 10 sec */ /* short p_cc_ticks; */
+ 20, /* 10 sec */ /* short p_dt_ticks; */
+
+ 40, /* 20 sec */ /* short p_x_ticks; */
+ 80, /* 40 sec */ /* short p_cr_ticks;*/
+
+ 240, /* 2 min */ /* short p_keepalive_ticks;*/
+ 10, /* 5 sec */ /* short p_sendack_ticks; */
+
+ 600, /* 5 min */ /* short p_ref_ticks; */
+ 360, /* 3 min */ /* short p_inact_ticks; */
+
+ (short) 100, /* short p_lcdtfract */
+ (short) TP_SOCKBUFSIZE, /* short p_winsize */
+ TP_TPDUSIZE, /* u_char p_tpdusize */
+
+ TPACK_WINDOW, /* 4 bits p_ack_strat */
+ TPRX_USE_CW | TPRX_FASTSTART,
+ /* 4 bits p_rx_strat*/
+ TP_CLASS_4, /* 5 bits p_class */
+ 1, /* 1 bit xtd format */
+ 1, /* 1 bit xpd service */
+ 1, /* 1 bit use_checksum */
+ 0, /* 1 bit use net xpd */
+ 0, /* 1 bit use rcc */
+ 0, /* 1 bit use efc */
+ 1, /* no disc indications */
+ 0, /* don't change params */
+ IN_CLNS, /* p_netservice */
+ },
+ /* ISO_CONS: TP0 CONNECTION MODE */
+ {
+ TP_NRETRANS, /* short p_Nretrans; */
+ 0, /* n/a */ /* short p_dr_ticks; */
+
+ 40, /* 20 sec */ /* short p_cc_ticks; */
+ 0, /* n/a */ /* short p_dt_ticks; */
+
+ 0, /* n/a */ /* short p_x_ticks; */
+ 360, /* 3 min */ /* short p_cr_ticks;*/
+
+ 0, /* n/a */ /* short p_keepalive_ticks;*/
+ 0, /* n/a */ /* short p_sendack_ticks; */
+
+ 600, /* for cr/cc to clear *//* short p_ref_ticks; */
+ 0, /* n/a */ /* short p_inact_ticks; */
+
+ /* Use tp4 defaults just in case the user changes ONLY
+ * the class
+ */
+ (short) 100, /* short p_lcdtfract */
+ (short) TP0_SOCKBUFSIZE, /* short p_winsize */
+ TP0_TPDUSIZE, /* 8 bits p_tpdusize */
+
+ 0, /* 4 bits p_ack_strat */
+ 0, /* 4 bits p_rx_strat*/
+ TP_CLASS_0, /* 5 bits p_class */
+ 0, /* 1 bit xtd format */
+ 0, /* 1 bit xpd service */
+ 0, /* 1 bit use_checksum */
+ 0, /* 1 bit use net xpd */
+ 0, /* 1 bit use rcc */
+ 0, /* 1 bit use efc */
+ 0, /* no disc indications */
+ 0, /* don't change params */
+ ISO_CONS, /* p_netservice */
+ },
+ /* ISO_COSNS: TP4 CONNECTION LESS SERVICE over CONSNS */
+ {
+ TP_NRETRANS, /* short p_Nretrans; */
+ 40, /* 20 sec */ /* short p_dr_ticks; */
+
+ 40, /* 20 sec */ /* short p_cc_ticks; */
+ 80, /* 40 sec */ /* short p_dt_ticks; */
+
+ 120, /* 1 min */ /* short p_x_ticks; */
+ 360, /* 3 min */ /* short p_cr_ticks;*/
+
+ 360, /* 3 min */ /* short p_keepalive_ticks;*/
+ 20, /* 10 sec */ /* short p_sendack_ticks; */
+
+ 600, /* 5 min */ /* short p_ref_ticks; */
+ 480, /* 4 min */ /* short p_inact_ticks; */
+
+ (short) 100, /* short p_lcdtfract */
+ (short) TP0_SOCKBUFSIZE, /* short p_winsize */
+ TP0_TPDUSIZE, /* u_char p_tpdusize */
+
+ TPACK_WINDOW, /* 4 bits p_ack_strat */
+ TPRX_USE_CW , /* No fast start */
+ /* 4 bits p_rx_strat*/
+ TP_CLASS_4 | TP_CLASS_0,/* 5 bits p_class */
+ 0, /* 1 bit xtd format */
+ 1, /* 1 bit xpd service */
+ 1, /* 1 bit use_checksum */
+ 0, /* 1 bit use net xpd */
+ 0, /* 1 bit use rcc */
+ 0, /* 1 bit use efc */
+ 0, /* no disc indications */
+ 0, /* don't change params */
+ ISO_COSNS, /* p_netservice */
+ },
+};
+
+#ifdef INET
+int in_putnetaddr();
+int in_getnetaddr();
+int in_cmpnetaddr();
+int in_putsufx();
+int in_getsufx();
+int in_recycle_tsuffix();
+int tpip_mtu();
+int in_pcbbind();
+int in_pcbconnect();
+int in_pcbdisconnect();
+int in_pcbdetach();
+int in_pcballoc();
+int tpip_output();
+int tpip_output_dg();
+struct inpcb tp_inpcb;
+#endif /* INET */
+#ifdef ISO
+int iso_putnetaddr();
+int iso_getnetaddr();
+int iso_cmpnetaddr();
+int iso_putsufx();
+int iso_getsufx();
+int iso_recycle_tsuffix();
+int tpclnp_mtu();
+int iso_pcbbind();
+int iso_pcbconnect();
+int iso_pcbdisconnect();
+int iso_pcbdetach();
+int iso_pcballoc();
+int tpclnp_output();
+int tpclnp_output_dg();
+int iso_nlctloutput();
+struct isopcb tp_isopcb;
+#endif /* ISO */
+#ifdef TPCONS
+int iso_putnetaddr();
+int iso_getnetaddr();
+int iso_cmpnetaddr();
+int iso_putsufx();
+int iso_getsufx();
+int iso_recycle_tsuffix();
+int iso_pcbbind();
+int tpcons_pcbconnect();
+int tpclnp_mtu();
+int iso_pcbdisconnect();
+int iso_pcbdetach();
+int iso_pcballoc();
+int tpcons_output();
+struct isopcb tp_isopcb;
+#endif /* TPCONS */
+
+
+struct nl_protosw nl_protosw[] = {
+ /* ISO_CLNS */
+#ifdef ISO
+ { AF_ISO, iso_putnetaddr, iso_getnetaddr, iso_cmpnetaddr,
+ iso_putsufx, iso_getsufx,
+ iso_recycle_tsuffix,
+ tpclnp_mtu, iso_pcbbind, iso_pcbconnect,
+ iso_pcbdisconnect, iso_pcbdetach,
+ iso_pcballoc,
+ tpclnp_output, tpclnp_output_dg, iso_nlctloutput,
+ (caddr_t) &tp_isopcb,
+ },
+#else
+ { 0 },
+#endif /* ISO */
+ /* IN_CLNS */
+#ifdef INET
+ { AF_INET, in_putnetaddr, in_getnetaddr, in_cmpnetaddr,
+ in_putsufx, in_getsufx,
+ in_recycle_tsuffix,
+ tpip_mtu, in_pcbbind, in_pcbconnect,
+ in_pcbdisconnect, in_pcbdetach,
+ in_pcballoc,
+ tpip_output, tpip_output_dg, /* nl_ctloutput */ NULL,
+ (caddr_t) &tp_inpcb,
+ },
+#else
+ { 0 },
+#endif /* INET */
+ /* ISO_CONS */
+#if defined(ISO) && defined(TPCONS)
+ { AF_ISO, iso_putnetaddr, iso_getnetaddr, iso_cmpnetaddr,
+ iso_putsufx, iso_getsufx,
+ iso_recycle_tsuffix,
+ tpclnp_mtu, iso_pcbbind, tpcons_pcbconnect,
+ iso_pcbdisconnect, iso_pcbdetach,
+ iso_pcballoc,
+ tpcons_output, tpcons_output, iso_nlctloutput,
+ (caddr_t) &tp_isopcb,
+ },
+#else
+ { 0 },
+#endif /* ISO_CONS */
+ /* End of protosw marker */
+ { 0 }
+};
+
+u_long tp_sendspace = 1024 * 4;
+u_long tp_recvspace = 1024 * 4;
+
+/*
+ * NAME: tp_init()
+ *
+ * CALLED FROM:
+ * autoconf through the protosw structure
+ *
+ * FUNCTION:
+ * initialize tp machine
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+int
+tp_init()
+{
+ static int init_done=0;
+ void tp_timerinit();
+
+ if (init_done++)
+ return 0;
+
+
+ /* FOR INET */
+ tp_inpcb.inp_next = tp_inpcb.inp_prev = &tp_inpcb;
+ /* FOR ISO */
+ tp_isopcb.isop_next = tp_isopcb.isop_prev = &tp_isopcb;
+
+ tp_start_win = 2;
+
+ tp_timerinit();
+ bzero((caddr_t)&tp_stat, sizeof(struct tp_stat));
+ return 0;
+}
+
+/*
+ * NAME: tp_soisdisconnecting()
+ *
+ * CALLED FROM:
+ * tp.trans
+ *
+ * FUNCTION and ARGUMENTS:
+ * Set state of the socket (so) to reflect that fact that we're disconnectING
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ * This differs from the regular soisdisconnecting() in that the latter
+ * also sets the SS_CANTRECVMORE and SS_CANTSENDMORE flags.
+ * We don't want to set those flags because those flags will cause
+ * a SIGPIPE to be delivered in sosend() and we don't like that.
+ * If anyone else is sleeping on this socket, wake 'em up.
+ */
+void
+tp_soisdisconnecting(so)
+ register struct socket *so;
+{
+ soisdisconnecting(so);
+ so->so_state &= ~SS_CANTSENDMORE;
+ IFPERF(sototpcb(so))
+ register struct tp_pcb *tpcb = sototpcb(so);
+ u_int fsufx, lsufx;
+
+ bcopy ((caddr_t)tpcb->tp_fsuffix, (caddr_t)&fsufx, sizeof(u_int) );
+ bcopy ((caddr_t)tpcb->tp_lsuffix, (caddr_t)&lsufx, sizeof(u_int) );
+
+ tpmeas(tpcb->tp_lref, TPtime_close, &time, fsufx, lsufx, tpcb->tp_fref);
+ tpcb->tp_perf_on = 0; /* turn perf off */
+ ENDPERF
+}
+
+
+/*
+ * NAME: tp_soisdisconnected()
+ *
+ * CALLED FROM:
+ * tp.trans
+ *
+ * FUNCTION and ARGUMENTS:
+ * Set state of the socket (so) to reflect that fact that we're disconnectED
+ * Set the state of the reference structure to closed, and
+ * recycle the suffix.
+ * Start a reference timer.
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ * This differs from the regular soisdisconnected() in that the latter
+ * also sets the SS_CANTRECVMORE and SS_CANTSENDMORE flags.
+ * We don't want to set those flags because those flags will cause
+ * a SIGPIPE to be delivered in sosend() and we don't like that.
+ * If anyone else is sleeping on this socket, wake 'em up.
+ */
+void
+tp_soisdisconnected(tpcb)
+ register struct tp_pcb *tpcb;
+{
+ register struct socket *so = tpcb->tp_sock;
+
+ soisdisconnecting(so);
+ so->so_state &= ~SS_CANTSENDMORE;
+ IFPERF(tpcb)
+ register struct tp_pcb *ttpcb = sototpcb(so);
+ u_int fsufx, lsufx;
+
+ /* CHOKE */
+ bcopy ((caddr_t)ttpcb->tp_fsuffix, (caddr_t)&fsufx, sizeof(u_int) );
+ bcopy ((caddr_t)ttpcb->tp_lsuffix, (caddr_t)&lsufx, sizeof(u_int) );
+
+ tpmeas(ttpcb->tp_lref, TPtime_close,
+ &time, &lsufx, &fsufx, ttpcb->tp_fref);
+ tpcb->tp_perf_on = 0; /* turn perf off */
+ ENDPERF
+
+ tpcb->tp_refstate = REF_FROZEN;
+ tp_recycle_tsuffix(tpcb);
+ tp_etimeout(tpcb, TM_reference, (int)tpcb->tp_refer_ticks);
+}
+
+/*
+ * NAME: tp_freeref()
+ *
+ * CALLED FROM:
+ * tp.trans when the reference timer goes off, and
+ * from tp_attach() and tp_detach() when a tpcb is partially set up but not
+ * set up enough to have a ref timer set for it, and it's discarded
+ * due to some sort of error or an early close()
+ *
+ * FUNCTION and ARGUMENTS:
+ * Frees the reference represented by (r) for re-use.
+ *
+ * RETURNS: Nothing
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES: better be called at clock priority !!!!!
+ */
+void
+tp_freeref(n)
+RefNum n;
+{
+ register struct tp_ref *r = tp_ref + n;
+ register struct tp_pcb *tpcb;
+
+ tpcb = r->tpr_pcb;
+ IFDEBUG(D_TIMER)
+ printf("tp_freeref called for ref %d pcb %x maxrefopen %d\n",
+ n, tpcb, tp_refinfo.tpr_maxopen);
+ ENDDEBUG
+ IFTRACE(D_TIMER)
+ tptrace(TPPTmisc, "tp_freeref ref maxrefopen pcb",
+ n, tp_refinfo.tpr_maxopen, tpcb, 0);
+ ENDTRACE
+ if (tpcb == 0)
+ return;
+ IFDEBUG(D_CONN)
+ printf("tp_freeref: CLEARING tpr_pcb 0x%x\n", tpcb);
+ ENDDEBUG
+ r->tpr_pcb = (struct tp_pcb *)0;
+ tpcb->tp_refstate = REF_FREE;
+
+ for (r = tp_ref + tp_refinfo.tpr_maxopen; r > tp_ref; r--)
+ if (r->tpr_pcb)
+ break;
+ tp_refinfo.tpr_maxopen = r - tp_ref;
+ tp_refinfo.tpr_numopen--;
+
+ IFDEBUG(D_TIMER)
+ printf("tp_freeref ends w/ maxrefopen %d\n", tp_refinfo.tpr_maxopen);
+ ENDDEBUG
+}
+
+/*
+ * NAME: tp_getref()
+ *
+ * CALLED FROM:
+ * tp_attach()
+ *
+ * FUNCTION and ARGUMENTS:
+ * obtains the next free reference and allocates the appropriate
+ * ref structure, links that structure to (tpcb)
+ *
+ * RETURN VALUE:
+ * a reference number
+ * or TP_ENOREF
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+u_long
+tp_getref(tpcb)
+ register struct tp_pcb *tpcb;
+{
+ register struct tp_ref *r, *rlim;
+ register int i;
+ caddr_t obase;
+ unsigned size;
+
+ if (++tp_refinfo.tpr_numopen < tp_refinfo.tpr_size)
+ for (r = tp_refinfo.tpr_base, rlim = r + tp_refinfo.tpr_size;
+ ++r < rlim; ) /* tp_ref[0] is never used */
+ if (r->tpr_pcb == 0)
+ goto got_one;
+ /* else have to allocate more space */
+
+ obase = (caddr_t)tp_refinfo.tpr_base;
+ size = tp_refinfo.tpr_size * sizeof(struct tp_ref);
+ r = (struct tp_ref *) malloc(size + size, M_PCB, M_NOWAIT);
+ if (r == 0)
+ return (--tp_refinfo.tpr_numopen, TP_ENOREF);
+ tp_refinfo.tpr_base = tp_ref = r;
+ tp_refinfo.tpr_size *= 2;
+ bcopy(obase, (caddr_t)r, size);
+ free(obase, M_PCB);
+ r = (struct tp_ref *)(size + (caddr_t)r);
+ bzero((caddr_t)r, size);
+
+got_one:
+ r->tpr_pcb = tpcb;
+ tpcb->tp_refstate = REF_OPENING;
+ i = r - tp_refinfo.tpr_base;
+ if (tp_refinfo.tpr_maxopen < i)
+ tp_refinfo.tpr_maxopen = i;
+ return (u_long)i;
+}
+
+/*
+ * NAME: tp_set_npcb()
+ *
+ * CALLED FROM:
+ * tp_attach(), tp_route_to()
+ *
+ * FUNCTION and ARGUMENTS:
+ * given a tpcb, allocate an appropriate lower-lever npcb, freeing
+ * any old ones that might need re-assigning.
+ */
+tp_set_npcb(tpcb)
+register struct tp_pcb *tpcb;
+{
+ register struct socket *so = tpcb->tp_sock;
+ int error;
+
+ if (tpcb->tp_nlproto && tpcb->tp_npcb) {
+ short so_state = so->so_state;
+ so->so_state &= ~SS_NOFDREF;
+ tpcb->tp_nlproto->nlp_pcbdetach(tpcb->tp_npcb);
+ so->so_state = so_state;
+ }
+ tpcb->tp_nlproto = &nl_protosw[tpcb->tp_netservice];
+ /* xx_pcballoc sets so_pcb */
+ error = tpcb->tp_nlproto->nlp_pcballoc(so, tpcb->tp_nlproto->nlp_pcblist);
+ tpcb->tp_npcb = so->so_pcb;
+ so->so_pcb = (caddr_t)tpcb;
+ return (error);
+}
+/*
+ * NAME: tp_attach()
+ *
+ * CALLED FROM:
+ * tp_usrreq, PRU_ATTACH
+ *
+ * FUNCTION and ARGUMENTS:
+ * given a socket (so) and a protocol family (dom), allocate a tpcb
+ * and ref structure, initialize everything in the structures that
+ * needs to be initialized.
+ *
+ * RETURN VALUE:
+ * 0 ok
+ * EINVAL if DEBUG(X) in is on and a disaster has occurred
+ * ENOPROTOOPT if TP hasn't been configured or if the
+ * socket wasn't created with tp as its protocol
+ * EISCONN if this socket is already part of a connection
+ * ETOOMANYREFS if ran out of tp reference numbers.
+ * E* whatever error is returned from soreserve()
+ * for from the network-layer pcb allocation routine
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+tp_attach(so, protocol)
+ struct socket *so;
+ int protocol;
+{
+ register struct tp_pcb *tpcb;
+ int error = 0;
+ int dom = so->so_proto->pr_domain->dom_family;
+ u_long lref;
+ extern struct tp_conn_param tp_conn_param[];
+
+ IFDEBUG(D_CONN)
+ printf("tp_attach:dom 0x%x so 0x%x ", dom, so);
+ ENDDEBUG
+ IFTRACE(D_CONN)
+ tptrace(TPPTmisc, "tp_attach:dom so", dom, so, 0, 0);
+ ENDTRACE
+
+ if (so->so_pcb != NULL) {
+ return EISCONN; /* socket already part of a connection*/
+ }
+
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0)
+ error = soreserve(so, tp_sendspace, tp_recvspace);
+ /* later an ioctl will allow reallocation IF still in closed state */
+
+ if (error)
+ goto bad2;
+
+ MALLOC(tpcb, struct tp_pcb *, sizeof(*tpcb), M_PCB, M_NOWAIT);
+ if (tpcb == NULL) {
+ error = ENOBUFS;
+ goto bad2;
+ }
+ bzero( (caddr_t)tpcb, sizeof (struct tp_pcb) );
+
+ if ( ((lref = tp_getref(tpcb)) & TP_ENOREF) != 0 ) {
+ error = ETOOMANYREFS;
+ goto bad3;
+ }
+ tpcb->tp_lref = lref;
+ tpcb->tp_sock = so;
+ tpcb->tp_domain = dom;
+ tpcb->tp_rhiwat = so->so_rcv.sb_hiwat;
+ /* tpcb->tp_proto = protocol; someday maybe? */
+ if (protocol && protocol<ISOPROTO_TP4) {
+ tpcb->tp_netservice = ISO_CONS;
+ tpcb->tp_snduna = (SeqNum) -1;/* kludge so the pseudo-ack from the CR/CC
+ * will generate correct fake-ack values
+ */
+ } else {
+ tpcb->tp_netservice = (dom== AF_INET)?IN_CLNS:ISO_CLNS;
+ /* the default */
+ }
+ tpcb->_tp_param = tp_conn_param[tpcb->tp_netservice];
+
+ tpcb->tp_state = TP_CLOSED;
+ tpcb->tp_vers = TP_VERSION;
+ tpcb->tp_notdetached = 1;
+
+ /* Spec says default is 128 octets,
+ * that is, if the tpdusize argument never appears, use 128.
+ * As the initiator, we will always "propose" the 2048
+ * size, that is, we will put this argument in the CR
+ * always, but accept what the other side sends on the CC.
+ * If the initiator sends us something larger on a CR,
+ * we'll respond w/ this.
+ * Our maximum is 4096. See tp_chksum.c comments.
+ */
+ tpcb->tp_cong_win =
+ tpcb->tp_l_tpdusize = 1 << tpcb->tp_tpdusize;
+
+ tpcb->tp_seqmask = TP_NML_FMT_MASK;
+ tpcb->tp_seqbit = TP_NML_FMT_BIT;
+ tpcb->tp_seqhalf = tpcb->tp_seqbit >> 1;
+
+ /* attach to a network-layer protoswitch */
+ if ( error = tp_set_npcb(tpcb))
+ goto bad4;
+ ASSERT( tpcb->tp_nlproto->nlp_afamily == tpcb->tp_domain);
+
+ /* nothing to do for iso case */
+ if( dom == AF_INET )
+ sotoinpcb(so)->inp_ppcb = (caddr_t) tpcb;
+
+ return 0;
+
+bad4:
+ IFDEBUG(D_CONN)
+ printf("BAD4 in tp_attach, so 0x%x\n", so);
+ ENDDEBUG
+ tp_freeref(tpcb->tp_lref);
+
+bad3:
+ IFDEBUG(D_CONN)
+ printf("BAD3 in tp_attach, so 0x%x\n", so);
+ ENDDEBUG
+
+ free((caddr_t)tpcb, M_PCB); /* never a cluster */
+
+bad2:
+ IFDEBUG(D_CONN)
+ printf("BAD2 in tp_attach, so 0x%x\n", so);
+ ENDDEBUG
+ so->so_pcb = 0;
+
+/*bad:*/
+ IFDEBUG(D_CONN)
+ printf("BAD in tp_attach, so 0x%x\n", so);
+ ENDDEBUG
+ return error;
+}
+
+/*
+ * NAME: tp_detach()
+ *
+ * CALLED FROM:
+ * tp.trans, on behalf of a user close request
+ * and when the reference timer goes off
+ * (if the disconnect was initiated by the protocol entity
+ * rather than by the user)
+ *
+ * FUNCTION and ARGUMENTS:
+ * remove the tpcb structure from the list of active or
+ * partially active connections, recycle all the mbufs
+ * associated with the pcb, ref structure, sockbufs, etc.
+ * Only free the ref structure if you know that a ref timer
+ * wasn't set for this tpcb.
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ * tp_soisdisconnected() was already when this is called
+ */
+void
+tp_detach(tpcb)
+ register struct tp_pcb *tpcb;
+{
+ void tp_freeref(), tp_rsyflush();
+ register struct socket *so = tpcb->tp_sock;
+
+ IFDEBUG(D_CONN)
+ printf("tp_detach(tpcb 0x%x, so 0x%x)\n",
+ tpcb,so);
+ ENDDEBUG
+ IFTRACE(D_CONN)
+ tptraceTPCB(TPPTmisc, "tp_detach tpcb so lsufx",
+ tpcb, so, *(u_short *)(tpcb->tp_lsuffix), 0);
+ ENDTRACE
+
+ IFDEBUG(D_CONN)
+ printf("so_snd at 0x%x so_rcv at 0x%x\n", &so->so_snd, &so->so_rcv);
+ dump_mbuf(so->so_snd.sb_mb, "so_snd at detach ");
+ printf("about to call LL detach, nlproto 0x%x, nl_detach 0x%x\n",
+ tpcb->tp_nlproto, tpcb->tp_nlproto->nlp_pcbdetach);
+ ENDDEBUG
+
+ if (tpcb->tp_Xsnd.sb_mb) {
+ printf("Unsent Xdata on detach; would panic");
+ sbflush(&tpcb->tp_Xsnd);
+ }
+ if (tpcb->tp_ucddata)
+ m_freem(tpcb->tp_ucddata);
+
+ IFDEBUG(D_CONN)
+ printf("reassembly info cnt %d rsyq 0x%x\n",
+ tpcb->tp_rsycnt, tpcb->tp_rsyq);
+ ENDDEBUG
+ if (tpcb->tp_rsyq)
+ tp_rsyflush(tpcb);
+
+ if (tpcb->tp_next) {
+ remque(tpcb);
+ tpcb->tp_next = tpcb->tp_prev = 0;
+ }
+ tpcb->tp_notdetached = 0;
+
+ IFDEBUG(D_CONN)
+ printf("calling (...nlproto->...)(0x%x, so 0x%x)\n",
+ tpcb->tp_npcb, so);
+ printf("so 0x%x so_head 0x%x, qlen %d q0len %d qlimit %d\n",
+ so, so->so_head,
+ so->so_q0len, so->so_qlen, so->so_qlimit);
+ ENDDEBUG
+
+ (tpcb->tp_nlproto->nlp_pcbdetach)(tpcb->tp_npcb);
+ /* does an so->so_pcb = 0; sofree(so) */
+
+ IFDEBUG(D_CONN)
+ printf("after xxx_pcbdetach\n");
+ ENDDEBUG
+
+ if (tpcb->tp_state == TP_LISTENING) {
+ register struct tp_pcb **tt;
+ for (tt = &tp_listeners; *tt; tt = &((*tt)->tp_nextlisten))
+ if (*tt == tpcb)
+ break;
+ if (*tt)
+ *tt = tpcb->tp_nextlisten;
+ else
+ printf("tp_detach from listen: should panic\n");
+ }
+ if (tpcb->tp_refstate == REF_OPENING ) {
+ /* no connection existed here so no reference timer will be called */
+ IFDEBUG(D_CONN)
+ printf("SETTING ref %d to REF_FREE\n", tpcb->tp_lref);
+ ENDDEBUG
+
+ tp_freeref(tpcb->tp_lref);
+ }
+#ifdef TP_PERF_MEAS
+ /*
+ * Get rid of the cluster mbuf allocated for performance measurements, if
+ * there is one. Note that tpcb->tp_perf_on says nothing about whether or
+ * not a cluster mbuf was allocated, so you have to check for a pointer
+ * to one (that is, we need the TP_PERF_MEASs around the following section
+ * of code, not the IFPERFs)
+ */
+ if (tpcb->tp_p_mbuf) {
+ register struct mbuf *m = tpcb->tp_p_mbuf;
+ struct mbuf *n;
+ IFDEBUG(D_PERF_MEAS)
+ printf("freeing tp_p_meas 0x%x ", tpcb->tp_p_meas);
+ ENDDEBUG
+ do {
+ MFREE(m, n);
+ m = n;
+ } while (n);
+ tpcb->tp_p_meas = 0;
+ tpcb->tp_p_mbuf = 0;
+ }
+#endif /* TP_PERF_MEAS */
+
+ IFDEBUG(D_CONN)
+ printf( "end of detach, NOT single, tpcb 0x%x\n", tpcb);
+ ENDDEBUG
+ /* free((caddr_t)tpcb, M_PCB); WHere to put this ? */
+}
+
+struct que {
+ struct tp_pcb *next;
+ struct tp_pcb *prev;
+} tp_bound_pcbs =
+{(struct tp_pcb *)&tp_bound_pcbs, (struct tp_pcb *)&tp_bound_pcbs};
+
+u_short tp_unique;
+
+tp_tselinuse(tlen, tsel, siso, reuseaddr)
+caddr_t tsel;
+register struct sockaddr_iso *siso;
+{
+ struct tp_pcb *b = tp_bound_pcbs.next, *l = tp_listeners;
+ register struct tp_pcb *t;
+
+ for (;;) {
+ if (b != (struct tp_pcb *)&tp_bound_pcbs) {
+ t = b; b = t->tp_next;
+ } else if (l) {
+ t = l; l = t->tp_nextlisten;
+ } else
+ break;
+ if (tlen == t->tp_lsuffixlen && bcmp(tsel, t->tp_lsuffix, tlen) == 0) {
+ if (t->tp_flags & TPF_GENERAL_ADDR) {
+ if (siso == 0 || reuseaddr == 0)
+ return 1;
+ } else if (siso) {
+ if (siso->siso_family == t->tp_domain &&
+ t->tp_nlproto->nlp_cmpnetaddr(t->tp_npcb, siso, TP_LOCAL))
+ return 1;
+ } else if (reuseaddr == 0)
+ return 1;
+ }
+ }
+ return 0;
+
+}
+
+
+tp_pcbbind(tpcb, nam)
+register struct tp_pcb *tpcb;
+register struct mbuf *nam;
+{
+ register struct sockaddr_iso *siso = 0;
+ int tlen = 0, wrapped = 0;
+ caddr_t tsel;
+ u_short tutil;
+
+ if (tpcb->tp_state != TP_CLOSED)
+ return (EINVAL);
+ if (nam) {
+ siso = mtod(nam, struct sockaddr_iso *);
+ switch (siso->siso_family) {
+ default:
+ return (EAFNOSUPPORT);
+#ifdef ISO
+ case AF_ISO:
+ tlen = siso->siso_tlen;
+ tsel = TSEL(siso);
+ if (siso->siso_nlen == 0)
+ siso = 0;
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ tsel = (caddr_t)&tutil;
+ if (tutil = ((struct sockaddr_in *)siso)->sin_port) {
+ tlen = 2;
+ }
+ if (((struct sockaddr_in *)siso)->sin_addr.s_addr == 0)
+ siso = 0;
+ }
+#endif
+ }
+ if (tpcb->tp_lsuffixlen == 0) {
+ if (tlen) {
+ if (tp_tselinuse(tlen, tsel, siso,
+ tpcb->tp_sock->so_options & SO_REUSEADDR))
+ return (EINVAL);
+ } else {
+ for (tsel = (caddr_t)&tutil, tlen = 2;;){
+ if (tp_unique++ < ISO_PORT_RESERVED ||
+ tp_unique > ISO_PORT_USERRESERVED) {
+ if (wrapped++)
+ return ESRCH;
+ tp_unique = ISO_PORT_RESERVED;
+ }
+ tutil = htons(tp_unique);
+ if (tp_tselinuse(tlen, tsel, siso, 0) == 0)
+ break;
+ }
+ if (siso) switch (siso->siso_family) {
+#ifdef ISO
+ case AF_ISO:
+ bcopy(tsel, TSEL(siso), tlen);
+ siso->siso_tlen = tlen;
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ ((struct sockaddr_in *)siso)->sin_port = tutil;
+#endif
+ }
+ }
+ bcopy(tsel, tpcb->tp_lsuffix, (tpcb->tp_lsuffixlen = tlen));
+ insque(tpcb, &tp_bound_pcbs);
+ } else {
+ if (tlen || siso == 0)
+ return (EINVAL);
+ }
+ if (siso == 0) {
+ tpcb->tp_flags |= TPF_GENERAL_ADDR;
+ return (0);
+ }
+ return tpcb->tp_nlproto->nlp_pcbbind(tpcb->tp_npcb, nam);
+}
diff --git a/sys/netiso/tp_pcb.h b/sys/netiso/tp_pcb.h
new file mode 100644
index 000000000000..0353cb47b205
--- /dev/null
+++ b/sys/netiso/tp_pcb.h
@@ -0,0 +1,356 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_pcb.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_pcb.h,v 5.2 88/11/18 17:09:32 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_pcb.h,v $
+ *
+ *
+ * This file defines the transport protocol control block (tpcb).
+ * and a bunch of #define values that are used in the tpcb.
+ */
+
+#ifndef __TP_PCB__
+#define __TP_PCB__
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_user.h>
+#ifndef sblock
+#include <sys/socketvar.h>
+#endif /* sblock */
+
+/* NOTE: the code depends on REF_CLOSED > REF_OPEN > the rest, and
+ * on REF_FREE being zero
+ *
+ * Possible improvement:
+ * think about merging the tp_ref w/ the tpcb and doing a search
+ * through the tpcb list, from tpb. This would slow down lookup
+ * during data transfer
+ * It would be a little nicer also to have something based on the
+ * clock (like top n bits of the reference is part of the clock, to
+ * minimize the likelihood of reuse after a crash)
+ * also, need to keep the timer servicing part to a minimum (although
+ * the cost of this is probably independent of whether the timers are
+ * in the pcb or in an array..
+ * Last, would have to make the number of timers a function of the amount of
+ * mbufs available, plus some for the frozen references.
+ *
+ * Possible improvement:
+ * Might not need the ref_state stuff either...
+ * REF_FREE could correspond to tp_state == CLOSED or nonexistend tpcb,
+ * REF_OPEN to tp_state anywhere from AK_WAIT or CR_SENT to CLOSING
+ * REF_OPENING could correspond to LISTENING, because that's the
+ * way it's used, not because the correspondence is exact.
+ * REF_CLOSED could correspond to REFWAIT
+ */
+#define REF_FROZEN 3 /* has ref timer only */
+#define REF_OPEN 2 /* has timers, possibly active */
+#define REF_OPENING 1 /* in use (has a pcb) but no timers */
+#define REF_FREE 0 /* free to reallocate */
+
+#define TM_NTIMERS 6
+
+struct tp_ref {
+ struct tp_pcb *tpr_pcb; /* back ptr to PCB */
+};
+
+/* PER system stuff (one static structure instead of a bunch of names) */
+struct tp_refinfo {
+ struct tp_ref *tpr_base;
+ int tpr_size;
+ int tpr_maxopen;
+ int tpr_numopen;
+};
+
+struct nl_protosw {
+ int nlp_afamily; /* address family */
+ int (*nlp_putnetaddr)(); /* puts addresses in nl pcb */
+ int (*nlp_getnetaddr)(); /* gets addresses from nl pcb */
+ int (*nlp_cmpnetaddr)(); /* compares address in pcb with sockaddr */
+ int (*nlp_putsufx)(); /* puts transport suffixes in nl pcb */
+ int (*nlp_getsufx)(); /* gets transport suffixes from nl pcb */
+ int (*nlp_recycle_suffix)();/* clears suffix from nl pcb */
+ int (*nlp_mtu)(); /* figures out mtu based on nl used */
+ int (*nlp_pcbbind)(); /* bind to pcb for net level */
+ int (*nlp_pcbconn)(); /* connect for net level */
+ int (*nlp_pcbdisc)(); /* disconnect net level */
+ int (*nlp_pcbdetach)(); /* detach net level pcb */
+ int (*nlp_pcballoc)(); /* allocate a net level pcb */
+ int (*nlp_output)(); /* prepare a packet to give to nl */
+ int (*nlp_dgoutput)(); /* prepare a packet to give to nl */
+ int (*nlp_ctloutput)(); /* hook for network set/get options */
+ caddr_t nlp_pcblist; /* list of xx_pcb's for connections */
+};
+
+
+struct tp_pcb {
+ struct tp_pcb *tp_next;
+ struct tp_pcb *tp_prev;
+ struct tp_pcb *tp_nextlisten; /* chain all listeners */
+ struct socket *tp_sock; /* back ptr */
+ u_short tp_state; /* state of fsm */
+ short tp_retrans; /* # times can still retrans */
+ caddr_t tp_npcb; /* to lower layer pcb */
+ struct nl_protosw *tp_nlproto; /* lower-layer dependent routines */
+ struct rtentry **tp_routep; /* obtain mtu; inside npcb */
+
+
+ RefNum tp_lref; /* local reference */
+ RefNum tp_fref; /* foreign reference */
+
+ u_int tp_seqmask; /* mask for seq space */
+ u_int tp_seqbit; /* bit for seq number wraparound */
+ u_int tp_seqhalf; /* half the seq space */
+
+ struct mbuf *tp_ucddata; /* user connect/disconnect data */
+
+ /* credit & sequencing info for SENDING */
+ u_short tp_fcredit; /* current remote credit in # packets */
+ u_short tp_maxfcredit; /* max remote credit in # packets */
+ u_short tp_dupacks; /* intuit packet loss before rxt timo */
+ u_long tp_cong_win; /* congestion window in bytes.
+ * see profuse comments in TCP code
+ */
+ u_long tp_ssthresh; /* cong_win threshold for slow start
+ * exponential to linear switch
+ */
+ SeqNum tp_snduna; /* seq # of lowest unacked DT */
+ SeqNum tp_sndnew; /* seq # of lowest unsent DT */
+ SeqNum tp_sndnum; /* next seq # to be assigned */
+ SeqNum tp_sndnxt; /* what to do next; poss. rxt */
+ struct mbuf *tp_sndnxt_m; /* packet corres. to sndnxt*/
+ int tp_Nwindow; /* for perf. measurement */
+
+ /* credit & sequencing info for RECEIVING */
+ SeqNum tp_rcvnxt; /* next DT seq # expect to recv */
+ SeqNum tp_sent_lcdt; /* cdt according to last ack sent */
+ SeqNum tp_sent_uwe; /* uwe according to last ack sent */
+ SeqNum tp_sent_rcvnxt; /* rcvnxt according to last ack sent
+ * needed for perf measurements only
+ */
+ u_short tp_lcredit; /* current local credit in # packets */
+ u_short tp_maxlcredit; /* needed for reassembly queue */
+ struct mbuf **tp_rsyq; /* unacked stuff recvd out of order */
+ int tp_rsycnt; /* number of packets "" "" "" "" */
+ u_long tp_rhiwat; /* remember original RCVBUF size */
+
+ /* receiver congestion state stuff ... */
+ u_int tp_win_recv;
+
+ /* receive window as a scaled int (8 bit fraction part) */
+
+ struct cong_sample {
+ ushort cs_size; /* current window size */
+ ushort cs_received; /* PDUs received in this sample */
+ ushort cs_ce_set; /* PDUs received in this sample with CE bit set */
+ } tp_cong_sample;
+
+
+ /* parameters per-connection controllable by user */
+ struct tp_conn_param _tp_param;
+
+#define tp_Nretrans _tp_param.p_Nretrans
+#define tp_dr_ticks _tp_param.p_dr_ticks
+#define tp_cc_ticks _tp_param.p_cc_ticks
+#define tp_dt_ticks _tp_param.p_dt_ticks
+#define tp_xpd_ticks _tp_param.p_x_ticks
+#define tp_cr_ticks _tp_param.p_cr_ticks
+#define tp_keepalive_ticks _tp_param.p_keepalive_ticks
+#define tp_sendack_ticks _tp_param.p_sendack_ticks
+#define tp_refer_ticks _tp_param.p_ref_ticks
+#define tp_inact_ticks _tp_param.p_inact_ticks
+#define tp_xtd_format _tp_param.p_xtd_format
+#define tp_xpd_service _tp_param.p_xpd_service
+#define tp_ack_strat _tp_param.p_ack_strat
+#define tp_rx_strat _tp_param.p_rx_strat
+#define tp_use_checksum _tp_param.p_use_checksum
+#define tp_use_efc _tp_param.p_use_efc
+#define tp_use_nxpd _tp_param.p_use_nxpd
+#define tp_use_rcc _tp_param.p_use_rcc
+#define tp_tpdusize _tp_param.p_tpdusize
+#define tp_class _tp_param.p_class
+#define tp_winsize _tp_param.p_winsize
+#define tp_no_disc_indications _tp_param.p_no_disc_indications
+#define tp_dont_change_params _tp_param.p_dont_change_params
+#define tp_netservice _tp_param.p_netservice
+#define tp_version _tp_param.p_version
+#define tp_ptpdusize _tp_param.p_ptpdusize
+
+ int tp_l_tpdusize;
+ /* whereas tp_tpdusize is log2(the negotiated max size)
+ * l_tpdusize is the size we'll use when sending, in # chars
+ */
+
+ int tp_rtv; /* max round-trip time variance */
+ int tp_rtt; /* smoothed round-trip time */
+ SeqNum tp_rttseq; /* packet being timed */
+ int tp_rttemit; /* when emitted, in ticks */
+ int tp_idle; /* last activity, in ticks */
+ short tp_rxtcur; /* current retransmit value */
+ short tp_rxtshift; /* log(2) of rexmt exp. backoff */
+ u_char tp_cebit_off; /* real DEC bit algorithms not in use */
+ u_char tp_oktonagle; /* Last unsent pckt may be append to */
+ u_char tp_flags; /* values: */
+#define TPF_NLQOS_PDN TPFLAG_NLQOS_PDN
+#define TPF_PEER_ON_SAMENET TPFLAG_PEER_ON_SAMENET
+#define TPF_GENERAL_ADDR TPFLAG_GENERAL_ADDR
+#define TPF_DELACK 0x8
+#define TPF_ACKNOW 0x10
+
+#define PEER_IS_LOCAL(t) (((t)->tp_flags & TPF_PEER_ON_SAME_NET) != 0)
+#define USES_PDN(t) (((t)->tp_flags & TPF_NLQOS_PDN) != 0)
+
+
+ unsigned
+ tp_sendfcc:1, /* shall next ack include FCC parameter? */
+ tp_trace:1, /* is this pcb being traced? (not used yet) */
+ tp_perf_on:1, /* 0/1 -> performance measuring on */
+ tp_reneged:1, /* have we reneged on cdt since last ack? */
+ tp_decbit:3, /* dec bit was set, we're in reneg mode */
+ tp_notdetached:1; /* Call tp_detach before freeing XXXXXXX */
+
+#ifdef TP_PERF_MEAS
+ /* performance stats - see tp_stat.h */
+ struct tp_pmeas *tp_p_meas;
+ struct mbuf *tp_p_mbuf;
+#endif /* TP_PERF_MEAS */
+
+ /* addressing */
+ u_short tp_domain; /* domain (INET, ISO) */
+ /* for compatibility with the *old* way and with INET, be sure that
+ * that lsuffix and fsuffix are aligned to a short addr.
+ * having them follow the u_short *suffixlen should suffice (choke)
+ */
+ u_short tp_fsuffixlen; /* foreign suffix */
+ char tp_fsuffix[MAX_TSAP_SEL_LEN];
+ u_short tp_lsuffixlen; /* local suffix */
+ char tp_lsuffix[MAX_TSAP_SEL_LEN];
+#define SHORT_LSUFXP(tpcb) ((short *)((tpcb)->tp_lsuffix))
+#define SHORT_FSUFXP(tpcb) ((short *)((tpcb)->tp_fsuffix))
+
+ /* Timer stuff */
+ u_char tp_vers; /* protocol version */
+ u_char tp_peer_acktime; /* used for DT retrans time */
+ u_char tp_refstate; /* values REF_FROZEN, etc. above */
+ struct tp_pcb *tp_fasttimeo; /* limit pcbs to examine */
+ u_int tp_timer[TM_NTIMERS]; /* C timers */
+
+ struct sockbuf tp_Xsnd; /* for expedited data */
+/* struct sockbuf tp_Xrcv; /* for expedited data */
+#define tp_Xrcv tp_sock->so_rcv
+ SeqNum tp_Xsndnxt; /* next XPD seq # to send */
+ SeqNum tp_Xuna; /* seq # of unacked XPD */
+ SeqNum tp_Xrcvnxt; /* next XPD seq # expect to recv */
+
+ /* AK subsequencing */
+ u_short tp_s_subseq; /* next subseq to send */
+ u_short tp_r_subseq; /* highest recv subseq */
+
+};
+
+u_int tp_start_win;
+
+#define ROUND(scaled_int) (((scaled_int) >> 8) + (((scaled_int) & 0x80) ? 1:0))
+
+/* to round off a scaled int with an 8 bit fraction part */
+
+#define CONG_INIT_SAMPLE(pcb) \
+ pcb->tp_cong_sample.cs_received = \
+ pcb->tp_cong_sample.cs_ce_set = 0; \
+ pcb->tp_cong_sample.cs_size = max(pcb->tp_lcredit, 1) << 1;
+
+#define CONG_UPDATE_SAMPLE(pcb, ce_bit) \
+ pcb->tp_cong_sample.cs_received++; \
+ if (ce_bit) { \
+ pcb->tp_cong_sample.cs_ce_set++; \
+ } \
+ if (pcb->tp_cong_sample.cs_size <= pcb->tp_cong_sample.cs_received) { \
+ if ((pcb->tp_cong_sample.cs_ce_set << 1) >= \
+ pcb->tp_cong_sample.cs_size ) { \
+ pcb->tp_win_recv -= pcb->tp_win_recv >> 3; /* multiply by .875 */ \
+ pcb->tp_win_recv = max(1 << 8, pcb->tp_win_recv); \
+ } \
+ else { \
+ pcb->tp_win_recv += (1 << 8); /* add one to the scaled int */ \
+ } \
+ pcb->tp_lcredit = ROUND(pcb->tp_win_recv); \
+ CONG_INIT_SAMPLE(pcb); \
+ }
+
+#ifdef KERNEL
+extern struct tp_refinfo tp_refinfo;
+extern struct timeval time;
+extern struct tp_ref *tp_ref;
+extern struct tp_param tp_param;
+extern struct nl_protosw nl_protosw[];
+extern struct tp_pcb *tp_listeners;
+extern struct tp_pcb *tp_ftimeolist;
+#endif
+
+#define sototpcb(so) ((struct tp_pcb *)(so->so_pcb))
+#define sototpref(so) ((sototpcb(so)->tp_ref))
+#define tpcbtoso(tp) ((struct socket *)((tp)->tp_sock))
+#define tpcbtoref(tp) ((struct tp_ref *)((tp)->tp_ref))
+
+#endif /* __TP_PCB__ */
diff --git a/sys/netiso/tp_seq.h b/sys/netiso/tp_seq.h
new file mode 100644
index 000000000000..f14e5ae7c7d8
--- /dev/null
+++ b/sys/netiso/tp_seq.h
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_seq.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_seq.h,v 5.1 88/10/12 12:20:59 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_seq.h,v $
+ *
+ * These macros perform sequence number arithmetic modulo (2**7 or 2**31).
+ * The relevant fields in the tpcb are:
+ * tp_seqmask : the mask of bits that define the sequence space.
+ * tp_seqbit : 1 + tp_seqmask
+ * tp_seqhalf : tp_seqbit / 2 or half the sequence space (rounded up)
+ * Not exactly fast, but at least it's maintainable.
+ */
+
+#ifndef __TP_SEQ__
+#define __TP_SEQ__
+
+#define SEQ(tpcb,x) \
+ ((x) & (tpcb)->tp_seqmask)
+
+#define SEQ_GT(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) > 0)\
+? ((int)((seq)-(operand)) < (int)(tpcb)->tp_seqhalf)\
+: !(-((int)(seq)-(operand)) < (int)(tpcb)->tp_seqhalf))
+
+#define SEQ_GEQ(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) >= 0)\
+? ((int)((seq)-(operand)) < (int)(tpcb)->tp_seqhalf)\
+: !((-((int)(seq)-(operand))) < (int)(tpcb)->tp_seqhalf))
+
+#define SEQ_LEQ(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) <= 0)\
+? ((-(int)((seq)-(operand))) < (int)(tpcb)->tp_seqhalf)\
+: !(((int)(seq)-(operand)) < (int)(tpcb)->tp_seqhalf))
+
+#define SEQ_LT(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) < 0)\
+? ((-(int)((seq)-(operand))) < (int)(tpcb)->tp_seqhalf)\
+: !(((int)(seq)-(operand)) < (int)(tpcb)->tp_seqhalf))
+
+#define SEQ_MIN(tpcb, a, b) ( SEQ_GT(tpcb, a, b) ? b : a)
+
+#define SEQ_MAX(tpcb, a, b) ( SEQ_GT(tpcb, a, b) ? a : b)
+
+#define SEQ_INC(tpcb, Seq) ((++Seq), ((Seq) &= (tpcb)->tp_seqmask))
+
+#define SEQ_DEC(tpcb, Seq)\
+ ((Seq) = (((Seq)+(unsigned)((int)(tpcb)->tp_seqbit - 1))&(tpcb)->tp_seqmask))
+
+/* (amt) had better be less than the seq bit ! */
+
+#define SEQ_SUB(tpcb, Seq, amt)\
+ (((Seq) + (unsigned)((int)(tpcb)->tp_seqbit - amt)) & (tpcb)->tp_seqmask)
+#define SEQ_ADD(tpcb, Seq, amt) (((Seq) + (unsigned)amt) & (tpcb)->tp_seqmask)
+
+
+#define IN_RWINDOW(tpcb, seq, lwe, uwe)\
+ ( SEQ_GEQ(tpcb, seq, lwe) && SEQ_LT(tpcb, seq, uwe) )
+
+#define IN_SWINDOW(tpcb, seq, lwe, uwe)\
+ ( SEQ_GT(tpcb, seq, lwe) && SEQ_LEQ(tpcb, seq, uwe) )
+
+#endif /* __TP_SEQ__ */
diff --git a/sys/netiso/tp_stat.h b/sys/netiso/tp_stat.h
new file mode 100644
index 000000000000..bf6e1a5e1244
--- /dev/null
+++ b/sys/netiso/tp_stat.h
@@ -0,0 +1,283 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_stat.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_stat.h,v 5.4 88/11/18 17:28:38 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_stat.h,v $
+ *
+ * Here are the data structures in which the global
+ * statistics(counters) are gathered.
+ */
+
+#ifndef __TP_STAT__
+#define __TP_STAT__
+
+struct tp_stat {
+ u_long ts_param_ignored;
+ u_long ts_unused3;
+ u_long ts_bad_csum;
+
+ u_long ts_inv_length;
+ u_long ts_inv_pcode;
+ u_long ts_inv_dutype;
+ u_long ts_negotfailed;
+ u_long ts_inv_dref;
+ u_long ts_inv_pval;
+ u_long ts_inv_sufx;
+ u_long ts_inv_aclass;
+
+ u_long ts_xtd_fmt;
+ u_long ts_use_txpd;
+ u_long ts_csum_off;
+ u_long ts_send_drop;
+ u_long ts_recv_drop;
+
+ u_long ts_xpd_intheway;/* xpd mark caused data flow to stop */
+ u_long ts_xpdmark_del; /* xpd markers thrown away */
+ u_long ts_dt_ooo; /* dt tpdus received out of order */
+ u_long ts_dt_niw; /* dt tpdus received & not in window */
+ u_long ts_xpd_niw; /* xpd tpdus received & not in window */
+ u_long ts_xpd_dup;
+ u_long ts_dt_dup; /* dt tpdus received & are duplicates */
+
+ u_long ts_zfcdt; /* # times f credit went down to 0 */
+ u_long ts_lcdt_reduced; /*
+ # times local cdt reduced on an acknowledgement.
+ */
+
+ u_long ts_pkt_rcvd; /* from ip */
+ u_long ts_tpdu_rcvd; /* accepted as a TPDU in tp_input */
+ u_long ts_tpdu_sent;
+ u_long ts_unused2;
+
+ u_long ts_retrans_cr;
+ u_long ts_retrans_cc;
+ u_long ts_retrans_dr;
+ u_long ts_retrans_dt;
+ u_long ts_retrans_xpd;
+ u_long ts_conn_gaveup;
+
+ u_long ts_ER_sent;
+ u_long ts_DT_sent;
+ u_long ts_XPD_sent;
+ u_long ts_AK_sent;
+ u_long ts_XAK_sent;
+ u_long ts_DR_sent;
+ u_long ts_DC_sent;
+ u_long ts_CR_sent;
+ u_long ts_CC_sent;
+
+ u_long ts_ER_rcvd;
+ u_long ts_DT_rcvd;
+ u_long ts_XPD_rcvd;
+ u_long ts_AK_rcvd;
+ u_long ts_XAK_rcvd;
+ u_long ts_DR_rcvd;
+ u_long ts_DC_rcvd;
+ u_long ts_CR_rcvd;
+ u_long ts_CC_rcvd;
+
+ u_long ts_Eticks;
+ u_long ts_Eexpired;
+ u_long ts_Eset;
+ u_long ts_Ecan_act;
+ u_long ts_Cticks;
+ u_long ts_Cexpired;
+ u_long ts_Cset;
+ u_long ts_Ccan_act;
+ u_long ts_Ccan_inact;
+ u_long ts_Fdelack;
+ u_long ts_Fpruned;
+
+ u_long ts_concat_rcvd;
+
+ u_long ts_zdebug; /* zero dref to test timeout on conn estab tp_input.c */
+ u_long ts_ydebug; /* throw away pseudo-random pkts tp_input.c */
+ u_long ts_unused5;
+ u_long ts_unused; /* kludged concat to test separation tp_emit.c */
+ u_long ts_vdebug; /* kludge to test input size checking tp_emit.c */
+ u_long ts_unused4;
+ u_long ts_ldebug; /* faked a renegging of credit */
+
+ u_long ts_mb_small;
+ u_long ts_mb_cluster;
+ u_long ts_mb_len_distr[17];
+
+ u_long ts_eot_input;
+ u_long ts_eot_user;
+ u_long ts_EOT_sent;
+ u_long ts_tp0_conn;
+ u_long ts_tp4_conn;
+ u_long ts_quench;
+ u_long ts_rcvdecbit;
+
+#define NRTT_CATEGORIES 4
+ /* The 4 categories are:
+ * 0 --> tp_flags: ~TPF_PEER_ON_SAMENET | TPF_NL_PDN
+ * 1 --> tp_flags: ~TPF_PEER_ON_SAMENET | ~TPF_NL_PDN
+ * 2 --> tp_flags: TPF_PEER_ON_SAMENET | ~TPF_NL_PDN
+ * 3 --> tp_flags: TPF_PEER_ON_SAMENET | TPF_NL_PDN
+ */
+ int ts_rtt[NRTT_CATEGORIES];
+ int ts_rtv[NRTT_CATEGORIES];
+
+ u_long ts_ackreason[_ACK_NUM_REASONS_];
+ /* ACK_DONT 0 / ACK_STRAT_EACH 0x1 / ACK_STRAT_FULLWIN 0x4
+ * ACK_DUP 0x8 / ACK_EOT 0x10 / ACK_REORDER 0x20
+ * ACK_USRRCV **
+ * ACK_FCC **
+ */
+} tp_stat ;
+#define TP_PM_MAX 0xa /* 10 decimal */
+
+#define IncStat(x) tp_stat./**/x/**/++
+
+#ifdef TP_PERF_MEAS
+
+#define PStat(Tpcb, X) (Tpcb)->tp_p_meas->/**/X/**/
+#define IncPStat(Tpcb, X) if((Tpcb)->tp_perf_on) (Tpcb)->tp_p_meas->/**/X/**/++
+
+/* BEWARE OF MACROS like this ^^^ must be sure it's surrounded by {} if
+ * it's used in an if-else statement.
+ */
+
+
+/* for perf measurement stuff: maximum window size it can handle */
+
+struct tp_pmeas {
+ /* the first few are distributions as a fn of window size
+ * only keep enough space for normal format plus 1 slot for
+ * extended format, in case any windows larger than 15 are used
+ */
+
+ /*
+ * tps_npdusent: for each call to tp_sbsend, we inc the
+ * element representing the number of pdus sent in this call
+ */
+ int tps_win_lim_by_cdt[TP_PM_MAX+1];
+ int tps_win_lim_by_data[TP_PM_MAX+1];
+ /*
+ * tps_sendtime: Each call to tp_sbsend() is timed. For
+ * Each window size, we keep the running average of the time
+ * taken by tp_sbsend() for each window size.
+ */
+ int tps_sendtime[TP_PM_MAX+1];
+ /*
+ * n_TMsendack: # times ack sent because timer went off
+ * n_ack_cuz_eot: # times ack sent due to EOTSDU on incoming packet
+ * n_ack_cuz_dup: # times ack sent for receiving a duplicate pkt.
+ * n_ack_cuz_fullwin: # times ack sent for receiving the full window.
+ * n_ack_cuz_doack: # times ack sent for having just reordered data.
+ */
+ int tps_n_TMsendack;
+ int tps_n_ack_cuz_eot;
+ int tps_n_ack_cuz_fullwin;
+ int tps_n_ack_cuz_reorder;
+ int tps_n_ack_cuz_dup;
+ int tps_n_ack_cuz_strat;
+ /*
+ * when we send an ack: how much less than the "expected" window
+ * did we actually ack. For example: if we last sent a credit
+ * of 10, and we're acking now for whatever reason, and have
+ * only received 6 since our last credit advertisement, we'll
+ * keep the difference, 4, in this variable.
+ */
+ int tps_ack_early[TP_PM_MAX+1];
+ /*
+ * when we ack, for the # pkts we actually acked w/ this ack,
+ * how much cdt are we advertising?
+ * [ size of window acknowledged ] [ cdt we're giving ]
+ */
+ int tps_cdt_acked[TP_PM_MAX+1][TP_PM_MAX+1];
+
+ int tps_AK_sent;
+ int tps_XAK_sent;
+ int tps_DT_sent;
+ int tps_XPD_sent;
+ int tps_AK_rcvd;
+ int tps_XAK_rcvd;
+ int tps_DT_rcvd;
+ int tps_XPD_rcvd;
+
+ int Nb_from_sess;
+ int Nb_to_sess;
+ int Nb_to_ll;
+ int Nb_from_ll;
+};
+
+#define IFPERF(tpcb) if (tpcb->tp_perf_on && tpcb->tp_p_meas) {
+#define ENDPERF }
+
+#else
+
+int PStat_Junk;
+#define PStat(tpcb, x) PStat_Junk
+#define IncPStat(tpcb, x) /* no-op */
+#define tpmeas(a,b,c,d,e,f) 0
+
+#define IFPERF(x) if (0) {
+#define ENDPERF }
+
+#endif /* TP_PERF_MEAS */
+
+#endif /* __TP_STAT__ */
diff --git a/sys/netiso/tp_states.h b/sys/netiso/tp_states.h
new file mode 100644
index 000000000000..ac6213a64d72
--- /dev/null
+++ b/sys/netiso/tp_states.h
@@ -0,0 +1,13 @@
+/* $Header$ */
+/* $Source$ */
+#define ST_ERROR 0x0
+#define TP_CLOSED 0x1
+#define TP_CRSENT 0x2
+#define TP_AKWAIT 0x3
+#define TP_OPEN 0x4
+#define TP_CLOSING 0x5
+#define TP_REFWAIT 0x6
+#define TP_LISTENING 0x7
+#define TP_CONFIRMING 0x8
+
+#define tp_NSTATES 0x9
diff --git a/sys/netiso/tp_states.init b/sys/netiso/tp_states.init
new file mode 100644
index 000000000000..89e53453866a
--- /dev/null
+++ b/sys/netiso/tp_states.init
@@ -0,0 +1,75 @@
+/* $Header$ */
+/* $Source$ */
+{0x3,0x0},
+{0x6,0x1},
+{0x6,0x2},
+{0x6,0x0},
+{0x2,0x3},
+{0x2,0x0},
+{0x1,0x0},
+{0x5,0x0},
+{0x4,0x0},
+{0x7,0x0},
+{0x7,0x0},
+{0x1,0x4},
+{0x8,0x5},
+{0x8,0x6},
+{0x4,0x7},
+{0x3,0x8},
+{0x1,0x9},
+{0x2,0xa},
+{0x6,0xb},
+{0x1,0xc},
+{0x6,0xd},
+{0x6,0xe},
+{0x6,0xf},
+{0x6,0x10},
+{0x1,0x11},
+{0x6,0x12},
+{0x5,0x13},
+{0x4,0x14},
+{0x4,0x15},
+{0x2,0x16},
+{0x6,0x17},
+{0x3,0x18},
+{0x4,0x19},
+{0x4,0x1a},
+{0x4,0x1b},
+{0x3,0x1c},
+{0x4,0x1c},
+{0x4,0x1d},
+{0x4,0x1e},
+{0x4,0x1f},
+{0x4,0x20},
+{0x3,0x20},
+{0x6,0x21},
+{0x5,0x22},
+{0x6,0x23},
+{0x5,0x24},
+{0x3,0x25},
+{0x5,0x26},
+{0x5,0x27},
+{0x4,0x28},
+{0x4,0x29},
+{0x5,0x2a},
+{0x6,0x2b},
+{0x1,0x2c},
+{0x4,0x2d},
+{0x4,0x2e},
+{0x4,0x2f},
+{0x4,0x30},
+{0x4,0x31},
+{0x4,0x32},
+{0x4,0x33},
+{0x4,0x34},
+{0x4,0x35},
+{0x4,0x36},
+{0x6,0x37},
+{0x6,0x38},
+{0x7,0x0},
+{0x5,0x0},
+{0x3,0x0},
+{0x2,0x0},
+{0x4,0x0},
+{0x6,0x0},
+{0x1,0x0},
diff --git a/sys/netiso/tp_subr.c b/sys/netiso/tp_subr.c
new file mode 100644
index 000000000000..1259ee412532
--- /dev/null
+++ b/sys/netiso/tp_subr.c
@@ -0,0 +1,947 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
+ *
+ * The main work of data transfer is done here.
+ * These routines are called from tp.trans.
+ * They include the routines that check the validity of acks and Xacks,
+ * (tp_goodack() and tp_goodXack() )
+ * take packets from socket buffers and send them (tp_send()),
+ * drop the data from the socket buffers (tp_sbdrop()),
+ * and put incoming packet data into socket buffers (tp_stash()).
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <netiso/tp_ip.h>
+#include <netiso/iso.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/tp_seq.h>
+
+int tp_emit(), tp_sbdrop();
+int tprexmtthresh = 3;
+extern int ticks;
+void tp_send();
+
+/*
+ * CALLED FROM:
+ * tp.trans, when an XAK arrives
+ * FUNCTION and ARGUMENTS:
+ * Determines if the sequence number (seq) from the XAK
+ * acks anything new. If so, drop the appropriate tpdu
+ * from the XPD send queue.
+ * RETURN VALUE:
+ * Returns 1 if it did this, 0 if the ack caused no action.
+ */
+int
+tp_goodXack(tpcb, seq)
+ struct tp_pcb *tpcb;
+ SeqNum seq;
+{
+
+ IFTRACE(D_XPD)
+ tptraceTPCB(TPPTgotXack,
+ seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
+ tpcb->tp_snduna);
+ ENDTRACE
+
+ if ( seq == tpcb->tp_Xuna ) {
+ tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
+
+ /* DROP 1 packet from the Xsnd socket buf - just so happens
+ * that only one packet can be there at any time
+ * so drop the whole thing. If you allow > 1 packet
+ * the socket buffer, then you'll have to keep
+ * track of how many characters went w/ each XPD tpdu, so this
+ * will get messier
+ */
+ IFDEBUG(D_XPD)
+ dump_mbuf(tpcb->tp_Xsnd.sb_mb,
+ "tp_goodXack Xsnd before sbdrop");
+ ENDDEBUG
+
+ IFTRACE(D_XPD)
+ tptraceTPCB(TPPTmisc,
+ "goodXack: dropping cc ",
+ (int)(tpcb->tp_Xsnd.sb_cc),
+ 0,0,0);
+ ENDTRACE
+ sbdroprecord(&tpcb->tp_Xsnd);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * CALLED FROM:
+ * tp_good_ack()
+ * FUNCTION and ARGUMENTS:
+ * updates
+ * smoothed average round trip time (*rtt)
+ * roundtrip time variance (*rtv) - actually deviation, not variance
+ * given the new value (diff)
+ * RETURN VALUE:
+ * void
+ */
+
+void
+tp_rtt_rtv(tpcb)
+register struct tp_pcb *tpcb;
+{
+ int old = tpcb->tp_rtt;
+ int delta, elapsed = ticks - tpcb->tp_rttemit;
+
+ if (tpcb->tp_rtt != 0) {
+ /*
+ * rtt is the smoothed round trip time in machine clock ticks (hz).
+ * It is stored as a fixed point number, unscaled (unlike the tcp
+ * srtt). The rationale here is that it is only significant to the
+ * nearest unit of slowtimo, which is at least 8 machine clock ticks
+ * so there is no need to scale. The smoothing is done according
+ * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
+ */
+ delta = elapsed - tpcb->tp_rtt;
+ if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
+ tpcb->tp_rtt = 1;
+ /*
+ * rtv is a smoothed accumulated mean difference, unscaled
+ * for reasons expressed above.
+ * It is smoothed with an alpha of .75, and the round trip timer
+ * will be set to rtt + 4*rtv, also as TCP does.
+ */
+ if (delta < 0)
+ delta = -delta;
+ if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
+ tpcb->tp_rtv = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt)
+ */
+ tpcb->tp_rtt = elapsed;
+ tpcb->tp_rtv = elapsed >> 1;
+ }
+ tpcb->tp_rttemit = 0;
+ tpcb->tp_rxtshift = 0;
+ /*
+ * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks)."
+ */
+ TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
+ tpcb->tp_peer_acktime, 128 /* XXX */);
+ IFDEBUG(D_RTT)
+ printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
+ "tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
+ ENDDEBUG
+ tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans when an AK arrives
+ * FUNCTION and ARGUMENTS:
+ * Given (cdt), the credit from the AK tpdu, and
+ * (seq), the sequence number from the AK tpdu,
+ * tp_goodack() determines if the AK acknowledges something in the send
+ * window, and if so, drops the appropriate packets from the retransmission
+ * list, computes the round trip time, and updates the retransmission timer
+ * based on the new smoothed round trip time.
+ * RETURN VALUE:
+ * Returns 1 if
+ * EITHER it actually acked something heretofore unacknowledged
+ * OR no news but the credit should be processed.
+ * If something heretofore unacked was acked with this sequence number,
+ * the appropriate tpdus are dropped from the retransmission control list,
+ * by calling tp_sbdrop().
+ * No need to see the tpdu itself.
+ */
+int
+tp_goodack(tpcb, cdt, seq, subseq)
+ register struct tp_pcb *tpcb;
+ u_int cdt;
+ register SeqNum seq;
+ u_int subseq;
+{
+ int old_fcredit;
+ int bang = 0; /* bang --> ack for something heretofore unacked */
+ u_int bytes_acked;
+
+ IFDEBUG(D_ACKRECV)
+ printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
+ tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
+ ENDDEBUG
+ IFTRACE(D_ACKRECV)
+ tptraceTPCB(TPPTgotack,
+ seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
+ ENDTRACE
+
+ IFPERF(tpcb)
+ tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
+ ENDPERF
+
+ if (seq == tpcb->tp_snduna) {
+ if (subseq < tpcb->tp_r_subseq ||
+ (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
+ discard_the_ack:
+ IFDEBUG(D_ACKRECV)
+ printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
+ tpcb, subseq, tpcb->tp_r_subseq);
+ ENDDEBUG
+ goto done;
+ }
+ if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
+ tpcb->tp_r_subseq = subseq;
+ if (tpcb->tp_timer[TM_data_retrans] == 0)
+ tpcb->tp_dupacks = 0;
+ else if (++tpcb->tp_dupacks == tprexmtthresh) {
+ /* partner went out of his way to signal with different
+ subsequences that he has the same lack of an expected
+ packet. This may be an early indiciation of a loss */
+
+ SeqNum onxt = tpcb->tp_sndnxt;
+ struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
+ u_int win = min(tpcb->tp_fcredit,
+ tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
+ IFDEBUG(D_ACKRECV)
+ printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
+ "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
+ ENDDEBUG
+ if (win < 2)
+ win = 2;
+ tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
+ tpcb->tp_timer[TM_data_retrans] = 0;
+ tpcb->tp_rttemit = 0;
+ tpcb->tp_sndnxt = tpcb->tp_snduna;
+ tpcb->tp_sndnxt_m = 0;
+ tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+ tp_send(tpcb);
+ tpcb->tp_cong_win = tpcb->tp_ssthresh +
+ tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
+ if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
+ tpcb->tp_sndnxt = onxt;
+ tpcb->tp_sndnxt_m = onxt_m;
+ }
+
+ } else if (tpcb->tp_dupacks > tprexmtthresh) {
+ tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
+ }
+ goto done;
+ }
+ } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
+ goto discard_the_ack;
+ /*
+ * If the congestion window was inflated to account
+ * for the other side's cached packets, retract it.
+ */
+ if (tpcb->tp_dupacks > tprexmtthresh &&
+ tpcb->tp_cong_win > tpcb->tp_ssthresh)
+ tpcb->tp_cong_win = tpcb->tp_ssthresh;
+ tpcb->tp_r_subseq = subseq;
+ old_fcredit = tpcb->tp_fcredit;
+ tpcb->tp_fcredit = cdt;
+ if (cdt > tpcb->tp_maxfcredit)
+ tpcb->tp_maxfcredit = cdt;
+ tpcb->tp_dupacks = 0;
+
+ if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
+
+ tpsbcheck(tpcb, 0);
+ bytes_acked = tp_sbdrop(tpcb, seq);
+ tpsbcheck(tpcb, 1);
+ /*
+ * If transmit timer is running and timed sequence
+ * number was acked, update smoothed round trip time.
+ * Since we now have an rtt measurement, cancel the
+ * timer backoff (cf., Phil Karn's retransmit alg.).
+ * Recompute the initial retransmit timer.
+ */
+ if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
+ tp_rtt_rtv(tpcb);
+ /*
+ * If all outstanding data is acked, stop retransmit timer.
+ * If there is more data to be acked, restart retransmit
+ * timer, using current (possibly backed-off) value.
+ * OSI combines the keepalive and persistance functions.
+ * So, there is no persistance timer per se, to restart.
+ */
+ if (tpcb->tp_class != TP_CLASS_0)
+ tpcb->tp_timer[TM_data_retrans] =
+ (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
+ /*
+ * When new data is acked, open the congestion window.
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (maxseg per packet).
+ * Otherwise open linearly: maxseg per window
+ * (maxseg^2 / cwnd per packet), plus a constant
+ * fraction of a packet (maxseg/8) to help larger windows
+ * open quickly enough.
+ */
+ {
+ u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
+
+ incr = min(incr, bytes_acked);
+ if (cw > tpcb->tp_ssthresh)
+ incr = incr * incr / cw + incr / 8;
+ tpcb->tp_cong_win =
+ min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
+ }
+ tpcb->tp_snduna = seq;
+ if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
+ tpcb->tp_sndnxt = seq;
+ tpcb->tp_sndnxt_m = 0;
+ }
+ bang++;
+ }
+
+ if( cdt != 0 && old_fcredit == 0 ) {
+ tpcb->tp_sendfcc = 1;
+ }
+ if (cdt == 0) {
+ if (old_fcredit != 0)
+ IncStat(ts_zfcdt);
+ /* The following might mean that the window shrunk */
+ if (tpcb->tp_timer[TM_data_retrans]) {
+ tpcb->tp_timer[TM_data_retrans] = 0;
+ tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
+ if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
+ tpcb->tp_sndnxt = tpcb->tp_snduna;
+ tpcb->tp_sndnxt_m = 0;
+ }
+ }
+ }
+ tpcb->tp_fcredit = cdt;
+ bang |= (old_fcredit < cdt);
+
+done:
+ IFDEBUG(D_ACKRECV)
+ printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
+ bang, cdt, old_fcredit, tpcb->tp_cong_win);
+ ENDDEBUG
+ /* if (bang) XXXXX Very bad to remove this test, but somethings broken */
+ tp_send(tpcb);
+ return (bang);
+}
+
+/*
+ * CALLED FROM:
+ * tp_goodack()
+ * FUNCTION and ARGUMENTS:
+ * drops everything up TO but not INCLUDING seq # (seq)
+ * from the retransmission queue.
+ */
+tp_sbdrop(tpcb, seq)
+ register struct tp_pcb *tpcb;
+ SeqNum seq;
+{
+ struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+ register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
+ int oldcc = sb->sb_cc, oldi = i;
+
+ if (i >= tpcb->tp_seqhalf)
+ printf("tp_spdropping too much -- should panic");
+ while (i-- > 0)
+ sbdroprecord(sb);
+ IFDEBUG(D_ACKRECV)
+ printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
+ oldi, oldcc - sb->sb_cc, tpcb, seq);
+ ENDDEBUG
+ if (sb->sb_flags & SB_NOTIFY)
+ sowwakeup(tpcb->tp_sock);
+ return (oldcc - sb->sb_cc);
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans on user send request, arrival of AK and arrival of XAK
+ * FUNCTION and ARGUMENTS:
+ * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
+ * Emits until a) runs out of data, or b) runs into an XPD mark, or
+ * c) it hits seq number (highseq) limited by cong or credit.
+ *
+ * If you want XPD to buffer > 1 du per socket buffer, you can
+ * modifiy this to issue XPD tpdus also, but then it'll have
+ * to take some argument(s) to distinguish between the type of DU to
+ * hand tp_emit.
+ *
+ * When something is sent for the first time, its time-of-send
+ * is stashed (in system clock ticks rather than pf_slowtimo ticks).
+ * When the ack arrives, the smoothed round-trip time is figured
+ * using this value.
+ */
+void
+tp_send(tpcb)
+ register struct tp_pcb *tpcb;
+{
+ register int len;
+ register struct mbuf *m;
+ struct mbuf *mb = 0;
+ struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+ unsigned int eotsdu = 0;
+ SeqNum highseq, checkseq;
+ int idle, idleticks, off, cong_win;
+#ifdef TP_PERF_MEAS
+ int send_start_time = ticks;
+ SeqNum oldnxt = tpcb->tp_sndnxt;
+#endif /* TP_PERF_MEAS */
+
+ idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
+ if (idle) {
+ idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
+ if (idleticks > tpcb->tp_dt_ticks)
+ /*
+ * We have been idle for "a while" and no acks are
+ * expected to clock out any data we send --
+ * slow start to get ack "clock" running again.
+ */
+ tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+ }
+
+ cong_win = tpcb->tp_cong_win;
+ highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
+ if (tpcb->tp_Xsnd.sb_mb)
+ highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
+
+ IFDEBUG(D_DATA)
+ printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
+ tpcb, tpcb->tp_sndnxt, cong_win, highseq);
+ ENDDEBUG
+ IFTRACE(D_DATA)
+ tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
+ tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
+ tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
+ tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
+ ENDTRACE
+ IFTRACE(D_DATA)
+ tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
+ tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
+ ENDTRACE
+
+ if (tpcb->tp_sndnxt_m)
+ m = tpcb->tp_sndnxt_m;
+ else {
+ off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
+ for (m = sb->sb_mb; m && off > 0; m = m->m_next)
+ off--;
+ }
+send:
+ /*
+ * Avoid silly window syndrome here . . . figure out how!
+ */
+ checkseq = tpcb->tp_sndnum;
+ if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
+ checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
+
+ while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
+
+ eotsdu = (m->m_flags & M_EOR) != 0;
+ len = m->m_pkthdr.len;
+ if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
+ len < (tpcb->tp_l_tpdusize / 2))
+ break; /* Nagle . . . . . */
+ cong_win -= len;
+ /* make a copy - mb goes into the retransmission list
+ * while m gets emitted. m_copy won't copy a zero-length mbuf.
+ */
+ mb = m;
+ m = m_copy(mb, 0, M_COPYALL);
+ if (m == MNULL)
+ break;
+ IFTRACE(D_STASH)
+ tptraceTPCB( TPPTmisc,
+ "tp_send mcopy nxt high eotsdu len",
+ tpcb->tp_sndnxt, highseq, eotsdu, len);
+ ENDTRACE
+
+ IFDEBUG(D_DATA)
+ printf("tp_sending tpcb 0x%x nxt 0x%x\n",
+ tpcb, tpcb->tp_sndnxt);
+ ENDDEBUG
+ /* when headers are precomputed, may need to fill
+ in checksum here */
+ if (tpcb->tp_sock->so_error =
+ tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
+ /* error */
+ break;
+ }
+ m = mb->m_nextpkt;
+ tpcb->tp_sndnxt_m = m;
+ if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
+ SEQ_INC(tpcb, tpcb->tp_sndnew);
+ /*
+ * Time this transmission if not a retransmission and
+ * not currently timing anything.
+ */
+ if (tpcb->tp_rttemit == 0) {
+ tpcb->tp_rttemit = ticks;
+ tpcb->tp_rttseq = tpcb->tp_sndnxt;
+ }
+ tpcb->tp_sndnxt = tpcb->tp_sndnew;
+ } else
+ SEQ_INC(tpcb, tpcb->tp_sndnxt);
+ /*
+ * Set retransmit timer if not currently set.
+ * Initial value for retransmit timer is smoothed
+ * round-trip time + 2 * round-trip time variance.
+ * Initialize shift counter which is used for backoff
+ * of retransmit time.
+ */
+ if (tpcb->tp_timer[TM_data_retrans] == 0 &&
+ tpcb->tp_class != TP_CLASS_0) {
+ tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
+ tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
+ tpcb->tp_rxtshift = 0;
+ }
+ }
+ if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
+ tpcb->tp_oktonagle = 0;
+#ifdef TP_PERF_MEAS
+ IFPERF(tpcb)
+ {
+ register int npkts;
+ int elapsed = ticks - send_start_time, *t;
+ struct timeval now;
+
+ npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
+
+ if (npkts > 0)
+ tpcb->tp_Nwindow++;
+
+ if (npkts > TP_PM_MAX)
+ npkts = TP_PM_MAX;
+
+ t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
+ *t += (t - elapsed) >> TP_RTT_ALPHA;
+
+ if (mb == 0) {
+ IncPStat(tpcb, tps_win_lim_by_data[npkts] );
+ } else {
+ IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
+ /* not true with congestion-window being used */
+ }
+ now.tv_sec = elapsed / hz;
+ now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
+ tpmeas( tpcb->tp_lref,
+ TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
+ }
+ ENDPERF
+#endif /* TP_PERF_MEAS */
+
+
+ IFTRACE(D_DATA)
+ tptraceTPCB( TPPTmisc,
+ "tp_send at end: new nxt eotsdu error",
+ tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
+
+ ENDTRACE
+}
+
+int TPNagleok;
+int TPNagled;
+
+tp_packetize(tpcb, m, eotsdu)
+register struct tp_pcb *tpcb;
+register struct mbuf *m;
+int eotsdu;
+{
+ register struct mbuf *n;
+ register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+ int maxsize = tpcb->tp_l_tpdusize
+ - tp_headersize(DT_TPDU_type, tpcb)
+ - (tpcb->tp_use_checksum?4:0) ;
+ int totlen = m->m_pkthdr.len;
+ struct mbuf *m_split();
+ /*
+ * Pre-packetize the data in the sockbuf
+ * according to negotiated mtu. Do it here
+ * where we can safely wait for mbufs.
+ *
+ * This presumes knowledge of sockbuf conventions.
+ * TODO: allocate space for header and fill it in (once!).
+ */
+ IFDEBUG(D_DATA)
+ printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
+ maxsize, totlen, eotsdu, tpcb->tp_sndnum);
+ ENDTRACE
+ if (tpcb->tp_oktonagle) {
+ if ((n = sb->sb_mb) == 0)
+ panic("tp_packetize");
+ while (n->m_act)
+ n = n->m_act;
+ if (n->m_flags & M_EOR)
+ panic("tp_packetize 2");
+ SEQ_INC(tpcb, tpcb->tp_sndnum);
+ if (totlen + n->m_pkthdr.len < maxsize) {
+ /* There is an unsent packet with space, combine data */
+ struct mbuf *old_n = n;
+ tpsbcheck(tpcb,3);
+ n->m_pkthdr.len += totlen;
+ while (n->m_next)
+ n = n->m_next;
+ sbcompress(sb, m, n);
+ tpsbcheck(tpcb,4);
+ n = old_n;
+ TPNagled++;
+ goto out;
+ }
+ }
+ while (m) {
+ n = m;
+ if (totlen > maxsize) {
+ if ((m = m_split(n, maxsize, M_WAIT)) == 0)
+ panic("tp_packetize");
+ } else
+ m = 0;
+ totlen -= maxsize;
+ tpsbcheck(tpcb, 5);
+ sbappendrecord(sb, n);
+ tpsbcheck(tpcb, 6);
+ SEQ_INC(tpcb, tpcb->tp_sndnum);
+ }
+out:
+ if (eotsdu) {
+ n->m_flags |= M_EOR; /* XXX belongs at end */
+ tpcb->tp_oktonagle = 0;
+ } else {
+ SEQ_DEC(tpcb, tpcb->tp_sndnum);
+ tpcb->tp_oktonagle = 1;
+ TPNagleok++;
+ }
+ IFDEBUG(D_DATA)
+ printf("SEND out: oktonagle %d sndnum 0x%x\n",
+ tpcb->tp_oktonagle, tpcb->tp_sndnum);
+ ENDTRACE
+ return 0;
+}
+
+
+/*
+ * NAME: tp_stash()
+ * CALLED FROM:
+ * tp.trans on arrival of a DT tpdu
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ * Returns 1 if
+ * a) something new arrived and it's got eotsdu_reached bit on,
+ * b) this arrival was caused other out-of-sequence things to be
+ * accepted, or
+ * c) this arrival is the highest seq # for which we last gave credit
+ * (sender just sent a whole window)
+ * In other words, returns 1 if tp should send an ack immediately, 0 if
+ * the ack can wait a while.
+ *
+ * Note: this implementation no longer renegs on credit, (except
+ * when debugging option D_RENEG is on, for the purpose of testing
+ * ack subsequencing), so we don't need to check for incoming tpdus
+ * being in a reneged portion of the window.
+ */
+
+tp_stash(tpcb, e)
+ register struct tp_pcb *tpcb;
+ register struct tp_event *e;
+{
+ register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
+ /* 0--> delay acks until full window */
+ /* 1--> ack each tpdu */
+#ifndef lint
+#define E e->ATTR(DT_TPDU)
+#else /* lint */
+#define E e->ev_union.EV_DT_TPDU
+#endif /* lint */
+
+ if ( E.e_eot ) {
+ register struct mbuf *n = E.e_data;
+ n->m_flags |= M_EOR;
+ n->m_act = 0;
+ }
+ IFDEBUG(D_STASH)
+ dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
+ "stash: so_rcv before appending");
+ dump_mbuf(E.e_data,
+ "stash: e_data before appending");
+ ENDDEBUG
+
+ IFPERF(tpcb)
+ PStat(tpcb, Nb_from_ll) += E.e_datalen;
+ tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
+ E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
+ ENDPERF
+
+ if (E.e_seq == tpcb->tp_rcvnxt) {
+
+ IFDEBUG(D_STASH)
+ printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
+ E.e_seq, E.e_datalen, E.e_eot);
+ ENDDEBUG
+
+ IFTRACE(D_STASH)
+ tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
+ E.e_seq, E.e_datalen, E.e_eot, 0);
+ ENDTRACE
+
+ SET_DELACK(tpcb);
+
+ sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
+
+ SEQ_INC( tpcb, tpcb->tp_rcvnxt );
+ /*
+ * move chains from the reassembly queue to the socket buffer
+ */
+ if (tpcb->tp_rsycnt) {
+ register struct mbuf **mp;
+ struct mbuf **mplim;
+
+ mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
+ mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
+
+ while (tpcb->tp_rsycnt && *mp) {
+ sbappend(&tpcb->tp_sock->so_rcv, *mp);
+ tpcb->tp_rsycnt--;
+ *mp = 0;
+ SEQ_INC(tpcb, tpcb->tp_rcvnxt);
+ ack_reason |= ACK_REORDER;
+ if (++mp == mplim)
+ mp = tpcb->tp_rsyq;
+ }
+ }
+ IFDEBUG(D_STASH)
+ dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
+ "stash: so_rcv after appending");
+ ENDDEBUG
+
+ } else {
+ register struct mbuf **mp;
+ SeqNum uwe;
+
+ IFTRACE(D_STASH)
+ tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
+ E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
+ ENDTRACE
+
+ if (tpcb->tp_rsyq == 0)
+ tp_rsyset(tpcb);
+ uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
+ if (tpcb->tp_rsyq == 0 ||
+ !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
+ ack_reason = ACK_DONT;
+ m_freem(E.e_data);
+ } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
+ IFDEBUG(D_STASH)
+ printf("tp_stash - drop & ack\n");
+ ENDDEBUG
+
+ /* retransmission - drop it and force an ack */
+ IncStat(ts_dt_dup);
+ IFPERF(tpcb)
+ IncPStat(tpcb, tps_n_ack_cuz_dup);
+ ENDPERF
+
+ m_freem(E.e_data);
+ ack_reason |= ACK_DUP;
+ } else {
+ *mp = E.e_data;
+ tpcb->tp_rsycnt++;
+ ack_reason = ACK_DONT;
+ }
+ }
+ /* there were some comments of historical interest here. */
+ {
+ LOCAL_CREDIT(tpcb);
+
+ if ( E.e_seq == tpcb->tp_sent_uwe )
+ ack_reason |= ACK_STRAT_FULLWIN;
+
+ IFTRACE(D_STASH)
+ tptraceTPCB(TPPTmisc,
+ "end of stash, eot, ack_reason, sent_uwe ",
+ E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
+ ENDTRACE
+
+ if ( ack_reason == ACK_DONT ) {
+ IncStat( ts_ackreason[ACK_DONT] );
+ return 0;
+ } else {
+ IFPERF(tpcb)
+ if(ack_reason & ACK_STRAT_EACH) {
+ IncPStat(tpcb, tps_n_ack_cuz_strat);
+ } else if(ack_reason & ACK_STRAT_FULLWIN) {
+ IncPStat(tpcb, tps_n_ack_cuz_fullwin);
+ } else if(ack_reason & ACK_REORDER) {
+ IncPStat(tpcb, tps_n_ack_cuz_reorder);
+ }
+ tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
+ SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
+ ENDPERF
+ {
+ register int i;
+
+ /* keep track of all reasons that apply */
+ for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
+ if( ack_reason & (1<<i) )
+ IncStat( ts_ackreason[i] );
+ }
+ }
+ return 1;
+ }
+ }
+}
+
+/*
+ * tp_rsyflush - drop all the packets on the reassembly queue.
+ * Do this when closing the socket, or when somebody has changed
+ * the space avaible in the receive socket (XXX).
+ */
+tp_rsyflush(tpcb)
+register struct tp_pcb *tpcb;
+{
+ register struct mbuf *m, **mp;
+ if (tpcb->tp_rsycnt) {
+ for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
+ --mp >= tpcb->tp_rsyq; )
+ if (*mp) {
+ tpcb->tp_rsycnt--;
+ m_freem(*mp);
+ }
+ if (tpcb->tp_rsycnt) {
+ printf("tp_rsyflush %x\n", tpcb);
+ tpcb->tp_rsycnt = 0;
+ }
+ }
+ free((caddr_t)tpcb->tp_rsyq, M_PCB);
+ tpcb->tp_rsyq = 0;
+}
+
+tp_rsyset(tpcb)
+register struct tp_pcb *tpcb;
+{
+ register struct socket *so = tpcb->tp_sock;
+ int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
+ int old_credit = tpcb->tp_maxlcredit;
+ caddr_t rsyq;
+
+ tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
+ (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
+
+ if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
+ return;
+ maxcredit *= sizeof(struct mbuf *);
+ if (tpcb->tp_rsyq)
+ tp_rsyflush(tpcb);
+ if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
+ bzero(rsyq, maxcredit);
+ tpcb->tp_rsyq = (struct mbuf **)rsyq;
+}
+
+tpsbcheck(tpcb, i)
+struct tp_pcb *tpcb;
+{
+ register struct mbuf *n, *m;
+ register int len = 0, mbcnt = 0, pktlen;
+ struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+
+ for (n = sb->sb_mb; n; n = n->m_nextpkt) {
+ if ((n->m_flags & M_PKTHDR) == 0)
+ panic("tpsbcheck nohdr");
+ pktlen = len + n->m_pkthdr.len;
+ for (m = n; m; m = m->m_next) {
+ len += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT)
+ mbcnt += m->m_ext.ext_size;
+ }
+ if (len != pktlen) {
+ printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
+ i, len, pktlen, n);
+ panic("tpsbcheck short");
+ }
+ }
+ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+ printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
+ mbcnt, sb->sb_mbcnt);
+ panic("tpsbcheck");
+ }
+}
diff --git a/sys/netiso/tp_subr2.c b/sys/netiso/tp_subr2.c
new file mode 100644
index 000000000000..60c7ce2a50b9
--- /dev/null
+++ b/sys/netiso/tp_subr2.c
@@ -0,0 +1,880 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_subr2.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_subr2.c,v 5.5 88/11/18 17:28:55 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_subr2.c,v $
+ *
+ * Some auxiliary routines:
+ * tp_protocol_error: required by xebec- called when a combo of state,
+ * event, predicate isn't covered for by the transition file.
+ * tp_indicate: gives indications(signals) to the user process
+ * tp_getoptions: initializes variables that are affected by the options
+ * chosen.
+ */
+
+/* this def'n is to cause the expansion of this macro in the
+ * routine tp_local_credit :
+ */
+#define LOCAL_CREDIT_EXPAND
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#undef MNULL
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_ip.h>
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_seq.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_user.h>
+#include <netiso/cons.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#ifdef TRUE
+#undef FALSE
+#undef TRUE
+#endif
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+void tp_rsyset();
+
+/*
+ * NAME: tp_local_credit()
+ *
+ * CALLED FROM:
+ * tp_emit(), tp_usrreq()
+ *
+ * FUNCTION and ARGUMENTS:
+ * Computes the local credit and stashes it in tpcb->tp_lcredit.
+ * It's a macro in the production system rather than a procdure.
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ * This doesn't actually get called in a production system -
+ * the macro gets expanded instead in place of calls to this proc.
+ * But for debugging, we call this and that allows us to add
+ * debugging messages easily here.
+ */
+void
+tp_local_credit(tpcb)
+ struct tp_pcb *tpcb;
+{
+ LOCAL_CREDIT(tpcb);
+ IFDEBUG(D_CREDIT)
+ printf("ref 0x%x lcdt 0x%x l_tpdusize 0x%x decbit 0x%x\n",
+ tpcb->tp_lref,
+ tpcb->tp_lcredit,
+ tpcb->tp_l_tpdusize,
+ tpcb->tp_decbit,
+ tpcb->tp_cong_win
+ );
+ ENDDEBUG
+ IFTRACE(D_CREDIT)
+ tptraceTPCB(TPPTmisc,
+ "lcdt tpdusz \n",
+ tpcb->tp_lcredit, tpcb->tp_l_tpdusize, 0, 0);
+ ENDTRACE
+}
+
+/*
+ * NAME: tp_protocol_error()
+ *
+ * CALLED FROM:
+ * tp_driver(), when it doesn't know what to do with
+ * a combo of event, state, predicate
+ *
+ * FUNCTION and ARGUMENTS:
+ * print error mesg
+ *
+ * RETURN VALUE:
+ * EIO - always
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+int
+tp_protocol_error(e,tpcb)
+ struct tp_event *e;
+ struct tp_pcb *tpcb;
+{
+ printf("TP PROTOCOL ERROR! tpcb 0x%x event 0x%x, state 0x%x\n",
+ tpcb, e->ev_number, tpcb->tp_state);
+ IFTRACE(D_DRIVER)
+ tptraceTPCB(TPPTmisc, "PROTOCOL ERROR tpcb event state",
+ tpcb, e->ev_number, tpcb->tp_state, 0 );
+ ENDTRACE
+ return EIO; /* for lack of anything better */
+}
+
+
+/* Not used at the moment */
+ProtoHook
+tp_drain()
+{
+ return 0;
+}
+
+
+/*
+ * NAME: tp_indicate()
+ *
+ * CALLED FROM:
+ * tp.trans when XPD arrive, when a connection is being disconnected by
+ * the arrival of a DR or ER, and when a connection times out.
+ *
+ * FUNCTION and ARGUMENTS:
+ * (ind) is the type of indication : T_DISCONNECT, T_XPD
+ * (error) is an E* value that will be put in the socket structure
+ * to be passed along to the user later.
+ * Gives a SIGURG to the user process or group indicated by the socket
+ * attached to the tpcb.
+ *
+ * RETURNS: Rien
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+tp_indicate(ind, tpcb, error)
+ int ind;
+ u_short error;
+ register struct tp_pcb *tpcb;
+{
+ register struct socket *so = tpcb->tp_sock;
+ IFTRACE(D_INDICATION)
+ tptraceTPCB(TPPTindicate, ind, *(u_short *)(tpcb->tp_lsuffix),
+ *(u_short *)(tpcb->tp_fsuffix), error,so->so_pgid);
+ ENDTRACE
+ IFDEBUG(D_INDICATION)
+ char *ls, *fs;
+ ls = tpcb->tp_lsuffix,
+ fs = tpcb->tp_fsuffix,
+
+ printf(
+"indicate 0x%x lsuf 0x%02x%02x fsuf 0x%02x%02x err 0x%x noind 0x%x ref 0x%x\n",
+ ind,
+ *ls, *(ls+1), *fs, *(fs+1),
+ error, /*so->so_pgrp,*/
+ tpcb->tp_no_disc_indications,
+ tpcb->tp_lref);
+ ENDDEBUG
+
+ if (ind == ER_TPDU) {
+ register struct mbuf *m;
+ struct tp_disc_reason x;
+
+ if ((so->so_state & SS_CANTRCVMORE) == 0 &&
+ (m = m_get(M_DONTWAIT, MT_OOBDATA)) != 0) {
+
+ x.dr_hdr.cmsg_len = m->m_len = sizeof(x);
+ x.dr_hdr.cmsg_level = SOL_TRANSPORT;
+ x.dr_hdr.cmsg_type= TPOPT_DISC_REASON;
+ x.dr_reason = error;
+ *mtod(m, struct tp_disc_reason *) = x;
+ sbappendrecord(&tpcb->tp_Xrcv, m);
+ error = 0;
+ } else
+ error = ECONNRESET;
+ }
+ so->so_error = error;
+
+ if (ind == T_DISCONNECT) {
+ if (error == 0)
+ so->so_error = ENOTCONN;
+ if ( tpcb->tp_no_disc_indications )
+ return;
+ }
+ IFTRACE(D_INDICATION)
+ tptraceTPCB(TPPTmisc, "doing sohasoutofband(so)", so,0,0,0);
+ ENDTRACE
+ sohasoutofband(so);
+}
+
+/*
+ * NAME : tp_getoptions()
+ *
+ * CALLED FROM:
+ * tp.trans whenever we go into OPEN state
+ *
+ * FUNCTION and ARGUMENTS:
+ * sets the proper flags and values in the tpcb, to control
+ * the appropriate actions for the given class, options,
+ * sequence space, etc, etc.
+ *
+ * RETURNS: Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+tp_getoptions(tpcb)
+struct tp_pcb *tpcb;
+{
+ tpcb->tp_seqmask =
+ tpcb->tp_xtd_format ? TP_XTD_FMT_MASK : TP_NML_FMT_MASK ;
+ tpcb->tp_seqbit =
+ tpcb->tp_xtd_format ? TP_XTD_FMT_BIT : TP_NML_FMT_BIT ;
+ tpcb->tp_seqhalf = tpcb->tp_seqbit >> 1;
+ tpcb->tp_dt_ticks =
+ max(tpcb->tp_dt_ticks, (tpcb->tp_peer_acktime + 2));
+ tp_rsyset(tpcb);
+
+}
+
+/*
+ * NAME: tp_recycle_tsuffix()
+ *
+ * CALLED FROM:
+ * Called when a ref is frozen.
+ *
+ * FUNCTION and ARGUMENTS:
+ * allows the suffix to be reused.
+ *
+ * RETURNS: zilch
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+tp_recycle_tsuffix(tpcb)
+ struct tp_pcb *tpcb;
+{
+ bzero((caddr_t)tpcb->tp_lsuffix, sizeof( tpcb->tp_lsuffix));
+ bzero((caddr_t)tpcb->tp_fsuffix, sizeof( tpcb->tp_fsuffix));
+ tpcb->tp_fsuffixlen = tpcb->tp_lsuffixlen = 0;
+
+ (tpcb->tp_nlproto->nlp_recycle_suffix)(tpcb->tp_npcb);
+}
+
+/*
+ * NAME: tp_quench()
+ *
+ * CALLED FROM:
+ * tp{af}_quench() when ICMP source quench or similar thing arrives.
+ *
+ * FUNCTION and ARGUMENTS:
+ * Drop the congestion window back to 1.
+ * Congestion window scheme:
+ * Initial value is 1. ("slow start" as Nagle, et. al. call it)
+ * For each good ack that arrives, the congestion window is increased
+ * by 1 (up to max size of logical infinity, which is to say,
+ * it doesn't wrap around).
+ * Source quench causes it to drop back to 1.
+ * tp_send() uses the smaller of (regular window, congestion window).
+ * One retransmission strategy option is to have any retransmission
+ * cause reset the congestion window back to 1.
+ *
+ * (cmd) is either PRC_QUENCH: source quench, or
+ * PRC_QUENCH2: dest. quench (dec bit)
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+tp_quench( tpcb, cmd )
+ struct tp_pcb *tpcb;
+ int cmd;
+{
+ IFDEBUG(D_QUENCH)
+ printf("tp_quench tpcb 0x%x ref 0x%x sufx 0x%x\n",
+ tpcb, tpcb->tp_lref, *(u_short *)(tpcb->tp_lsuffix));
+ printf("cong_win 0x%x decbit 0x%x \n",
+ tpcb->tp_cong_win, tpcb->tp_decbit);
+ ENDDEBUG
+ switch(cmd) {
+ case PRC_QUENCH:
+ tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+ IncStat(ts_quench);
+ break;
+ case PRC_QUENCH2:
+ tpcb->tp_cong_win = tpcb->tp_l_tpdusize; /* might as well quench source also */
+ tpcb->tp_decbit = TP_DECBIT_CLEAR_COUNT;
+ IncStat(ts_rcvdecbit);
+ break;
+ }
+}
+
+
+/*
+ * NAME: tp_netcmd()
+ *
+ * CALLED FROM:
+ *
+ * FUNCTION and ARGUMENTS:
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+tp_netcmd( tpcb, cmd )
+ struct tp_pcb *tpcb;
+ int cmd;
+{
+#ifdef TPCONS
+ struct isopcb *isop;
+ struct pklcd *lcp;
+
+ if (tpcb->tp_netservice != ISO_CONS)
+ return;
+ isop = (struct isopcb *)tpcb->tp_npcb;
+ lcp = (struct pklcd *)isop->isop_chan;
+ switch (cmd) {
+
+ case CONN_CLOSE:
+ case CONN_REFUSE:
+ if (isop->isop_refcnt == 1) {
+ /* This is really superfluous, since it would happen
+ anyway in iso_pcbdetach, although it is a courtesy
+ to free up the x.25 channel before the refwait timer
+ expires. */
+ lcp->lcd_upper = 0;
+ lcp->lcd_upnext = 0;
+ pk_disconnect(lcp);
+ isop->isop_chan = 0;
+ isop->isop_refcnt = 0;
+ }
+ break;
+
+ default:
+ printf("tp_netcmd(0x%x, 0x%x) NOT IMPLEMENTED\n", tpcb, cmd);
+ break;
+ }
+#else /* TPCONS */
+ printf("tp_netcmd(): X25 NOT CONFIGURED!!\n");
+#endif
+}
+/*
+ * CALLED FROM:
+ * tp_ctloutput() and tp_emit()
+ * FUNCTION and ARGUMENTS:
+ * Convert a class mask to the highest numeric value it represents.
+ */
+
+int
+tp_mask_to_num(x)
+ u_char x;
+{
+ register int j;
+
+ for(j = 4; j>=0 ;j--) {
+ if(x & (1<<j))
+ break;
+ }
+ ASSERT( (j == 4) || (j == 0) ); /* for now */
+ if( (j != 4) && (j != 0) ) {
+ printf("ASSERTION ERROR: tp_mask_to_num: x 0x%x j %d\n",
+ x, j);
+ }
+ IFTRACE(D_TPINPUT)
+ tptrace(TPPTmisc, "tp_mask_to_num(x) returns j", x, j, 0, 0);
+ ENDTRACE
+ IFDEBUG(D_TPINPUT)
+ printf("tp_mask_to_num(0x%x) returns 0x%x\n", x, j);
+ ENDDEBUG
+ return j;
+}
+
+static
+copyQOSparms(src, dst)
+ struct tp_conn_param *src, *dst;
+{
+ /* copy all but the bits stuff at the end */
+#define COPYSIZE (12 * sizeof(short))
+
+ bcopy((caddr_t)src, (caddr_t)dst, COPYSIZE);
+ dst->p_tpdusize = src->p_tpdusize;
+ dst->p_ack_strat = src->p_ack_strat;
+ dst->p_rx_strat = src->p_rx_strat;
+#undef COPYSIZE
+}
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ */
+void
+tp_mss(tpcb, nhdr_size)
+ register struct tp_pcb *tpcb;
+ int nhdr_size;
+{
+ register struct rtentry *rt;
+ struct ifnet *ifp;
+ register int rtt, mss;
+ u_long bufsize;
+ int i, ssthresh = 0, rt_mss;
+ struct socket *so;
+
+ if (tpcb->tp_ptpdusize)
+ mss = tpcb->tp_ptpdusize << 7;
+ else
+ mss = 1 << tpcb->tp_tpdusize;
+ so = tpcb->tp_sock;
+ if ((rt = *(tpcb->tp_routep)) == 0) {
+ bufsize = so->so_rcv.sb_hiwat;
+ goto punt_route;
+ }
+ ifp = rt->rt_ifp;
+
+#ifdef RTV_MTU /* if route characteristics exist ... */
+ /*
+ * While we're here, check if there's an initial rtt
+ * or rttvar. Convert from the route-table units
+ * to hz ticks for the smoothed timers and slow-timeout units
+ * for other inital variables.
+ */
+ if (tpcb->tp_rtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+ tpcb->tp_rtt = rtt * hz / RTM_RTTUNIT;
+ if (rt->rt_rmx.rmx_rttvar)
+ tpcb->tp_rtv = rt->rt_rmx.rmx_rttvar
+ * hz / RTM_RTTUNIT;
+ else
+ tpcb->tp_rtv = tpcb->tp_rtt;
+ }
+ /*
+ * if there's an mtu associated with the route, use it
+ */
+ if (rt->rt_rmx.rmx_mtu)
+ rt_mss = rt->rt_rmx.rmx_mtu - nhdr_size;
+ else
+#endif /* RTV_MTU */
+ rt_mss = (ifp->if_mtu - nhdr_size);
+ if (tpcb->tp_ptpdusize == 0 || /* assume application doesn't care */
+ mss > rt_mss /* network won't support what was asked for */)
+ mss = rt_mss;
+ /* can propose mtu which are multiples of 128 */
+ mss &= ~0x7f;
+ /*
+ * If there's a pipesize, change the socket buffer
+ * to that size.
+ */
+#ifdef RTV_SPIPE
+ if ((bufsize = rt->rt_rmx.rmx_sendpipe) > 0) {
+#endif
+ bufsize = min(bufsize, so->so_snd.sb_hiwat);
+ (void) sbreserve(&so->so_snd, bufsize);
+ }
+#ifdef RTV_SPIPE
+ if ((bufsize = rt->rt_rmx.rmx_recvpipe) > 0) {
+#endif
+ bufsize = min(bufsize, so->so_rcv.sb_hiwat);
+ (void) sbreserve(&so->so_rcv, bufsize);
+ } else
+ bufsize = so->so_rcv.sb_hiwat;
+#ifdef RTV_SSTHRESH
+ /*
+ * There's some sort of gateway or interface
+ * buffer limit on the path. Use this to set
+ * the slow start threshhold, but set the
+ * threshold to no less than 2*mss.
+ */
+ ssthresh = rt->rt_rmx.rmx_ssthresh;
+punt_route:
+ /*
+ * The current mss is initialized to the default value.
+ * If we compute a smaller value, reduce the current mss.
+ * If we compute a larger value, return it for use in sending
+ * a max seg size option.
+ * If we received an offer, don't exceed it.
+ * However, do not accept offers under 128 bytes.
+ */
+ if (tpcb->tp_l_tpdusize)
+ mss = min(mss, tpcb->tp_l_tpdusize);
+ /*
+ * We want a minimum recv window of 4 packets to
+ * signal packet loss by duplicate acks.
+ */
+ mss = min(mss, bufsize >> 2) & ~0x7f;
+ mss = max(mss, 128); /* sanity */
+ tpcb->tp_cong_win =
+ (rt == 0 || (rt->rt_flags & RTF_GATEWAY)) ? mss : bufsize;
+ tpcb->tp_l_tpdusize = mss;
+ tp_rsyset(tpcb);
+ tpcb->tp_ssthresh = max(2 * mss, ssthresh);
+ /* Calculate log2 of mss */
+ for (i = TP_MIN_TPDUSIZE + 1; i <= TP_MAX_TPDUSIZE; i++)
+ if ((1 << i) > mss)
+ break;
+ i--;
+ tpcb->tp_tpdusize = i;
+#endif /* RTV_MTU */
+}
+
+/*
+ * CALLED FROM:
+ * tp_usrreq on PRU_CONNECT and tp_input on receipt of CR
+ *
+ * FUNCTION and ARGUMENTS:
+ * -- An mbuf containing the peer's network address.
+ * -- Our control block, which will be modified
+ * -- In the case of cons, a control block for that layer.
+ *
+ *
+ * RETURNS:
+ * errno value :
+ * EAFNOSUPPORT if can't find an nl_protosw for x.25 (really could panic)
+ * ECONNREFUSED if trying to run TP0 with non-type 37 address
+ * possibly other E* returned from cons_netcmd()
+ *
+ * SIDE EFFECTS:
+ * Determines recommended tpdusize, buffering and intial delays
+ * based on information cached on the route.
+ */
+int
+tp_route_to( m, tpcb, channel)
+ struct mbuf *m;
+ register struct tp_pcb *tpcb;
+ caddr_t channel;
+{
+ register struct sockaddr_iso *siso; /* NOTE: this may be a sockaddr_in */
+ extern struct tp_conn_param tp_conn_param[];
+ int error = 0, save_netservice = tpcb->tp_netservice;
+ register struct rtentry *rt = 0;
+ int nhdr_size, mtu, bufsize;
+
+ siso = mtod(m, struct sockaddr_iso *);
+ IFTRACE(D_CONN)
+ tptraceTPCB(TPPTmisc,
+ "route_to: so afi netservice class",
+ tpcb->tp_sock, siso->siso_addr.isoa_genaddr[0], tpcb->tp_netservice,
+ tpcb->tp_class);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("tp_route_to( m x%x, channel 0x%x, tpcb 0x%x netserv 0x%x)\n",
+ m, channel, tpcb, tpcb->tp_netservice);
+ printf("m->mlen x%x, m->m_data:\n", m->m_len);
+ dump_buf(mtod(m, caddr_t), m->m_len);
+ ENDDEBUG
+ if (channel) {
+#ifdef TPCONS
+ struct pklcd *lcp = (struct pklcd *)channel;
+ struct isopcb *isop = (struct isopcb *)lcp->lcd_upnext,
+ *isop_new = (struct isopcb *)tpcb->tp_npcb;
+ /* The next 2 lines believe that you haven't
+ set any network level options or done a pcbconnect
+ and XXXXXXX'edly apply to both inpcb's and isopcb's */
+ remque(isop_new);
+ free(isop_new, M_PCB);
+ tpcb->tp_npcb = (caddr_t)isop;
+ tpcb->tp_netservice = ISO_CONS;
+ tpcb->tp_nlproto = nl_protosw + ISO_CONS;
+ if (isop->isop_refcnt++ == 0) {
+ iso_putsufx(isop, tpcb->tp_lsuffix, tpcb->tp_lsuffixlen, TP_LOCAL);
+ isop->isop_socket = tpcb->tp_sock;
+ } else
+ /* there are already connections sharing this */;
+#endif
+ } else {
+ switch (siso->siso_family) {
+ default:
+ error = EAFNOSUPPORT;
+ goto done;
+#ifdef ISO
+ case AF_ISO:
+ {
+ struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+ int flags = tpcb->tp_sock->so_options & SO_DONTROUTE;
+ tpcb->tp_netservice = ISO_CLNS;
+ if (clnp_route(&siso->siso_addr, &isop->isop_route,
+ flags, (void **)0, (void **)0) == 0) {
+ rt = isop->isop_route.ro_rt;
+ if (rt && rt->rt_flags & RTF_PROTO1)
+ tpcb->tp_netservice = ISO_CONS;
+ }
+ } break;
+#endif
+#ifdef INET
+ case AF_INET:
+ tpcb->tp_netservice = IN_CLNS;
+#endif
+ }
+ if (tpcb->tp_nlproto->nlp_afamily != siso->siso_family) {
+ IFDEBUG(D_CONN)
+ printf("tp_route_to( CHANGING nlproto old 0x%x new 0x%x)\n",
+ save_netservice, tpcb->tp_netservice);
+ ENDDEBUG
+ if (error = tp_set_npcb(tpcb))
+ goto done;
+ }
+ IFDEBUG(D_CONN)
+ printf("tp_route_to calling nlp_pcbconn, netserv %d\n",
+ tpcb->tp_netservice);
+ ENDDEBUG
+ tpcb->tp_nlproto = nl_protosw + tpcb->tp_netservice;
+ error = (tpcb->tp_nlproto->nlp_pcbconn)(tpcb->tp_npcb, m);
+ }
+ if (error)
+ goto done;
+ nhdr_size = tpcb->tp_nlproto->nlp_mtu(tpcb); /* only gets common info */
+ tp_mss(tpcb, nhdr_size);
+done:
+ IFDEBUG(D_CONN)
+ printf("tp_route_to returns 0x%x\n", error);
+ ENDDEBUG
+ IFTRACE(D_CONN)
+ tptraceTPCB(TPPTmisc, "route_to: returns: error netserv class", error,
+ tpcb->tp_netservice, tpcb->tp_class, 0);
+ ENDTRACE
+ return error;
+}
+
+
+/* class zero version */
+void
+tp0_stash( tpcb, e )
+ register struct tp_pcb *tpcb;
+ register struct tp_event *e;
+{
+#ifndef lint
+#define E e->ATTR(DT_TPDU)
+#else /* lint */
+#define E e->ev_union.EV_DT_TPDU
+#endif /* lint */
+
+ register struct sockbuf *sb = &tpcb->tp_sock->so_rcv;
+ register struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+
+ IFPERF(tpcb)
+ PStat(tpcb, Nb_from_ll) += E.e_datalen;
+ tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
+ E.e_seq, PStat(tpcb, Nb_from_ll), E.e_datalen);
+ ENDPERF
+
+ IFDEBUG(D_STASH)
+ printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x",
+ E.e_seq, E.e_datalen, E.e_eot);
+ ENDDEBUG
+
+ IFTRACE(D_STASH)
+ tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
+ E.e_seq, E.e_datalen, E.e_eot, 0);
+ ENDTRACE
+
+ if ( E.e_eot ) {
+ register struct mbuf *n = E.e_data;
+ n->m_flags |= M_EOR;
+ n->m_act = MNULL; /* set on tp_input */
+ }
+ sbappend(sb, E.e_data);
+ IFDEBUG(D_STASH)
+ dump_mbuf(sb->sb_mb, "stash 0: so_rcv after appending");
+ ENDDEBUG
+ if (tpcb->tp_netservice != ISO_CONS)
+ printf("tp0_stash: tp running over something wierd\n");
+ else {
+ register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+ pk_flowcontrol(lcp, sbspace(sb) <= 0, 1);
+ }
+}
+
+void
+tp0_openflow(tpcb)
+register struct tp_pcb *tpcb;
+{
+ register struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+ if (tpcb->tp_netservice != ISO_CONS)
+ printf("tp0_openflow: tp running over something wierd\n");
+ else {
+ register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+ if (lcp->lcd_rxrnr_condition)
+ pk_flowcontrol(lcp, 0, 0);
+ }
+}
+#ifndef TPCONS
+static
+pk_flowcontrol() {}
+#endif
+
+#ifdef TP_PERF_MEAS
+/*
+ * CALLED FROM:
+ * tp_ctloutput() when the user sets TPOPT_PERF_MEAS on
+ * and tp_newsocket() when a new connection is made from
+ * a listening socket with tp_perf_on == true.
+ * FUNCTION and ARGUMENTS:
+ * (tpcb) is the usual; this procedure gets a clear cluster mbuf for
+ * a tp_pmeas structure, and makes tpcb->tp_p_meas point to it.
+ * RETURN VALUE:
+ * ENOBUFS if it cannot get a cluster mbuf.
+ */
+
+int
+tp_setup_perf(tpcb)
+ register struct tp_pcb *tpcb;
+{
+ register struct mbuf *q;
+
+ if( tpcb->tp_p_meas == 0 ) {
+ MGET(q, M_WAITOK, MT_PCB);
+ if (q == 0)
+ return ENOBUFS;
+ MCLGET(q, M_WAITOK);
+ if ((q->m_flags & M_EXT) == 0) {
+ (void) m_free(q);
+ return ENOBUFS;
+ }
+ q->m_len = sizeof (struct tp_pmeas);
+ tpcb->tp_p_mbuf = q;
+ tpcb->tp_p_meas = mtod(q, struct tp_pmeas *);
+ bzero( (caddr_t)tpcb->tp_p_meas, sizeof (struct tp_pmeas) );
+ IFDEBUG(D_PERF_MEAS)
+ printf(
+ "tpcb 0x%x so 0x%x ref 0x%x tp_p_meas 0x%x tp_perf_on 0x%x\n",
+ tpcb, tpcb->tp_sock, tpcb->tp_lref,
+ tpcb->tp_p_meas, tpcb->tp_perf_on);
+ ENDDEBUG
+ tpcb->tp_perf_on = 1;
+ }
+ return 0;
+}
+#endif /* TP_PERF_MEAS */
+
+#ifdef ARGO_DEBUG
+dump_addr (addr)
+ register struct sockaddr *addr;
+{
+ switch( addr->sa_family ) {
+ case AF_INET:
+ dump_inaddr((struct sockaddr_in *)addr);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ dump_isoaddr((struct sockaddr_iso *)addr);
+ break;
+#endif /* ISO */
+ default:
+ printf("BAD AF: 0x%x\n", addr->sa_family);
+ break;
+ }
+}
+
+#define MAX_COLUMNS 8
+/*
+ * Dump the buffer to the screen in a readable format. Format is:
+ *
+ * hex/dec where hex is the hex format, dec is the decimal format.
+ * columns of hex/dec numbers will be printed, followed by the
+ * character representations (if printable).
+ */
+Dump_buf(buf, len)
+caddr_t buf;
+int len;
+{
+ int i,j;
+#define Buf ((u_char *)buf)
+ printf("Dump buf 0x%x len 0x%x\n", buf, len);
+ for (i = 0; i < len; i += MAX_COLUMNS) {
+ printf("+%d:\t", i);
+ for (j = 0; j < MAX_COLUMNS; j++) {
+ if (i + j < len) {
+ printf("%x/%d\t", Buf[i+j], Buf[i+j]);
+ } else {
+ printf(" ");
+ }
+ }
+
+ for (j = 0; j < MAX_COLUMNS; j++) {
+ if (i + j < len) {
+ if (((Buf[i+j]) > 31) && ((Buf[i+j]) < 128))
+ printf("%c", Buf[i+j]);
+ else
+ printf(".");
+ }
+ }
+ printf("\n");
+ }
+}
+#endif /* ARGO_DEBUG */
diff --git a/sys/netiso/tp_timer.c b/sys/netiso/tp_timer.c
new file mode 100644
index 000000000000..b3a0be3a9453
--- /dev/null
+++ b/sys/netiso/tp_timer.c
@@ -0,0 +1,377 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_timer.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_timer.c,v 5.2 88/11/18 17:29:07 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_timer.c,v $
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/malloc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_seq.h>
+
+struct tp_ref *tp_ref;
+int tp_rttdiv, tp_rttadd, N_TPREF = 127;
+struct tp_refinfo tp_refinfo;
+struct tp_pcb *tp_ftimeolist = (struct tp_pcb *)&tp_ftimeolist;
+
+/*
+ * CALLED FROM:
+ * at autoconfig time from tp_init()
+ * a combo of event, state, predicate
+ * FUNCTION and ARGUMENTS:
+ * initialize data structures for the timers
+ */
+void
+tp_timerinit()
+{
+ register int s;
+ /*
+ * Initialize storage
+ */
+ if (tp_refinfo.tpr_base)
+ return;
+ tp_refinfo.tpr_size = N_TPREF + 1; /* Need to start somewhere */
+ s = sizeof(*tp_ref) * tp_refinfo.tpr_size;
+ if ((tp_ref = (struct tp_ref *) malloc(s, M_PCB, M_NOWAIT)) == 0)
+ panic("tp_timerinit");
+ bzero((caddr_t)tp_ref, (unsigned) s);
+ tp_refinfo.tpr_base = tp_ref;
+ tp_rttdiv = hz / PR_SLOWHZ;
+ tp_rttadd = (2 * tp_rttdiv) - 1;
+}
+#ifdef TP_DEBUG_TIMERS
+/********************** e timers *************************/
+
+/*
+ * CALLED FROM:
+ * tp.trans all over
+ * FUNCTION and ARGUMENTS:
+ * Set an E type timer.
+ */
+void
+tp_etimeout(tpcb, fun, ticks)
+ register struct tp_pcb *tpcb;
+ int fun; /* function to be called */
+ int ticks;
+{
+
+ register u_int *callp;
+ IFDEBUG(D_TIMER)
+ printf("etimeout pcb 0x%x state 0x%x\n", tpcb, tpcb->tp_state);
+ ENDDEBUG
+ IFTRACE(D_TIMER)
+ tptrace(TPPTmisc, "tp_etimeout ref refstate tks Etick", tpcb->tp_lref,
+ tpcb->tp_state, ticks, tp_stat.ts_Eticks);
+ ENDTRACE
+ if (tpcb == 0)
+ return;
+ IncStat(ts_Eset);
+ if (ticks == 0)
+ ticks = 1;
+ callp = tpcb->tp_timer + fun;
+ if (*callp == 0 || *callp > ticks)
+ *callp = ticks;
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans all over
+ * FUNCTION and ARGUMENTS:
+ * Cancel all occurrences of E-timer function (fun) for reference (refp)
+ */
+void
+tp_euntimeout(tpcb, fun)
+ register struct tp_pcb *tpcb;
+ int fun;
+{
+ IFTRACE(D_TIMER)
+ tptrace(TPPTmisc, "tp_euntimeout ref", tpcb->tp_lref, 0, 0, 0);
+ ENDTRACE
+
+ if (tpcb)
+ tpcb->tp_timer[fun] = 0;
+}
+
+/**************** c timers **********************
+ *
+ * These are not chained together; they sit
+ * in the tp_ref structure. they are the kind that
+ * are typically cancelled so it's faster not to
+ * mess with the chains
+ */
+#endif
+/*
+ * CALLED FROM:
+ * the clock, every 500 ms
+ * FUNCTION and ARGUMENTS:
+ * Look for open references with active timers.
+ * If they exist, call the appropriate timer routines to update
+ * the timers and possibly generate events.
+ */
+ProtoHook
+tp_slowtimo()
+{
+ register u_int *cp;
+ register struct tp_ref *rp;
+ struct tp_pcb *tpcb;
+ struct tp_event E;
+ int s = splnet(), t;
+
+ /* check only open reference structures */
+ IncStat(ts_Cticks);
+ /* tp_ref[0] is never used */
+ for (rp = tp_ref + tp_refinfo.tpr_maxopen; rp > tp_ref; rp--) {
+ if ((tpcb = rp->tpr_pcb) == 0 || tpcb->tp_refstate < REF_OPEN)
+ continue;
+ /* check the timers */
+ for (t = 0; t < TM_NTIMERS; t++) {
+ cp = tpcb->tp_timer + t;
+ if (*cp && --(*cp) <= 0 ) {
+ *cp = 0;
+ E.ev_number = t;
+ IFDEBUG(D_TIMER)
+ printf("tp_slowtimo: pcb 0x%x t %d\n",
+ tpcb, t);
+ ENDDEBUG
+ IncStat(ts_Cexpired);
+ tp_driver(tpcb, &E);
+ if (t == TM_reference && tpcb->tp_state == TP_CLOSED) {
+ if (tpcb->tp_notdetached) {
+ IFDEBUG(D_CONN)
+ printf("PRU_DETACH: not detached\n");
+ ENDDEBUG
+ tp_detach(tpcb);
+ }
+ /* XXX wart; where else to do it? */
+ free((caddr_t)tpcb, M_PCB);
+ }
+ }
+ }
+ }
+ splx(s);
+ return 0;
+}
+
+/*
+ * Called From: tp.trans from tp_slowtimo() -- retransmission timer went off.
+ */
+tp_data_retrans(tpcb)
+register struct tp_pcb *tpcb;
+{
+ int rexmt, win;
+ tpcb->tp_rttemit = 0; /* cancel current round trip time */
+ tpcb->tp_dupacks = 0;
+ tpcb->tp_sndnxt = tpcb->tp_snduna;
+ if (tpcb->tp_fcredit == 0) {
+ /*
+ * We transmitted new data, started timing it and the window
+ * got shrunk under us. This can only happen if all data
+ * that they wanted us to send got acked, so don't
+ * bother shrinking the congestion windows, et. al.
+ * The retransmission timer should have been reset in goodack()
+ */
+ IFDEBUG(D_ACKRECV)
+ printf("tp_data_retrans: 0 window tpcb 0x%x una 0x%x\n",
+ tpcb, tpcb->tp_snduna);
+ ENDDEBUG
+ tpcb->tp_rxtshift = 0;
+ tpcb->tp_timer[TM_data_retrans] = 0;
+ tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
+ return;
+ }
+ rexmt = tpcb->tp_dt_ticks << min(tpcb->tp_rxtshift, TP_MAXRXTSHIFT);
+ win = min(tpcb->tp_fcredit, (tpcb->tp_cong_win / tpcb->tp_l_tpdusize / 2));
+ win = max(win, 2);
+ tpcb->tp_cong_win = tpcb->tp_l_tpdusize; /* slow start again. */
+ tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
+ /* We're losing; our srtt estimate is probably bogus.
+ * Clobber it so we'll take the next rtt measurement as our srtt;
+ * Maintain current rxt times until then.
+ */
+ if (++tpcb->tp_rxtshift > TP_NRETRANS / 4) {
+ /* tpcb->tp_nlprotosw->nlp_losing(tpcb->tp_npcb) someday */
+ tpcb->tp_rtt = 0;
+ }
+ TP_RANGESET(tpcb->tp_rxtcur, rexmt, tpcb->tp_peer_acktime, 128);
+ tpcb->tp_timer[TM_data_retrans] = tpcb->tp_rxtcur;
+ tp_send(tpcb);
+}
+
+int
+tp_fasttimo()
+{
+ register struct tp_pcb *t;
+ int s = splnet();
+ struct tp_event E;
+
+ E.ev_number = TM_sendack;
+ while ((t = tp_ftimeolist) != (struct tp_pcb *)&tp_ftimeolist) {
+ if (t == 0) {
+ printf("tp_fasttimeo: should panic");
+ tp_ftimeolist = (struct tp_pcb *)&tp_ftimeolist;
+ } else {
+ if (t->tp_flags & TPF_DELACK) {
+ IncStat(ts_Fdelack);
+ tp_driver(t, &E);
+ t->tp_flags &= ~TPF_DELACK;
+ } else
+ IncStat(ts_Fpruned);
+ tp_ftimeolist = t->tp_fasttimeo;
+ t->tp_fasttimeo = 0;
+ }
+ }
+ splx(s);
+}
+
+#ifdef TP_DEBUG_TIMERS
+/*
+ * CALLED FROM:
+ * tp.trans, tp_emit()
+ * FUNCTION and ARGUMENTS:
+ * Set a C type timer of type (which) to go off after (ticks) time.
+ */
+void
+tp_ctimeout(tpcb, which, ticks)
+ register struct tp_pcb *tpcb;
+ int which, ticks;
+{
+
+ IFTRACE(D_TIMER)
+ tptrace(TPPTmisc, "tp_ctimeout ref which tpcb active",
+ tpcb->tp_lref, which, tpcb, tpcb->tp_timer[which]);
+ ENDTRACE
+ if(tpcb->tp_timer[which])
+ IncStat(ts_Ccan_act);
+ IncStat(ts_Cset);
+ if (ticks <= 0)
+ ticks = 1;
+ tpcb->tp_timer[which] = ticks;
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans
+ * FUNCTION and ARGUMENTS:
+ * Version of tp_ctimeout that resets the C-type time if the
+ * parameter (ticks) is > the current value of the timer.
+ */
+void
+tp_ctimeout_MIN(tpcb, which, ticks)
+ register struct tp_pcb *tpcb;
+ int which, ticks;
+{
+ IFTRACE(D_TIMER)
+ tptrace(TPPTmisc, "tp_ctimeout_MIN ref which tpcb active",
+ tpcb->tp_lref, which, tpcb, tpcb->tp_timer[which]);
+ ENDTRACE
+ IncStat(ts_Cset);
+ if (tpcb->tp_timer[which]) {
+ tpcb->tp_timer[which] = min(ticks, tpcb->tp_timer[which]);
+ IncStat(ts_Ccan_act);
+ } else
+ tpcb->tp_timer[which] = ticks;
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans
+ * FUNCTION and ARGUMENTS:
+ * Cancel the (which) timer in the ref structure indicated by (refp).
+ */
+void
+tp_cuntimeout(tpcb, which)
+ register struct tp_pcb *tpcb;
+ int which;
+{
+ IFDEBUG(D_TIMER)
+ printf("tp_cuntimeout(0x%x, %d) active %d\n",
+ tpcb, which, tpcb->tp_timer[which]);
+ ENDDEBUG
+
+ IFTRACE(D_TIMER)
+ tptrace(TPPTmisc, "tp_cuntimeout ref which, active", refp-tp_ref,
+ which, tpcb->tp_timer[which], 0);
+ ENDTRACE
+
+ if (tpcb->tp_timer[which])
+ IncStat(ts_Ccan_act);
+ else
+ IncStat(ts_Ccan_inact);
+ tpcb->tp_timer[which] = 0;
+}
+#endif
diff --git a/sys/netiso/tp_timer.h b/sys/netiso/tp_timer.h
new file mode 100644
index 000000000000..a6f7735586b9
--- /dev/null
+++ b/sys/netiso/tp_timer.h
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_timer.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_timer.h,v 5.1 88/10/12 12:21:41 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_timer.h,v $
+ *
+ * ARGO TP
+ * The callout structures used by the tp timers.
+ */
+
+#ifndef __TP_TIMER__
+#define __TP_TIMER__
+
+#define SET_DELACK(t) {\
+ (t)->tp_flags |= TPF_DELACK; \
+ if ((t)->tp_fasttimeo == 0)\
+ { (t)->tp_fasttimeo = tp_ftimeolist; tp_ftimeolist = (t); } }
+
+#ifdef ARGO_DEBUG
+#define TP_DEBUG_TIMERS
+#endif
+
+#ifndef TP_DEBUG_TIMERS
+#define tp_ctimeout(tpcb, which, timo) ((tpcb)->tp_timer[which] = (timo))
+#define tp_cuntimeout(tpcb, which) ((tpcb)->tp_timer[which] = 0)
+#define tp_etimeout tp_ctimeout
+#define tp_euntimeout tp_cuntimeout
+#define tp_ctimeout_MIN(p, w, t) \
+ { if((p)->tp_timer[w] > (t)) (p)->tp_timer[w] = (t);}
+#endif /* TP_DEBUG_TIMERS */
+
+#endif /* __TP_TIMER__ */
diff --git a/sys/netiso/tp_tpdu.h b/sys/netiso/tp_tpdu.h
new file mode 100644
index 000000000000..15f130d1703a
--- /dev/null
+++ b/sys/netiso/tp_tpdu.h
@@ -0,0 +1,296 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_tpdu.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_tpdu.h,v 4.4 88/07/26 16:45:40 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_tpdu.h,v $
+ *
+ * This ghastly set of macros makes it possible to
+ * refer to tpdu structures without going mad.
+ */
+
+#ifndef __TP_TPDU__
+#define __TP_TPDU__
+
+#ifndef BYTE_ORDER
+/*
+ * Definitions for byte order,
+ * according to byte significance from low address to high.
+ */
+#define LITTLE_ENDIAN 1234 /* least-significant byte first (vax) */
+#define BIG_ENDIAN 4321 /* most-significant byte first (IBM, net) */
+#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long (pdp) */
+
+#ifdef vax
+#define BYTE_ORDER LITTLE_ENDIAN
+#else
+#define BYTE_ORDER BIG_ENDIAN /* mc68000, tahoe, most others */
+#endif
+#endif /* BYTE_ORDER */
+
+/* This much of a tpdu is the same for all types of tpdus (except
+ * DT tpdus in class 0; their exceptions are handled by the data
+ * structure below
+ */
+struct tpdu_fixed {
+ u_char _tpduf_li:8, /* length indicator */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ _tpduf_cdt: 4, /* credit */
+ _tpduf_type: 4; /* type of tpdu (DT, CR, etc.) */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ _tpduf_type: 4, /* type of tpdu (DT, CR, etc.) */
+ _tpduf_cdt: 4; /* credit */
+#endif
+ u_short _tpduf_dref; /* destination ref; not in DT in class 0 */
+};
+
+#define tpdu_li _tpduf._tpduf_li
+#define tpdu_type _tpduf._tpduf_type
+#define tpdu_cdt _tpduf._tpduf_cdt
+#define tpdu_dref _tpduf._tpduf_dref
+
+struct tp0du {
+ u_char _tp0_li,
+ _tp0_cdt_type, /* same as in tpdu_fixed */
+#if BYTE_ORDER == BIG_ENDIAN
+ _tp0_eot: 1, /* eot */
+ _tp0_mbz: 7, /* must be zero */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ _tp0_mbz: 7, /* must be zero */
+ _tp0_eot: 1, /* eot */
+#endif
+ _tp0_notused: 8; /* data begins on this octet */
+};
+
+#define tp0du_eot _tp0_eot
+#define tp0du_mbz _tp0_mbz
+
+/*
+ * This is used when the extended format seqence numbers are
+ * being sent and received.
+ */
+ /*
+ * the seqeot field is an int that overlays the seq
+ * and eot fields, this allows the htonl operation
+ * to be applied to the entire 32 bit quantity, and
+ * simplifies the structure definitions.
+ */
+union seq_type {
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned int st_eot:1, /* end-of-tsdu */
+ st_seq:31; /* 31 bit sequence number */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned int st_seq:31, /* 31 bit sequence number */
+ st_eot:1; /* end-of-tsdu */
+#endif
+ } st;
+ unsigned int s_seqeot;
+#define s_eot st.st_eot
+#define s_seq st.st_seq
+};
+
+/* Then most tpdu types have a portion that is always present but
+ * differs among the tpdu types :
+ */
+union tpdu_fixed_rest {
+
+ struct {
+ u_short _tpdufr_sref, /* source reference */
+#if BYTE_ORDER == BIG_ENDIAN
+ _tpdufr_class: 4, /* class [ ISO 8073 13.3.3.e ] */
+ _tpdufr_opt: 4, /* options [ ISO 8073 13.3.3.e ] */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ _tpdufr_opt: 4, /* options [ ISO 8073 13.3.3.e ] */
+ _tpdufr_class: 4, /* class [ ISO 8073 13.3.3.e ] */
+#endif
+ _tpdufr_xx: 8; /* unused */
+ } CRCC;
+
+#define tpdu_CRli _tpduf._tpduf_li
+#define tpdu_CRtype _tpduf._tpduf_type
+#define tpdu_CRcdt _tpduf._tpduf_cdt
+#define tpdu_CRdref_0 _tpduf._tpduf_dref
+#define tpdu_CRsref _tpdufr.CRCC._tpdufr_sref
+#define tpdu_sref _tpdufr.CRCC._tpdufr_sref
+#define tpdu_CRclass _tpdufr.CRCC._tpdufr_class
+#define tpdu_CRoptions _tpdufr.CRCC._tpdufr_opt
+
+#define tpdu_CCli _tpduf._tpduf_li
+#define tpdu_CCtype _tpduf._tpduf_type
+#define tpdu_CCcdt _tpduf._tpduf_cdt
+#define tpdu_CCdref _tpduf._tpduf_dref
+#define tpdu_CCsref _tpdufr.CRCC._tpdufr_sref
+#define tpdu_CCclass _tpdufr.CRCC._tpdufr_class
+#define tpdu_CCoptions _tpdufr.CRCC._tpdufr_opt
+
+/* OPTIONS and ADDL OPTIONS bits */
+#define TPO_USE_EFC 0x1
+#define TPO_XTD_FMT 0x2
+#define TPAO_USE_TXPD 0x1
+#define TPAO_NO_CSUM 0x2
+#define TPAO_USE_RCC 0x4
+#define TPAO_USE_NXPD 0x8
+
+ struct {
+ unsigned short _tpdufr_sref; /* source reference */
+ unsigned char _tpdufr_reason; /* [ ISO 8073 13.5.3.d ] */
+ } DR;
+#define tpdu_DRli _tpduf._tpduf_li
+#define tpdu_DRtype _tpduf._tpduf_type
+#define tpdu_DRdref _tpduf._tpduf_dref
+#define tpdu_DRsref _tpdufr.DR._tpdufr_sref
+#define tpdu_DRreason _tpdufr.DR._tpdufr_reason
+
+ unsigned short _tpdufr_sref; /* source reference */
+
+#define tpdu_DCli _tpduf._tpduf_li
+#define tpdu_DCtype _tpduf._tpduf_type
+#define tpdu_DCdref _tpduf._tpduf_dref
+#define tpdu_DCsref _tpdufr._tpdufr_sref
+
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned char _tpdufr_eot:1, /* end-of-tsdu */
+ _tpdufr_seq:7; /* 7 bit sequence number */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned char _tpdufr_seq:7, /* 7 bit sequence number */
+ _tpdufr_eot:1; /* end-of-tsdu */
+#endif
+ }SEQEOT;
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned int _tpdufr_Xeot:1, /* end-of-tsdu */
+ _tpdufr_Xseq:31; /* 31 bit sequence number */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned int _tpdufr_Xseq:31, /* 31 bit sequence number */
+ _tpdufr_Xeot:1; /* end-of-tsdu */
+#endif
+ }SEQEOT31;
+ unsigned int _tpdufr_Xseqeot;
+#define tpdu_seqeotX _tpdufr._tpdufr_Xseqeot
+
+#define tpdu_DTli _tpduf._tpduf_li
+#define tpdu_DTtype _tpduf._tpduf_type
+#define tpdu_DTdref _tpduf._tpduf_dref
+#define tpdu_DTseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_DTeot _tpdufr.SEQEOT._tpdufr_eot
+#define tpdu_DTseqX _tpdufr.SEQEOT31._tpdufr_Xseq
+#define tpdu_DTeotX _tpdufr.SEQEOT31._tpdufr_Xeot
+
+#define tpdu_XPDli _tpduf._tpduf_li
+#define tpdu_XPDtype _tpduf._tpduf_type
+#define tpdu_XPDdref _tpduf._tpduf_dref
+#define tpdu_XPDseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_XPDeot _tpdufr.SEQEOT._tpdufr_eot
+#define tpdu_XPDseqX _tpdufr.SEQEOT31._tpdufr_Xseq
+#define tpdu_XPDeotX _tpdufr.SEQEOT31._tpdufr_Xeot
+
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned _tpdufr_yrseq0:1, /* always zero */
+ _tpdufr_yrseq:31; /* [ ISO 8073 13.9.3.d ] */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned _tpdufr_yrseq:31, /* [ ISO 8073 13.9.3.d ] */
+ _tpdufr_yrseq0:1; /* always zero */
+#endif
+ unsigned short _tpdufr_cdt; /* [ ISO 8073 13.9.3.b ] */
+ } AK31;
+
+#define tpdu_AKli _tpduf._tpduf_li
+#define tpdu_AKtype _tpduf._tpduf_type
+#define tpdu_AKdref _tpduf._tpduf_dref
+#define tpdu_AKseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_AKseqX _tpdufr.AK31._tpdufr_yrseq
+/* location of cdt depends on size of seq. numbers */
+#define tpdu_AKcdt _tpduf._tpduf_cdt
+#define tpdu_AKcdtX _tpdufr.AK31._tpdufr_cdt
+
+#define tpdu_XAKli _tpduf._tpduf_li
+#define tpdu_XAKtype _tpduf._tpduf_type
+#define tpdu_XAKdref _tpduf._tpduf_dref
+#define tpdu_XAKseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_XAKseqX _tpdufr.SEQEOT31._tpdufr_Xseq
+
+ unsigned char _tpdu_ERreason; /* [ ISO 8073 13.12.3.c ] */
+
+#define tpdu_ERli _tpduf._tpduf_li
+#define tpdu_ERtype _tpduf._tpduf_type
+#define tpdu_ERdref _tpduf._tpduf_dref
+#define tpdu_ERreason _tpdufr._tpdu_ERreason
+
+};
+
+struct tpdu {
+ struct tpdu_fixed _tpduf;
+ union tpdu_fixed_rest _tpdufr;
+};
+
+#endif /* __TP_TPDU__ */
diff --git a/sys/netiso/tp_trace.c b/sys/netiso/tp_trace.c
new file mode 100644
index 000000000000..115597bf4720
--- /dev/null
+++ b/sys/netiso/tp_trace.c
@@ -0,0 +1,175 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_trace.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_trace.c,v 5.3 88/11/18 17:29:14 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_trace.c,v $
+ *
+ * The whole protocol trace module.
+ * We keep a circular buffer of trace structures, which are big
+ * unions of different structures we might want to see.
+ * Unfortunately this gets too big pretty easily. Pcbs were removed
+ * from the tracing when the kernel got too big to boot.
+ */
+
+#define TP_TRACEFILE
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+
+#ifdef TPPT
+static tp_seq = 0;
+u_char tp_traceflags[128];
+
+/*
+ * The argument tpcb is the obvious.
+ * event here is just the type of trace event - TPPTmisc, etc.
+ * The rest of the arguments have different uses depending
+ * on the type of trace event.
+ */
+/*ARGSUSED*/
+/*VARARGS*/
+
+void
+tpTrace(tpcb, event, arg, src, len, arg4, arg5)
+ struct tp_pcb *tpcb;
+ u_int event, arg;
+ u_int src;
+ u_int len;
+ u_int arg4;
+ u_int arg5;
+{
+ register struct tp_Trace *tp;
+
+ tp = &tp_Trace[tp_Tracen++];
+ tp_Tracen %= TPTRACEN;
+
+ tp->tpt_event = event;
+ tp->tpt_tseq = tp_seq++;
+ tp->tpt_arg = arg;
+ if(tpcb)
+ tp->tpt_arg2 = tpcb->tp_lref;
+ bcopy( (caddr_t)&time, (caddr_t)&tp->tpt_time, sizeof(struct timeval) );
+
+ switch(event) {
+
+ case TPPTertpdu:
+ bcopy((caddr_t)src, (caddr_t)&tp->tpt_ertpdu,
+ (unsigned)MIN((int)len, sizeof(struct tp_Trace)));
+ break;
+
+ case TPPTusrreq:
+ case TPPTmisc:
+
+ /* arg is a string */
+ bcopy((caddr_t)arg, (caddr_t)tp->tpt_str,
+ (unsigned)MIN(1+strlen((caddr_t) arg), TPTRACE_STRLEN));
+ tp->tpt_m2 = src;
+ tp->tpt_m3 = len;
+ tp->tpt_m4 = arg4;
+ tp->tpt_m1 = arg5;
+ break;
+
+ case TPPTgotXack:
+ case TPPTXack:
+ case TPPTsendack:
+ case TPPTgotack:
+ case TPPTack:
+ case TPPTindicate:
+ default:
+ case TPPTdriver:
+ tp->tpt_m2 = arg;
+ tp->tpt_m3 = src;
+ tp->tpt_m4 = len;
+ tp->tpt_m5 = arg4;
+ tp->tpt_m1 = arg5;
+ break;
+ case TPPTparam:
+ bcopy((caddr_t)src, (caddr_t)&tp->tpt_param, sizeof(struct tp_param));
+ break;
+ case TPPTref:
+ bcopy((caddr_t)src, (caddr_t)&tp->tpt_ref, sizeof(struct tp_ref));
+ break;
+
+ case TPPTtpduin:
+ case TPPTtpduout:
+ tp->tpt_arg2 = arg4;
+ bcopy((caddr_t)src, (caddr_t)&tp->tpt_tpdu,
+ (unsigned)MIN((int)len, sizeof(struct tp_Trace)));
+ break;
+ }
+}
+#endif /* TPPT */
diff --git a/sys/netiso/tp_trace.h b/sys/netiso/tp_trace.h
new file mode 100644
index 000000000000..885730549e58
--- /dev/null
+++ b/sys/netiso/tp_trace.h
@@ -0,0 +1,198 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_trace.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_trace.h,v 5.1 88/10/12 12:21:51 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_trace.h,v $
+ *
+ *
+ * Definitions needed for the protocol trace mechanism.
+ */
+
+#ifndef __TP_TRACE__
+#define __TP_TRACE__
+
+
+#define TPPTsendack 1
+#define TPPTgotack 2
+#define TPPTXack 3
+#define TPPTgotXack 4
+#define TPPTack 5
+#define TPPTindicate 6
+#define TPPTusrreq 7
+#define TPPTmisc 8
+#define TPPTpcb 9
+#define TPPTref 10
+#define TPPTtpduin 11
+#define TPPTparam 12
+#define TPPTertpdu 13
+#define TPPTdriver 14
+#define TPPTtpduout 15
+
+#include <netiso/tp_pcb.h>
+
+/* this #if is to avoid lint */
+
+#if defined(TP_TRACEFILE)||!defined(KERNEL)
+
+#include <netiso/tp_tpdu.h>
+
+#define TPTRACE_STRLEN 50
+
+
+/* for packet tracing */
+struct tp_timeval {
+ SeqNum tptv_seq;
+ u_int tptv_kind;
+ u_int tptv_window;
+ u_int tptv_size;
+};
+
+struct tp_Trace {
+ u_int tpt_event;
+ u_int tpt_arg;
+ u_int tpt_arg2;
+ int tpt_tseq;
+ struct timeval tpt_time;
+ union {
+ struct inpcb tpt_Inpcb; /* protocol control block */
+ struct tp_ref tpt_Ref; /* ref part of pcb */
+ struct tpdu tpt_Tpdu; /* header*/
+ struct tp_refinfo tpt_Param; /* ?? bytes, make sure < 128??*/
+ struct tp_timeval tpt_Time;
+ struct {
+ u_int tptm_2;
+ u_int tptm_3;
+ u_int tptm_4;
+ u_int tptm_5;
+ char tpt_Str[TPTRACE_STRLEN];
+ u_int tptm_1;
+ } tptmisc;
+ u_char tpt_Ertpdu; /* use rest of structure */
+ } tpt_stuff;
+};
+#define tpt_inpcb tpt_stuff.tpt_Inpcb
+#define tpt_pcb tpt_stuff.tpt_Pcb
+#define tpt_ref tpt_stuff.tpt_Ref
+#define tpt_tpdu tpt_stuff.tpt_Tpdu
+#define tpt_param tpt_stuff.tpt_Param
+#define tpt_ertpdu tpt_stuff.tpt_Ertpdu
+#define tpt_str tpt_stuff.tptmisc.tpt_Str
+#define tpt_m1 tpt_stuff.tptmisc.tptm_1
+#define tpt_m2 tpt_stuff.tptmisc.tptm_2
+#define tpt_m3 tpt_stuff.tptmisc.tptm_3
+#define tpt_m4 tpt_stuff.tptmisc.tptm_4
+#define tpt_m5 tpt_stuff.tptmisc.tptm_5
+
+#define tpt_seq tpt_stuff.tpt_Time.tptv_seq
+#define tpt_kind tpt_stuff.tpt_Time.tptv_kind
+#define tpt_window tpt_stuff.tpt_Time.tptv_window
+#define tpt_size tpt_stuff.tpt_Time.tptv_size
+
+#endif /* defined(TP_TRACEFILE)||!defined(KERNEL) */
+
+
+#ifdef TPPT
+
+#define TPTRACEN 300
+
+#define tptrace(A,B,C,D,E,F) \
+ tpTrace((struct tp_pcb *)0,\
+ (u_int)(A),(u_int)(B),(u_int)(C),(u_int)(D),(u_int)(E),(u_int)(F))
+
+#define tptraceTPCB(A,B,C,D,E,F) \
+ tpTrace(tpcb,\
+ (u_int)(A),(u_int)(B),(u_int)(C),(u_int)(D),(u_int)(E),(u_int)(F))
+
+extern void tpTrace();
+extern struct tp_Trace tp_Trace[];
+extern u_char tp_traceflags[];
+int tp_Tracen = 0;
+
+#define IFTRACE(ascii)\
+ if(tp_traceflags[ascii]) {
+/*
+ * for some reason lint complains about tp_param being undefined no
+ * matter where or how many times I define it.
+ */
+#define ENDTRACE }
+
+
+#else /* TPPT */
+
+/***********************************************
+ * NO TPPT TRACE STUFF
+ **********************************************/
+#define TPTRACEN 1
+
+#define tptrace(A,B,C,D,E,F) 0
+#define tptraceTPCB(A,B,C,D,E,F) 0
+
+#define IFTRACE(ascii) if (0) {
+#define ENDTRACE }
+
+#endif /* TPPT */
+
+
+
+#endif /* __TP_TRACE__ */
diff --git a/sys/netiso/tp_user.h b/sys/netiso/tp_user.h
new file mode 100644
index 000000000000..b81491b76501
--- /dev/null
+++ b/sys/netiso/tp_user.h
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_user.h 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_user.h,v 5.2 88/11/04 15:44:44 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_user.h,v $
+ *
+ * These are the values a real-live user ;-) needs.
+ */
+
+#ifndef _TYPES_
+#include <sys/types.h>
+#endif
+
+#ifndef __TP_USER__
+#define __TP_USER__
+
+struct tp_conn_param {
+ /* PER CONNECTION parameters */
+ short p_Nretrans;
+ short p_dr_ticks;
+
+ short p_cc_ticks;
+ short p_dt_ticks;
+
+ short p_x_ticks;
+ short p_cr_ticks;
+
+ short p_keepalive_ticks;
+ short p_sendack_ticks;
+
+ short p_ref_ticks;
+ short p_inact_ticks;
+
+ short p_ptpdusize; /* preferred tpdusize/128 */
+ short p_winsize;
+
+ u_char p_tpdusize; /* log 2 of size */
+
+ u_char p_ack_strat; /* see comments in tp_pcb.h */
+ u_char p_rx_strat; /* see comments in tp_pcb.h */
+ u_char p_class; /* class bitmask */
+ u_char p_xtd_format;
+ u_char p_xpd_service;
+ u_char p_use_checksum;
+ u_char p_use_nxpd; /* netwk expedited data: not implemented */
+ u_char p_use_rcc; /* receipt confirmation: not implemented */
+ u_char p_use_efc; /* explicit flow control: not implemented */
+ u_char p_no_disc_indications; /* don't deliver indic on disc */
+ u_char p_dont_change_params; /* use these params as they are */
+ u_char p_netservice;
+ u_char p_version; /* only here for checking */
+};
+
+/*
+ * These sockopt level definitions should be considered for socket.h
+ */
+#define SOL_TRANSPORT 0xfffe
+#define SOL_NETWORK 0xfffd
+
+/* get/set socket opt commands */
+#define TPACK_WINDOW 0x0 /* ack only on full window */
+#define TPACK_EACH 0x1 /* ack every packet */
+
+#define TPRX_USE_CW 0x8 /* use congestion window transmit */
+#define TPRX_EACH 0x4 /* retrans each packet of a set */
+#define TPRX_FASTSTART 0x1 /* don't use slow start */
+
+#define TPOPT_INTERCEPT 0x200
+#define TPOPT_FLAGS 0x300
+#define TPOPT_CONN_DATA 0x400
+#define TPOPT_DISC_DATA 0x500
+#define TPOPT_CFRM_DATA 0x600
+#define TPOPT_CDDATA_CLEAR 0x700
+#define TPOPT_MY_TSEL 0x800
+#define TPOPT_PEER_TSEL 0x900
+#define TPOPT_PERF_MEAS 0xa00
+#define TPOPT_PSTATISTICS 0xb00
+#define TPOPT_PARAMS 0xc00 /* to replace a bunch of the others */
+#define TPOPT_DISC_REASON 0xe00
+
+struct tp_disc_reason {
+ struct cmsghdr dr_hdr;
+ u_int dr_reason;
+};
+
+/*
+ ***********************flags**********************************
+ */
+
+/* read only flags */
+#define TPFLAG_NLQOS_PDN (u_char)0x01
+#define TPFLAG_PEER_ON_SAMENET (u_char)0x02
+#define TPFLAG_GENERAL_ADDR (u_char)0x04 /* bound to wildcard addr */
+
+
+/*
+ ***********************end flags******************************
+ */
+
+
+#endif /* __TP_USER__ */
diff --git a/sys/netiso/tp_usrreq.c b/sys/netiso/tp_usrreq.c
new file mode 100644
index 000000000000..8060c947f544
--- /dev/null
+++ b/sys/netiso/tp_usrreq.c
@@ -0,0 +1,756 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * ARGO TP
+ *
+ * $Header: tp_usrreq.c,v 5.4 88/11/18 17:29:18 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_usrreq.c,v $
+ *
+ * tp_usrreq(), the fellow that gets called from most of the socket code.
+ * Pretty straighforward.
+ * THe only really awful stuff here is the OOB processing, which is done
+ * wholly here.
+ * tp_rcvoob() and tp_sendoob() are contained here and called by tp_usrreq().
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_seq.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+
+int tp_attach(), tp_driver(), tp_pcbbind();
+int TNew;
+int TPNagle1, TPNagle2;
+struct tp_pcb *tp_listeners, *tp_intercepts;
+
+#ifdef ARGO_DEBUG
+/*
+ * CALLED FROM:
+ * anywhere you want to debug...
+ * FUNCTION and ARGUMENTS:
+ * print (str) followed by the control info in the mbufs of an mbuf chain (n)
+ */
+void
+dump_mbuf(n, str)
+ struct mbuf *n;
+ char *str;
+{
+ struct mbuf *nextrecord;
+
+ printf("dump %s\n", str);
+
+ if (n == MNULL) {
+ printf("EMPTY:\n");
+ return;
+ }
+
+ while (n) {
+ nextrecord = n->m_act;
+ printf("RECORD:\n");
+ while (n) {
+ printf("%x : Len %x Data %x A %x Nx %x Tp %x\n",
+ n, n->m_len, n->m_data, n->m_act, n->m_next, n->m_type);
+#ifdef notdef
+ {
+ register char *p = mtod(n, char *);
+ register int i;
+
+ printf("data: ");
+ for (i = 0; i < n->m_len; i++) {
+ if (i%8 == 0)
+ printf("\n");
+ printf("0x%x ", *(p+i));
+ }
+ printf("\n");
+ }
+#endif /* notdef */
+ if (n->m_next == n) {
+ printf("LOOP!\n");
+ return;
+ }
+ n = n->m_next;
+ }
+ n = nextrecord;
+ }
+ printf("\n");
+}
+
+#endif /* ARGO_DEBUG */
+
+/*
+ * CALLED FROM:
+ * tp_usrreq(), PRU_RCVOOB
+ * FUNCTION and ARGUMENTS:
+ * Copy data from the expedited data socket buffer into
+ * the pre-allocated mbuf m.
+ * There is an isomorphism between XPD TPDUs and expedited data TSDUs.
+ * XPD tpdus are limited to 16 bytes of data so they fit in one mbuf.
+ * RETURN VALUE:
+ * EINVAL if debugging is on and a disaster has occurred
+ * ENOTCONN if the socket isn't connected
+ * EWOULDBLOCK if the socket is in non-blocking mode and there's no
+ * xpd data in the buffer
+ * E* whatever is returned from the fsm.
+ */
+tp_rcvoob(tpcb, so, m, outflags, inflags)
+ struct tp_pcb *tpcb;
+ register struct socket *so;
+ register struct mbuf *m;
+ int *outflags;
+ int inflags;
+{
+ register struct mbuf *n;
+ register struct sockbuf *sb = &so->so_rcv;
+ struct tp_event E;
+ int error = 0;
+ register struct mbuf **nn;
+
+ IFDEBUG(D_XPD)
+ printf("PRU_RCVOOB, sostate 0x%x\n", so->so_state);
+ ENDDEBUG
+
+ /* if you use soreceive */
+ if (m == MNULL)
+ return ENOBUFS;
+
+restart:
+ if ((((so->so_state & SS_ISCONNECTED) == 0)
+ || (so->so_state & SS_ISDISCONNECTING) != 0) &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ return ENOTCONN;
+ }
+
+ /* Take the first mbuf off the chain.
+ * Each XPD TPDU gives you a complete TSDU so the chains don't get
+ * coalesced, but one TSDU may span several mbufs.
+ * Nevertheless, since n should have a most 16 bytes, it
+ * will fit into m. (size was checked in tp_input() )
+ */
+
+ /*
+ * Code for excision of OOB data should be added to
+ * uipc_socket2.c (like sbappend).
+ */
+
+ sblock(sb, M_WAITOK);
+ for (nn = &sb->sb_mb; n = *nn; nn = &n->m_act)
+ if (n->m_type == MT_OOBDATA)
+ break;
+
+ if (n == 0) {
+ IFDEBUG(D_XPD)
+ printf("RCVOOB: empty queue!\n");
+ ENDDEBUG
+ sbunlock(sb);
+ if (so->so_state & SS_NBIO) {
+ return EWOULDBLOCK;
+ }
+ sbwait(sb);
+ goto restart;
+ }
+ m->m_len = 0;
+
+ /* Assuming at most one xpd tpdu is in the buffer at once */
+ while (n != MNULL) {
+ m->m_len += n->m_len;
+ bcopy(mtod(n, caddr_t), mtod(m, caddr_t), (unsigned)n->m_len);
+ m->m_data += n->m_len; /* so mtod() in bcopy() above gives right addr */
+ n = n->m_next;
+ }
+ m->m_data = m->m_dat;
+ m->m_flags |= M_EOR;
+
+ IFDEBUG(D_XPD)
+ printf("tp_rcvoob: xpdlen 0x%x\n", m->m_len);
+ dump_mbuf(so->so_rcv.sb_mb, "RCVOOB: Rcv socketbuf");
+ dump_mbuf(sb->sb_mb, "RCVOOB: Xrcv socketbuf");
+ ENDDEBUG
+
+ if ((inflags & MSG_PEEK) == 0) {
+ n = *nn;
+ *nn = n->m_act;
+ for (; n; n = m_free(n))
+ sbfree(sb, n);
+ }
+
+release:
+ sbunlock(sb);
+
+ IFTRACE(D_XPD)
+ tptraceTPCB(TPPTmisc, "PRU_RCVOOB @ release sb_cc m_len",
+ tpcb->tp_Xrcv.sb_cc, m->m_len, 0, 0);
+ ENDTRACE
+ if (error == 0)
+ error = DoEvent(T_USR_Xrcvd);
+ return error;
+}
+
+/*
+ * CALLED FROM:
+ * tp_usrreq(), PRU_SENDOOB
+ * FUNCTION and ARGUMENTS:
+ * Send what's in the mbuf chain (m) as an XPD TPDU.
+ * The mbuf may not contain more then 16 bytes of data.
+ * XPD TSDUs aren't segmented, so they translate into
+ * exactly one XPD TPDU, with EOT bit set.
+ * RETURN VALUE:
+ * EWOULDBLOCK if socket is in non-blocking mode and the previous
+ * xpd data haven't been acked yet.
+ * EMSGSIZE if trying to send > max-xpd bytes (16)
+ * ENOBUFS if ran out of mbufs
+ */
+tp_sendoob(tpcb, so, xdata, outflags)
+ struct tp_pcb *tpcb;
+ register struct socket *so;
+ register struct mbuf *xdata;
+ int *outflags; /* not used */
+{
+ /*
+ * Each mbuf chain represents a sequence # in the XPD seq space.
+ * The first one in the queue has sequence # tp_Xuna.
+ * When we add to the XPD queue, we stuff a zero-length
+ * mbuf (mark) into the DATA queue, with its sequence number in m_next
+ * to be assigned to this XPD tpdu, so data xfer can stop
+ * when it reaches the zero-length mbuf if this XPD TPDU hasn't
+ * yet been acknowledged.
+ */
+ register struct sockbuf *sb = &(tpcb->tp_Xsnd);
+ register struct mbuf *xmark;
+ register int len=0;
+ struct tp_event E;
+
+ IFDEBUG(D_XPD)
+ printf("tp_sendoob:");
+ if (xdata)
+ printf("xdata len 0x%x\n", xdata->m_len);
+ ENDDEBUG
+ /* DO NOT LOCK the Xsnd buffer!!!! You can have at MOST one
+ * socket buf locked at any time!!! (otherwise you might
+ * sleep() in sblock() w/ a signal pending and cause the
+ * system call to be aborted w/ a locked socketbuf, which
+ * is a problem. So the so_snd buffer lock
+ * (done in sosend()) serves as the lock for Xpd.
+ */
+ if (sb->sb_mb) { /* Anything already in eXpedited data sockbuf? */
+ if (so->so_state & SS_NBIO) {
+ return EWOULDBLOCK;
+ }
+ while (sb->sb_mb) {
+ sbunlock(&so->so_snd); /* already locked by sosend */
+ sbwait(&so->so_snd);
+ sblock(&so->so_snd, M_WAITOK); /* sosend will unlock on return */
+ }
+ }
+
+ if (xdata == (struct mbuf *)0) {
+ /* empty xpd packet */
+ MGETHDR(xdata, M_WAIT, MT_OOBDATA);
+ if (xdata == NULL) {
+ return ENOBUFS;
+ }
+ xdata->m_len = 0;
+ xdata->m_pkthdr.len = 0;
+ }
+ IFDEBUG(D_XPD)
+ printf("tp_sendoob 1:");
+ if (xdata)
+ printf("xdata len 0x%x\n", xdata->m_len);
+ ENDDEBUG
+ xmark = xdata; /* temporary use of variable xmark */
+ while (xmark) {
+ len += xmark->m_len;
+ xmark = xmark->m_next;
+ }
+ if (len > TP_MAX_XPD_DATA) {
+ return EMSGSIZE;
+ }
+ IFDEBUG(D_XPD)
+ printf("tp_sendoob 2:");
+ if (xdata)
+ printf("xdata len 0x%x\n", len);
+ ENDDEBUG
+
+
+ IFTRACE(D_XPD)
+ tptraceTPCB(TPPTmisc, "XPD mark m_next ", xdata->m_next, 0, 0, 0);
+ ENDTRACE
+
+ sbappendrecord(sb, xdata);
+
+ IFDEBUG(D_XPD)
+ printf("tp_sendoob len 0x%x\n", len);
+ dump_mbuf(so->so_snd.sb_mb, "XPD request Regular sndbuf:");
+ dump_mbuf(tpcb->tp_Xsnd.sb_mb, "XPD request Xsndbuf:");
+ ENDDEBUG
+ return DoEvent(T_XPD_req);
+}
+
+/*
+ * CALLED FROM:
+ * the socket routines
+ * FUNCTION and ARGUMENTS:
+ * Handles all "user requests" except the [gs]ockopts() requests.
+ * The argument (req) is the request type (PRU*),
+ * (m) is an mbuf chain, generally used for send and
+ * receive type requests only.
+ * (nam) is used for addresses usually, in particular for the bind request.
+ *
+ */
+/*ARGSUSED*/
+ProtoHook
+tp_usrreq(so, req, m, nam, controlp)
+ struct socket *so;
+ u_int req;
+ struct mbuf *m, *nam, *controlp;
+{
+ register struct tp_pcb *tpcb = sototpcb(so);
+ int s = splnet();
+ int error = 0;
+ int flags, *outflags = &flags;
+ u_long eotsdu = 0;
+ struct tp_event E;
+
+ IFDEBUG(D_REQUEST)
+ printf("usrreq(0x%x,%d,0x%x,0x%x,0x%x)\n",so,req,m,nam,outflags);
+ if (so->so_error)
+ printf("WARNING!!! so->so_error is 0x%x\n", so->so_error);
+ ENDDEBUG
+ IFTRACE(D_REQUEST)
+ tptraceTPCB(TPPTusrreq, "req so m state [", req, so, m,
+ tpcb?tpcb->tp_state:0);
+ ENDTRACE
+
+ if ((u_int)tpcb == 0 && req != PRU_ATTACH) {
+ IFTRACE(D_REQUEST)
+ tptraceTPCB(TPPTusrreq, "req failed NO TPCB[", 0, 0, 0, 0);
+ ENDTRACE
+ splx(s);
+ return ENOTCONN;
+ }
+
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (tpcb) {
+ error = EISCONN;
+ } else if ((error = tp_attach(so, (int)nam)) == 0)
+ tpcb = sototpcb(so);
+ break;
+
+ case PRU_ABORT: /* called from close() */
+ /* called for each incoming connect queued on the
+ * parent (accepting) socket
+ */
+ if (tpcb->tp_state == TP_OPEN || tpcb->tp_state == TP_CONFIRMING) {
+ E.ATTR(T_DISC_req).e_reason = E_TP_NO_SESSION;
+ error = DoEvent(T_DISC_req); /* pretend it was a close() */
+ break;
+ } /* else DROP THROUGH */
+
+ case PRU_DETACH: /* called from close() */
+ /* called only after disconnect was called */
+ error = DoEvent(T_DETACH);
+ if (tpcb->tp_state == TP_CLOSED) {
+ if (tpcb->tp_notdetached) {
+ IFDEBUG(D_CONN)
+ printf("PRU_DETACH: not detached\n");
+ ENDDEBUG
+ tp_detach(tpcb);
+ }
+ free((caddr_t)tpcb, M_PCB);
+ tpcb = 0;
+ }
+ break;
+
+ case PRU_SHUTDOWN:
+ /* recv end may have been released; local credit might be zero */
+ case PRU_DISCONNECT:
+ E.ATTR(T_DISC_req).e_reason = E_TP_NORMAL_DISC;
+ error = DoEvent(T_DISC_req);
+ break;
+
+ case PRU_BIND:
+ error = tp_pcbbind(tpcb, nam);
+ break;
+
+ case PRU_LISTEN:
+ if (tpcb->tp_state != TP_CLOSED || tpcb->tp_lsuffixlen == 0 ||
+ tpcb->tp_next == 0)
+ error = EINVAL;
+ else {
+ register struct tp_pcb **tt;
+ remque(tpcb);
+ tpcb->tp_next = tpcb->tp_prev = tpcb;
+ for (tt = &tp_listeners; *tt; tt = &((*tt)->tp_nextlisten))
+ if ((*tt)->tp_lsuffixlen)
+ break;
+ tpcb->tp_nextlisten = *tt;
+ *tt = tpcb;
+ error = DoEvent(T_LISTEN_req);
+ }
+ break;
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP; /* for unix domain sockets */
+ break;
+
+ case PRU_CONNECT:
+ IFTRACE(D_CONN)
+ tptraceTPCB(TPPTmisc,
+ "PRU_CONNECT: so 0x%x *SHORT_LSUFXP(tpcb) 0x%x lsuflen 0x%x, class 0x%x",
+ tpcb->tp_sock, *SHORT_LSUFXP(tpcb), tpcb->tp_lsuffixlen,
+ tpcb->tp_class);
+ ENDTRACE
+ IFDEBUG(D_CONN)
+ printf("PRU_CONNECT: so *SHORT_LSUFXP(tpcb) 0x%x lsuflen 0x%x, class 0x%x",
+ tpcb->tp_sock, *SHORT_LSUFXP(tpcb), tpcb->tp_lsuffixlen,
+ tpcb->tp_class);
+ ENDDEBUG
+ if (tpcb->tp_lsuffixlen == 0) {
+ if (error = tp_pcbbind(tpcb, MNULL)) {
+ IFDEBUG(D_CONN)
+ printf("pcbbind returns error 0x%x\n", error);
+ ENDDEBUG
+ break;
+ }
+ }
+ IFDEBUG(D_CONN)
+ printf("isop 0x%x isop->isop_socket offset 12 :\n", tpcb->tp_npcb);
+ dump_buf(tpcb->tp_npcb, 16);
+ ENDDEBUG
+ if (error = tp_route_to(nam, tpcb, /* channel */0))
+ break;
+ IFDEBUG(D_CONN)
+ printf(
+ "PRU_CONNECT after tpcb 0x%x so 0x%x npcb 0x%x flags 0x%x\n",
+ tpcb, so, tpcb->tp_npcb, tpcb->tp_flags);
+ printf("isop 0x%x isop->isop_socket offset 12 :\n", tpcb->tp_npcb);
+ dump_buf(tpcb->tp_npcb, 16);
+ ENDDEBUG
+ if (tpcb->tp_fsuffixlen == 0) {
+ /* didn't set peer extended suffix */
+ (tpcb->tp_nlproto->nlp_getsufx)(tpcb->tp_npcb, &tpcb->tp_fsuffixlen,
+ tpcb->tp_fsuffix, TP_FOREIGN);
+ }
+ if (tpcb->tp_state == TP_CLOSED) {
+ soisconnecting(so);
+ error = DoEvent(T_CONN_req);
+ } else {
+ (tpcb->tp_nlproto->nlp_pcbdisc)(tpcb->tp_npcb);
+ error = EISCONN;
+ }
+ IFPERF(tpcb)
+ u_int lsufx, fsufx;
+ lsufx = *(u_short *)(tpcb->tp_lsuffix);
+ fsufx = *(u_short *)(tpcb->tp_fsuffix);
+
+ tpmeas(tpcb->tp_lref,
+ TPtime_open | (tpcb->tp_xtd_format << 4),
+ &time, lsufx, fsufx, tpcb->tp_fref);
+ ENDPERF
+ break;
+
+ case PRU_ACCEPT:
+ (tpcb->tp_nlproto->nlp_getnetaddr)(tpcb->tp_npcb, nam, TP_FOREIGN);
+ IFDEBUG(D_REQUEST)
+ printf("ACCEPT PEERADDDR:");
+ dump_buf(mtod(nam, char *), nam->m_len);
+ ENDDEBUG
+ IFPERF(tpcb)
+ u_int lsufx, fsufx;
+ lsufx = *(u_short *)(tpcb->tp_lsuffix);
+ fsufx = *(u_short *)(tpcb->tp_fsuffix);
+
+ tpmeas(tpcb->tp_lref, TPtime_open,
+ &time, lsufx, fsufx, tpcb->tp_fref);
+ ENDPERF
+ break;
+
+ case PRU_RCVD:
+ if (so->so_state & SS_ISCONFIRMING) {
+ if (tpcb->tp_state == TP_CONFIRMING)
+ error = tp_confirm(tpcb);
+ break;
+ }
+ IFTRACE(D_DATA)
+ tptraceTPCB(TPPTmisc,
+ "RCVD BF: lcredit sent_lcdt cc hiwat \n",
+ tpcb->tp_lcredit, tpcb->tp_sent_lcdt,
+ so->so_rcv.sb_cc, so->so_rcv.sb_hiwat);
+ LOCAL_CREDIT(tpcb);
+ tptraceTPCB(TPPTmisc,
+ "PRU_RCVD AF sbspace lcredit hiwat cc",
+ sbspace(&so->so_rcv), tpcb->tp_lcredit,
+ so->so_rcv.sb_cc, so->so_rcv.sb_hiwat);
+ ENDTRACE
+ IFDEBUG(D_REQUEST)
+ printf("RCVD: cc %d space %d hiwat %d\n",
+ so->so_rcv.sb_cc, sbspace(&so->so_rcv),
+ so->so_rcv.sb_hiwat);
+ ENDDEBUG
+ if (((int)nam) & MSG_OOB)
+ error = DoEvent(T_USR_Xrcvd);
+ else
+ error = DoEvent(T_USR_rcvd);
+ break;
+
+ case PRU_RCVOOB:
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ if (! tpcb->tp_xpd_service) {
+ error = EOPNOTSUPP;
+ break;
+ }
+ /* kludge - nam is really flags here */
+ error = tp_rcvoob(tpcb, so, m, outflags, (int)nam);
+ break;
+
+ case PRU_SEND:
+ case PRU_SENDOOB:
+ if (controlp) {
+ error = tp_snd_control(controlp, so, &m);
+ controlp = NULL;
+ if (error)
+ break;
+ }
+ if ((so->so_state & SS_ISCONFIRMING) &&
+ (tpcb->tp_state == TP_CONFIRMING) &&
+ (error = tp_confirm(tpcb)))
+ break;
+ if (req == PRU_SENDOOB) {
+ error = (tpcb->tp_xpd_service == 0) ?
+ EOPNOTSUPP : tp_sendoob(tpcb, so, m, outflags);
+ break;
+ }
+ if (m == 0)
+ break;
+ if (m->m_flags & M_EOR) {
+ eotsdu = 1;
+ m->m_flags &= ~M_EOR;
+ }
+ if (eotsdu == 0 && m->m_pkthdr.len == 0)
+ break;
+ if (tpcb->tp_state != TP_AKWAIT && tpcb->tp_state != TP_OPEN) {
+ error = ENOTCONN;
+ break;
+ }
+ /*
+ * The protocol machine copies mbuf chains,
+ * prepends headers, assigns seq numbers, and
+ * puts the packets on the device.
+ * When they are acked they are removed from the socket buf.
+ *
+ * sosend calls this up until sbspace goes negative.
+ * Sbspace may be made negative by appending this mbuf chain,
+ * possibly by a whole cluster.
+ */
+ {
+ /*
+ * Could have eotsdu and no data.(presently MUST have
+ * an mbuf though, even if its length == 0)
+ */
+ int totlen = m->m_pkthdr.len;
+ struct sockbuf *sb = &so->so_snd;
+ IFPERF(tpcb)
+ PStat(tpcb, Nb_from_sess) += totlen;
+ tpmeas(tpcb->tp_lref, TPtime_from_session, 0, 0,
+ PStat(tpcb, Nb_from_sess), totlen);
+ ENDPERF
+ IFDEBUG(D_SYSCALL)
+ printf(
+ "PRU_SEND: eot %d before sbappend 0x%x len 0x%x to sb @ 0x%x\n",
+ eotsdu, m, totlen, sb);
+ dump_mbuf(sb->sb_mb, "so_snd.sb_mb");
+ dump_mbuf(m, "m : to be added");
+ ENDDEBUG
+ tp_packetize(tpcb, m, eotsdu);
+ IFDEBUG(D_SYSCALL)
+ printf("PRU_SEND: eot %d after sbappend 0x%x\n", eotsdu, m);
+ dump_mbuf(sb->sb_mb, "so_snd.sb_mb");
+ ENDDEBUG
+ if (tpcb->tp_state == TP_OPEN)
+ error = DoEvent(T_DATA_req);
+ IFDEBUG(D_SYSCALL)
+ printf("PRU_SEND: after driver error 0x%x \n",error);
+ printf("so_snd 0x%x cc 0t%d mbcnt 0t%d\n",
+ sb, sb->sb_cc, sb->sb_mbcnt);
+ dump_mbuf(sb->sb_mb, "so_snd.sb_mb after driver");
+ ENDDEBUG
+ }
+ break;
+
+ case PRU_SOCKADDR:
+ (tpcb->tp_nlproto->nlp_getnetaddr)(tpcb->tp_npcb, nam, TP_LOCAL);
+ break;
+
+ case PRU_PEERADDR:
+ (tpcb->tp_nlproto->nlp_getnetaddr)(tpcb->tp_npcb, nam, TP_FOREIGN);
+ break;
+
+ case PRU_CONTROL:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_PROTOSEND:
+ case PRU_PROTORCV:
+ case PRU_SENSE:
+ case PRU_SLOWTIMO:
+ case PRU_FASTTIMO:
+ error = EOPNOTSUPP;
+ break;
+
+ default:
+#ifdef ARGO_DEBUG
+ printf("tp_usrreq UNKNOWN PRU %d\n", req);
+#endif /* ARGO_DEBUG */
+ error = EOPNOTSUPP;
+ }
+
+ IFDEBUG(D_REQUEST)
+ printf("%s, so 0x%x, tpcb 0x%x, error %d, state %d\n",
+ "returning from tp_usrreq", so, tpcb, error,
+ tpcb ? tpcb->tp_state : 0);
+ ENDDEBUG
+ IFTRACE(D_REQUEST)
+ tptraceTPCB(TPPTusrreq, "END req so m state [", req, so, m,
+ tpcb ? tpcb->tp_state : 0);
+ ENDTRACE
+ if (controlp) {
+ m_freem(controlp);
+ printf("control data unexpectedly retained in tp_usrreq()");
+ }
+ splx(s);
+ return error;
+}
+tp_ltrace(so, uio)
+struct socket *so;
+struct uio *uio;
+{
+ IFTRACE(D_DATA)
+ register struct tp_pcb *tpcb = sototpcb(so);
+ if (tpcb) {
+ tptraceTPCB(TPPTmisc, "sosend so resid iovcnt", so,
+ uio->uio_resid, uio->uio_iovcnt, 0);
+ }
+ ENDTRACE
+}
+
+tp_confirm(tpcb)
+register struct tp_pcb *tpcb;
+{
+ struct tp_event E;
+ if (tpcb->tp_state == TP_CONFIRMING)
+ return DoEvent(T_ACPT_req);
+ printf("Tp confirm called when not confirming; tpcb 0x%x, state 0x%x\n",
+ tpcb, tpcb->tp_state);
+ return 0;
+}
+
+/*
+ * Process control data sent with sendmsg()
+ */
+tp_snd_control(m, so, data)
+ struct mbuf *m;
+ struct socket *so;
+ register struct mbuf **data;
+{
+ register struct cmsghdr *ch;
+ int error = 0;
+
+ if (m && m->m_len) {
+ ch = mtod(m, struct cmsghdr *);
+ m->m_len -= sizeof (*ch);
+ m->m_data += sizeof (*ch);
+ error = tp_ctloutput(PRCO_SETOPT,
+ so, ch->cmsg_level, ch->cmsg_type, &m);
+ if (ch->cmsg_type == TPOPT_DISC_DATA) {
+ if (data && *data) {
+ m_freem(*data);
+ *data = 0;
+ }
+ error = tp_usrreq(so, PRU_DISCONNECT, (struct mbuf *)0,
+ (caddr_t)0, (struct mbuf *)0);
+ }
+ }
+ if (m)
+ m_freem(m);
+ return error;
+}
diff --git a/sys/netiso/tuba_subr.c b/sys/netiso/tuba_subr.c
new file mode 100644
index 000000000000..d346927255c2
--- /dev/null
+++ b/sys/netiso/tuba_subr.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tuba_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/tuba_table.h>
+
+static struct sockaddr_iso null_siso = { sizeof(null_siso), AF_ISO, };
+extern int tuba_table_size, tcp_keepidle, tcp_keepintvl, tcp_maxidle;
+extern int tcppcbcachemiss, tcppredack, tcppreddat, tcprexmtthresh;
+extern struct tcpiphdr tcp_saveti;
+struct inpcb tuba_inpcb;
+struct inpcb *tuba_last_inpcb = &tuba_inpcb;
+struct isopcb tuba_isopcb;
+/*
+ * Tuba initialization
+ */
+tuba_init()
+{
+#define TUBAHDRSIZE (3 /*LLC*/ + 9 /*CLNP Fixed*/ + 42 /*Addresses*/ \
+ + 6 /*CLNP Segment*/ + 20 /*TCP*/)
+
+ tuba_inpcb.inp_next = tuba_inpcb.inp_prev = &tuba_inpcb;
+ tuba_isopcb.isop_next = tuba_isopcb.isop_prev = &tuba_isopcb;
+ tuba_isopcb.isop_faddr = &tuba_isopcb.isop_sfaddr;
+ tuba_isopcb.isop_laddr = &tuba_isopcb.isop_sladdr;
+ if (max_protohdr < TUBAHDRSIZE)
+ max_protohdr = TUBAHDRSIZE;
+ if (max_linkhdr + TUBAHDRSIZE > MHLEN)
+ panic("tuba_init");
+}
+
+struct addr_arg {
+ int error;
+ int offset;
+ u_long sum;
+};
+
+/*
+ * Calculate contribution to fudge factor for TCP checksum,
+ * and coincidentally set pointer for convenience of clnp_output
+ * if we are are responding when there is no isopcb around.
+ */
+static void
+tuba_getaddr(arg, siso, index)
+ register struct addr_arg *arg;
+ struct sockaddr_iso **siso;
+ u_long index;
+{
+ register struct tuba_cache *tc;
+ if (index <= tuba_table_size && (tc = tuba_table[index])) {
+ if (siso)
+ *siso = &tc->tc_siso;
+ arg->sum += (arg->offset & 1 ? tc->tc_ssum : tc->tc_sum)
+ + (0xffff ^ index);
+ arg->offset += tc->tc_siso.siso_nlen + 1;
+ } else
+ arg->error = 1;
+}
+
+tuba_output(m, tp)
+ register struct mbuf *m;
+ struct tcpcb *tp;
+{
+ register struct tcpiphdr *n;
+ struct isopcb *isop;
+ struct addr_arg arg;
+
+ if (tp == 0 || (n = tp->t_template) == 0 ||
+ (isop = (struct isopcb *)tp->t_tuba_pcb) == 0) {
+ isop = &tuba_isopcb;
+ n = mtod(m, struct tcpiphdr *);
+ arg.error = arg.sum = arg.offset = 0;
+ tuba_getaddr(&arg, &tuba_isopcb.isop_faddr, n->ti_dst.s_addr);
+ tuba_getaddr(&arg, &tuba_isopcb.isop_laddr, n->ti_src.s_addr);
+ REDUCE(arg.sum, arg.sum);
+ goto adjust;
+ }
+ if (n->ti_sum == 0) {
+ arg.error = arg.sum = arg.offset = 0;
+ tuba_getaddr(&arg, (struct sockaddr_iso **)0, n->ti_dst.s_addr);
+ tuba_getaddr(&arg, (struct sockaddr_iso **)0, n->ti_src.s_addr);
+ REDUCE(arg.sum, arg.sum);
+ n->ti_sum = arg.sum;
+ n = mtod(m, struct tcpiphdr *);
+ adjust:
+ if (arg.error) {
+ m_freem(m);
+ return (EADDRNOTAVAIL);
+ }
+ REDUCE(n->ti_sum, n->ti_sum + (0xffff ^ arg.sum));
+ }
+ m->m_len -= sizeof (struct ip);
+ m->m_pkthdr.len -= sizeof (struct ip);
+ m->m_data += sizeof (struct ip);
+ return (clnp_output(m, isop, m->m_pkthdr.len, 0));
+}
+
+tuba_refcnt(isop, delta)
+ struct isopcb *isop;
+{
+ register struct tuba_cache *tc;
+ unsigned index, sum;
+
+ if (delta != 1)
+ delta = -1;
+ if (isop == 0 || isop->isop_faddr == 0 || isop->isop_laddr == 0 ||
+ (delta == -1 && isop->isop_tuba_cached == 0) ||
+ (delta == 1 && isop->isop_tuba_cached != 0))
+ return;
+ isop->isop_tuba_cached = (delta == 1);
+ if ((index = tuba_lookup(isop->isop_faddr, M_DONTWAIT)) != 0 &&
+ (tc = tuba_table[index]) != 0 && (delta == 1 || tc->tc_refcnt > 0))
+ tc->tc_refcnt += delta;
+ if ((index = tuba_lookup(isop->isop_laddr, M_DONTWAIT)) != 0 &&
+ (tc = tuba_table[index]) != 0 && (delta == 1 || tc->tc_refcnt > 0))
+ tc->tc_refcnt += delta;
+}
+
+tuba_pcbdetach(isop)
+ struct isopcb *isop;
+{
+ if (isop == 0)
+ return;
+ tuba_refcnt(isop, -1);
+ isop->isop_socket = 0;
+ iso_pcbdetach(isop);
+}
+
+/*
+ * Avoid in_pcbconnect in faked out tcp_input()
+ */
+tuba_pcbconnect(inp, nam)
+ register struct inpcb *inp;
+ struct mbuf *nam;
+{
+ register struct sockaddr_iso *siso;
+ struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct isopcb *isop = (struct isopcb *)tp->t_tuba_pcb;
+ int error;
+
+ /* hardwire iso_pcbbind() here */
+ siso = isop->isop_laddr = &isop->isop_sladdr;
+ *siso = tuba_table[inp->inp_laddr.s_addr]->tc_siso;
+ siso->siso_tlen = sizeof(inp->inp_lport);
+ bcopy((caddr_t)&inp->inp_lport, TSEL(siso), sizeof(inp->inp_lport));
+
+ /* hardwire in_pcbconnect() here without assigning route */
+ inp->inp_fport = sin->sin_port;
+ inp->inp_faddr = sin->sin_addr;
+
+ /* reuse nam argument to call iso_pcbconnect() */
+ nam->m_len = sizeof(*siso);
+ siso = mtod(nam, struct sockaddr_iso *);
+ *siso = tuba_table[inp->inp_faddr.s_addr]->tc_siso;
+ siso->siso_tlen = sizeof(inp->inp_fport);
+ bcopy((caddr_t)&inp->inp_fport, TSEL(siso), sizeof(inp->inp_fport));
+
+ if ((error = iso_pcbconnect(isop, nam)) == 0)
+ tuba_refcnt(isop, 1);
+ return (error);
+}
+
+/*
+ * CALLED FROM:
+ * clnp's input routine, indirectly through the protosw.
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from clnp, strip off the clnp header
+ * and do tcp input processing.
+ * No return value.
+ */
+tuba_tcpinput(m, src, dst)
+ register struct mbuf *m;
+ struct sockaddr_iso *src, *dst;
+{
+ unsigned long sum, lindex, findex;
+ register struct tcpiphdr *ti;
+ register struct inpcb *inp;
+ caddr_t optp = NULL;
+ int optlen;
+ int len, tlen, off;
+ register struct tcpcb *tp = 0;
+ int tiflags;
+ struct socket *so;
+ int todrop, acked, ourfinisacked, needoutput = 0;
+ short ostate;
+ struct in_addr laddr;
+ int dropsocket = 0, iss = 0;
+ u_long tiwin, ts_val, ts_ecr;
+ int ts_present = 0;
+
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("tuba_tcpinput");
+ /*
+ * Do some housekeeping looking up CLNP addresses.
+ * If we are out of space might as well drop the packet now.
+ */
+ tcpstat.tcps_rcvtotal++;
+ lindex = tuba_lookup(dst, M_DONTWAIT);
+ findex = tuba_lookup(src, M_DONTWAIT);
+ if (lindex == 0 || findex == 0)
+ goto drop;
+ /*
+ * CLNP gave us an mbuf chain WITH the clnp header pulled up,
+ * but the data pointer pushed past it.
+ */
+ len = m->m_len;
+ tlen = m->m_pkthdr.len;
+ m->m_data -= sizeof(struct ip);
+ m->m_len += sizeof(struct ip);
+ m->m_pkthdr.len += sizeof(struct ip);
+ m->m_flags &= ~(M_MCAST|M_BCAST); /* XXX should do this in clnp_input */
+ /*
+ * The reassembly code assumes it will be overwriting a useless
+ * part of the packet, which is why we need to have it point
+ * into the packet itself.
+ *
+ * Check to see if the data is properly alligned
+ * so that we can save copying the tcp header.
+ * This code knows way too much about the structure of mbufs!
+ */
+ off = ((sizeof (long) - 1) & ((m->m_flags & M_EXT) ?
+ (m->m_data - m->m_ext.ext_buf) : (m->m_data - m->m_pktdat)));
+ if (off || len < sizeof(struct tcphdr)) {
+ struct mbuf *m0 = m;
+
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m == 0) {
+ m = m0;
+ goto drop;
+ }
+ m->m_next = m0;
+ m->m_data += max_linkhdr;
+ m->m_pkthdr = m0->m_pkthdr;
+ m->m_flags = m0->m_flags & M_COPYFLAGS;
+ if (len < sizeof(struct tcphdr)) {
+ m->m_len = 0;
+ if ((m = m_pullup(m, sizeof(struct tcpiphdr))) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ } else {
+ bcopy(mtod(m0, caddr_t) + sizeof(struct ip),
+ mtod(m, caddr_t) + sizeof(struct ip),
+ sizeof(struct tcphdr));
+ m0->m_len -= sizeof(struct tcpiphdr);
+ m0->m_data += sizeof(struct tcpiphdr);
+ m->m_len = sizeof(struct tcpiphdr);
+ }
+ }
+ /*
+ * Calculate checksum of extended TCP header and data,
+ * replacing what would have been IP addresses by
+ * the IP checksum of the CLNP addresses.
+ */
+ ti = mtod(m, struct tcpiphdr *);
+ ti->ti_dst.s_addr = tuba_table[lindex]->tc_sum;
+ if (dst->siso_nlen & 1)
+ ti->ti_src.s_addr = tuba_table[findex]->tc_sum;
+ else
+ ti->ti_src.s_addr = tuba_table[findex]->tc_ssum;
+ ti->ti_prev = ti->ti_next = 0;
+ ti->ti_x1 = 0; ti->ti_pr = ISOPROTO_TCP;
+ ti->ti_len = htons((u_short)tlen);
+ if (ti->ti_sum = in_cksum(m, m->m_pkthdr.len)) {
+ tcpstat.tcps_rcvbadsum++;
+ goto drop;
+ }
+ ti->ti_src.s_addr = findex;
+ ti->ti_dst.s_addr = lindex;
+ /*
+ * Now include the rest of TCP input
+ */
+#define TUBA_INCLUDE
+#define in_pcbconnect tuba_pcbconnect
+#define tcb tuba_inpcb
+#define tcp_last_inpcb tuba_last_inpcb
+
+#include <netinet/tcp_input.c>
+}
+
+#define tcp_slowtimo tuba_slowtimo
+#define tcp_fasttimo tuba_fasttimo
+
+#include <netinet/tcp_timer.c>
diff --git a/sys/netiso/tuba_table.c b/sys/netiso/tuba_table.c
new file mode 100644
index 000000000000..a1bf5f98de0f
--- /dev/null
+++ b/sys/netiso/tuba_table.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tuba_table.c 8.2 (Berkeley) 11/15/93
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/radix.h>
+
+#include <netiso/iso.h>
+#include <netiso/tuba_table.h>
+
+int tuba_table_size;
+struct tuba_cache **tuba_table;
+struct radix_node_head *tuba_tree;
+extern int arpt_keep, arpt_prune; /* use same values as arp cache */
+
+void
+tuba_timer()
+{
+ int s = splnet();
+ int i;
+ register struct tuba_cache *tc;
+ long timelimit = time.tv_sec - arpt_keep;
+
+ timeout(tuba_timer, (caddr_t)0, arpt_prune * hz);
+ for (i = tuba_table_size; i > 0; i--)
+ if ((tc = tuba_table[i]) && (tc->tc_refcnt == 0) &&
+ (tc->tc_time < timelimit)) {
+ tuba_table[i] = 0;
+ rn_delete(&tc->tc_siso.siso_addr, NULL, tuba_tree);
+ free((caddr_t)tc, M_RTABLE);
+ }
+ splx(s);
+}
+
+tuba_table_init()
+{
+ rn_inithead((void **)&tuba_tree, 40);
+ timeout(tuba_timer, (caddr_t)0, arpt_prune * hz);
+}
+
+int
+tuba_lookup(siso, wait)
+ register struct sockaddr_iso *siso;
+{
+ struct radix_node *rn, *rn_match();
+ register struct tuba_cache *tc;
+ struct tuba_cache **new;
+ int dupentry = 0, sum_a = 0, sum_b = 0, old_size, i;
+
+ if ((rn = rn_match((caddr_t)&siso->siso_addr, tuba_tree->rnh_treetop))
+ && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ tc = (struct tuba_cache *)rn;
+ tc->tc_time = time.tv_sec;
+ return (tc->tc_index);
+ }
+ if ((tc = (struct tuba_cache *)malloc(sizeof(*tc), M_RTABLE, wait))
+ == NULL)
+ return (0);
+ bzero((caddr_t)tc, sizeof (*tc));
+ bcopy(siso->siso_data, tc->tc_siso.siso_data,
+ tc->tc_siso.siso_nlen = siso->siso_nlen);
+ rn_insert(&tc->tc_siso.siso_addr, tuba_tree, &dupentry, tc->tc_nodes);
+ if (dupentry)
+ panic("tuba_lookup 1");
+ tc->tc_siso.siso_family = AF_ISO;
+ tc->tc_siso.siso_len = sizeof(tc->tc_siso);
+ tc->tc_time = time.tv_sec;
+ for (i = sum_a = tc->tc_siso.siso_nlen; --i >= 0; )
+ (i & 1 ? sum_a : sum_b) += (u_char)tc->tc_siso.siso_data[i];
+ REDUCE(tc->tc_sum, (sum_a << 8) + sum_b);
+ HTONS(tc->tc_sum);
+ SWAB(tc->tc_ssum, tc->tc_sum);
+ for (i = tuba_table_size; i > 0; i--)
+ if (tuba_table[i] == 0)
+ goto fixup;
+ old_size = tuba_table_size;
+ if (tuba_table_size == 0)
+ tuba_table_size = 15;
+ if (tuba_table_size > 0x7fff)
+ return (0);
+ tuba_table_size = 1 + 2 * tuba_table_size;
+ i = (tuba_table_size + 1) * sizeof(tc);
+ new = (struct tuba_cache **)malloc((unsigned)i, M_RTABLE, wait);
+ if (new == 0) {
+ tuba_table_size = old_size;
+ rn_delete(&tc->tc_siso.siso_addr, NULL, tuba_tree);
+ free((caddr_t)tc, M_RTABLE);
+ return (0);
+ }
+ bzero((caddr_t)new, (unsigned)i);
+ if (tuba_table) {
+ bcopy((caddr_t)tuba_table, (caddr_t)new, i >> 1);
+ free((caddr_t)tuba_table, M_RTABLE);
+ }
+ tuba_table = new;
+ i = tuba_table_size;
+fixup:
+ tuba_table[i] = tc;
+ tc->tc_index = i;
+ return (tc->tc_index);
+}
diff --git a/sys/netiso/tuba_table.h b/sys/netiso/tuba_table.h
new file mode 100644
index 000000000000..6be8afaf523c
--- /dev/null
+++ b/sys/netiso/tuba_table.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tuba_table.h 8.1 (Berkeley) 6/10/93
+ */
+
+struct tuba_cache {
+ struct radix_node tc_nodes[2]; /* convenient lookup */
+ int tc_refcnt;
+ int tc_time; /* last looked up */
+ int tc_flags;
+#define TCF_PERM 1
+ int tc_index;
+ u_short tc_sum; /* cksum of nsap inc. length */
+ u_short tc_ssum; /* swab(tc_sum) */
+ struct sockaddr_iso tc_siso; /* for responding */
+};
+
+#define ADDCARRY(x) (x >= 65535 ? x -= 65535 : x)
+#define REDUCE(a, b) { union { u_short s[2]; long l;} l_util; long x; \
+ l_util.l = (b); x = l_util.s[0] + l_util.s[1]; ADDCARRY(x); \
+ if (x == 0) x = 0xffff; a = x;}
+#define SWAB(a, b) { union { u_char c[2]; u_short s;} s; u_char t; \
+ s.s = (b); t = s.c[0]; s.c[0] = s.c[1]; s.c[1] = t; a = s.s;}
+
+#ifdef KERNEL
+extern int tuba_table_size;
+extern struct tuba_cache **tuba_table;
+extern struct radix_node_head *tuba_tree;
+#endif
diff --git a/sys/netiso/tuba_usrreq.c b/sys/netiso/tuba_usrreq.c
new file mode 100644
index 000000000000..2d9211707a45
--- /dev/null
+++ b/sys/netiso/tuba_usrreq.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tuba_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/tuba_table.h>
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+extern char *tcpstates[];
+extern struct inpcb tuba_inpcb;
+extern struct isopcb tuba_isopcb;
+
+/*
+ * Process a TCP user request for TCP tb. If this is a send request
+ * then m is the mbuf chain of send data. If this is a timer expiration
+ * (called from the software clock routine), then timertype tells which timer.
+ */
+/*ARGSUSED*/
+tuba_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ register struct inpcb *inp;
+ register struct isopcb *isop;
+ register struct tcpcb *tp;
+ int s;
+ int error = 0;
+ int ostate;
+ struct sockaddr_iso *siso;
+
+ if (req == PRU_CONTROL)
+ return (iso_control(so, (int)m, (caddr_t)nam,
+ (struct ifnet *)control));
+
+ s = splnet();
+ inp = sotoinpcb(so);
+ /*
+ * When a TCP is attached to a socket, then there will be
+ * a (struct inpcb) pointed at by the socket, and this
+ * structure will point at a subsidary (struct tcpcb).
+ */
+ if (inp == 0 && req != PRU_ATTACH) {
+ splx(s);
+ return (EINVAL); /* XXX */
+ }
+ if (inp) {
+ tp = intotcpcb(inp);
+ if (tp == 0)
+ panic("tuba_usrreq");
+ ostate = tp->t_state;
+ isop = (struct isopcb *)tp->t_tuba_pcb;
+ if (isop == 0)
+ panic("tuba_usrreq 2");
+ } else
+ ostate = 0;
+ switch (req) {
+
+ /*
+ * TCP attaches to socket via PRU_ATTACH, reserving space,
+ * and an internet control block. We also need to
+ * allocate an isopcb and separate the control block from
+ * tcp/ip ones.
+ */
+ case PRU_ATTACH:
+ if (error = iso_pcballoc(so, &tuba_isopcb))
+ break;
+ isop = (struct isopcb *)so->so_pcb;
+ so->so_pcb = 0;
+ if (error = tcp_usrreq(so, req, m, nam, control)) {
+ isop->isop_socket = 0;
+ iso_pcbdetach(isop);
+ } else {
+ inp = sotoinpcb(so);
+ remque(inp);
+ insque(inp, &tuba_inpcb);
+ inp->inp_head = &tuba_inpcb;
+ tp = intotcpcb(inp);
+ if (tp == 0)
+ panic("tuba_usrreq 3");
+ tp->t_tuba_pcb = (caddr_t) isop;
+ }
+ goto notrace;
+
+ /*
+ * PRU_DETACH detaches the TCP protocol from the socket.
+ * If the protocol state is non-embryonic, then can't
+ * do this directly: have to initiate a PRU_DISCONNECT,
+ * which may finish later; embryonic TCB's can just
+ * be discarded here.
+ */
+ case PRU_DETACH:
+ if (tp->t_state > TCPS_LISTEN)
+ tp = tcp_disconnect(tp);
+ else
+ tp = tcp_close(tp);
+ if (tp == 0)
+ tuba_pcbdetach(isop);
+ break;
+
+ /*
+ * Give the socket an address.
+ */
+ case PRU_BIND:
+ siso = mtod(nam, struct sockaddr_iso *);
+ if (siso->siso_tlen && siso->siso_tlen != 2) {
+ error = EINVAL;
+ break;
+ }
+ if ((error = iso_pcbbind(isop, nam)) ||
+ (siso = isop->isop_laddr) == 0)
+ break;
+ bcopy(TSEL(siso), &inp->inp_lport, 2);
+ if (siso->siso_nlen &&
+ !(inp->inp_laddr.s_addr = tuba_lookup(siso, M_WAITOK)))
+ error = ENOBUFS;
+ break;
+
+ /*
+ * Prepare to accept connections.
+ */
+ case PRU_CONNECT:
+ case PRU_LISTEN:
+ if (inp->inp_lport == 0 &&
+ (error = iso_pcbbind(isop, (struct mbuf *)0)))
+ break;
+ bcopy(TSEL(isop->isop_laddr), &inp->inp_lport, 2);
+ if (req == PRU_LISTEN) {
+ tp->t_state = TCPS_LISTEN;
+ break;
+ }
+ /*FALLTHROUGH*/
+ /*
+ * Initiate connection to peer.
+ * Create a template for use in transmissions on this connection.
+ * Enter SYN_SENT state, and mark socket as connecting.
+ * Start keep-alive timer, and seed output sequence space.
+ * Send initial segment on connection.
+ */
+ /* case PRU_CONNECT: */
+ if (error = iso_pcbconnect(isop, nam))
+ break;
+ if ((siso = isop->isop_laddr) && siso->siso_nlen > 1)
+ siso->siso_data[siso->siso_nlen - 1] = ISOPROTO_TCP;
+ else
+ panic("tuba_usrreq: connect");
+ siso = mtod(nam, struct sockaddr_iso *);
+ if (!(inp->inp_faddr.s_addr = tuba_lookup(siso, M_WAITOK))) {
+ unconnect:
+ iso_pcbdisconnect(isop);
+ error = ENOBUFS;
+ break;
+ }
+ bcopy(TSEL(isop->isop_faddr), &inp->inp_fport, 2);
+ if (inp->inp_laddr.s_addr == 0 &&
+ (inp->inp_laddr.s_addr =
+ tuba_lookup(isop->isop_laddr, M_WAITOK)) == 0)
+ goto unconnect;
+ if ((tp->t_template = tcp_template(tp)) == 0)
+ goto unconnect;
+ soisconnecting(so);
+ tcpstat.tcps_connattempt++;
+ tp->t_state = TCPS_SYN_SENT;
+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+ tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
+ tcp_sendseqinit(tp);
+ error = tcp_output(tp);
+ tuba_refcnt(isop, 1);
+ break;
+
+ /*
+ * Initiate disconnect from peer.
+ * If connection never passed embryonic stage, just drop;
+ * else if don't need to let data drain, then can just drop anyways,
+ * else have to begin TCP shutdown process: mark socket disconnecting,
+ * drain unread data, state switch to reflect user close, and
+ * send segment (e.g. FIN) to peer. Socket will be really disconnected
+ * when peer sends FIN and acks ours.
+ *
+ * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+ */
+ case PRU_DISCONNECT:
+ if ((tp = tcp_disconnect(tp)) == 0)
+ tuba_pcbdetach(isop);
+ break;
+
+ /*
+ * Accept a connection. Essentially all the work is
+ * done at higher levels; just return the address
+ * of the peer, storing through addr.
+ */
+ case PRU_ACCEPT:
+ bcopy((caddr_t)isop->isop_faddr, mtod(nam, caddr_t),
+ nam->m_len = isop->isop_faddr->siso_len);
+ break;
+
+ /*
+ * Mark the connection as being incapable of further output.
+ */
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ tp = tcp_usrclosed(tp);
+ if (tp)
+ error = tcp_output(tp);
+ else
+ tuba_pcbdetach(isop);
+ break;
+ /*
+ * Abort the TCP.
+ */
+ case PRU_ABORT:
+ if ((tp = tcp_drop(tp, ECONNABORTED)) == 0)
+ tuba_pcbdetach(isop);
+ break;
+
+
+ case PRU_SOCKADDR:
+ if (isop->isop_laddr)
+ bcopy((caddr_t)isop->isop_laddr, mtod(nam, caddr_t),
+ nam->m_len = isop->isop_laddr->siso_len);
+ break;
+
+ case PRU_PEERADDR:
+ if (isop->isop_faddr)
+ bcopy((caddr_t)isop->isop_faddr, mtod(nam, caddr_t),
+ nam->m_len = isop->isop_faddr->siso_len);
+ break;
+
+ default:
+ error = tcp_usrreq(so, req, m, nam, control);
+ goto notrace;
+ }
+ if (tp && (so->so_options & SO_DEBUG))
+ tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
+notrace:
+ splx(s);
+ return(error);
+}
+
+tuba_ctloutput(op, so, level, optname, mp)
+ int op;
+ struct socket *so;
+ int level, optname;
+ struct mbuf **mp;
+{
+ int clnp_ctloutput(), tcp_ctloutput();
+
+ return ((level != IPPROTO_TCP ? clnp_ctloutput : tcp_ctloutput)
+ (op, so, level, optname, mp));
+}
diff --git a/sys/netiso/xebec/Makefile b/sys/netiso/xebec/Makefile
new file mode 100644
index 000000000000..fa05f9cc47ed
--- /dev/null
+++ b/sys/netiso/xebec/Makefile
@@ -0,0 +1,8 @@
+# @(#)Makefile 5.16 (Berkeley) 4/26/91
+
+PROG= xebec
+SRCS= llparse.c llscan.c main.c malloc.c procs.c putdriver.c sets.c xebec.c
+CFLAGS+= -DDEBUG -traditional
+NOMAN = noman
+
+.include <bsd.prog.mk>
diff --git a/sys/netiso/xebec/debug.h b/sys/netiso/xebec/debug.h
new file mode 100644
index 000000000000..2e3f16794d6b
--- /dev/null
+++ b/sys/netiso/xebec/debug.h
@@ -0,0 +1,22 @@
+/* $Header: debug.h,v 2.1 88/09/19 12:56:16 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/debug.h,v $ */
+
+#define OUT stdout
+
+extern int debug[128];
+
+#ifdef DEBUG
+extern int column;
+
+#define IFDEBUG(letter) \
+ if(debug['letter']) {
+#define ENDDEBUG ; (void) fflush(stdout);}
+
+#else
+
+#define STAR *
+#define IFDEBUG(letter) //*beginning of comment*/STAR
+#define ENDDEBUG STAR/*end of comment*//
+
+#endif DEBUG
+
diff --git a/sys/netiso/xebec/llparse.c b/sys/netiso/xebec/llparse.c
new file mode 100644
index 000000000000..fee7a9f7e47d
--- /dev/null
+++ b/sys/netiso/xebec/llparse.c
@@ -0,0 +1,366 @@
+/* $Header: llparse.c,v 2.2 88/09/19 12:54:59 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/llparse.c,v $ */
+/*
+ * ************************* NOTICE *******************************
+ * This code is in the public domain. It cannot be copyrighted.
+ * This ll parser was originally written by Keith Thompson for the
+ * University of Wisconsin Crystal project.
+ * It was based on an FMQ lr parser written by Jon Mauney at the
+ * University of Wisconsin.
+ * It was subsequently modified very slightly by Nancy Hall at the
+ * University of Wisconsin for the Crystal project.
+ * ****************************************************************
+ */
+#include "xebec.h"
+#include "llparse.h"
+#include "main.h"
+#include <stdio.h>
+
+#include "debug.h"
+
+#define LLMINACTION -LLINF
+
+short llparsestack[STACKSIZE];
+short llstackptr = 0;
+LLtoken lltoken;
+
+llparse()
+{
+ register havetoken = FALSE;
+ register sym;
+ register LLtoken *t = &lltoken;
+ register parseaction;
+ register accepted = FALSE;
+
+ llpushprod(llnprods-1); /* $$$ ::= <start symbol> */
+
+ do {
+ sym = llparsestack[llstackptr];
+ IFDEBUG(L)
+ printf("llparse() top of loop, llstackptr=%d, sym=%d\n",
+ llstackptr, sym);
+ ENDDEBUG
+
+ if(sym < 0) {
+ /* action symbol */
+ if(sym <= LLMINACTION) {
+ for(;sym<=LLMINACTION;sym++) {
+ llaction(1, t); /* calls llfinprod */
+ }
+ llstackptr--;
+ continue;
+ } else { llaction(-sym, t);
+ llstackptr--;
+ continue;
+ }
+ }
+
+ if(sym < llnterms) {
+
+ /* it's a terminal symbol */
+
+ if(!havetoken) {
+ llgettoken(t);
+ havetoken = TRUE;
+ }
+
+ if(sym == t->llterm) {
+ llpushattr(t->llattrib);
+ llaccept(t);
+ llstackptr--; /* pop terminal */
+ if(t->llterm == llnterms-1) { /* end symbol $$$ */
+ accepted = TRUE;
+ } else {
+ havetoken = FALSE;
+ }
+ } else {
+ llparsererror(t); /* wrong terminal on input */
+ havetoken = FALSE;
+ }
+ continue;
+ }
+
+ /* non terminal */
+
+ if(!havetoken) {
+ llgettoken(t);
+ havetoken = TRUE;
+ }
+
+ /* consult parse table for new production */
+ parseaction = llfindaction(sym, t->llterm);
+
+ if(parseaction == 0) {
+ /* error entry */
+ llparsererror(t);
+ havetoken = FALSE;
+ continue;
+ }
+
+ if(llepsilon[parseaction]) {
+ /* epsilon production */
+ if(llepsilonok(t->llterm)) {
+ llstackptr--; /* pop nonterminal */
+ llpushprod(parseaction); /* push rhs of production */
+ } else {
+ llparsererror(t);
+ havetoken = FALSE;
+ }
+ } else {
+ llstackptr--; /* pop nonterminal */
+ llpushprod(parseaction); /* push rhs of production */
+ }
+ } while(!accepted);
+
+ return(0);
+}
+
+llpushprod(prod) /* recognize production prod - push rhs on stack */
+short prod;
+{
+ register start;
+ register length;
+ register count;
+
+ start = llprodindex[prod].llprodstart;
+ length = llprodindex[prod].llprodlength;
+
+ IFDEBUG(L)
+ printf("llpushprod(%d) llstackptr=0x%x(%d), length = 0x%x(%d)\n",
+ prod, llstackptr, llstackptr, length , length);
+ /*
+ dump_parse_stack();
+ */
+ ENDDEBUG
+ if(llstackptr+length >= STACKSIZE) {
+ fprintf(stderr,"Parse stack overflow. llstackptr=0x%x, length=0x%x\n",
+ llstackptr, length);
+ Exit(-1);
+ }
+
+
+ llsetattr(llprodindex[prod].llprodtlen);
+
+ /* put a marker on the stack to mark beginning of production */
+ if(llparsestack[llstackptr] <= LLMINACTION) {
+ (llparsestack[llstackptr]) --; /* if there's already one there, don't
+ put another on; just let it represent all of
+ the adjacent markers */
+ }
+ else {
+ llstackptr++;
+ llparsestack[llstackptr] = LLMINACTION;
+ }
+
+ for(count=0; count<length; count++) {
+ llstackptr++;
+ llparsestack[llstackptr] = llproductions[start++];
+ }
+ if(llstackptr > STACKSIZE) {
+ fprintf(stderr, "PARSE STACK OVERFLOW! \n"); Exit(-1);
+ Exit(-1);
+ }
+}
+
+
+llepsilonok(term)
+{
+ register ptr;
+ register sym;
+ register pact;
+ register nomore;
+ register rval;
+
+ IFDEBUG(L)
+ printf("llepsilonok() enter\n");
+ ENDDEBUG
+ rval = TRUE;
+
+ ptr = llstackptr;
+
+ do {
+ sym = llparsestack[ptr];
+
+ if(sym < 0) {
+ ptr--;
+ nomore = ptr == 0;
+ continue;
+ }
+
+ if(sym < llnterms) {
+ nomore = TRUE;
+ rval = sym == term;
+ continue;
+ }
+
+ pact = llfindaction(sym, term);
+
+ if(pact == 0) {
+ nomore = TRUE;
+ rval = FALSE;
+ continue;
+ }
+
+ if(llepsilon[pact] == TRUE) {
+ ptr--;
+ nomore = ptr == 0;
+ }
+ else {
+ nomore = TRUE;
+ }
+
+ } while(!nomore);
+
+ return(rval);
+}
+
+
+short llfindaction(sym, term)
+{
+ register index;
+
+ IFDEBUG(L)
+ printf("llfindaction(sym=%d, term=%d) enter \n", sym, term);
+ ENDDEBUG
+ index = llparseindex[sym];
+
+ while(llparsetable[index].llterm != 0) {
+ if(llparsetable[index].llterm == term) {
+ return(llparsetable[index].llprod);
+ }
+ index++;
+ }
+ return(0);
+}
+
+
+llparsererror(token)
+LLtoken *token;
+{
+ IFDEBUG(L)
+ fprintf(stderr,"llparsererror() enter\n");
+ prt_token(token);
+ ENDDEBUG
+
+ fprintf(stderr, "Syntax error: ");
+ prt_token(token);
+ dump_buffer();
+ Exit(-1);
+}
+
+
+llgettoken(token)
+LLtoken *token;
+{
+ llscan(token);
+ token->llstate = NORMAL;
+ IFDEBUG(L)
+ printf("llgettoken(): ");
+ prt_token(token);
+ ENDDEBUG
+}
+
+
+/******************************************************************************
+
+ Attribute support routines
+
+******************************************************************************/
+/*
+** attribute stack
+**
+** AttrStack = stack of record
+** values : array of values;
+** ptr : index;
+** end;
+**
+*/
+
+LLattrib llattributes[LLMAXATTR];
+int llattrtop = 0;
+
+struct llattr llattrdesc[LLMAXDESC];
+
+int lldescindex = 1;
+
+
+llsetattr(n)
+{
+ register struct llattr *ptr;
+
+ IFDEBUG(L)
+ printf("llsetattr(%d) enter\n",n);
+ ENDDEBUG
+ if(lldescindex >= LLMAXDESC) {
+ fprintf(stdout, "llattribute stack overflow: desc\n");
+ fprintf(stdout,
+ "lldescindex=0x%x, llattrtop=0x%x\n",lldescindex, llattrtop);
+ Exit(-1);
+ }
+ ptr = &llattrdesc[lldescindex];
+ ptr->llabase = &llattributes[llattrtop];
+ ptr->lloldtop = ++llattrtop;
+ ptr->llaindex = 1;
+ ptr->llacnt = n+1; /* the lhs ALWAYS uses an attr; it remains on the
+ stack when the production is recognized */
+ lldescindex++;
+}
+
+llpushattr(attr)
+LLattrib attr;
+{
+ struct llattr *a;
+
+ IFDEBUG(L)
+ printf("llpushattr() enter\n");
+ ENDDEBUG
+ if(llattrtop + 1 > LLMAXATTR) {
+ fprintf(stderr, "ATTRIBUTE STACK OVERFLOW!\n");
+ Exit(-1);
+ }
+ a = &llattrdesc[lldescindex-1];
+ llattributes[llattrtop++] = attr;
+ a->llaindex++; /* inc count of attrs on the stack for this prod */
+}
+
+llfinprod()
+{
+ IFDEBUG(L)
+ printf("llfinprod() enter\n");
+ ENDDEBUG
+ lldescindex--;
+ llattrtop = llattrdesc[lldescindex].lloldtop;
+ llattrdesc[lldescindex-1].llaindex++; /* lhs-of-prod.attr stays on
+ the stack; it is now one of the rhs attrs of the now-top production
+ on the stack */
+}
+
+#ifndef LINT
+#ifdef DEBUG
+dump_parse_stack()
+{
+ int ind;
+
+ printf("PARSE STACK:\n");
+ for(ind=llstackptr; ind>=0; ind--) {
+ printf("%d\t%d\t%s\n",
+ ind, llparsestack[ind],
+ llparsestack[ind]<0? "Action symbol" : llstrings[llparsestack[ind]]);
+ }
+}
+
+#endif DEBUG
+#endif LINT
+
+prt_token(t)
+LLtoken *t;
+{
+ fprintf(stdout, "t at 0x%x\n", t);
+ fprintf(stdout, "t->llterm=0x%x\n", t->llterm); (void) fflush(stdout);
+ fprintf(stdout, "TOK: %s\n", llstrings[t->llterm]);
+ (void) fflush(stdout);
+#ifdef LINT
+ /* to make lint shut up */
+ fprintf(stdout, "", llnterms, llnsyms, llnprods, llinfinite);
+#endif LINT
+}
diff --git a/sys/netiso/xebec/llparse.h b/sys/netiso/xebec/llparse.h
new file mode 100644
index 000000000000..1b6133b1b7bb
--- /dev/null
+++ b/sys/netiso/xebec/llparse.h
@@ -0,0 +1,145 @@
+/* $Header: llparse.h,v 2.1 88/09/19 12:56:20 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/llparse.h,v $ */
+
+ /************************************************************
+ attributes stack garbage
+ ************************************************************/
+
+#define LLMAXATTR 512
+#define LLMAXDESC 256
+#define LLATTR /* build an attribute stack */
+
+ /*
+ ** attribute stack
+ **
+ ** AttrStack = stack of record
+ ** values : array of values;
+ ** ptr : index;
+ ** end;
+ **
+ */
+
+ typedef union llattrib LLattrib;
+
+ extern LLattrib llattributes[LLMAXATTR];
+ extern int llattrtop;
+
+ extern struct llattr {
+ LLattrib *llabase; /* ptr into the attr stack (llattributes) */
+ int llaindex;/* # attrs on the stack so far for this prod */
+ int llacnt;/* total # ever to go on for this prod */
+
+ int lloldtop;/* when popping this prod, restore stack to here ;
+ one attr will remain on the stack (for the lhs) */
+ } llattrdesc[LLMAXDESC];
+
+ extern int lldescindex;
+
+ /************************************************************
+ attributes stack garbage
+ ************************************************************/
+
+ extern struct lltoken {
+ short llterm; /* token number */
+ short llstate; /* inserted deleted normal */
+ LLattrib llattrib;
+ } lltoken;
+ typedef struct lltoken LLtoken;
+
+/************************************************************
+ constants used in llparse.c
+************************************************************/
+
+#define STACKSIZE 500
+#define MAXCORR 16
+
+#define NORMAL 0
+#define DELETE 1
+#define INSERT 2
+
+/************************************************************
+ datatypes used to communicate with the parser
+************************************************************/
+
+struct llinsert {
+ short llinscost;
+ short llinslength;
+ short llinsert[MAXCORR];
+};
+typedef struct llinsert LLinsert;
+
+extern short llparsestack[];
+extern short llstackptr;
+extern short llinfinite;
+
+/************************************************************
+ variables used to pass information
+ specific to each grammer
+************************************************************/
+
+extern short llnterms;
+extern short llnsyms;
+extern short llnprods;
+
+extern char *llefile;
+
+extern struct llparsetable {
+ short llterm;
+ short llprod;
+} llparsetable[];
+
+extern short llparseindex[];
+
+extern short llepsilon[];
+
+extern short llproductions[];
+
+extern struct llprodindex {
+ short llprodstart;
+ short llprodlength;
+ short llprodtlen;
+} llprodindex[];
+
+extern struct llcosts {
+ short llinsert;
+ short lldelete;
+} llcosts[];
+
+extern struct llstable {
+ short llsstart;
+ short llslength;
+} llstable[];
+
+extern short llsspace[];
+
+extern struct lletable {
+ short llecost;
+ short llelength;
+ short llestart;
+} lletable[];
+
+extern long lleindex[];
+
+extern short llespace[];
+
+extern char *llstrings[];
+
+/************************************************************
+ routines defined in llparse.c
+************************************************************/
+
+extern llparse();
+extern llcopye();
+extern llcopys();
+extern llcorrector();
+extern llepsilonok();
+extern llexpand();
+extern short llfindaction();
+extern llgetprefix();
+extern llgettoken();
+extern llinsert();
+extern llinsertsym();
+extern llinserttokens();
+extern llparsererror();
+extern llpushprod();
+extern llreadetab();
diff --git a/sys/netiso/xebec/llscan.c b/sys/netiso/xebec/llscan.c
new file mode 100644
index 000000000000..ffdb9a92a60a
--- /dev/null
+++ b/sys/netiso/xebec/llscan.c
@@ -0,0 +1,430 @@
+/* $Header: llscan.c,v 2.2 88/09/19 12:55:06 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/llscan.c,v $ */
+/*
+ * ************************* NOTICE *******************************
+ * This code is in the public domain. It cannot be copyrighted.
+ * This scanner was originally written by Keith Thompson for the
+ * University of Wisconsin Crystal project.
+ * It was subsequently modified significantly by Nancy Hall at the
+ * University of Wisconsin for the ARGO project.
+ * ****************************************************************
+ */
+#include "xebec.h"
+#include "llparse.h"
+
+#include "main.h"
+#include <stdio.h>
+#include "procs.h"
+#include "debug.h"
+
+#define EOFILE 0x01
+#define UNUSED 0x02
+#define IGNORE 0x04
+#define OPCHAR 0x8
+#define DIGITS 0x10
+#define LETTER 0x20
+
+int chtype[128] = {
+/* null, soh ^a, stx ^b etx ^c eot ^d enq ^e ack ^f bel ^g */
+ EOFILE, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+/* bs ^h ht ^i lf ^j vt ^k ff ^l cr ^m so ^n si ^o */
+ UNUSED, IGNORE, IGNORE, UNUSED, IGNORE, IGNORE, UNUSED, UNUSED,
+/* dle ^p dc1 ^q dc2 ^r dc3 ^s dc4 ^t nak ^u syn ^v etb ^w */
+ UNUSED, UNUSED, UNUSED, UNUSED, EOFILE, UNUSED, UNUSED, UNUSED,
+/* can ^x em ^y sub ^z esc ^] fs ^\ gs ^} rs ^` us ^/ */
+ UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+
+/* ! " # $ % & ' */
+ IGNORE, UNUSED, OPCHAR, UNUSED, OPCHAR, UNUSED, OPCHAR, OPCHAR,
+/* ( ) * + , - . / */
+ OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR,
+/* 0 1 2 3 4 5 6 7 */
+ DIGITS, DIGITS, DIGITS, DIGITS, DIGITS, DIGITS, DIGITS, DIGITS,
+/* 8 9 : ; < = > ? */
+ DIGITS, DIGITS, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR,
+
+/* @ A B C D E F G */
+ UNUSED, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+/* H I J K L M N O */
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+/* P Q R S T U V W */
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+/* X Y Z [ \ ] ^ _ */
+ LETTER, LETTER, LETTER, OPCHAR, UNUSED, OPCHAR, OPCHAR, LETTER,
+
+/* ` a b c d e f g */
+ UNUSED, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+/* h i j k l m n o */
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+/* p q r s t u v w */
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+/* x y z { | } ~ del */
+ LETTER, LETTER, LETTER, OPCHAR, UNUSED, OPCHAR, UNUSED, UNUSED
+};
+
+
+extern FILE *astringfile;
+static char *buffptr;
+static char buffer[2][LINELEN];
+static int currentbuf = 1;
+
+#define addbuf(x) *buffptr++ = x
+
+static int ch = ' ';
+
+skip()
+{
+ while((chtype[ch] == IGNORE) ) {
+ ch = getch();
+ }
+}
+
+llaccept(t)
+LLtoken *t;
+{
+ switch(t->llstate) {
+ case NORMAL:
+ break;
+ case INSERT:
+ fprintf(stderr,"Insert %s\n", llstrings[t->llterm]);
+ break;
+ case DELETE:
+ fprintf(stderr,"Delete %s\n", llstrings[t->llterm]);
+ break;
+ }
+}
+
+#define TVAL (t->llattrib)
+
+
+dump_buffer()
+{
+ register int i;
+ for(i=0; i<20; i++)
+ (void) fputc(buffer[currentbuf][i], stderr);
+ (void) fputc('\n', stderr);
+ (void) fflush(stderr);
+}
+
+int iskey(c, buf)
+char *c;
+char **buf;
+{
+ register int i;
+ static struct { char *key_word; int term_type; } keys[] = {
+ { "SAME", T_SAME },
+ { "DEFAULT", T_DEFAULT },
+ { "NULLACTION", T_NULLACTION },
+ { "STRUCT", T_STRUCT },
+ { "SYNONYM", T_SYNONYM },
+ { "TRANSITIONS", T_TRANSITIONS },
+ { "STATES", T_STATES },
+ { "EVENTS", T_EVENTS },
+ { "PCB", T_PCB },
+ { "INCLUDE", T_INCLUDE },
+ { "PROTOCOL", T_PROTOCOL },
+ { 0, 0},
+ };
+
+ for (i = 0; keys[i].key_word ; i++) {
+ if( !strcmp(c, (*buf = keys[i].key_word) ) ) {
+ return ( keys[i].term_type );
+ }
+ }
+ *buf = (char *)0;
+ return(0);
+}
+
+getstr(o,c)
+ /* c is the string delimiter
+ * allow the delimiter to be escaped
+ * the messy part: translate $ID to
+ * e->ev_union.ID
+ * where ID is an event with a non-zero obj_struc
+ * need we check for the field???
+ */
+char o,c;
+{
+ register int nested = 1;
+ register int allow_nesting = (o==c)?-1:1;
+
+ IFDEBUG(S)
+ fprintf(stdout,"getstr: ch=%c, delimiters %c %c\n",
+ ch,o, c);
+ fprintf(stdout,"getstr: buffptr 0x%x, currentbuf 0x%x\n",
+ buffptr, currentbuf);
+ ENDDEBUG
+
+ if( ch == c ) nested--;
+ while(nested) {
+ if(ch == '\0') {
+ fprintf(stderr,
+ "Eof inside of a string, delims= %c,%c, nesting %d",c,o, nested);
+ Exit(-1);
+ /* notreached */
+ } else if(ch == '$') {
+ /* might be an attribute */
+ IFDEBUG(S)
+ fprintf(stdout,"getstr: atttribute?\n");
+ ENDDEBUG
+
+ /* assume it's an event */
+ /* addbuf is a macro so this isn't as bad as
+ * it looks
+ * add "e->ev_union."
+ */
+ if( (ch = getch()) == '$' ) {
+ addbuf('e'); addbuf('-'); addbuf('>');
+ addbuf('e'); addbuf('v'); addbuf('_');
+ addbuf('u'); addbuf('n'); addbuf('i');
+ addbuf('o'); addbuf('n');
+ addbuf('.');
+ AddCurrentEventName(& buffptr);
+ } else {
+ char *obufp = buffptr;
+
+ do {
+ addbuf(ch);
+ ch = getch();
+ } while(chtype[ch] & LETTER);
+ addbuf('\0');
+ if( !strncmp(obufp, synonyms[PCB_SYN],
+ strlen(synonyms[PCB_SYN]) )) {
+ buffptr = obufp;
+ addbuf('p');
+ } else if( !strncmp(obufp, synonyms[EVENT_SYN],
+ strlen(synonyms[EVENT_SYN]))) {
+ buffptr = obufp;
+ addbuf('e');
+ } else {
+ fprintf(stderr, "Unknown synonym %s\n", obufp);
+ Exit(-1);
+ }
+ if(ch == '.') {
+ addbuf('-'); addbuf('>');
+ } else {
+ /* needs to be checked for nesting */
+ goto check;
+ }
+ }
+ /* end of attribute handling */
+ goto skip;
+ } else if(ch == '\\') {
+ /* possible escape - this is kludgy beyond belief:
+ * \ is used to escape open and closing delimiters
+ * and '$'
+ * otherwise it's passed through to be compiled by C
+ */
+ ch = getch();
+ if( (ch != o ) && (ch != c) && (ch != '$') ) {
+ /* may need to handle case where \ is last char in file... */
+ /* don't treat is as escape; not open or close so
+ * don't have to worry about nesting either
+ */
+ addbuf('\\');
+ }
+ }
+ addbuf(ch);
+ skip:
+ ch = getch();
+ check:
+ if( ch == o ) nested += allow_nesting;
+ else if( ch == c ) nested--;
+ if ( (buffptr - buffer[currentbuf]) > LINELEN) {
+ fprintf(stderr,
+ "%s too long.\n", (o=='{')?"Action":"Predicate"); /*}*/
+ fprintf(stderr,
+ "buffptr, currentbuf 0x%x, 0x%x\n",buffptr,currentbuf );
+ Exit(-1);
+ }
+ IFDEBUG(S)
+ fprintf(stdout,"loop in getstr: ch 0x%x,%c o=%c,c=%c nested=%d\n",
+ ch,ch,o,c,nested);
+ ENDDEBUG
+ }
+ addbuf(ch);
+ addbuf('\0');
+
+ IFDEBUG(S)
+ fprintf(stdout,"exit getstr: got %s\n", buffer[currentbuf]);
+ fprintf(stdout,"exit getstr: buffptr 0x%x, currentbuf 0x%x\n",
+ buffptr, currentbuf);
+ ENDDEBUG
+}
+
+getch()
+{
+ char c;
+ extern FILE *infile;
+ extern int lineno;
+
+ c = fgetc(infile) ;
+ if (c == '\n') lineno++;
+ if ((int)c == EOF) c = (char)0;
+ if (feof(infile)) c = (char) 0;
+ IFDEBUG(e)
+ fprintf(stdout, "getch: 0x%x\n", c);
+ (void) fputc( c, stdout);
+ fflush(stdout);
+ ENDDEBUG
+
+ return c;
+}
+
+llscan(t)
+LLtoken *t;
+{
+ char c;
+
+ t->llstate = NORMAL;
+
+ ++currentbuf;
+ currentbuf&=1;
+again:
+ buffptr = &buffer[currentbuf][0];
+
+ skip();
+
+ switch(chtype[ch]) {
+
+ case EOFILE:
+ t->llterm = T_ENDMARKER;
+ break;
+
+ case UNUSED:
+ fprintf(stderr, "Illegal character in input - 0x%x ignored.", ch);
+ ch = getch();
+ goto again;
+
+ case OPCHAR:
+
+ switch(ch) {
+
+ case '/':
+ /* possible comment : elide ; kludge */
+ IFDEBUG(S)
+ fprintf(stdout, "Comment ch=%c\n", ch);
+ ENDDEBUG
+ c = getch();
+ if (c != '*') {
+ fprintf(stderr,"Syntax error : character(0x%x) ignored", ch);
+ ch = c;
+ goto again;
+ } else {
+ register int state = 2, whatchar=0;
+ static int dfa[3][3] = {
+ /* done seen-star middle */
+ /* star */ { 0, 1, 1 },
+ /* / */ { 0, 0, 2 },
+ /* other */ { 0, 2, 2 }
+ };
+
+ while( state ) {
+ if( (c = getch()) == (char)0)
+ break;
+ whatchar = (c=='*')?0:(c=='/'?1:2);
+ IFDEBUG(S)
+ fprintf(stdout,
+ "comment: whatchar = %d, c = 0x%x,%c, oldstate=%d",
+ whatchar, c,c, state);
+ ENDDEBUG
+ state = dfa[whatchar][state];
+ IFDEBUG(S)
+ fprintf(stdout, ", newstate=%d\n", state);
+ ENDDEBUG
+ }
+ if(state) {
+ fprintf(stderr,
+ "Syntax error: end of file inside a comment");
+ Exit(-1);
+ } else ch = getch();
+ }
+ IFDEBUG(S)
+ fprintf(stdout, "end of comment at 0x%x,%c\n",ch,ch);
+ ENDDEBUG
+ goto again;
+
+
+ case '*':
+ t->llterm = T_STAR;
+ break;
+
+ case ',':
+ t->llterm = T_COMMA;
+ break;
+
+ case ';':
+ t->llterm = T_SEMI;
+ break;
+
+ case '<':
+ t->llterm = T_LANGLE;
+ break;
+
+ case '=':
+ t->llterm = T_EQUAL;
+ break;
+
+ case '[':
+ t->llterm = T_LBRACK;
+ break;
+
+ case ']':
+ t->llterm = T_RBRACK;
+ break;
+
+#ifdef T_FSTRING
+ case '"':
+ t->llterm = T_FSTRING;
+ addbuf(ch);
+ ch = getch();
+ getstr('"', '"');
+ TVAL.FSTRING.address = stash(buffer[currentbuf]);
+ break;
+#endif T_FSTRING
+
+ case '(':
+ t->llterm = T_PREDICATE;
+ getstr(ch, ')' );
+ TVAL.PREDICATE.address = buffer[currentbuf];
+ break;
+
+ case '{':
+ t->llterm = T_ACTION;
+ getstr(ch, '}');
+ TVAL.ACTION.address = buffer[currentbuf];
+ break;
+
+ default:
+ fprintf(stderr,"Syntax error : character(0x%x) ignored", ch);
+ ch = getch();
+ goto again;
+
+ }
+ ch = getch();
+ break;
+
+ case LETTER:
+ do {
+ addbuf(ch);
+ ch = getch();
+ } while(chtype[ch] & (LETTER | DIGITS));
+
+ addbuf('\0');
+
+ t->llterm = iskey(buffer[currentbuf], &TVAL.ID.address);
+ if(!t->llterm) {
+ t->llterm = T_ID;
+ TVAL.ID.address = buffer[currentbuf];
+ }
+ IFDEBUG(S)
+ fprintf(stdout, "llscan: id or keyword 0x%x, %s\n",
+ TVAL.ID.address, TVAL.ID.address);
+ ENDDEBUG
+ break;
+
+ default:
+ fprintf(stderr, "Snark in llscan: chtype=0x%x, ch=0x%x\n",
+ chtype[ch], ch);
+ }
+}
diff --git a/sys/netiso/xebec/main.c b/sys/netiso/xebec/main.c
new file mode 100644
index 000000000000..a0b4842f30ca
--- /dev/null
+++ b/sys/netiso/xebec/main.c
@@ -0,0 +1,410 @@
+/* $Header: main.c,v 2.4 88/09/19 12:55:13 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/main.c,v $ */
+/*
+ * TODO:
+ * rewrite the command line stuff altogether - it's kludged beyond
+ * belief (as is the rest of the code...)
+ *
+ * DISCLAIMER DISCLAIMER DISCLAIMER
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include "malloc.h"
+#include "debug.h"
+#include "main.h"
+
+int debug[128];
+
+int lineno = 1;
+
+FILE *statefile, *actfile, *eventfile_h, *statevalfile;
+FILE *infile, *astringfile;
+char *Transfilename;
+char *astringfile_name = DEBUGFILE;
+char *actfile_name = ACTFILE;
+char *statefile_name = STATEFILE;
+char *statevalfile_name = STATEVALFILE;
+char *eventfile_h_name = EVENTFILE_H;
+int print_trans = 0;
+int print_protoerrs = 0;
+int pgoption = 0;
+char kerneldirname[50] = "\0";
+
+char protocol[50];
+
+char *synonyms[] = {
+ "EVENT",
+ "PCB",
+ 0
+};
+
+usage(a)
+char *a;
+{
+ fprintf(stderr,
+ "usage: %s <transition file> {-D<debug options>} <other options>\n",
+ a);
+ fprintf(stderr, "\t<other options> is any combination of:\n");
+ fprintf(stderr, "\t\t-A<action file name>\n");
+ fprintf(stderr, "\t\t-E<event file name>\n");
+ fprintf(stderr, "\t\t-S<state file name>\n");
+ fprintf(stderr, "\t\t-I<initial values file name>\n");
+ fprintf(stderr, "\t\t-X<debugging file name>\n");
+ fprintf(stderr, "\t\t-K<directory name>\n");
+ fprintf(stderr,
+ "\tThese names do NOT include the suffices (.c, .h)\n");
+ fprintf(stderr,
+ "\t\t-D<options> to turn on debug options for xebec itself\n");
+ fprintf(stderr, "\t-<nn> for levels of debugging output\n");
+ fprintf(stderr, "\t\t<nn> ranges from 1 to 3, 1 is default(everything)\n");
+ fprintf(stderr, "\t\t-T to print transitions\n");
+ fprintf(stderr, "\t\t-e to print list of combinations of\n");
+ fprintf(stderr, "\t\t\t [event,old_state] that produce protocol errors\n");
+ fprintf(stderr, "\t\t-g include profiling code in driver\n");
+ Exit(-1);
+}
+
+openfiles(proto)
+register char *proto;
+{
+ register char *junk;
+ register int lenp = strlen(proto);
+
+ IFDEBUG(b)
+ fprintf(OUT, "openfiles %s\n",proto);
+ ENDDEBUG
+
+#define HEADER Header
+#define SOURCE Source
+#define DOIT(X)\
+ /* GAG */\
+ junk = Malloc( 2 + lenp + strlen(X/**/_name) );\
+ (void) sprintf(junk, "%s_", proto);\
+ X/**/_name = strcat(junk, X/**/_name);\
+ X = fopen(X/**/_name, "w");\
+ if((X)==(FILE *)0)\
+ { fprintf(stderr,"Open failed: %s\n", "X"); Exit(-1); }\
+ fprintf(X, "/* %cHeader%c */\n",'$', '$' );\
+ fprintf(X, "/* %cSource%c */\n",'$', '$' );
+
+ DOIT(eventfile_h);
+
+ IFDEBUG(X)
+#ifdef DEBUG
+ DOIT(astringfile);
+#endif DEBUG
+ fprintf(astringfile,
+ "#ifndef _NFILE\n#include <stdio.h>\n#endif _NFILE\n" );
+ ENDDEBUG
+
+ DOIT(statevalfile);
+ DOIT(statefile);
+ DOIT(actfile);
+ fprintf(actfile,
+ "#ifndef lint\nstatic char *rcsid = \"$Header/**/$\";\n#endif lint\n");
+
+ if(pgoption)
+ putdriver(actfile, 15);
+ else
+ putdriver(actfile, 14);
+
+ FakeFilename(actfile, Transfilename, lineno);
+ putdriver(actfile, 1);
+ FakeFilename(actfile, Transfilename, lineno);
+ putdriver(actfile, 12);
+ fprintf(actfile, "#include \"%s%s\"\n", kerneldirname, statevalfile_name);
+ FakeFilename(actfile, Transfilename, lineno);
+ putdriver(actfile, 2);
+
+ initsets(eventfile_h, statefile);
+}
+
+includecode(file, f)
+FILE *file;
+register char *f;
+{
+ register int count=1;
+ static char o='{';
+ static char c='}';
+ register char *g;
+
+ IFDEBUG(a)
+ fprintf(stdout, "including: %s, f=0x%x", f,f);
+ ENDDEBUG
+ g = ++f;
+ while(count>0) {
+ if(*g == o) count++;
+ if(*g == c) count--;
+ g++;
+ }
+ *(--g) = '\0';
+ IFDEBUG(a)
+ fprintf(stdout, "derived: %s", f);
+ ENDDEBUG
+ fprintf(file, "%s", f);
+ FakeFilename(file, Transfilename, lineno);
+}
+
+putincludes()
+{
+ FakeFilename(actfile, Transfilename, lineno);
+ fprintf(actfile, "\n#include \"%s%s\"\n", kerneldirname, eventfile_h_name);
+ IFDEBUG(X)
+ if( !debug['K'] )
+ fprintf(actfile, "\n#include \"%s\"\n", astringfile_name);
+ /* not in kernel mode */
+ ENDDEBUG
+ FakeFilename(actfile, Transfilename, lineno);
+}
+
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ register int i = 2;
+ extern char *strcpy();
+ int start, finish;
+ extern int FirstEventAttribute;
+ extern int Nevents, Nstates;
+
+ start = time(0);
+ if(argc < 2) {
+ usage(argv[0]);
+ }
+ IFDEBUG(a)
+ fprintf(stdout, "infile = %s\n",argv[1]);
+ ENDDEBUG
+ Transfilename = argv[1];
+ infile = fopen(argv[1], "r");
+
+ if(argc > 2) while(i < argc) {
+ register int j=0;
+ char c;
+ char *name;
+
+ if(argv[i][j] == '-') j++;
+ switch(c = argv[i][j]) {
+
+ /* GROT */
+ case 'A':
+ name = &argv[i][++j];
+ actfile_name = Malloc( strlen(name)+4);
+ actfile_name = (char *)strcpy(actfile_name,name);
+#ifdef LINT
+ name =
+#endif LINT
+ strcat(actfile_name, ".c");
+ fprintf(stdout, "debugging file is %s\n",actfile_name);
+ break;
+ case 'K':
+ debug[c]=1;
+ fprintf(OUT, "option %c file %s\n",c, &argv[i][j+1]);
+ (void) strcpy(kerneldirname,&argv[i][++j]);
+ break;
+ case 'X':
+ debug[c]=1;
+ name = &argv[i][++j];
+ astringfile_name = Malloc( strlen(name)+4);
+ astringfile_name = (char *)strcpy(astringfile_name,name);
+#ifdef LINT
+ name =
+#endif LINT
+ strcat(astringfile_name, ".c");
+ fprintf(OUT, "option %c, astringfile name %s\n",c, name);
+ break;
+ case 'E':
+ name = &argv[i][++j];
+ eventfile_h_name = Malloc( strlen(name)+4);
+ eventfile_h_name = (char *)strcpy(eventfile_h_name,name);
+#ifdef LINT
+ name =
+#endif LINT
+ strcat(eventfile_h_name, ".h");
+ fprintf(stdout, "event files is %s\n",eventfile_h_name);
+ break;
+ case 'I':
+ name = &argv[i][++j];
+ statevalfile_name = Malloc( strlen(name)+4 );
+ statevalfile_name = (char *)strcpy(statevalfile_name,name);
+#ifdef LINT
+ name =
+#endif LINT
+ strcat(statevalfile_name, ".init");
+ fprintf(stdout, "state table initial values file is %s\n",statevalfile_name);
+ break;
+ case 'S':
+ name = &argv[i][++j];
+ statefile_name = Malloc( strlen(name)+4);
+ statefile_name = (char *)strcpy(statefile_name,name);
+#ifdef LINT
+ name =
+#endif LINT
+ strcat(statefile_name, ".h");
+ fprintf(stdout, "state file is %s\n",statefile_name);
+ break;
+ /* END GROT */
+ case '1':
+ case '2':
+ case '3':
+ debug['X']= (int)argv[i][j] - (int) '0';
+ fprintf(OUT, "value of debug['X'] is 0x%x,%d\n", debug['X'],
+ debug['X']);
+ break;
+ case 'D':
+ while( c = argv[i][++j] ) {
+ if(c == 'X') {
+ fprintf(OUT, "debugging on");
+ if(debug['X']) fprintf(OUT,
+ " - overrides any -%d flags used\n", debug['X']);
+ }
+ debug[c]=1;
+ fprintf(OUT, "debug %c\n",c);
+ }
+ break;
+ case 'g':
+ pgoption = 1;
+ fprintf(stdout, "Profiling\n");
+ break;
+ case 'e':
+ print_protoerrs = 1;
+ fprintf(stdout, "Protocol error table:\n");
+ break;
+
+ case 'T':
+ print_trans = 1;
+ fprintf(stdout, "Transitions:\n");
+ break;
+ default:
+ usage(argv[0]);
+ break;
+ }
+ i++;
+ }
+ if(kerneldirname[0]) {
+ char *c;
+#ifdef notdef
+ if(debug['X']) {
+ fprintf(OUT, "Option K overrides option X\n");
+ debug['X'] = 0;
+ }
+#endif notdef
+ if(strlen(kerneldirname)<1) {
+ fprintf(OUT, "K option: dir name too short!\n");
+ exit(-1);
+ }
+ /* add ../name/ */
+ c = (char *) Malloc(strlen(kerneldirname)+6) ;
+ if(c <= (char *)0) {
+ fprintf(OUT, "Cannot allocate %d bytes for kerneldirname\n",
+ strlen(kerneldirname + 6) );
+ fprintf(OUT, "kerneldirname is %s\n", kerneldirname );
+ exit(-1);
+ }
+ *c = '.';
+ *(c+1) = '.';
+ *(c+2) = '/';
+ (void) strcat(c, kerneldirname);
+ (void) strcat(c, "/\0");
+ strcpy(kerneldirname, c);
+ }
+
+ init_alloc();
+
+ (void) llparse();
+
+ /* {{ */
+ if( !FirstEventAttribute )
+ fprintf(eventfile_h, "\t}ev_union;\n");
+ fprintf(eventfile_h, "};/* end struct event */\n");
+ fprintf(eventfile_h, "\n#define %s_NEVENTS 0x%x\n", protocol, Nevents);
+ fprintf(eventfile_h,
+ "\n#define ATTR(X)ev_union.%s/**/X/**/\n",EV_PREFIX);
+ (void) fclose(eventfile_h);
+
+ /* {{ */ fprintf(actfile, "\t}\nreturn 0;\n}\n"); /* end switch; end action() */
+ dump_predtable(actfile);
+
+ putdriver(actfile, 3);
+ IFDEBUG(X)
+ if(!debug['K'])
+ putdriver(actfile, 4);
+ ENDDEBUG
+ putdriver(actfile, 6);
+ IFDEBUG(X)
+ /*
+ putdriver(actfile, 10);
+ */
+ if(debug['K']) {
+ putdriver(actfile, 11);
+ } else {
+ switch(debug['X']) {
+ case 1:
+ default:
+ putdriver(actfile, 7);
+ break;
+ case 2:
+ putdriver(actfile, 13);
+ break;
+ case 3:
+ break;
+ }
+ }
+ ENDDEBUG
+ putdriver(actfile, 8);
+ (void) fclose(actfile);
+ IFDEBUG(X)
+ /* { */
+ fprintf(astringfile, "};\n");
+ (void) fclose(astringfile);
+ ENDDEBUG
+
+ (void) fclose(statevalfile);
+
+ fprintf(statefile, "\n#define %s_NSTATES 0x%x\n", protocol, Nstates);
+ (void) fclose(statefile);
+
+ finish = time(0);
+ fprintf(stdout, "%d seconds\n", finish - start);
+ if( print_protoerrs )
+ printprotoerrs();
+}
+
+int transno = 0;
+
+Exit(n)
+{
+ fprintf(stderr, "Error at line %d\n",lineno);
+ if(transno) fprintf(stderr, "Transition number %d\n",transno);
+ (void) fflush(stdout);
+ (void) fflush(statefile);
+ (void) fflush(eventfile_h);
+ (void) fflush(actfile);
+ exit(n);
+}
+
+syntax()
+{
+ static char *synt[] = {
+ "*PROTOCOL <string>\n",
+ "*PCB <string> <optional: SYNONYM synonymstring>\n",
+ "<optional: *INCLUDE {\n<C source>\n} >\n",
+ "*STATES <string>\n",
+ "*EVENTS <string>\n",
+ "*TRANSITIONS <string>\n",
+ };
+}
+
+FakeFilename(outfile, name, l)
+FILE *outfile;
+char *name;
+int l;
+{
+ /*
+ doesn't work
+ fprintf(outfile, "\n\n\n\n# line %d \"%s\"\n", l, name);
+ */
+}
diff --git a/sys/netiso/xebec/main.h b/sys/netiso/xebec/main.h
new file mode 100644
index 000000000000..cb5bd74f31e4
--- /dev/null
+++ b/sys/netiso/xebec/main.h
@@ -0,0 +1,32 @@
+/* $Header: main.h,v 2.1 88/09/19 12:56:24 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/main.h,v $ */
+
+#define TRUE 1
+#define FALSE 0
+#define LINELEN 2350
+ /* approx limit on token size for C compiler
+ * which matters for the purpose of debugging (astring.c...)
+ */
+
+#define MSIZE 4000
+#define DEBUGFILE "astring.c"
+#define ACTFILE "driver.c"
+#define EVENTFILE_H "events.h"
+#define STATEFILE "states.h"
+#define STATEVALFILE "states.init"
+
+#define EV_PREFIX "EV_"
+#define ST_PREFIX "ST_"
+
+#define PCBNAME "_PCB_"
+
+extern char kerneldirname[];
+extern char protocol[];
+extern char *synonyms[];
+#define EVENT_SYN 0
+#define PCB_SYN 1
+
+extern int transno;
+extern int print_trans;
+extern char *stash();
+
diff --git a/sys/netiso/xebec/malloc.c b/sys/netiso/xebec/malloc.c
new file mode 100644
index 000000000000..5cdfc147a703
--- /dev/null
+++ b/sys/netiso/xebec/malloc.c
@@ -0,0 +1,136 @@
+/* $Header: malloc.c,v 2.2 88/09/19 12:55:18 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/malloc.c,v $ */
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+/*
+ * a simple malloc
+ * it might be brain-damaged but for the purposes of xebec
+ * it's a whole lot faster than the c library malloc
+ */
+
+#include <stdio.h>
+#include "malloc.h"
+#include "debug.h"
+#define CHUNKSIZE 4096*2
+
+static char *hiwat, *highend;
+int bytesmalloced=0;
+int byteswasted = 0;
+
+
+init_alloc()
+{
+#ifdef LINT
+ hiwat = 0;
+ highend = 0;
+#else LINT
+ extern char *sbrk();
+
+ hiwat = (char *) sbrk(0);
+ hiwat = (char *)((unsigned)(hiwat + 3) & ~0x3);
+ highend = hiwat;
+#endif LINT
+}
+
+HIWAT(s)
+char *s;
+{
+ IFDEBUG(M)
+ fprintf(stdout, "HIWAT 0x%x %s\n", hiwat,s);
+ fflush(stdout);
+ ENDDEBUG
+}
+
+#define MIN(x,y) ((x<y)?x:y)
+
+char *Malloc(x)
+int x;
+{
+ char *c;
+ extern char *sbrk();
+ static int firsttime=1;
+ int total = x;
+ int first_iter = 1;
+ char *returnvalue;
+
+ IFDEBUG(N)
+ fprintf(stdout, "Malloc 0x%x, %d, bytesmalloced %d\n",
+ total,total, bytesmalloced);
+ fflush(stdout);
+ ENDDEBUG
+ IFDEBUG(M)
+ fprintf(stdout, "Malloc 0x%x, %d, hiwat 0x%x\n",
+ total,total, hiwat);
+ fflush(stdout);
+ ENDDEBUG
+ if(firsttime) {
+ hiwat = sbrk(0);
+ if(((unsigned)(hiwat) & 0x3)) {
+ bytesmalloced = 4 - (int) ((unsigned)(hiwat) & 0x3);
+ hiwat = sbrk( bytesmalloced );
+ } else
+ bytesmalloced = 0;
+ firsttime = 0;
+ highend = hiwat;
+ }
+ while( total ) {
+ x = MIN(CHUNKSIZE, total);
+ if(total != x) {
+ IFDEBUG(N)
+ fprintf(stdout, "BIG Malloc tot %d, x %d, left %d net %d\n",
+ total,x, total-x, bytesmalloced);
+ fflush(stdout);
+ ENDDEBUG
+ }
+ if ( (hiwat + x) > highend) {
+ c = sbrk(CHUNKSIZE);
+ IFDEBUG(M)
+ fprintf(stdout, "hiwat 0x%x, x 0x%x, highend 0x%x, c 0x%x\n",
+ hiwat, x, highend, c);
+ fflush(stdout);
+ ENDDEBUG
+ if( c == (char *) -1 ) {
+ fprintf(stderr, "Ran out of memory!\n");
+ Exit(-1);
+ }
+ if(first_iter) {
+ returnvalue = c;
+ first_iter = 0;
+ }
+ bytesmalloced += CHUNKSIZE;
+ IFDEBUG(m)
+ if (highend != c) {
+ fprintf(OUT, "warning: %d wasted bytes!\n", highend - hiwat);
+ fprintf(OUT, " chunksize 0x%x, x 0x%x \n", CHUNKSIZE, x);
+ }
+ ENDDEBUG
+ highend = c + CHUNKSIZE;
+ hiwat = c;
+ }
+ c = hiwat;
+ if(first_iter) {
+ returnvalue = c;
+ first_iter = 0;
+ }
+ hiwat += x;
+ total -= x;
+ }
+ if((unsigned)hiwat & 0x3) {
+ byteswasted += (int)((unsigned)(hiwat) & 0x3);
+ hiwat = (char *)((unsigned)(hiwat + 3) & ~0x3);
+ }
+ IFDEBUG(M)
+ fprintf(stdout, "Malloc = 0x%x, bytesm 0x%x, wasted 0x%x, hiwat 0x%x\n",
+ returnvalue, bytesmalloced, byteswasted, hiwat);
+ ENDDEBUG
+ IFDEBUG(N)
+ fprintf(stdout, "Malloc returns 0x%x, sbrk(0) 0x%x\n", returnvalue, sbrk(0));
+ fflush(stdout);
+ ENDDEBUG
+ return(returnvalue);
+}
+
diff --git a/sys/netiso/xebec/malloc.h b/sys/netiso/xebec/malloc.h
new file mode 100644
index 000000000000..53d865bf47b5
--- /dev/null
+++ b/sys/netiso/xebec/malloc.h
@@ -0,0 +1,4 @@
+/* $Header: malloc.h,v 2.1 88/09/19 12:56:27 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/malloc.h,v $ */
+
+char *Malloc();
diff --git a/sys/netiso/xebec/procs.c b/sys/netiso/xebec/procs.c
new file mode 100644
index 000000000000..49d862ac5b9e
--- /dev/null
+++ b/sys/netiso/xebec/procs.c
@@ -0,0 +1,437 @@
+/* $Header: procs.c,v 2.3 88/09/19 12:55:22 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/procs.c,v $ */
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include "malloc.h"
+#include "main.h"
+#include "debug.h"
+#include "sets.h"
+#include "procs.h"
+
+struct Predicate {
+ int p_index;
+ int p_transno;
+ char *p_str;
+ struct Predicate *p_next;
+};
+
+struct Stateent {
+ int s_index;
+ int s_newstate;
+ int s_action;
+ struct Stateent *s_next;
+};
+
+struct Object *SameState = (struct Object *)-1;
+int Index = 0;
+int Nstates = 0;
+int Nevents = 0;
+struct Predicate **Predlist;
+struct Stateent **Statelist;
+extern FILE *astringfile;
+
+end_events() {
+ int size, part;
+ char *addr;
+
+ IFDEBUG(X)
+ /* finish estring[], start astring[] */
+ if(debug['X'] < 2 )
+ fprintf(astringfile, "};\n\nchar *%s_astring[] = {\n\"NULLACTION\",\n",
+ protocol);
+ ENDDEBUG
+ /* NOSTRICT */
+ Statelist =
+ (struct Stateent **) Malloc((Nstates+1) * sizeof(struct Statent *));
+ /* NOSTRICT */
+ Predlist =
+ (struct Predicate **)
+ Malloc ( (((Nevents)<<Eventshift)+Nstates)*sizeof(struct Predicate *) );
+
+ size = (((Nevents)<<Eventshift)+Nstates)*sizeof(struct Predicate *) ;
+ addr = (char *)Predlist;
+ IFDEBUG(N)
+ fprintf(OUT, "Predlist at 0x%x, sbrk 0x%x bzero size %d at addr 0x%x\n",
+ Predlist, sbrk(0), size, addr);
+ ENDDEBUG
+#define BZSIZE 8192
+ while(size) {
+ part = size>BZSIZE?BZSIZE:size;
+ IFDEBUG(N)
+ fprintf(OUT, "bzero addr 0x%x part %d size %d\n",addr, part, size);
+ ENDDEBUG
+ bzero(addr, part);
+ IFDEBUG(N)
+ fprintf(OUT, "after bzero addr 0x%x part %d size %d\n",addr, part, size);
+ ENDDEBUG
+ addr += part;
+ size -= part;
+
+ }
+ IFDEBUG(N)
+ fprintf(OUT, "endevents..done \n");
+ ENDDEBUG
+}
+
+int acttable(f,actstring)
+char *actstring;
+FILE *f;
+{
+ static Actindex = 0;
+ extern FILE *astringfile;
+ extern int pgoption;
+
+ IFDEBUG(a)
+ fprintf(OUT,"acttable()\n");
+ ENDDEBUG
+ fprintf(f, "case 0x%x: \n", ++Actindex);
+
+ if(pgoption) {
+ fprintf(f, "asm(\" # dummy statement\");\n");
+ fprintf(f, "asm(\"_Xebec_action_%x: \");\n", Actindex );
+ fprintf(f, "asm(\".data\");\n");
+ fprintf(f, "asm(\".globl _Xebec_action_%x# X profiling\");\n",
+ Actindex );
+ fprintf(f, "asm(\".long 0 # X profiling\");\n");
+ fprintf(f, "asm(\".text # X profiling\");\n");
+ fprintf(f, "asm(\"cas r0,r15,r0 # X profiling\");\n");
+ fprintf(f, "asm(\"bali r15,mcount # X profiling\");\n");
+ }
+
+ fprintf(f, "\t\t%s\n\t\t break;\n", actstring);
+ IFDEBUG(X)
+ if(debug['X']<2) {
+ register int len = 0;
+ fputc('"',astringfile);
+ while(*actstring) {
+ if( *actstring == '\n' ) {
+ fputc('\\', astringfile);
+ len++;
+ fputc('n', astringfile);
+ } else if (*actstring == '\\') {
+ fputc('\\', astringfile);
+ len ++;
+ fputc('\\', astringfile);
+ } else if (*actstring == '\"') {
+ fputc('\\', astringfile);
+ len ++;
+ fputc('\"', astringfile);
+ } else fputc(*actstring, astringfile);
+ actstring++;
+ len++;
+ }
+ fprintf(astringfile,"\",\n");
+ if (len > LINELEN) {
+ fprintf(stderr, "Action too long: %d\n",len); Exit(-1);
+ }
+ }
+ ENDDEBUG
+
+ return(Actindex);
+}
+
+static int Npred=0, Ndefpred=0, Ntrans=0, Ndefevent=0, Nnulla=0;
+
+statetable(string, oldstate, newstate, action, event)
+char *string;
+int action;
+struct Object *oldstate, *newstate, *event;
+{
+ register int different;
+
+ IFDEBUG(a)
+ fprintf(OUT,"statetable(0x%x, 0x%x,0x%x, 0x%x)\n",
+ string, oldstate, newstate, action);
+ fprintf(OUT,"statetable(%s, %s,%s, 0x%x)\n",
+ string, oldstate->obj_name, newstate->obj_name, action);
+ ENDDEBUG
+
+ if( !action) Nnulla++;
+ if( newstate->obj_kind == OBJ_SET) {
+ fprintf(stderr, "Newstate cannot be a set\n");
+ Exit(-1);
+ }
+ different = (newstate != SameState);
+
+ (void) predtable( oldstate, event, string,
+ action, (newstate->obj_number) * different );
+ IFDEBUG(a)
+ fprintf(OUT,"EXIT statetable\n");
+ ENDDEBUG
+}
+
+stateentry(index, oldstate, newstate, action)
+int index, action;
+int oldstate, newstate;
+{
+ extern FILE *statevalfile;
+
+ IFDEBUG(a)
+ fprintf(OUT,"stateentry(0x%x,0x%x,0x%x,0x%x) Statelist@0x%x, val 0x%x\n",
+ index, oldstate, newstate,action, &Statelist, Statelist);
+ ENDDEBUG
+
+
+ fprintf(statevalfile, "{0x%x,0x%x},\n", newstate, action);
+}
+
+int predtable(os, oe, str, action, newstate)
+struct Object *os, *oe;
+char *str;
+int action, newstate;
+{
+ register struct Predicate *p, **q;
+ register int event, state;
+ register struct Object *e, *s;
+ struct Object *firste;
+
+ if (oe == (struct Object *)0 ) {
+ Ndefevent ++;
+ fprintf(stderr, "DEFAULT EVENTS aren't implemented; trans ignored\n");
+ return;
+ }
+ Ntrans++;
+ IFDEBUG(g)
+ fprintf(stdout,
+ "PREDTAB: s %5s; e %5s\n", os->obj_kind==OBJ_SET?"SET":"item",
+ oe->obj_kind==OBJ_SET?"SET":"item");
+ ENDDEBUG
+ if (os->obj_kind == OBJ_SET) s = os->obj_members;
+ else s = os;
+ if (oe->obj_kind == OBJ_SET) firste = oe->obj_members;
+ else firste = oe;
+ if(newstate) {
+ fprintf(statevalfile, "{0x%x,0x%x},\n",newstate, action);
+ Index++;
+ }
+ while (s) {
+ if( !newstate ) { /* !newstate --> SAME */
+ /* i.e., use old obj_number */
+ fprintf(statevalfile, "{0x%x,0x%x},\n",s->obj_number, action);
+ Index++;
+ }
+ e = firste;
+ while (e) {
+ event = e->obj_number; state = s->obj_number;
+ IFDEBUG(g)
+ fprintf(stdout,"pred table event=0x%x, state 0x%x\n",
+ event, state);
+ fflush(stdout);
+ ENDDEBUG
+ if( !str /* DEFAULT PREDICATE */) {
+ Ndefpred++;
+ IFDEBUG(g)
+ fprintf(stdout,
+ "DEFAULT pred state 0x%x, event 0x%x, Index 0x%x\n",
+ state, event, Index);
+ fflush(stdout);
+ ENDDEBUG
+ } else
+ Npred++;
+ /* put at END of list */
+#ifndef LINT
+ IFDEBUG(g)
+ fprintf(stdout,
+ "predicate for event 0x%x, state 0x%x is 0x%x, %s\n",
+ event, state, Index, str);
+ fflush(stdout);
+ ENDDEBUG
+#endif LINT
+ for( ((q = &Predlist[(event<<Eventshift)+state]),
+ (p = Predlist[(event<<Eventshift)+state]));
+ p ; p = p->p_next ) {
+ q = &p->p_next;
+ }
+
+ p = (struct Predicate *)Malloc(sizeof(struct Predicate));
+ p->p_next = (struct Predicate *)0;
+ p->p_str = str;
+ p->p_index = Index;
+ p->p_transno = transno;
+ *q = p;
+
+ IFDEBUG(g)
+ fprintf(stdout,
+ "predtable index 0x%x, transno %d, E 0x%x, S 0x%x\n",
+ Index, transno, e, s);
+ ENDDEBUG
+
+ e = e->obj_members;
+ }
+ s = s->obj_members;
+ }
+ return Index ;
+}
+
+printprotoerrs()
+{
+ register int e,s;
+
+ fprintf(stderr, "[ Event, State ] without any transitions :\n");
+ for(e = 0; e < Nevents; e++) {
+ fprintf(stderr, "Event 0x%x: states ", e);
+ for(s = 0; s < Nstates; s++) {
+ if( Predlist[(e<<Eventshift)+s] == 0 )
+ fprintf(stderr, "0x%x ", s);
+ }
+ fprintf(stderr, "\n");
+ }
+}
+
+#ifndef LINT
+dump_predtable(f)
+FILE *f;
+{
+ struct Predicate *p;
+ register int e,s, hadapred;
+ int defaultindex;
+ int defaultItrans;
+ extern int bytesmalloced;
+ extern int byteswasted;
+
+#ifdef notdef
+ fprintf(stdout,
+ " Xebec used %8d bytes of storage, wasted %8d bytes\n",
+ bytesmalloced, byteswasted);
+#endif notdef
+ fprintf(stdout,
+ " %8d states\n %8d events\n %8d transitions\n",
+ Nstates, Nevents, Ntrans);
+ fprintf(stdout,
+ " %8d predicates\n %8d default predicates used\n",
+ Npred, Ndefpred);
+ fprintf(stdout,
+ " %8d null actions\n",
+ Nnulla);
+
+ putdriver(f, 5);
+ for(e = 0; e < Nevents; e++) { for(s = 0; s < Nstates; s++) {
+ p = Predlist[(e<<Eventshift)+s];
+ hadapred=0;
+ defaultindex=0;
+ defaultItrans=0;
+ if(p) {
+ IFDEBUG(d)
+ fflush(f);
+ ENDDEBUG
+ while(p) {
+ if(p->p_str) {
+ if(!hadapred)
+ fprintf(f, "case 0x%x:\n\t", (e<<Eventshift) + s);
+ hadapred = 1;
+ fprintf(f, "if %s return 0x%x;\n\t else ",
+ p->p_str, p->p_index);
+ } else {
+ if(defaultindex) {
+ fprintf(stderr,
+"\nConflict between transitions %d and %d: duplicate default \n",
+ p->p_transno, defaultItrans);
+ Exit(-1);
+ }
+ defaultindex = p->p_index;
+ defaultItrans = p->p_transno;
+ }
+ p = p->p_next;
+ }
+ if( hadapred) {
+ fprintf(f, "return 0x%x;\n", defaultindex);
+ }
+ IFDEBUG(d)
+ fflush(f);
+ ENDDEBUG
+ }
+ IFDEBUG(g)
+ fprintf(stdout,
+ "loop: e 0x%x s 0x%x hadapred 0x%x dindex 0x%x for trans 0x%x\n",
+ e, s, hadapred, defaultindex, defaultItrans);
+ ENDDEBUG
+ if ( hadapred ) {
+ /* put a -1 in the array - Predlist is temporary storage */
+ Predlist[(e<<Eventshift)+s] = (struct Predicate *)(-1);
+ } else {
+ /* put defaultindex in the array */
+ /* if defaultindex is zero, then the driver will
+ * cause an erroraction (same as if no default
+ * were given and none of the predicates were true;
+ * also same as if no preds or defaults were given
+ * for this combo)
+ */
+ Predlist[(e<<Eventshift)+s] = (struct Predicate *)(defaultindex);
+ }
+ } }
+ fprintf(f, "default: return 0;\n} /* end switch */\n");
+#ifdef notdef
+ fprintf(f, "/*NOTREACHED*/return 0;\n} /* _Xebec_index() */\n");
+#else notdef
+ fprintf(f, "} /* _Xebec_index() */\n");
+#endif notdef
+ fprintf(f, "static int inx[%d][%d] = { {", Nevents+1,Nstates);
+ for(s = 0; s< Nstates; s++) fprintf(f, "0,"); /* event 0 */
+ fprintf(f, "},\n");
+
+ for(e = 0; e < Nevents; e++) {
+ fprintf(f, " {");
+ for(s = 0; s < Nstates; s++) {
+ register struct Predicate *xyz = Predlist[(e<<Eventshift)+s];
+ /* this kludge is to avoid a lint msg. concerning
+ * loss of bits
+ */
+ if (xyz == (struct Predicate *)(-1))
+ fprintf(f, "-1,");
+ else
+ fprintf(f, "0x%x,", Predlist[(e<<Eventshift)+s]);
+ }
+ fprintf(f, " },\n");
+ }
+ fprintf(f, "};");
+}
+#endif LINT
+
+char *
+stash(buf)
+char *buf;
+{
+ register int len;
+ register char *c;
+
+ /* grot */
+ len = strlen(buf);
+ c = Malloc(len+1);
+#ifdef LINT
+ c =
+#endif LINT
+ strcpy(c, buf);
+
+ IFDEBUG(z)
+ fprintf(stdout,"stash %s at 0x%x\n", c,c);
+ ENDDEBUG
+ return(c);
+}
+
+#ifdef notdef
+dump_pentry(event,state)
+int event,state;
+{
+ register struct Predicate *p, **q;
+
+ for(
+ ((q = &Predlist[(event<<Eventshift) +state]),
+ (p = Predlist[(event<<Eventshift) + state]));
+ p!= (struct Predicate *)0 ; p = p->p_next ) {
+#ifndef LINT
+ IFDEBUG(a)
+ fprintf(OUT,
+ "dump_pentry for event 0x%x, state 0x%x is 0x%x\n",
+ event, state, p);
+ ENDDEBUG
+#endif LINT
+ q = &p->p_next;
+ }
+}
+#endif notdef
diff --git a/sys/netiso/xebec/procs.h b/sys/netiso/xebec/procs.h
new file mode 100644
index 000000000000..e41ae75995ce
--- /dev/null
+++ b/sys/netiso/xebec/procs.h
@@ -0,0 +1,5 @@
+/* $Header: procs.h,v 2.1 88/09/19 12:56:30 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/procs.h,v $ */
+
+extern char *stash();
+extern struct Object *SameState;
diff --git a/sys/netiso/xebec/putdriver.c b/sys/netiso/xebec/putdriver.c
new file mode 100644
index 000000000000..996ac643d10c
--- /dev/null
+++ b/sys/netiso/xebec/putdriver.c
@@ -0,0 +1,244 @@
+/* $Header: putdriver.c,v 2.2 88/09/19 12:55:27 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/putdriver.c,v $ */
+
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+/* The original idea was to put all the driver code
+ * in one place so it would be easy to modify
+ * but as hacks got thrown in it got worse and worse...
+ * It's to the point where a user would be better off
+ * writing his own driver and xebec should JUST produce
+ * the tables.
+ */
+
+#include <stdio.h>
+#include "main.h"
+#include "debug.h"
+
+extern char protocol[];
+char Eventshiftstring[10];
+static char statename[] = {'_', 's', 't', 'a', 't', 'e', 0 };
+
+static char *strings[] = {
+
+#define PART1 { 0,3 }
+
+ "\n#include \"",
+ kerneldirname,
+ protocol,
+ "_states.h\"",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART12 { 10,12 }
+ "\n\nstatic struct act_ent {\n",
+ "\tint a_newstate;\n\tint a_action;\n",
+ "} statetable[] = { {0,0},\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART2 { 20,20 }
+ "};\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART3 { 30,41 }
+ "\n",
+ protocol,
+ "_driver(p, e)\nregister ",
+ protocol,
+ PCBNAME,
+ " *p;\nregister struct ",
+ protocol,
+ "_event *e;\n",
+ "{\n",
+ "\tregister int index, error=0;\n",
+ "\tstruct act_ent *a;\n",
+ "\tstatic struct act_ent erroraction = {0,-1};\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART4 { 50,54 }
+
+ "\textern int ",
+ protocol,
+ "_debug;\n\textern FILE *",
+ protocol,
+ "_astringfile;\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART6 { 60, 65 }
+ "\n\tindex = inx[1 + e->ev_number][p->",
+ protocol,
+ statename,
+ "];\n\tif(index<0) index=_Xebec_index(e, p);\n",
+ "\tif (index==0) {\n\t\ta = &erroraction;\n",
+ "\t} else\n\t\ta = &statetable[index];\n\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART7 {70, 77 }
+ "\tif(",
+ protocol,
+ "_debug) fprintf(",
+ protocol,
+ "_astringfile, \"%15s <-- %15s [%15s] \\n\\t%s\\n\",\n",
+ "\t\tsstring[a->a_newstate], sstring[p->",
+ protocol,
+ "_state], estring[e->ev_number], astring[a->a_action]);\n\n",
+ (char *)0,
+ (char *)0,
+
+#define PART8 { 80, 84 }
+ "\tif(a->a_action)\n",
+ "\t\terror = _Xebec_action( a->a_action, e, p );\n",
+ "\tif(error==0)\n\tp->",
+ protocol,
+ "_state = a->a_newstate;\n\treturn error;\n}\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART9 { 90, 99 }
+ "\n_XEBEC_PG int _Xebec_action(a,e,p)\nint a;\nstruct ",
+ protocol,
+ "_event *e;\n",
+ protocol,
+ PCBNAME,
+ " *p;\n{\n",
+ "switch(a) {\n",
+ "case -1: return ",
+ protocol,
+ "_protocol_error(e,p);\n",
+ (char *)0,
+
+#define PART10 { 101, 105 }
+ "\tif(",
+ protocol,
+ "_debug) fprintf(",
+ protocol,
+ "_astringfile, \"index 0x%5x\\n\", index);\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART5 { 110, 121 }
+ "\n_XEBEC_PG int\n_Xebec_index( e,p )\n",
+ "\tstruct ",
+ protocol,
+ "_event *e;\n\t",
+ protocol,
+ PCBNAME,
+ " *p;\n{\nswitch( (e->ev_number<<",
+ Eventshiftstring,
+ ")+(p->",
+ protocol,
+ statename,
+ ") ) {\n",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+
+#define PART11 {130, 137 }
+ "\tIFTRACE(D_DRIVER)\n",
+ "\t",
+ protocol,
+ "trace(DRIVERTRACE,",
+ "\t\ta->a_newstate, p->",
+ protocol,
+ "_state, e->ev_number, a->a_action, 0);\n\n",
+ "\tENDTRACE\n",
+ (char *)0,
+ (char *)0,
+
+#define PART13 {140, 147 }
+ "\tif(",
+ protocol,
+ "_debug) fprintf(",
+ protocol,
+ "_astringfile, \"%15s <-- %15s [%15s] \\n\",\n",
+ "\t\tsstring[a->a_newstate], sstring[p->",
+ protocol,
+ "_state], estring[e->ev_number]);\n\n",
+ (char *)0,
+ (char *)0,
+
+#define PART14 { 150,150 }
+ "#define _XEBEC_PG static\n",
+
+#define PART15 { 151,151 }
+ "#define _XEBEC_PG \n",
+
+};
+
+static struct { int start; int finish; } parts[] = {
+ { 0,0 },
+ PART1,
+ PART2,
+ PART3,
+ PART4,
+ PART5,
+ PART6,
+ PART7,
+ PART8,
+ PART9,
+ PART10,
+ PART11,
+ PART12,
+ PART13,
+ PART14,
+ PART15,
+};
+
+putdriver(f, x)
+FILE *f;
+int x;
+{
+ register int i;
+
+ for( i = parts[x].start; i<= parts[x].finish; i++)
+ fprintf(f, "%s", strings[i]);
+ IFDEBUG(d)
+ fflush(f);
+ ENDDEBUG
+}
diff --git a/sys/netiso/xebec/sets.c b/sys/netiso/xebec/sets.c
new file mode 100644
index 000000000000..3bb74ed8d291
--- /dev/null
+++ b/sys/netiso/xebec/sets.c
@@ -0,0 +1,472 @@
+/* $Header: sets.c,v 2.3 88/09/19 12:55:30 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/sets.c,v $ */
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+#include "main.h"
+#include "malloc.h"
+#include "sets.h"
+#include "debug.h"
+#include <stdio.h>
+
+struct Object *CurrentEvent = (struct Object *)0;
+struct Object *Objtree;
+struct Object dummy;
+/*
+ * define a set w/ type and name
+ * return a set number
+ */
+#undef NULL
+#define NULL (struct Object *)0
+
+static FILE *Sfile, *Efile;
+extern FILE *astringfile;
+char *Noname = "Unnamed set\0";
+
+initsets(f,s)
+FILE *f, *s;
+{
+ static char errorstring[20];
+ extern struct Object *SameState;
+ Efile = f;
+ Sfile = s;
+
+ IFDEBUG(X)
+ fprintf(astringfile, "char *%s_sstring[] = {\n", protocol);
+ ENDDEBUG
+ sprintf(errorstring, "%sERROR\0", ST_PREFIX);
+ defineitem(STATESET, errorstring, (char *)0); /* state 0 */
+ SameState = (struct Object *) Malloc( sizeof (struct Object) );
+ SameState->obj_kind = OBJ_ITEM;
+ SameState->obj_type = STATESET;
+ SameState->obj_name = "SAME";
+ SameState->obj_struc = (char *)0;
+ SameState->obj_number = 0;
+ SameState->obj_members = (struct Object *)0;
+ SameState->obj_left = (struct Object *)0;
+ SameState->obj_right = (struct Object *)0;
+ SameState->obj_parent = (struct Object *)0;
+}
+
+/*
+ * get a set based on its type and name
+ * returns address of an Object, may be set or item
+ */
+
+struct Object *lookup(type, name)
+unsigned char type;
+char *name;
+{
+ register struct Object *p = Objtree;
+ int val = 1 ;
+
+ IFDEBUG(o)
+ fprintf(stdout,"lookup 0x%x,%s \n",
+ type, name);
+ ENDDEBUG
+
+ while( p && val ) {
+ IFDEBUG(o)
+ fprintf(OUT, "lookup strcmp 0x%x,%s, 0x%x,%s\n",
+ name, name, OBJ_NAME(p), OBJ_NAME(p));
+ ENDDEBUG
+ if( p->obj_name == (char *)0 ) {
+ fprintf(stderr, "Unnamed set in table!\n");
+ Exit(-1);
+ }
+ val = (int) strcmp(name, OBJ_NAME(p));
+ if(val < 0) {
+ /* left */
+ p = p->obj_left;
+ } else if (val > 0) {
+ /* right */
+ p = p->obj_right;
+ }
+ }
+ if( p && ( p->obj_type != type)) {
+ fprintf(stdout, "lookup(0x%x,%s) found wrong obj type 0x%x\n",
+ type,name, p->obj_type);
+ p = NULL;
+ }
+ IFDEBUG(o)
+ fprintf(stdout,"lookup 0x%x,%s returning 0x%x\n",type, name, p);
+ ENDDEBUG
+ return(p);
+}
+
+static int states_done = 0;
+
+end_states(f)
+FILE *f;
+{
+ register unsigned n = Nstates;
+ register int i;
+ extern char Eventshiftstring[];
+
+ states_done = 1;
+
+ for( i = 0; ;i++) {
+ if( (n >>= 1) <= 0 ) break;
+ }
+ Eventshift = i+1;
+ IFDEBUG(d)
+ fprintf(OUT, "Eventshift=%d\n", Eventshift);
+ ENDDEBUG
+ sprintf(Eventshiftstring, "%d\0",Eventshift);
+ fprintf(f, "struct %s_event {\n\tint ev_number;\n", &protocol[0]);
+ IFDEBUG(X)
+ /* finish sstring[] & start estring[] */
+ fprintf(astringfile,
+ "};\n\nchar *%s_estring[] = {\n", protocol);
+ ENDDEBUG
+}
+
+int FirstEventAttribute = 1;
+
+static
+insert(o)
+struct Object *o;
+{
+ struct Object *p = Objtree;
+ struct Object **q = &Objtree;
+ int val=1;
+
+
+ if (o->obj_name == (char *)0) {
+ fprintf(stderr, "Internal Error: inserting unnamed object\n");
+ Exit(-1);
+ }
+ if( o->obj_type == STATESET) {
+ if( states_done ) {
+ fprintf(stderr, "No states may be defined after *TRANSITIONS\n");
+ Exit(-1);
+ }
+ o->obj_number = Nstates++ ;
+ if(Nstates > MAXSTATES) {
+ fprintf(stderr, "Too many states\n");
+ Exit(-1);
+ }
+ fprintf(Sfile, "#define %s 0x%x\n", o->obj_name, o->obj_number);
+ IFDEBUG(X)
+ fprintf(astringfile, "\"%s(0x%x)\",\n", o->obj_name, o->obj_number);
+ ENDDEBUG
+ } else {
+ /* EVENTSET */
+ if( ! states_done ) {
+ fprintf(stderr, "states must precede events\n");
+ Exit(-1);
+ }
+ o->obj_number = Nevents++ ;
+ if(Nevents > MAXEVENTS) {
+ fprintf(stderr, "Too many events\n");
+ Exit(-1);
+ }
+ if(o->obj_struc) {
+ if( FirstEventAttribute ) {
+ fprintf(Efile, "\n\tunion{\n"); /*} */
+ FirstEventAttribute = 0;
+ }
+ fprintf(Efile,
+ "struct %s %s%s;\n\n", o->obj_struc, EV_PREFIX, o->obj_name);
+ }
+ fprintf(Efile, "#define %s 0x%x\n", o->obj_name, o->obj_number);
+ IFDEBUG(X)
+ fprintf(astringfile, "\"%s(0x%x)\",\n", o->obj_name, o->obj_number);
+ ENDDEBUG
+ }
+ IFDEBUG(o)
+ fprintf(OUT, "insert(%s)\n", OBJ_NAME(o) );
+ if(o->obj_right != NULL) {
+ fprintf(OUT, "insert: unclean Object right\n");
+ exit(-1);
+ }
+ if(o->obj_left != NULL) {
+ fprintf(OUT, "insert: unclean Object left\n");
+ exit(-1);
+ }
+ fflush(OUT);
+ ENDDEBUG
+
+ while( val ) {
+ if(p == NULL) {
+ *q = o;
+ o->obj_parent = (struct Object *)q;
+ break;
+ }
+ if(!(val = strcmp(o->obj_name, p->obj_name)) ) {
+ /* equal */
+ fprintf(stderr, "re-inserting %s\n",o->obj_name);
+ exit(-1);
+ }
+ if(val < 0) {
+ /* left */
+ q = &p->obj_left;
+ p = p->obj_left;
+ } else {
+ /* right */
+ q = &p->obj_right;
+ p = p->obj_right;
+ }
+ }
+ IFDEBUG(a)
+ dumptree(Objtree,0);
+ ENDDEBUG
+}
+
+delete(o)
+struct Object *o;
+{
+ register struct Object *p = o->obj_right;
+ register struct Object *q;
+ register struct Object *newparent;
+ register struct Object **np_childlink;
+
+ IFDEBUG(T)
+ fprintf(stdout, "delete(0x%x)\n", o);
+ dumptree(Objtree,0);
+ ENDDEBUG
+
+ /* q <== lowest valued node of the right subtree */
+ while( p ) {
+ q = p;
+ p = p->obj_left;
+ }
+
+ if (o->obj_parent == (struct Object *)&Objtree) {
+ newparent = (struct Object *)&Objtree;
+ np_childlink = (struct Object **)&Objtree;
+ } else if(o->obj_parent->obj_left == o) {
+ newparent = o->obj_parent;
+ np_childlink = &(o->obj_parent->obj_left);
+ } else {
+ newparent = o->obj_parent;
+ np_childlink = &(o->obj_parent->obj_right);
+ }
+ IFDEBUG(T)
+ fprintf(OUT, "newparent=0x%x\n");
+ ENDDEBUG
+
+ if (q) { /* q gets the left, parent gets the right */
+ IFDEBUG(T)
+ fprintf(OUT, "delete: q null\n");
+ ENDDEBUG
+ q->obj_left = p;
+ if(p) p->obj_parent = q;
+ p = o->obj_right;
+ } else { /* parent(instead of q) gets the left ; there is no right */
+ IFDEBUG(T)
+ fprintf(OUT, "delete: q not null\n");
+ ENDDEBUG
+ p = o->obj_left;
+ }
+ *np_childlink = p;
+ if(p)
+ p->obj_parent = newparent;
+
+ IFDEBUG(T)
+ fprintf(OUT, "After deleting 0x%x\n",o);
+ dumptree(Objtree,0);
+ ENDDEBUG
+}
+
+struct Object *
+defineset(type, adr, keep)
+unsigned char type;
+char *adr;
+int keep;
+{
+ struct Object *onew;
+ IFDEBUG(o)
+ printf("defineset(0x%x,%s, %s)\n", type , adr, keep?"KEEP":"NO_KEEP");
+ ENDDEBUG
+
+ onew = (struct Object *)Malloc(sizeof (struct Object));
+ bzero(onew, sizeof(struct Object));
+ onew->obj_name = adr;
+ onew->obj_kind = OBJ_SET;
+ onew->obj_type = type;
+ if(keep)
+ insert( onew );
+ /* address already stashed before calling defineset */
+ IFDEBUG(o)
+ printf("defineset(0x%x,%s) returning 0x%x\n", type , adr, onew);
+ dumptree(Objtree,0);
+ ENDDEBUG
+ return(onew);
+}
+
+dumpit(o, s)
+char *o;
+char *s;
+{
+ register int i;
+
+IFDEBUG(o)
+ fprintf(OUT, "object 0x%x, %s\n",o, s);
+ for(i=0; i< sizeof(struct Object); i+=4) {
+ fprintf(OUT, "0x%x: 0x%x 0x%x 0x%x 0x%x\n",
+ *((int *)o), *o, *(o+1), *(o+2), *(o+3) );
+ }
+ENDDEBUG
+}
+
+defineitem(type, adr, struc)
+unsigned char type;
+char *adr;
+char *struc;
+{
+ struct Object *onew;
+ IFDEBUG(o)
+ printf("defineitem(0x%x, %s at 0x%x, %s)\n", type, adr, adr, struc);
+ ENDDEBUG
+
+ if( onew = lookup( type, adr ) ) {
+ fprintf(stderr,
+ "Internal error at defineitem: trying to redefine obj type 0x%x, adr %s\n",
+ type, adr);
+ exit(-1);
+ } else {
+ onew = (struct Object *)Malloc(sizeof (struct Object));
+ bzero(onew, sizeof(struct Object));
+ onew->obj_name = stash(adr);
+ onew->obj_kind = OBJ_ITEM;
+ onew->obj_type = type;
+ onew->obj_struc = struc?stash(struc):struc;
+ insert( onew );
+ }
+ IFDEBUG(o)
+ fprintf(OUT, "defineitem(0x%x, %s) returning 0x%x\n", type, adr, onew);
+ ENDDEBUG
+}
+
+member(o, adr)
+struct Object *o;
+char *adr;
+{
+ struct Object *onew, *oold;
+ IFDEBUG(o)
+ printf("member(0x%x, %s)\n", o, adr);
+ ENDDEBUG
+
+ oold = lookup( o->obj_type, adr );
+
+ onew = (struct Object *)Malloc(sizeof (struct Object));
+ if( oold == NULL ) {
+ extern int lineno;
+
+ fprintf(stderr,
+ "Warning at line %d: set definition of %s causes definition of\n",
+ lineno, OBJ_NAME(o));
+ fprintf(stderr, "\t (previously undefined) member %s\n", adr);
+ bzero(onew, sizeof(struct Object));
+ onew->obj_name = stash(adr);
+ onew->obj_kind = OBJ_ITEM;
+ onew->obj_type = o->obj_type;
+ onew->obj_members = NULL;
+ insert( onew );
+ } else {
+ if(oold->obj_kind != OBJ_ITEM) {
+ fprintf(stderr, "Sets cannot be members of sets; %s\n", adr);
+ exit(-1);
+ }
+ bcopy(oold, onew, sizeof(struct Object));
+ onew->obj_members = onew->obj_left = onew->obj_right = NULL;
+ }
+ onew->obj_members = o->obj_members;
+ o->obj_members = onew;
+}
+
+struct Object *Lookup(type, name)
+unsigned char type;
+char *name;
+{
+ register struct Object *o = lookup(type,name);
+
+ if(o == NULL) {
+ fprintf(stderr, "Trying to use undefined %s: %s\n",
+ type==STATESET?"state":"event", name);
+ Exit(-1);
+ }
+ return(o);
+}
+
+AddCurrentEventName(x)
+register char **x;
+{
+ register char *n = EV_PREFIX; ;
+
+ if( CurrentEvent == (struct Object *)0 ) {
+ fprintf(stderr, "No event named! BARF!\n"); Exit(-1);
+ }
+
+ if( ! CurrentEvent->obj_struc ) {
+ fprintf(stderr, "No attributes for current event!\n"); Exit(-1);
+ }
+
+ /* add prefix first */
+ while(*n) {
+ *(*x)++ = *n++;
+ }
+
+ n = CurrentEvent->obj_name;
+
+ while(*n) {
+ *(*x)++ = *n++;
+ }
+}
+
+dumptree(o,i)
+ register struct Object *o;
+ int i;
+{
+ register int j;
+
+ if(o == NULL) {
+ for(j=0; j<i; j++)
+ fputc(' ', stdout);
+ fprintf(stdout, "%3d NULL\n", i);
+ } else {
+ dumptree(o->obj_left, i+1);
+ for(j=0; j<i; j++)
+ fputc(' ', stdout);
+ fprintf(stdout, "%3d 0x%x: %s\n", i,o, OBJ_NAME(o));
+ dumptree(o->obj_right, i+1);
+ }
+}
+
+dump(c,a)
+{
+ register int x = 8;
+ int zero = 0;
+#include <sys/signal.h>
+
+ fprintf(stderr, "dump: c 0x%x, a 0x%x\n",c,a);
+
+ x = x/zero;
+ kill(0, SIGQUIT);
+}
+
+dump_trans( pred, oldstate, newstate, action, event )
+struct Object *oldstate, *newstate, *event;
+char *pred, *action;
+{
+ extern int transno;
+ struct Object *o;
+
+ fprintf(stdout, "\n%d: ", transno);
+#define dumpit(x)\
+ if((x)->obj_kind == OBJ_SET) {\
+ o = (x)->obj_members; fprintf( stdout, "[ " );\
+ while(o) { fprintf(stdout, "%s ", o->obj_name); o = o->obj_members; }\
+ fprintf( stdout, " ] ");\
+ } else { fprintf(stdout, "%s ", (x)->obj_name); }
+
+ dumpit(newstate);
+ fprintf(stdout, " <== ");
+ dumpit(oldstate);
+ dumpit(event);
+ fprintf(stdout, "\n\t\t%s\n\t\t%s\n", pred?pred:"DEFAULT",
+ action);
+}
diff --git a/sys/netiso/xebec/sets.h b/sys/netiso/xebec/sets.h
new file mode 100644
index 000000000000..96eb791edc2d
--- /dev/null
+++ b/sys/netiso/xebec/sets.h
@@ -0,0 +1,36 @@
+/* $Header: sets.h,v 2.1 88/09/19 12:56:33 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/sets.h,v $ */
+
+#define MAXEVENTS 200
+#define MAXSTATES 200
+
+#define STATESET 10
+#define EVENTSET 5
+
+#define OBJ_ITEM 2
+#define OBJ_SET 3
+
+struct Object {
+ unsigned char obj_kind;
+ unsigned char obj_type; /* state or event */
+ char *obj_name;
+ char *obj_struc;
+ int obj_number;
+ struct Object *obj_members; /* must be null for kind==item */
+ /* for the tree */
+ struct Object *obj_left;
+ struct Object *obj_right;
+ struct Object *obj_parent;
+} ;
+
+extern char *Noname;
+
+#define OBJ_NAME(o) (((o)->obj_name)?(o)->obj_name:Noname)
+
+extern int Nevents, Nstates;
+int Eventshift;
+extern struct Object *CurrentEvent;
+
+extern struct Object *Lookup();
+extern struct Object *defineset();
+
diff --git a/sys/netiso/xebec/test.trans b/sys/netiso/xebec/test.trans
new file mode 100644
index 000000000000..49db36109943
--- /dev/null
+++ b/sys/netiso/xebec/test.trans
@@ -0,0 +1,64 @@
+/* $Header: test.trans,v 0.2 88/09/19 12:58:29 nhall Exp $
+ */
+*PROTOCOL test
+
+*INCLUDE
+
+{
+#include "test_def.h"
+}
+
+*PCB test_pcbstruct SYNONYM P
+
+*STATES
+
+STATE_A
+STATE_B
+STATE_C
+ALL_STATES = [STATE_A, STATE_B, STATE_C]
+
+*EVENTS { int ev_all; } SYNONYM E
+
+EV_1 { char *ev1_char; }
+EV_2 { int ev2_int; char ev2_char; }
+EV_3
+EV_4 { struct blah *ev4_blahptr;
+ unsigned int ev4_uint;
+ int ev4_int;
+ }
+
+*TRANSITIONS
+
+SAME <== [ STATE_A, STATE_B ] [ EV_1, EV_2, EV_3 ]
+ ( $E.ev_all > 0 )
+ {
+ if( $P.test_state == STATE_A )
+ printf("state is STATE_A\n");
+ else
+ printf("state is STATE_B\n");
+ printf("action first transition\n");
+ }
+
+;
+STATE_C <== [ STATE_A, STATE_B ] [ EV_1, EV_2, EV_3 ]
+ DEFAULT
+ {
+ printf("default - transition 2\n");
+ MACRO1( $P.test_pcbfield );
+ }
+;
+
+STATE_C <== [ STATE_A, STATE_B ] EV_4
+ ( $$.ev4_blahptr->blahfield & 0x1 )
+ NULLACTION
+;
+
+STATE_C <== ALL_STATES EV_4
+ DEFAULT
+ {
+ printf("default - transition 4\n");
+ printf("pcb is 0x%x, event is 0x%x \n", $P, $E);
+ printf("ev4 values are : blahptr 0x%x uint 0x%x int 0x%x\n",
+ $$.ev4_blahptr, $$.ev4_uint, $$.ev4_int);
+ }
+;
diff --git a/sys/netiso/xebec/test_def.h b/sys/netiso/xebec/test_def.h
new file mode 100644
index 000000000000..6faa2dfce81f
--- /dev/null
+++ b/sys/netiso/xebec/test_def.h
@@ -0,0 +1,13 @@
+
+struct blah {
+ unsigned int blahfield;
+ int dummyi;
+ char dummyc;
+};
+
+struct test_pcbstruct {
+ int test_pcbfield;
+ int test_state;
+};
+
+#define MACRO1(arg) if(arg != 0) { printf("macro1\n"); }
diff --git a/sys/netiso/xebec/xebec.bnf b/sys/netiso/xebec/xebec.bnf
new file mode 100644
index 000000000000..d7406d9d5c28
--- /dev/null
+++ b/sys/netiso/xebec/xebec.bnf
@@ -0,0 +1,315 @@
+{
+#include "main.h"
+#include "sets.h"
+#include <stdio.h>
+
+extern FILE *eventfile_h, *actfile;
+}
+
+*fmq
+
+ novocab
+ nobnf
+ nofirst
+ nofollow
+ noparsetable
+ noerrortables
+ nos
+ noe
+
+*terminals
+
+ID 0 0 { char *address; }
+STRUCT 0 0
+SYNONYM 0 0
+PREDICATE 0 0 { char *address; }
+ACTION 0 0 { char *address; }
+/*
+FSTRING 0 0 { char *address; }
+*/
+PROTOCOL 0 0
+LBRACK 0 0
+RBRACK 0 0
+LANGLE 0 0
+EQUAL 0 0
+COMMA 0 0
+STAR 0 0
+EVENTS 0 0
+TRANSITIONS 0 0
+INCLUDE 0 0
+STATES 0 0
+SEMI 0 0
+PCB 0 0 { char *address; }
+DEFAULT 0 0
+NULLACTION 0 0
+SAME 0 0
+
+*nonterminals
+
+pcb { char *address; int isevent; }
+syn { int type; }
+setlist { struct Object *setnum; }
+setlisttail { struct Object *setnum; }
+part { unsigned char type; }
+parttail { unsigned char type; }
+partrest { unsigned char type; char *address; }
+setstruct { struct Object *object; }
+setdef { unsigned char type,keep; char *address; struct Object *object; }
+translist
+transition
+event { struct Object *object; }
+oldstate { struct Object *object; }
+newstate { struct Object *object; }
+predicatepart { char *string; }
+actionpart { char *string; struct Object *oldstate; struct Object *newstate; }
+
+*productions
+
+program ::=
+ STAR PROTOCOL ID
+ {
+ if(strlen($ID.address) > 50 ) {
+ fprintf(stderr,
+ "Protocol name may not exceed 50 chars in length.\n");
+ Exit(-1);
+ }
+ strcpy(protocol, $ID.address);
+ openfiles(protocol);
+ }
+ STAR includelist
+ PCB
+ {
+ $$pcb.isevent = 0;
+ }
+ pcb
+ {
+ fprintf(actfile, "\ntypedef %s %s%s;\n",
+ $pcb[7].address,protocol, PCBNAME);
+ $$syn.type = PCB_SYN;
+ }
+ syn
+ STAR STATES { $$part.type = (unsigned char) STATESET; } part
+ STAR { end_states(eventfile_h); } EVENTS
+ { $$pcb.isevent = 1; }
+ pcb
+ {
+ fprintf(eventfile_h, "\t"); /* fmq gags on single chars */
+ includecode(eventfile_h, $pcb[14].address);
+ fprintf(eventfile_h, "\n"); /* fmq gags on single chars */
+ $$syn.type = EVENT_SYN;
+ }
+ syn
+ {
+ $$part.type = (unsigned char)EVENTSET;
+ }
+ part
+ STAR { end_events(); }
+ TRANSITIONS
+ {
+ putincludes();
+ putdriver(actfile, 9);
+ }
+ translist
+;
+pcb ::= STRUCT
+ { if($pcb.isevent) {
+ fprintf(stderr,
+ "Event is a list of objects enclosed by \"{}\"\n");
+ Exit(-1);
+ }
+ fprintf(eventfile_h, "struct ");
+ }
+ ACTION { $pcb.address = $ACTION.address; }
+ optsemi
+ ::= ACTION
+ { if( ! $pcb.isevent) {
+ fprintf(stderr,
+ "Pcb requires a type or structure definition.\"{}\"\n");
+ Exit(-1);
+ }
+ $pcb.address = $ACTION.address;
+ }
+ optsemi
+ ::= ID { $pcb.address = $ID.address; } optsemi
+;
+
+syn ::= SYNONYM ID { synonyms[$syn.type] = stash( $ID.address ); }
+ ::=
+;
+
+optsemi ::= SEMI
+ ::=
+;
+includelist ::= INCLUDE ACTION { includecode(actfile, $ACTION.address);} STAR
+ ::=
+;
+part ::= ID
+ {
+ $$partrest.address = $ID.address;
+ $$partrest.type = $part.type;
+ }
+ partrest
+ { $$parttail.type = $part.type; }
+ parttail
+;
+parttail ::= { $$part.type = $parttail.type; } part
+ ::=
+;
+partrest ::= EQUAL
+ {
+ if( lookup( $partrest.type, $partrest.address ) ) {
+ fprintf(stderr, "bnf:trying to redefine obj type 0x%x, adr %s\n",
+ $partrest.type, $partrest.address);
+ Exit(-1);
+ }
+ $$setdef.type = $partrest.type;
+ $$setdef.address = stash( $partrest.address );
+ $$setdef.keep = 1;
+ } setdef { $$setstruct.object = $setdef.object; } setstruct
+
+ ::= ACTION
+ {
+ defineitem($partrest.type,
+ $partrest.address, $ACTION.address);
+ }
+
+ ::= {
+ defineitem($partrest.type, $partrest.address, (char *)0);
+ }
+;
+
+setstruct ::= ACTION
+ {
+ if($setstruct.object) {
+ /* WHEN COULD THIS BE FALSE??
+ * isn't it supposed to be setstruct.object???
+ * (it used to be $ACTION.address)
+ */
+
+ $setstruct.object->obj_struc = $ACTION.address;
+ fprintf(eventfile_h,
+ "struct %s %s%s;\n\n", $ACTION.address,
+ EV_PREFIX, $setstruct.object->obj_name);
+ }
+ }
+ ::=
+;
+
+setdef ::= LBRACK
+ {
+ $$setlist.setnum =
+ defineset($setdef.type, $setdef.address, $setdef.keep);
+ } setlist RBRACK { $setdef.object = $setlist.setnum; }
+;
+
+setlist ::= ID
+ {
+ member($setlist.setnum, $ID.address);
+ $$setlisttail.setnum = $setlist.setnum;
+ } setlisttail
+;
+
+setlisttail ::= COMMA { $$setlist.setnum = $setlisttail.setnum; } setlist
+ ::=
+;
+translist ::= transition translisttail
+;
+translisttail ::= translist
+ ::=
+;
+transition ::= newstate { transno ++; } LANGLE EQUAL EQUAL oldstate
+ event
+ {
+ CurrentEvent /* GAG! */ = $event.object;
+ }
+ predicatepart
+ {
+ $$actionpart.string = $predicatepart.string;
+ $$actionpart.newstate = $newstate.object;
+ $$actionpart.oldstate = $oldstate.object;
+ }
+ actionpart
+ SEMI
+;
+
+predicatepart ::= PREDICATE
+ {
+ $predicatepart.string = stash ( $PREDICATE.address );
+ }
+ ::= DEFAULT
+ {
+ $predicatepart.string = (char *)0;
+ }
+;
+
+actionpart ::=
+ ACTION
+ {
+ statetable( $actionpart.string, $actionpart.oldstate,
+ $actionpart.newstate,
+ acttable(actfile, $ACTION.address ),
+ CurrentEvent );
+ if( print_trans ) {
+ dump_trans( $actionpart.string, $actionpart.oldstate,
+ $actionpart.newstate,
+ $ACTION.address, CurrentEvent );
+ }
+ }
+ ::= NULLACTION
+ {
+ statetable($actionpart.string, $actionpart.oldstate, $actionpart.newstate,
+ 0, CurrentEvent ); /* KLUDGE - remove this */
+ if( print_trans ) {
+ dump_trans( $actionpart.string, $actionpart.oldstate,
+ $actionpart.newstate,
+ "NULLACTION", CurrentEvent );
+ }
+ }
+;
+
+oldstate ::= ID
+ {
+ $oldstate.object = Lookup(STATESET, $ID.address);
+ }
+ ::= {
+ $$setdef.address = (char *)0;
+ $$setdef.type = (unsigned char)STATESET;
+ $$setdef.keep = 0;
+ }
+ setdef
+ {
+ $oldstate.object = $setdef.object;
+ }
+;
+
+newstate ::= ID
+ {
+ $newstate.object = Lookup(STATESET, $ID.address);
+ }
+;
+
+newstate ::= SAME
+ {
+ extern struct Object *SameState;
+
+ $newstate.object = SameState;
+ }
+;
+
+event ::= ID
+ {
+ $event.object = Lookup(EVENTSET, $ID.address);
+ }
+ ::=
+ {
+ $$setdef.address = (char *)0;
+ $$setdef.type = (unsigned char)EVENTSET;
+ $$setdef.keep = 0;
+ }
+ setdef
+ {
+ $event.object = $setdef.object;
+ }
+;
+
+*end
diff --git a/sys/netiso/xebec/xebec.c b/sys/netiso/xebec/xebec.c
new file mode 100644
index 000000000000..132bcb8487a3
--- /dev/null
+++ b/sys/netiso/xebec/xebec.c
@@ -0,0 +1,451 @@
+/* $Header: xebec.c,v 2.2 88/09/19 12:55:37 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/xebec.c,v $ */
+
+#include "xebec.h"
+#include "llparse.h"
+#ifndef E_TABLE
+#define E_TABLE "xebec.e"
+#endif E_TABLE
+
+#include "main.h"
+#include "sets.h"
+#include <stdio.h>
+
+extern FILE *eventfile_h, *actfile;
+
+llaction(lln,token)
+LLtoken *token;
+{
+ struct llattr *llattr;
+ llattr = &llattrdesc[lldescindex-1];
+switch(lln) {
+case 1:
+ llfinprod();
+ break;
+
+case 10: {
+
+ if(strlen(llattr->llabase[3].ID.address) > 50 ) {
+ fprintf(stderr,
+ "Protocol name may not exceed 50 chars in length.\n");
+ Exit(-1);
+ }
+ strcpy(protocol, llattr->llabase[3].ID.address);
+ openfiles(protocol);
+
+} break;
+
+case 11: {
+
+ llattr->llabase[7].pcb.isevent = 0;
+
+} break;
+
+case 12: {
+
+ fprintf(actfile, "\ntypedef %s %s%s;\n",
+ llattr->llabase[7].pcb.address,protocol, PCBNAME);
+ llattr->llabase[8].syn.type = PCB_SYN;
+
+} break;
+
+case 13: {
+ llattr->llabase[11].part.type = (unsigned char) STATESET;
+} break;
+
+case 14: {
+ end_states(eventfile_h);
+} break;
+
+case 15: {
+ llattr->llabase[14].pcb.isevent = 1;
+} break;
+
+case 16: {
+
+ fprintf(eventfile_h, "\t"); /* fmq gags on single chars */
+ includecode(eventfile_h, llattr->llabase[14].pcb.address);
+ fprintf(eventfile_h, "\n"); /* fmq gags on single chars */
+ llattr->llabase[15].syn.type = EVENT_SYN;
+
+} break;
+
+case 17: {
+
+ llattr->llabase[16].part.type = (unsigned char)EVENTSET;
+
+} break;
+
+case 18: {
+ end_events();
+} break;
+
+case 19: {
+
+ putincludes();
+ putdriver(actfile, 9);
+
+} break;
+
+case 20: {
+ if(llattr->llabase[0].pcb.isevent) {
+ fprintf(stderr,
+ "Event is a list of objects enclosed by \"{}\"\n");
+ Exit(-1);
+ }
+ fprintf(eventfile_h, "struct ");
+
+} break;
+
+case 21: {
+ llattr->llabase[0].pcb.address = llattr->llabase[2].ACTION.address;
+} break;
+
+case 22: {
+ if( ! llattr->llabase[0].pcb.isevent) {
+ fprintf(stderr,
+ "Pcb requires a type or structure definition.\"{}\"\n");
+ Exit(-1);
+ }
+ llattr->llabase[0].pcb.address = llattr->llabase[1].ACTION.address;
+
+} break;
+
+case 23: {
+ llattr->llabase[0].pcb.address = llattr->llabase[1].ID.address;
+} break;
+
+case 24: {
+ synonyms[llattr->llabase[0].syn.type] = stash( llattr->llabase[2].ID.address );
+} break;
+
+case 25: {
+ includecode(actfile, llattr->llabase[2].ACTION.address);
+} break;
+
+case 26: {
+
+ llattr->llabase[2].partrest.address = llattr->llabase[1].ID.address;
+ llattr->llabase[2].partrest.type = llattr->llabase[0].part.type;
+
+} break;
+
+case 27: {
+ llattr->llabase[3].parttail.type = llattr->llabase[0].part.type;
+} break;
+
+case 28: {
+ llattr->llabase[1].part.type = llattr->llabase[0].parttail.type;
+} break;
+
+case 29: {
+
+ if( lookup( llattr->llabase[0].partrest.type, llattr->llabase[0].partrest.address ) ) {
+ fprintf(stderr, "bnf:trying to redefine obj type 0x%x, adr %s\n",
+ llattr->llabase[0].partrest.type, llattr->llabase[0].partrest.address);
+ Exit(-1);
+ }
+ llattr->llabase[2].setdef.type = llattr->llabase[0].partrest.type;
+ llattr->llabase[2].setdef.address = stash( llattr->llabase[0].partrest.address );
+ llattr->llabase[2].setdef.keep = 1;
+
+} break;
+
+case 30: {
+ llattr->llabase[3].setstruct.object = llattr->llabase[2].setdef.object;
+} break;
+
+case 31: {
+
+ defineitem(llattr->llabase[0].partrest.type,
+ llattr->llabase[0].partrest.address, llattr->llabase[1].ACTION.address);
+
+} break;
+
+case 32: {
+
+ defineitem(llattr->llabase[0].partrest.type, llattr->llabase[0].partrest.address, (char *)0);
+
+} break;
+
+case 33: {
+
+ if(llattr->llabase[0].setstruct.object) {
+ /* WHEN COULD THIS BE FALSE??
+ * isn't it supposed to be setstruct.object???
+ * (it used to be $ACTION.address)
+ */
+
+ llattr->llabase[0].setstruct.object->obj_struc = llattr->llabase[1].ACTION.address;
+ fprintf(eventfile_h,
+ "struct %s %s%s;\n\n", llattr->llabase[1].ACTION.address,
+ EV_PREFIX, llattr->llabase[0].setstruct.object->obj_name);
+ }
+
+} break;
+
+case 34: {
+
+ llattr->llabase[2].setlist.setnum =
+ defineset(llattr->llabase[0].setdef.type, llattr->llabase[0].setdef.address, llattr->llabase[0].setdef.keep);
+
+} break;
+
+case 35: {
+ llattr->llabase[0].setdef.object = llattr->llabase[2].setlist.setnum;
+} break;
+
+case 36: {
+
+ member(llattr->llabase[0].setlist.setnum, llattr->llabase[1].ID.address);
+ llattr->llabase[2].setlisttail.setnum = llattr->llabase[0].setlist.setnum;
+
+} break;
+
+case 37: {
+ llattr->llabase[2].setlist.setnum = llattr->llabase[0].setlisttail.setnum;
+} break;
+
+case 38: {
+ transno ++;
+} break;
+
+case 39: {
+
+ CurrentEvent /* GAG! */ = llattr->llabase[6].event.object;
+
+} break;
+
+case 40: {
+
+ llattr->llabase[8].actionpart.string = llattr->llabase[7].predicatepart.string;
+ llattr->llabase[8].actionpart.newstate = llattr->llabase[1].newstate.object;
+ llattr->llabase[8].actionpart.oldstate = llattr->llabase[5].oldstate.object;
+
+} break;
+
+case 41: {
+
+ llattr->llabase[0].predicatepart.string = stash ( llattr->llabase[1].PREDICATE.address );
+
+} break;
+
+case 42: {
+
+ llattr->llabase[0].predicatepart.string = (char *)0;
+
+} break;
+
+case 43: {
+
+ statetable( llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate,
+ llattr->llabase[0].actionpart.newstate,
+ acttable(actfile, llattr->llabase[1].ACTION.address ),
+ CurrentEvent );
+ if( print_trans ) {
+ dump_trans( llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate,
+ llattr->llabase[0].actionpart.newstate,
+ llattr->llabase[1].ACTION.address, CurrentEvent );
+ }
+
+} break;
+
+case 44: {
+
+ statetable(llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate, llattr->llabase[0].actionpart.newstate,
+ 0, CurrentEvent ); /* KLUDGE - remove this */
+ if( print_trans ) {
+ dump_trans( llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate,
+ llattr->llabase[0].actionpart.newstate,
+ "NULLACTION", CurrentEvent );
+ }
+
+} break;
+
+case 45: {
+
+ llattr->llabase[0].oldstate.object = Lookup(STATESET, llattr->llabase[1].ID.address);
+
+} break;
+
+case 46: {
+
+ llattr->llabase[1].setdef.address = (char *)0;
+ llattr->llabase[1].setdef.type = (unsigned char)STATESET;
+ llattr->llabase[1].setdef.keep = 0;
+
+} break;
+
+case 47: {
+
+ llattr->llabase[0].oldstate.object = llattr->llabase[1].setdef.object;
+
+} break;
+
+case 48: {
+
+ llattr->llabase[0].newstate.object = Lookup(STATESET, llattr->llabase[1].ID.address);
+
+} break;
+
+case 49: {
+
+ extern struct Object *SameState;
+
+ llattr->llabase[0].newstate.object = SameState;
+
+} break;
+
+case 50: {
+
+ llattr->llabase[0].event.object = Lookup(EVENTSET, llattr->llabase[1].ID.address);
+
+} break;
+
+case 51: {
+
+ llattr->llabase[1].setdef.address = (char *)0;
+ llattr->llabase[1].setdef.type = (unsigned char)EVENTSET;
+ llattr->llabase[1].setdef.keep = 0;
+
+} break;
+
+case 52: {
+
+ llattr->llabase[0].event.object = llattr->llabase[1].setdef.object;
+
+} break;
+}
+}
+char *llstrings[] = {
+ "<null>",
+ "ID",
+ "STRUCT",
+ "SYNONYM",
+ "PREDICATE",
+ "ACTION",
+ "PROTOCOL",
+ "LBRACK",
+ "RBRACK",
+ "LANGLE",
+ "EQUAL",
+ "COMMA",
+ "STAR",
+ "EVENTS",
+ "TRANSITIONS",
+ "INCLUDE",
+ "STATES",
+ "SEMI",
+ "PCB",
+ "DEFAULT",
+ "NULLACTION",
+ "SAME",
+ "ENDMARKER",
+ "pcb",
+ "syn",
+ "setlist",
+ "setlisttail",
+ "part",
+ "parttail",
+ "partrest",
+ "setstruct",
+ "setdef",
+ "translist",
+ "transition",
+ "event",
+ "oldstate",
+ "newstate",
+ "predicatepart",
+ "actionpart",
+ "program",
+ "includelist",
+ "optsemi",
+ "translisttail",
+ "$goal$",
+ (char *) 0
+};
+short llnterms = 23;
+short llnsyms = 44;
+short llnprods = 38;
+short llinfinite = 10000;
+short llproductions[] = {
+41, -21, 5, -20, 2,
+41, -22, 5,
+41, -23, 1,
+-24, 1, 3,
+
+26, -36, 1,
+25, -37, 11,
+
+28, -27, 29, -26, 1,
+27, -28,
+
+30, -30, 31, -29, 10,
+-31, 5,
+-32,
+-33, 5,
+
+-35, 8, 25, -34, 7,
+42, 33,
+17, 38, -40, 37, -39, 34, 35, 10, 10, 9, -38, 36,
+-50, 1,
+-52, 31, -51,
+-45, 1,
+-47, 31, -46,
+-48, 1,
+-49, 21,
+-41, 4,
+-42, 19,
+-43, 5,
+-44, 20,
+32, -19, 14, -18, 12, 27, -17, 24, -16, 23, -15, 13, -14, 12, 27, -13, 16, 12, 24, -12, 23, -11, 18, 40, 12, -10, 1, 6, 12,
+12, -25, 5, 15,
+
+17,
+
+32,
+
+22, 39,
+0
+};
+struct llprodindex llprodindex[] = {
+{ 0, 0, 0 }, { 0, 5, 19 }, { 5, 3, 3 }, { 8, 3, 2 },
+{ 11, 3, 2 }, { 14, 0, 2 }, { 14, 3, 0 }, { 17, 3, 1 },
+{ 20, 0, 0 }, { 20, 5, 3 }, { 25, 2, 0 }, { 27, 0, 3 },
+{ 27, 5, 1 }, { 32, 2, 0 }, { 34, 1, 3 }, { 35, 2, 1 },
+{ 37, 0, 0 }, { 37, 5, 1 }, { 42, 2, 0 }, { 44, 12, 3 },
+{ 56, 2, 2 }, { 58, 3, 2 }, { 61, 2, 0 }, { 63, 3, 2 },
+{ 66, 2, 1 }, { 68, 2, 0 }, { 70, 2, 9 }, { 72, 2, 1 },
+{ 74, 2, 1 }, { 76, 2, 1 }, { 78, 29, 1 }, { 107, 4, 1 },
+{ 111, 0, 1 }, { 111, 1, 1 }, { 112, 0, 1 }, { 112, 1, 1 },
+{ 113, 0, 1 }, { 113, 2, 2 }, { 0, 0, 0 }
+};
+short llepsilon[] = {
+ 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
+ 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 1, 0, 1, 0, 0
+};
+struct llparsetable llparsetable[] = {
+{ 1, 3 }, { 2, 1 }, { 5, 2 }, { 0, 23 }, { 1, 5 },
+{ 3, 4 }, { 12, 5 }, { 0, 24 }, { 1, 6 }, { 0, 25 },
+{ 8, 8 }, { 11, 7 }, { 0, 26 }, { 1, 9 }, { 0, 27 },
+{ 1, 10 }, { 12, 11 }, { 0, 28 }, { 1, 14 }, { 5, 13 },
+{ 10, 12 }, { 12, 14 }, { 0, 29 }, { 1, 16 }, { 5, 15 },
+{ 12, 16 }, { 0, 30 }, { 7, 17 }, { 0, 31 }, { 1, 18 },
+{ 21, 18 }, { 0, 32 }, { 1, 19 }, { 21, 19 }, { 0, 33 },
+{ 1, 20 }, { 7, 21 }, { 0, 34 }, { 1, 22 }, { 7, 23 },
+{ 0, 35 }, { 1, 24 }, { 21, 25 }, { 0, 36 }, { 4, 26 },
+{ 19, 27 }, { 0, 37 }, { 5, 28 }, { 20, 29 }, { 0, 38 },
+{ 12, 30 }, { 0, 39 }, { 15, 31 }, { 18, 32 }, { 0, 40 },
+{ 1, 34 }, { 3, 34 }, { 12, 34 }, { 17, 33 }, { 0, 41 },
+{ 1, 35 }, { 21, 35 }, { 22, 36 }, { 0, 42 }, { 12, 37 },
+{ 0, 43 }, { 0, 0 }
+};
+short llparseindex[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 4, 8, 10, 13, 15, 18,
+ 23, 27, 29, 32, 35, 38, 41, 44, 47, 50,
+ 52, 55, 60, 64, 0
+};
diff --git a/sys/netiso/xebec/xebec.h b/sys/netiso/xebec/xebec.h
new file mode 100644
index 000000000000..168bb77b249d
--- /dev/null
+++ b/sys/netiso/xebec/xebec.h
@@ -0,0 +1,88 @@
+/* $Header: xebec.h,v 2.1 88/09/19 12:56:35 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/xebec.h,v $ */
+
+union llattrib {
+ struct {
+ char *address; } ID;
+ int STRUCT;
+ int SYNONYM;
+ struct {
+ char *address; } PREDICATE;
+ struct {
+ char *address; } ACTION;
+ int PROTOCOL;
+ int LBRACK;
+ int RBRACK;
+ int LANGLE;
+ int EQUAL;
+ int COMMA;
+ int STAR;
+ int EVENTS;
+ int TRANSITIONS;
+ int INCLUDE;
+ int STATES;
+ int SEMI;
+ struct {
+ char *address; } PCB;
+ int DEFAULT;
+ int NULLACTION;
+ int SAME;
+ struct {
+ char *address; int isevent; } pcb;
+ struct {
+ int type; } syn;
+ struct {
+ struct Object *setnum; } setlist;
+ struct {
+ struct Object *setnum; } setlisttail;
+ struct {
+ unsigned char type; } part;
+ struct {
+ unsigned char type; } parttail;
+ struct {
+ unsigned char type; char *address; } partrest;
+ struct {
+ struct Object *object; } setstruct;
+ struct {
+ unsigned char type,keep; char *address; struct Object *object; } setdef;
+ int translist;
+ int transition;
+ struct {
+ struct Object *object; } event;
+ struct {
+ struct Object *object; } oldstate;
+ struct {
+ struct Object *object; } newstate;
+ struct {
+ char *string; } predicatepart;
+ struct {
+ char *string; struct Object *oldstate; struct Object *newstate; } actionpart;
+};
+#define LLTERM 23
+#define LLSYM 44
+#define LLPROD 38
+
+#define LLINF 10000
+
+#define T_ID 1
+#define T_STRUCT 2
+#define T_SYNONYM 3
+#define T_PREDICATE 4
+#define T_ACTION 5
+#define T_PROTOCOL 6
+#define T_LBRACK 7
+#define T_RBRACK 8
+#define T_LANGLE 9
+#define T_EQUAL 10
+#define T_COMMA 11
+#define T_STAR 12
+#define T_EVENTS 13
+#define T_TRANSITIONS 14
+#define T_INCLUDE 15
+#define T_STATES 16
+#define T_SEMI 17
+#define T_PCB 18
+#define T_DEFAULT 19
+#define T_NULLACTION 20
+#define T_SAME 21
+#define T_ENDMARKER 22
diff --git a/sys/netns/idp.h b/sys/netns/idp.h
new file mode 100644
index 000000000000..254208dfad80
--- /dev/null
+++ b/sys/netns/idp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)idp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for NS(tm) Internet Datagram Protocol
+ */
+struct idp {
+ u_short idp_sum; /* Checksum */
+ u_short idp_len; /* Length, in bytes, including header */
+ u_char idp_tc; /* Transport Crontrol (i.e. hop count) */
+ u_char idp_pt; /* Packet Type (i.e. level 2 protocol) */
+ struct ns_addr idp_dna; /* Destination Network Address */
+ struct ns_addr idp_sna; /* Source Network Address */
+};
diff --git a/sys/netns/idp_usrreq.c b/sys/netns/idp_usrreq.c
new file mode 100644
index 000000000000..b548a12574b3
--- /dev/null
+++ b/sys/netns/idp_usrreq.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)idp_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/ns_if.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/ns_error.h>
+
+/*
+ * IDP protocol implementation.
+ */
+
+struct sockaddr_ns idp_ns = { sizeof(idp_ns), AF_NS };
+
+/*
+ * This may also be called for raw listeners.
+ */
+idp_input(m, nsp)
+ struct mbuf *m;
+ register struct nspcb *nsp;
+{
+ register struct idp *idp = mtod(m, struct idp *);
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+
+ if (nsp==0)
+ panic("No nspcb");
+ /*
+ * Construct sockaddr format source address.
+ * Stuff source address and datagram in user buffer.
+ */
+ idp_ns.sns_addr = idp->idp_sna;
+ if (ns_neteqnn(idp->idp_sna.x_net, ns_zeronet) && ifp) {
+ register struct ifaddr *ifa;
+
+ for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family == AF_NS) {
+ idp_ns.sns_addr.x_net =
+ IA_SNS(ifa)->sns_addr.x_net;
+ break;
+ }
+ }
+ }
+ nsp->nsp_rpt = idp->idp_pt;
+ if ( ! (nsp->nsp_flags & NSP_RAWIN) ) {
+ m->m_len -= sizeof (struct idp);
+ m->m_pkthdr.len -= sizeof (struct idp);
+ m->m_data += sizeof (struct idp);
+ }
+ if (sbappendaddr(&nsp->nsp_socket->so_rcv, (struct sockaddr *)&idp_ns,
+ m, (struct mbuf *)0) == 0)
+ goto bad;
+ sorwakeup(nsp->nsp_socket);
+ return;
+bad:
+ m_freem(m);
+}
+
+idp_abort(nsp)
+ struct nspcb *nsp;
+{
+ struct socket *so = nsp->nsp_socket;
+
+ ns_pcbdisconnect(nsp);
+ soisdisconnected(so);
+}
+/*
+ * Drop connection, reporting
+ * the specified error.
+ */
+struct nspcb *
+idp_drop(nsp, errno)
+ register struct nspcb *nsp;
+ int errno;
+{
+ struct socket *so = nsp->nsp_socket;
+
+ /*
+ * someday, in the xerox world
+ * we will generate error protocol packets
+ * announcing that the socket has gone away.
+ */
+ /*if (TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_state = TCPS_CLOSED;
+ (void) tcp_output(tp);
+ }*/
+ so->so_error = errno;
+ ns_pcbdisconnect(nsp);
+ soisdisconnected(so);
+}
+
+int noIdpRoute;
+idp_output(nsp, m0)
+ struct nspcb *nsp;
+ struct mbuf *m0;
+{
+ register struct mbuf *m;
+ register struct idp *idp;
+ register struct socket *so;
+ register int len = 0;
+ register struct route *ro;
+ struct mbuf *mprev;
+ extern int idpcksum;
+
+ /*
+ * Calculate data length.
+ */
+ for (m = m0; m; m = m->m_next) {
+ mprev = m;
+ len += m->m_len;
+ }
+ /*
+ * Make sure packet is actually of even length.
+ */
+
+ if (len & 1) {
+ m = mprev;
+ if ((m->m_flags & M_EXT) == 0 &&
+ (m->m_len + m->m_data < &m->m_dat[MLEN])) {
+ m->m_len++;
+ } else {
+ struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
+
+ if (m1 == 0) {
+ m_freem(m0);
+ return (ENOBUFS);
+ }
+ m1->m_len = 1;
+ * mtod(m1, char *) = 0;
+ m->m_next = m1;
+ }
+ m0->m_pkthdr.len++;
+ }
+
+ /*
+ * Fill in mbuf with extended IDP header
+ * and addresses and length put into network format.
+ */
+ m = m0;
+ if (nsp->nsp_flags & NSP_RAWOUT) {
+ idp = mtod(m, struct idp *);
+ } else {
+ M_PREPEND(m, sizeof (struct idp), M_DONTWAIT);
+ if (m == 0)
+ return (ENOBUFS);
+ idp = mtod(m, struct idp *);
+ idp->idp_tc = 0;
+ idp->idp_pt = nsp->nsp_dpt;
+ idp->idp_sna = nsp->nsp_laddr;
+ idp->idp_dna = nsp->nsp_faddr;
+ len += sizeof (struct idp);
+ }
+
+ idp->idp_len = htons((u_short)len);
+
+ if (idpcksum) {
+ idp->idp_sum = 0;
+ len = ((len - 1) | 1) + 1;
+ idp->idp_sum = ns_cksum(m, len);
+ } else
+ idp->idp_sum = 0xffff;
+
+ /*
+ * Output datagram.
+ */
+ so = nsp->nsp_socket;
+ if (so->so_options & SO_DONTROUTE)
+ return (ns_output(m, (struct route *)0,
+ (so->so_options & SO_BROADCAST) | NS_ROUTETOIF));
+ /*
+ * Use cached route for previous datagram if
+ * possible. If the previous net was the same
+ * and the interface was a broadcast medium, or
+ * if the previous destination was identical,
+ * then we are ok.
+ *
+ * NB: We don't handle broadcasts because that
+ * would require 3 subroutine calls.
+ */
+ ro = &nsp->nsp_route;
+#ifdef ancient_history
+ /*
+ * I think that this will all be handled in ns_pcbconnect!
+ */
+ if (ro->ro_rt) {
+ if(ns_neteq(nsp->nsp_lastdst, idp->idp_dna)) {
+ /*
+ * This assumes we have no GH type routes
+ */
+ if (ro->ro_rt->rt_flags & RTF_HOST) {
+ if (!ns_hosteq(nsp->nsp_lastdst, idp->idp_dna))
+ goto re_route;
+
+ }
+ if ((ro->ro_rt->rt_flags & RTF_GATEWAY) == 0) {
+ register struct ns_addr *dst =
+ &satons_addr(ro->ro_dst);
+ dst->x_host = idp->idp_dna.x_host;
+ }
+ /*
+ * Otherwise, we go through the same gateway
+ * and dst is already set up.
+ */
+ } else {
+ re_route:
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ }
+ }
+ nsp->nsp_lastdst = idp->idp_dna;
+#endif /* ancient_history */
+ if (noIdpRoute) ro = 0;
+ return (ns_output(m, ro, so->so_options & SO_BROADCAST));
+}
+/* ARGSUSED */
+idp_ctloutput(req, so, level, name, value)
+ int req, level;
+ struct socket *so;
+ int name;
+ struct mbuf **value;
+{
+ register struct mbuf *m;
+ struct nspcb *nsp = sotonspcb(so);
+ int mask, error = 0;
+ extern long ns_pexseq;
+
+ if (nsp == NULL)
+ return (EINVAL);
+
+ switch (req) {
+
+ case PRCO_GETOPT:
+ if (value==NULL)
+ return (EINVAL);
+ m = m_get(M_DONTWAIT, MT_DATA);
+ if (m==NULL)
+ return (ENOBUFS);
+ switch (name) {
+
+ case SO_ALL_PACKETS:
+ mask = NSP_ALL_PACKETS;
+ goto get_flags;
+
+ case SO_HEADERS_ON_INPUT:
+ mask = NSP_RAWIN;
+ goto get_flags;
+
+ case SO_HEADERS_ON_OUTPUT:
+ mask = NSP_RAWOUT;
+ get_flags:
+ m->m_len = sizeof(short);
+ *mtod(m, short *) = nsp->nsp_flags & mask;
+ break;
+
+ case SO_DEFAULT_HEADERS:
+ m->m_len = sizeof(struct idp);
+ {
+ register struct idp *idp = mtod(m, struct idp *);
+ idp->idp_len = 0;
+ idp->idp_sum = 0;
+ idp->idp_tc = 0;
+ idp->idp_pt = nsp->nsp_dpt;
+ idp->idp_dna = nsp->nsp_faddr;
+ idp->idp_sna = nsp->nsp_laddr;
+ }
+ break;
+
+ case SO_SEQNO:
+ m->m_len = sizeof(long);
+ *mtod(m, long *) = ns_pexseq++;
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ *value = m;
+ break;
+
+ case PRCO_SETOPT:
+ switch (name) {
+ int *ok;
+
+ case SO_ALL_PACKETS:
+ mask = NSP_ALL_PACKETS;
+ goto set_head;
+
+ case SO_HEADERS_ON_INPUT:
+ mask = NSP_RAWIN;
+ goto set_head;
+
+ case SO_HEADERS_ON_OUTPUT:
+ mask = NSP_RAWOUT;
+ set_head:
+ if (value && *value) {
+ ok = mtod(*value, int *);
+ if (*ok)
+ nsp->nsp_flags |= mask;
+ else
+ nsp->nsp_flags &= ~mask;
+ } else error = EINVAL;
+ break;
+
+ case SO_DEFAULT_HEADERS:
+ {
+ register struct idp *idp
+ = mtod(*value, struct idp *);
+ nsp->nsp_dpt = idp->idp_pt;
+ }
+ break;
+#ifdef NSIP
+
+ case SO_NSIP_ROUTE:
+ error = nsip_route(*value);
+ break;
+#endif /* NSIP */
+ default:
+ error = EINVAL;
+ }
+ if (value && *value)
+ m_freem(*value);
+ break;
+ }
+ return (error);
+}
+
+/*ARGSUSED*/
+idp_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ struct nspcb *nsp = sotonspcb(so);
+ int error = 0;
+
+ if (req == PRU_CONTROL)
+ return (ns_control(so, (int)m, (caddr_t)nam,
+ (struct ifnet *)control));
+ if (control && control->m_len) {
+ error = EINVAL;
+ goto release;
+ }
+ if (nsp == NULL && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (nsp != NULL) {
+ error = EINVAL;
+ break;
+ }
+ error = ns_pcballoc(so, &nspcb);
+ if (error)
+ break;
+ error = soreserve(so, (u_long) 2048, (u_long) 2048);
+ if (error)
+ break;
+ break;
+
+ case PRU_DETACH:
+ if (nsp == NULL) {
+ error = ENOTCONN;
+ break;
+ }
+ ns_pcbdetach(nsp);
+ break;
+
+ case PRU_BIND:
+ error = ns_pcbbind(nsp, nam);
+ break;
+
+ case PRU_LISTEN:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_CONNECT:
+ if (!ns_nullhost(nsp->nsp_faddr)) {
+ error = EISCONN;
+ break;
+ }
+ error = ns_pcbconnect(nsp, nam);
+ if (error == 0)
+ soisconnected(so);
+ break;
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_ACCEPT:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_DISCONNECT:
+ if (ns_nullhost(nsp->nsp_faddr)) {
+ error = ENOTCONN;
+ break;
+ }
+ ns_pcbdisconnect(nsp);
+ soisdisconnected(so);
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ break;
+
+ case PRU_SEND:
+ {
+ struct ns_addr laddr;
+ int s;
+
+ if (nam) {
+ laddr = nsp->nsp_laddr;
+ if (!ns_nullhost(nsp->nsp_faddr)) {
+ error = EISCONN;
+ break;
+ }
+ /*
+ * Must block input while temporarily connected.
+ */
+ s = splnet();
+ error = ns_pcbconnect(nsp, nam);
+ if (error) {
+ splx(s);
+ break;
+ }
+ } else {
+ if (ns_nullhost(nsp->nsp_faddr)) {
+ error = ENOTCONN;
+ break;
+ }
+ }
+ error = idp_output(nsp, m);
+ m = NULL;
+ if (nam) {
+ ns_pcbdisconnect(nsp);
+ splx(s);
+ nsp->nsp_laddr.x_host = laddr.x_host;
+ nsp->nsp_laddr.x_port = laddr.x_port;
+ }
+ }
+ break;
+
+ case PRU_ABORT:
+ ns_pcbdetach(nsp);
+ sofree(so);
+ soisdisconnected(so);
+ break;
+
+ case PRU_SOCKADDR:
+ ns_setsockaddr(nsp, nam);
+ break;
+
+ case PRU_PEERADDR:
+ ns_setpeeraddr(nsp, nam);
+ break;
+
+ case PRU_SENSE:
+ /*
+ * stat: don't bother with a blocksize.
+ */
+ return (0);
+
+ case PRU_SENDOOB:
+ case PRU_FASTTIMO:
+ case PRU_SLOWTIMO:
+ case PRU_PROTORCV:
+ case PRU_PROTOSEND:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_CONTROL:
+ case PRU_RCVD:
+ case PRU_RCVOOB:
+ return (EOPNOTSUPP); /* do not free mbuf's */
+
+ default:
+ panic("idp_usrreq");
+ }
+release:
+ if (control != NULL)
+ m_freem(control);
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
+/*ARGSUSED*/
+idp_raw_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ int error = 0;
+ struct nspcb *nsp = sotonspcb(so);
+ extern struct nspcb nsrawpcb;
+
+ switch (req) {
+
+ case PRU_ATTACH:
+
+ if (!(so->so_state & SS_PRIV) || (nsp != NULL)) {
+ error = EINVAL;
+ break;
+ }
+ error = ns_pcballoc(so, &nsrawpcb);
+ if (error)
+ break;
+ error = soreserve(so, (u_long) 2048, (u_long) 2048);
+ if (error)
+ break;
+ nsp = sotonspcb(so);
+ nsp->nsp_faddr.x_host = ns_broadhost;
+ nsp->nsp_flags = NSP_RAWIN | NSP_RAWOUT;
+ break;
+ default:
+ error = idp_usrreq(so, req, m, nam, control);
+ }
+ return (error);
+}
+
diff --git a/sys/netns/idp_var.h b/sys/netns/idp_var.h
new file mode 100644
index 000000000000..fc9a4f45d818
--- /dev/null
+++ b/sys/netns/idp_var.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)idp_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * IDP Kernel Structures and Variables
+ */
+struct idpstat {
+ int idps_badsum; /* checksum bad */
+ int idps_tooshort; /* packet too short */
+ int idps_toosmall; /* not enough data */
+ int idps_badhlen; /* ip header length < data size */
+ int idps_badlen; /* ip length < ip header length */
+};
+
+#ifdef KERNEL
+struct idpstat idpstat;
+#endif
diff --git a/sys/netns/ns.c b/sys/netns/ns.c
new file mode 100644
index 000000000000..8b76543fce36
--- /dev/null
+++ b/sys/netns/ns.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns.c 8.2 (Berkeley) 11/15/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/ioctl.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+
+#ifdef NS
+
+struct ns_ifaddr *ns_ifaddr;
+int ns_interfaces;
+extern struct sockaddr_ns ns_netmask, ns_hostmask;
+
+/*
+ * Generic internet control operations (ioctl's).
+ */
+/* ARGSUSED */
+ns_control(so, cmd, data, ifp)
+ struct socket *so;
+ int cmd;
+ caddr_t data;
+ register struct ifnet *ifp;
+{
+ register struct ifreq *ifr = (struct ifreq *)data;
+ register struct ns_aliasreq *ifra = (struct ns_aliasreq *)data;
+ register struct ns_ifaddr *ia;
+ struct ifaddr *ifa;
+ struct ns_ifaddr *oia;
+ int error, dstIsNew, hostIsNew;
+
+ /*
+ * Find address for this interface, if it exists.
+ */
+ if (ifp == 0)
+ return (EADDRNOTAVAIL);
+ for (ia = ns_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp)
+ break;
+
+ switch (cmd) {
+
+ case SIOCGIFADDR:
+ if (ia == (struct ns_ifaddr *)0)
+ return (EADDRNOTAVAIL);
+ *(struct sockaddr_ns *)&ifr->ifr_addr = ia->ia_addr;
+ return (0);
+
+
+ case SIOCGIFBRDADDR:
+ if (ia == (struct ns_ifaddr *)0)
+ return (EADDRNOTAVAIL);
+ if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ return (EINVAL);
+ *(struct sockaddr_ns *)&ifr->ifr_dstaddr = ia->ia_broadaddr;
+ return (0);
+
+ case SIOCGIFDSTADDR:
+ if (ia == (struct ns_ifaddr *)0)
+ return (EADDRNOTAVAIL);
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ return (EINVAL);
+ *(struct sockaddr_ns *)&ifr->ifr_dstaddr = ia->ia_dstaddr;
+ return (0);
+ }
+
+ if ((so->so_state & SS_PRIV) == 0)
+ return (EPERM);
+
+ switch (cmd) {
+ case SIOCAIFADDR:
+ case SIOCDIFADDR:
+ if (ifra->ifra_addr.sns_family == AF_NS)
+ for (oia = ia; ia; ia = ia->ia_next) {
+ if (ia->ia_ifp == ifp &&
+ ns_neteq(ia->ia_addr.sns_addr,
+ ifra->ifra_addr.sns_addr))
+ break;
+ }
+ if (cmd == SIOCDIFADDR && ia == 0)
+ return (EADDRNOTAVAIL);
+ /* FALLTHROUGH */
+
+ case SIOCSIFADDR:
+ case SIOCSIFDSTADDR:
+ if (ia == (struct ns_ifaddr *)0) {
+ oia = (struct ns_ifaddr *)
+ malloc(sizeof *ia, M_IFADDR, M_WAITOK);
+ if (oia == (struct ns_ifaddr *)NULL)
+ return (ENOBUFS);
+ bzero((caddr_t)oia, sizeof(*oia));
+ if (ia = ns_ifaddr) {
+ for ( ; ia->ia_next; ia = ia->ia_next)
+ ;
+ ia->ia_next = oia;
+ } else
+ ns_ifaddr = oia;
+ ia = oia;
+ if (ifa = ifp->if_addrlist) {
+ for ( ; ifa->ifa_next; ifa = ifa->ifa_next)
+ ;
+ ifa->ifa_next = (struct ifaddr *) ia;
+ } else
+ ifp->if_addrlist = (struct ifaddr *) ia;
+ ia->ia_ifp = ifp;
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+
+ ia->ia_ifa.ifa_netmask =
+ (struct sockaddr *)&ns_netmask;
+
+ ia->ia_ifa.ifa_dstaddr =
+ (struct sockaddr *)&ia->ia_dstaddr;
+ if (ifp->if_flags & IFF_BROADCAST) {
+ ia->ia_broadaddr.sns_family = AF_NS;
+ ia->ia_broadaddr.sns_len = sizeof(ia->ia_addr);
+ ia->ia_broadaddr.sns_addr.x_host = ns_broadhost;
+ }
+ ns_interfaces++;
+ }
+ }
+
+ switch (cmd) {
+ int error;
+
+ case SIOCSIFDSTADDR:
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ return (EINVAL);
+ if (ia->ia_flags & IFA_ROUTE) {
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ ia->ia_flags &= ~IFA_ROUTE;
+ }
+ if (ifp->if_ioctl) {
+ error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR, ia);
+ if (error)
+ return (error);
+ }
+ *(struct sockaddr *)&ia->ia_dstaddr = ifr->ifr_dstaddr;
+ return (0);
+
+ case SIOCSIFADDR:
+ return (ns_ifinit(ifp, ia,
+ (struct sockaddr_ns *)&ifr->ifr_addr, 1));
+
+ case SIOCDIFADDR:
+ ns_ifscrub(ifp, ia);
+ if ((ifa = ifp->if_addrlist) == (struct ifaddr *)ia)
+ ifp->if_addrlist = ifa->ifa_next;
+ else {
+ while (ifa->ifa_next &&
+ (ifa->ifa_next != (struct ifaddr *)ia))
+ ifa = ifa->ifa_next;
+ if (ifa->ifa_next)
+ ifa->ifa_next = ((struct ifaddr *)ia)->ifa_next;
+ else
+ printf("Couldn't unlink nsifaddr from ifp\n");
+ }
+ oia = ia;
+ if (oia == (ia = ns_ifaddr)) {
+ ns_ifaddr = ia->ia_next;
+ } else {
+ while (ia->ia_next && (ia->ia_next != oia)) {
+ ia = ia->ia_next;
+ }
+ if (ia->ia_next)
+ ia->ia_next = oia->ia_next;
+ else
+ printf("Didn't unlink nsifadr from list\n");
+ }
+ IFAFREE((&oia->ia_ifa));
+ if (0 == --ns_interfaces) {
+ /*
+ * We reset to virginity and start all over again
+ */
+ ns_thishost = ns_zerohost;
+ }
+ return (0);
+
+ case SIOCAIFADDR:
+ dstIsNew = 0; hostIsNew = 1;
+ if (ia->ia_addr.sns_family == AF_NS) {
+ if (ifra->ifra_addr.sns_len == 0) {
+ ifra->ifra_addr = ia->ia_addr;
+ hostIsNew = 0;
+ } else if (ns_neteq(ifra->ifra_addr.sns_addr,
+ ia->ia_addr.sns_addr))
+ hostIsNew = 0;
+ }
+ if ((ifp->if_flags & IFF_POINTOPOINT) &&
+ (ifra->ifra_dstaddr.sns_family == AF_NS)) {
+ if (hostIsNew == 0)
+ ns_ifscrub(ifp, ia);
+ ia->ia_dstaddr = ifra->ifra_dstaddr;
+ dstIsNew = 1;
+ }
+ if (ifra->ifra_addr.sns_family == AF_NS &&
+ (hostIsNew || dstIsNew))
+ error = ns_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+ return (error);
+
+ default:
+ if (ifp->if_ioctl == 0)
+ return (EOPNOTSUPP);
+ return ((*ifp->if_ioctl)(ifp, cmd, data));
+ }
+}
+
+/*
+* Delete any previous route for an old address.
+*/
+ns_ifscrub(ifp, ia)
+ register struct ifnet *ifp;
+ register struct ns_ifaddr *ia;
+{
+ if (ia->ia_flags & IFA_ROUTE) {
+ if (ifp->if_flags & IFF_POINTOPOINT) {
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+ } else
+ rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+ ia->ia_flags &= ~IFA_ROUTE;
+ }
+}
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+ns_ifinit(ifp, ia, sns, scrub)
+ register struct ifnet *ifp;
+ register struct ns_ifaddr *ia;
+ register struct sockaddr_ns *sns;
+{
+ struct sockaddr_ns oldaddr;
+ register union ns_host *h = &ia->ia_addr.sns_addr.x_host;
+ int s = splimp(), error;
+
+ /*
+ * Set up new addresses.
+ */
+ oldaddr = ia->ia_addr;
+ ia->ia_addr = *sns;
+ /*
+ * The convention we shall adopt for naming is that
+ * a supplied address of zero means that "we don't care".
+ * if there is a single interface, use the address of that
+ * interface as our 6 byte host address.
+ * if there are multiple interfaces, use any address already
+ * used.
+ *
+ * Give the interface a chance to initialize
+ * if this is its first address,
+ * and to validate the address if necessary.
+ */
+ if (ns_hosteqnh(ns_thishost, ns_zerohost)) {
+ if (ifp->if_ioctl &&
+ (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, ia))) {
+ ia->ia_addr = oldaddr;
+ splx(s);
+ return (error);
+ }
+ ns_thishost = *h;
+ } else if (ns_hosteqnh(sns->sns_addr.x_host, ns_zerohost)
+ || ns_hosteqnh(sns->sns_addr.x_host, ns_thishost)) {
+ *h = ns_thishost;
+ if (ifp->if_ioctl &&
+ (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, ia))) {
+ ia->ia_addr = oldaddr;
+ splx(s);
+ return (error);
+ }
+ if (!ns_hosteqnh(ns_thishost,*h)) {
+ ia->ia_addr = oldaddr;
+ splx(s);
+ return (EINVAL);
+ }
+ } else {
+ ia->ia_addr = oldaddr;
+ splx(s);
+ return (EINVAL);
+ }
+ ia->ia_ifa.ifa_metric = ifp->if_metric;
+ /*
+ * Add route for the network.
+ */
+ if (scrub) {
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+ ns_ifscrub(ifp, ia);
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ }
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+ else {
+ ia->ia_broadaddr.sns_addr.x_net = ia->ia_addr.sns_addr.x_net;
+ rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_UP);
+ }
+ ia->ia_flags |= IFA_ROUTE;
+ return (0);
+}
+
+/*
+ * Return address info for specified internet network.
+ */
+struct ns_ifaddr *
+ns_iaonnetof(dst)
+ register struct ns_addr *dst;
+{
+ register struct ns_ifaddr *ia;
+ register struct ns_addr *compare;
+ register struct ifnet *ifp;
+ struct ns_ifaddr *ia_maybe = 0;
+ union ns_net net = dst->x_net;
+
+ for (ia = ns_ifaddr; ia; ia = ia->ia_next) {
+ if (ifp = ia->ia_ifp) {
+ if (ifp->if_flags & IFF_POINTOPOINT) {
+ compare = &satons_addr(ia->ia_dstaddr);
+ if (ns_hosteq(*dst, *compare))
+ return (ia);
+ if (ns_neteqnn(net, ia->ia_addr.sns_addr.x_net))
+ ia_maybe = ia;
+ } else {
+ if (ns_neteqnn(net, ia->ia_addr.sns_addr.x_net))
+ return (ia);
+ }
+ }
+ }
+ return (ia_maybe);
+}
+#endif
diff --git a/sys/netns/ns.h b/sys/netns/ns.h
new file mode 100644
index 000000000000..cf51f0047e92
--- /dev/null
+++ b/sys/netns/ns.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Constants and Structures defined by the Xerox Network Software
+ * per "Internet Transport Protocols", XSIS 028112, December 1981
+ */
+
+/*
+ * Protocols
+ */
+#define NSPROTO_RI 1 /* Routing Information */
+#define NSPROTO_ECHO 2 /* Echo Protocol */
+#define NSPROTO_ERROR 3 /* Error Protocol */
+#define NSPROTO_PE 4 /* Packet Exchange */
+#define NSPROTO_SPP 5 /* Sequenced Packet */
+#define NSPROTO_RAW 255 /* Placemarker*/
+#define NSPROTO_MAX 256 /* Placemarker*/
+
+
+/*
+ * Port/Socket numbers: network standard functions
+ */
+
+#define NSPORT_RI 1 /* Routing Information */
+#define NSPORT_ECHO 2 /* Echo */
+#define NSPORT_RE 3 /* Router Error */
+
+/*
+ * Ports < NSPORT_RESERVED are reserved for priveleged
+ * processes (e.g. root).
+ */
+#define NSPORT_RESERVED 3000
+
+/* flags passed to ns_output as last parameter */
+
+#define NS_FORWARDING 0x1 /* most of idp header exists */
+#define NS_ROUTETOIF 0x10 /* same as SO_DONTROUTE */
+#define NS_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */
+
+#define NS_MAXHOPS 15
+
+/* flags passed to get/set socket option */
+#define SO_HEADERS_ON_INPUT 1
+#define SO_HEADERS_ON_OUTPUT 2
+#define SO_DEFAULT_HEADERS 3
+#define SO_LAST_HEADER 4
+#define SO_NSIP_ROUTE 5
+#define SO_SEQNO 6
+#define SO_ALL_PACKETS 7
+#define SO_MTU 8
+
+
+/*
+ * NS addressing
+ */
+union ns_host {
+ u_char c_host[6];
+ u_short s_host[3];
+};
+
+union ns_net {
+ u_char c_net[4];
+ u_short s_net[2];
+};
+
+union ns_net_u {
+ union ns_net net_e;
+ u_long long_e;
+};
+
+struct ns_addr {
+ union ns_net x_net;
+ union ns_host x_host;
+ u_short x_port;
+};
+
+/*
+ * Socket address, Xerox style
+ */
+struct sockaddr_ns {
+ u_char sns_len;
+ u_char sns_family;
+ struct ns_addr sns_addr;
+ char sns_zero[2];
+};
+#define sns_port sns_addr.x_port
+
+#ifdef vax
+#define ns_netof(a) (*(long *) & ((a).x_net)) /* XXX - not needed */
+#endif
+#define ns_neteqnn(a,b) (((a).s_net[0]==(b).s_net[0]) && \
+ ((a).s_net[1]==(b).s_net[1]))
+#define ns_neteq(a,b) ns_neteqnn((a).x_net, (b).x_net)
+#define satons_addr(sa) (((struct sockaddr_ns *)&(sa))->sns_addr)
+#define ns_hosteqnh(s,t) ((s).s_host[0] == (t).s_host[0] && \
+ (s).s_host[1] == (t).s_host[1] && (s).s_host[2] == (t).s_host[2])
+#define ns_hosteq(s,t) (ns_hosteqnh((s).x_host,(t).x_host))
+#define ns_nullhost(x) (((x).x_host.s_host[0]==0) && \
+ ((x).x_host.s_host[1]==0) && ((x).x_host.s_host[2]==0))
+
+#ifdef KERNEL
+extern struct domain nsdomain;
+union ns_host ns_thishost;
+union ns_host ns_zerohost;
+union ns_host ns_broadhost;
+union ns_net ns_zeronet;
+union ns_net ns_broadnet;
+u_short ns_cksum();
+#else
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+extern struct ns_addr ns_addr __P((const char *));
+extern char *ns_ntoa __P((struct ns_addr));
+__END_DECLS
+
+#endif
diff --git a/sys/netns/ns_cksum.c b/sys/netns/ns_cksum.c
new file mode 100644
index 000000000000..52eba8bce816
--- /dev/null
+++ b/sys/netns/ns_cksum.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 1982, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_cksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+/*
+ * Checksum routine for Network Systems Protocol Packets (Big-Endian).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x) { if ((x) > 65535) (x) -= 65535; }
+#define FOLD(x) {l_util.l = (x); (x) = l_util.s[0] + l_util.s[1]; ADDCARRY(x);}
+
+u_short
+ns_cksum(m, len)
+ register struct mbuf *m;
+ register int len;
+{
+ register u_short *w;
+ register int sum = 0;
+ register int mlen = 0;
+ register int sum2;
+
+ union {
+ u_short s[2];
+ long l;
+ } l_util;
+
+ for (;m && len; m = m->m_next) {
+ if (m->m_len == 0)
+ continue;
+ /*
+ * Each trip around loop adds in
+ * word from one mbuf segment.
+ */
+ w = mtod(m, u_short *);
+ if (mlen == -1) {
+ /*
+ * There is a byte left from the last segment;
+ * ones-complement add it into the checksum.
+ */
+#if BYTE_ORDER == BIG_ENDIAN
+ sum += *(u_char *)w;
+#else
+ sum += *(u_char *)w << 8;
+#endif
+ sum += sum;
+ w = (u_short *)(1 + (char *)w);
+ mlen = m->m_len - 1;
+ len--;
+ FOLD(sum);
+ } else
+ mlen = m->m_len;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+ /*
+ * We can do a 16 bit ones complement sum using
+ * 32 bit arithmetic registers for adding,
+ * with carries from the low added
+ * into the high (by normal carry-chaining)
+ * so long as we fold back before 16 carries have occured.
+ */
+ if (1 & (int) w)
+ goto uuuuglyy;
+#ifndef TINY
+/* -DTINY reduces the size from 1250 to 550, but slows it down by 22% */
+ while ((mlen -= 32) >= 0) {
+ sum += w[0]; sum += sum; sum += w[1]; sum += sum;
+ sum += w[2]; sum += sum; sum += w[3]; sum += sum;
+ sum += w[4]; sum += sum; sum += w[5]; sum += sum;
+ sum += w[6]; sum += sum; sum += w[7]; sum += sum;
+ FOLD(sum);
+ sum += w[8]; sum += sum; sum += w[9]; sum += sum;
+ sum += w[10]; sum += sum; sum += w[11]; sum += sum;
+ sum += w[12]; sum += sum; sum += w[13]; sum += sum;
+ sum += w[14]; sum += sum; sum += w[15]; sum += sum;
+ FOLD(sum);
+ w += 16;
+ }
+ mlen += 32;
+#endif
+ while ((mlen -= 8) >= 0) {
+ sum += w[0]; sum += sum; sum += w[1]; sum += sum;
+ sum += w[2]; sum += sum; sum += w[3]; sum += sum;
+ FOLD(sum);
+ w += 4;
+ }
+ mlen += 8;
+ while ((mlen -= 2) >= 0) {
+ sum += *w++; sum += sum;
+ }
+ goto commoncase;
+uuuuglyy:
+#if BYTE_ORDER == BIG_ENDIAN
+#define ww(n) (((u_char *)w)[n + n + 1])
+#define vv(n) (((u_char *)w)[n + n])
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define vv(n) (((u_char *)w)[n + n + 1])
+#define ww(n) (((u_char *)w)[n + n])
+#endif
+#endif
+ sum2 = 0;
+#ifndef TINY
+ while ((mlen -= 32) >= 0) {
+ sum += ww(0); sum += sum; sum += ww(1); sum += sum;
+ sum += ww(2); sum += sum; sum += ww(3); sum += sum;
+ sum += ww(4); sum += sum; sum += ww(5); sum += sum;
+ sum += ww(6); sum += sum; sum += ww(7); sum += sum;
+ FOLD(sum);
+ sum += ww(8); sum += sum; sum += ww(9); sum += sum;
+ sum += ww(10); sum += sum; sum += ww(11); sum += sum;
+ sum += ww(12); sum += sum; sum += ww(13); sum += sum;
+ sum += ww(14); sum += sum; sum += ww(15); sum += sum;
+ FOLD(sum);
+ sum2 += vv(0); sum2 += sum2; sum2 += vv(1); sum2 += sum2;
+ sum2 += vv(2); sum2 += sum2; sum2 += vv(3); sum2 += sum2;
+ sum2 += vv(4); sum2 += sum2; sum2 += vv(5); sum2 += sum2;
+ sum2 += vv(6); sum2 += sum2; sum2 += vv(7); sum2 += sum2;
+ FOLD(sum2);
+ sum2 += vv(8); sum2 += sum2; sum2 += vv(9); sum2 += sum2;
+ sum2 += vv(10); sum2 += sum2; sum2 += vv(11); sum2 += sum2;
+ sum2 += vv(12); sum2 += sum2; sum2 += vv(13); sum2 += sum2;
+ sum2 += vv(14); sum2 += sum2; sum2 += vv(15); sum2 += sum2;
+ FOLD(sum2);
+ w += 16;
+ }
+ mlen += 32;
+#endif
+ while ((mlen -= 8) >= 0) {
+ sum += ww(0); sum += sum; sum += ww(1); sum += sum;
+ sum += ww(2); sum += sum; sum += ww(3); sum += sum;
+ FOLD(sum);
+ sum2 += vv(0); sum2 += sum2; sum2 += vv(1); sum2 += sum2;
+ sum2 += vv(2); sum2 += sum2; sum2 += vv(3); sum2 += sum2;
+ FOLD(sum2);
+ w += 4;
+ }
+ mlen += 8;
+ while ((mlen -= 2) >= 0) {
+ sum += ww(0); sum += sum;
+ sum2 += vv(0); sum2 += sum2;
+ w++;
+ }
+ sum += (sum2 << 8);
+commoncase:
+ if (mlen == -1) {
+#if BYTE_ORDER == BIG_ENDIAN
+ sum += *(u_char *)w << 8;
+#else
+ sum += *(u_char *)w;
+#endif
+ }
+ FOLD(sum);
+ }
+ if (mlen == -1) {
+ /* We had an odd number of bytes to sum; assume a garbage
+ byte of zero and clean up */
+ sum += sum;
+ FOLD(sum);
+ }
+ /*
+ * sum has already been kept to low sixteen bits.
+ * just examine result and exit.
+ */
+ if(sum==0xffff) sum = 0;
+ return (sum);
+}
diff --git a/sys/netns/ns_error.c b/sys/netns/ns_error.c
new file mode 100644
index 000000000000..03473a236805
--- /dev/null
+++ b/sys/netns/ns_error.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 1984, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_error.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/ns_error.h>
+
+#ifdef lint
+#define NS_ERRPRINTFS 1
+#endif
+
+#ifdef NS_ERRPRINTFS
+/*
+ * NS_ERR routines: error generation, receive packet processing, and
+ * routines to turnaround packets back to the originator.
+ */
+int ns_errprintfs = 0;
+#endif
+
+ns_err_x(c)
+{
+ register u_short *w, *lim, *base = ns_errstat.ns_es_codes;
+ u_short x = c;
+
+ /*
+ * zero is a legit error code, handle specially
+ */
+ if (x == 0)
+ return (0);
+ lim = base + NS_ERR_MAX - 1;
+ for (w = base + 1; w < lim; w++) {
+ if (*w == 0)
+ *w = x;
+ if (*w == x)
+ break;
+ }
+ return (w - base);
+}
+
+/*
+ * Generate an error packet of type error
+ * in response to bad packet.
+ */
+
+ns_error(om, type, param)
+ struct mbuf *om;
+ int type;
+{
+ register struct ns_epidp *ep;
+ struct mbuf *m;
+ struct idp *nip;
+ register struct idp *oip = mtod(om, struct idp *);
+ extern int idpcksum;
+
+ /*
+ * If this packet was sent to the echo port,
+ * and nobody was there, just echo it.
+ * (Yes, this is a wart!)
+ */
+ if (type == NS_ERR_NOSOCK &&
+ oip->idp_dna.x_port == htons(2) &&
+ (type = ns_echo(om))==0)
+ return;
+
+#ifdef NS_ERRPRINTFS
+ if (ns_errprintfs)
+ printf("ns_err_error(%x, %d, %d)\n", oip, type, param);
+#endif
+ /*
+ * Don't Generate error packets in response to multicasts.
+ */
+ if (oip->idp_dna.x_host.c_host[0] & 1)
+ goto freeit;
+
+ ns_errstat.ns_es_error++;
+ /*
+ * Make sure that the old IDP packet had 30 bytes of data to return;
+ * if not, don't bother. Also don't EVER error if the old
+ * packet protocol was NS_ERR.
+ */
+ if (oip->idp_len < sizeof(struct idp)) {
+ ns_errstat.ns_es_oldshort++;
+ goto freeit;
+ }
+ if (oip->idp_pt == NSPROTO_ERROR) {
+ ns_errstat.ns_es_oldns_err++;
+ goto freeit;
+ }
+
+ /*
+ * First, formulate ns_err message
+ */
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ goto freeit;
+ m->m_len = sizeof(*ep);
+ MH_ALIGN(m, m->m_len);
+ ep = mtod(m, struct ns_epidp *);
+ if ((u_int)type > NS_ERR_TOO_BIG)
+ panic("ns_err_error");
+ ns_errstat.ns_es_outhist[ns_err_x(type)]++;
+ ep->ns_ep_errp.ns_err_num = htons((u_short)type);
+ ep->ns_ep_errp.ns_err_param = htons((u_short)param);
+ bcopy((caddr_t)oip, (caddr_t)&ep->ns_ep_errp.ns_err_idp, 42);
+ nip = &ep->ns_ep_idp;
+ nip->idp_len = sizeof(*ep);
+ nip->idp_len = htons((u_short)nip->idp_len);
+ nip->idp_pt = NSPROTO_ERROR;
+ nip->idp_tc = 0;
+ nip->idp_dna = oip->idp_sna;
+ nip->idp_sna = oip->idp_dna;
+ if (idpcksum) {
+ nip->idp_sum = 0;
+ nip->idp_sum = ns_cksum(m, sizeof(*ep));
+ } else
+ nip->idp_sum = 0xffff;
+ (void) ns_output(m, (struct route *)0, 0);
+
+freeit:
+ m_freem(om);
+}
+
+ns_printhost(p)
+register struct ns_addr *p;
+{
+
+ printf("<net:%x%x,host:%x%x%x,port:%x>",
+ p->x_net.s_net[0],
+ p->x_net.s_net[1],
+ p->x_host.s_host[0],
+ p->x_host.s_host[1],
+ p->x_host.s_host[2],
+ p->x_port);
+
+}
+
+/*
+ * Process a received NS_ERR message.
+ */
+ns_err_input(m)
+ struct mbuf *m;
+{
+ register struct ns_errp *ep;
+ register struct ns_epidp *epidp = mtod(m, struct ns_epidp *);
+ register int i;
+ int type, code, param;
+
+ /*
+ * Locate ns_err structure in mbuf, and check
+ * that not corrupted and of at least minimum length.
+ */
+#ifdef NS_ERRPRINTFS
+ if (ns_errprintfs) {
+ printf("ns_err_input from ");
+ ns_printhost(&epidp->ns_ep_idp.idp_sna);
+ printf("len %d\n", ntohs(epidp->ns_ep_idp.idp_len));
+ }
+#endif
+ i = sizeof (struct ns_epidp);
+ if (((m->m_flags & M_EXT) || m->m_len < i) &&
+ (m = m_pullup(m, i)) == 0) {
+ ns_errstat.ns_es_tooshort++;
+ return;
+ }
+ ep = &(mtod(m, struct ns_epidp *)->ns_ep_errp);
+ type = ntohs(ep->ns_err_num);
+ param = ntohs(ep->ns_err_param);
+ ns_errstat.ns_es_inhist[ns_err_x(type)]++;
+
+#ifdef NS_ERRPRINTFS
+ /*
+ * Message type specific processing.
+ */
+ if (ns_errprintfs)
+ printf("ns_err_input, type %d param %d\n", type, param);
+#endif
+ if (type >= NS_ERR_TOO_BIG) {
+ goto badcode;
+ }
+ ns_errstat.ns_es_outhist[ns_err_x(type)]++;
+ switch (type) {
+
+ case NS_ERR_UNREACH_HOST:
+ code = PRC_UNREACH_NET;
+ goto deliver;
+
+ case NS_ERR_TOO_OLD:
+ code = PRC_TIMXCEED_INTRANS;
+ goto deliver;
+
+ case NS_ERR_TOO_BIG:
+ code = PRC_MSGSIZE;
+ goto deliver;
+
+ case NS_ERR_FULLUP:
+ code = PRC_QUENCH;
+ goto deliver;
+
+ case NS_ERR_NOSOCK:
+ code = PRC_UNREACH_PORT;
+ goto deliver;
+
+ case NS_ERR_UNSPEC_T:
+ case NS_ERR_BADSUM_T:
+ case NS_ERR_BADSUM:
+ case NS_ERR_UNSPEC:
+ code = PRC_PARAMPROB;
+ goto deliver;
+
+ deliver:
+ /*
+ * Problem with datagram; advise higher level routines.
+ */
+#ifdef NS_ERRPRINTFS
+ if (ns_errprintfs)
+ printf("deliver to protocol %d\n",
+ ep->ns_err_idp.idp_pt);
+#endif
+ switch(ep->ns_err_idp.idp_pt) {
+ case NSPROTO_SPP:
+ spp_ctlinput(code, (caddr_t)ep);
+ break;
+
+ default:
+ idp_ctlinput(code, (caddr_t)ep);
+ }
+
+ goto freeit;
+
+ default:
+ badcode:
+ ns_errstat.ns_es_badcode++;
+ goto freeit;
+
+ }
+freeit:
+ m_freem(m);
+}
+
+#ifdef notdef
+u_long
+nstime()
+{
+ int s = splclock();
+ u_long t;
+
+ t = (time.tv_sec % (24*60*60)) * 1000 + time.tv_usec / 1000;
+ splx(s);
+ return (htonl(t));
+}
+#endif
+
+ns_echo(m)
+struct mbuf *m;
+{
+ register struct idp *idp = mtod(m, struct idp *);
+ register struct echo {
+ struct idp ec_idp;
+ u_short ec_op; /* Operation, 1 = request, 2 = reply */
+ } *ec = (struct echo *)idp;
+ struct ns_addr temp;
+
+ if (idp->idp_pt!=NSPROTO_ECHO) return(NS_ERR_NOSOCK);
+ if (ec->ec_op!=htons(1)) return(NS_ERR_UNSPEC);
+
+ ec->ec_op = htons(2);
+
+ temp = idp->idp_dna;
+ idp->idp_dna = idp->idp_sna;
+ idp->idp_sna = temp;
+
+ if (idp->idp_sum != 0xffff) {
+ idp->idp_sum = 0;
+ idp->idp_sum = ns_cksum(m,
+ (int)(((ntohs(idp->idp_len) - 1)|1)+1));
+ }
+ (void) ns_output(m, (struct route *)0, NS_FORWARDING);
+ return(0);
+}
diff --git a/sys/netns/ns_error.h b/sys/netns/ns_error.h
new file mode 100644
index 000000000000..992911f1552c
--- /dev/null
+++ b/sys/netns/ns_error.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 1984, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_error.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Xerox NS error messages
+ */
+
+struct ns_errp {
+ u_short ns_err_num; /* Error Number */
+ u_short ns_err_param; /* Error Parameter */
+ struct idp ns_err_idp; /* Initial segment of offending
+ packet */
+ u_char ns_err_lev2[12]; /* at least this much higher
+ level protocol */
+};
+struct ns_epidp {
+ struct idp ns_ep_idp;
+ struct ns_errp ns_ep_errp;
+};
+
+#define NS_ERR_UNSPEC 0 /* Unspecified Error detected at dest. */
+#define NS_ERR_BADSUM 1 /* Bad Checksum detected at dest */
+#define NS_ERR_NOSOCK 2 /* Specified socket does not exist at dest*/
+#define NS_ERR_FULLUP 3 /* Dest. refuses packet due to resource lim.*/
+#define NS_ERR_UNSPEC_T 0x200 /* Unspec. Error occured before reaching dest*/
+#define NS_ERR_BADSUM_T 0x201 /* Bad Checksum detected in transit */
+#define NS_ERR_UNREACH_HOST 0x202 /* Dest cannot be reached from here*/
+#define NS_ERR_TOO_OLD 0x203 /* Packet x'd 15 routers without delivery*/
+#define NS_ERR_TOO_BIG 0x204 /* Packet too large to be forwarded through
+ some intermediate gateway. The error
+ parameter field contains the max packet
+ size that can be accommodated */
+#define NS_ERR_MAX 20
+
+/*
+ * Variables related to this implementation
+ * of the network systems error message protocol.
+ */
+struct ns_errstat {
+/* statistics related to ns_err packets generated */
+ int ns_es_error; /* # of calls to ns_error */
+ int ns_es_oldshort; /* no error 'cuz old ip too short */
+ int ns_es_oldns_err; /* no error 'cuz old was ns_err */
+ int ns_es_outhist[NS_ERR_MAX];
+/* statistics related to input messages processed */
+ int ns_es_badcode; /* ns_err_code out of range */
+ int ns_es_tooshort; /* packet < IDP_MINLEN */
+ int ns_es_checksum; /* bad checksum */
+ int ns_es_badlen; /* calculated bound mismatch */
+ int ns_es_reflect; /* number of responses */
+ int ns_es_inhist[NS_ERR_MAX];
+ u_short ns_es_codes[NS_ERR_MAX];/* which error code for outhist
+ since we might not know all */
+};
+
+#ifdef KERNEL
+struct ns_errstat ns_errstat;
+#endif
diff --git a/sys/netns/ns_if.h b/sys/netns/ns_if.h
new file mode 100644
index 000000000000..3abb284a1dd7
--- /dev/null
+++ b/sys/netns/ns_if.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_if.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Interface address, xerox version. One of these structures
+ * is allocated for each interface with an internet address.
+ * The ifaddr structure contains the protocol-independent part
+ * of the structure and is assumed to be first.
+ */
+
+struct ns_ifaddr {
+ struct ifaddr ia_ifa; /* protocol-independent info */
+#define ia_ifp ia_ifa.ifa_ifp
+#define ia_flags ia_ifa.ifa_flags
+ struct ns_ifaddr *ia_next; /* next in list of xerox addresses */
+ struct sockaddr_ns ia_addr; /* reserve space for my address */
+ struct sockaddr_ns ia_dstaddr; /* space for my broadcast address */
+#define ia_broadaddr ia_dstaddr
+ struct sockaddr_ns ia_netmask; /* space for my network mask */
+};
+
+struct ns_aliasreq {
+ char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct sockaddr_ns ifra_addr;
+ struct sockaddr_ns ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+};
+/*
+ * Given a pointer to an ns_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockadd_ns.
+ */
+
+#define IA_SNS(ia) (&(((struct ns_ifaddr *)(ia))->ia_addr))
+
+/* This is not the right place for this but where is? */
+#define ETHERTYPE_NS 0x0600
+
+#ifdef NSIP
+struct nsip_req {
+ struct sockaddr rq_ns; /* must be ns format destination */
+ struct sockaddr rq_ip; /* must be ip format gateway */
+ short rq_flags;
+};
+#endif
+
+#ifdef KERNEL
+struct ns_ifaddr *ns_ifaddr;
+struct ns_ifaddr *ns_iaonnetof();
+struct ifqueue nsintrq; /* XNS input packet queue */
+#endif
diff --git a/sys/netns/ns_input.c b/sys/netns/ns_input.c
new file mode 100644
index 000000000000..7a6e1babc7c8
--- /dev/null
+++ b/sys/netns/ns_input.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_input.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/ns_error.h>
+
+/*
+ * NS initialization.
+ */
+union ns_host ns_thishost;
+union ns_host ns_zerohost;
+union ns_host ns_broadhost;
+union ns_net ns_zeronet;
+union ns_net ns_broadnet;
+struct sockaddr_ns ns_netmask, ns_hostmask;
+
+static u_short allones[] = {-1, -1, -1};
+
+struct nspcb nspcb;
+struct nspcb nsrawpcb;
+
+struct ifqueue nsintrq;
+int nsqmaxlen = IFQ_MAXLEN;
+
+int idpcksum = 1;
+long ns_pexseq;
+
+ns_init()
+{
+ extern struct timeval time;
+
+ ns_broadhost = * (union ns_host *) allones;
+ ns_broadnet = * (union ns_net *) allones;
+ nspcb.nsp_next = nspcb.nsp_prev = &nspcb;
+ nsrawpcb.nsp_next = nsrawpcb.nsp_prev = &nsrawpcb;
+ nsintrq.ifq_maxlen = nsqmaxlen;
+ ns_pexseq = time.tv_usec;
+ ns_netmask.sns_len = 6;
+ ns_netmask.sns_addr.x_net = ns_broadnet;
+ ns_hostmask.sns_len = 12;
+ ns_hostmask.sns_addr.x_net = ns_broadnet;
+ ns_hostmask.sns_addr.x_host = ns_broadhost;
+}
+
+/*
+ * Idp input routine. Pass to next level.
+ */
+int nsintr_getpck = 0;
+int nsintr_swtch = 0;
+nsintr()
+{
+ register struct idp *idp;
+ register struct mbuf *m;
+ register struct nspcb *nsp;
+ register int i;
+ int len, s, error;
+ char oddpacketp;
+
+next:
+ /*
+ * Get next datagram off input queue and get IDP header
+ * in first mbuf.
+ */
+ s = splimp();
+ IF_DEQUEUE(&nsintrq, m);
+ splx(s);
+ nsintr_getpck++;
+ if (m == 0)
+ return;
+ if ((m->m_flags & M_EXT || m->m_len < sizeof (struct idp)) &&
+ (m = m_pullup(m, sizeof (struct idp))) == 0) {
+ idpstat.idps_toosmall++;
+ goto next;
+ }
+
+ /*
+ * Give any raw listeners a crack at the packet
+ */
+ for (nsp = nsrawpcb.nsp_next; nsp != &nsrawpcb; nsp = nsp->nsp_next) {
+ struct mbuf *m1 = m_copy(m, 0, (int)M_COPYALL);
+ if (m1) idp_input(m1, nsp);
+ }
+
+ idp = mtod(m, struct idp *);
+ len = ntohs(idp->idp_len);
+ if (oddpacketp = len & 1) {
+ len++; /* If this packet is of odd length,
+ preserve garbage byte for checksum */
+ }
+
+ /*
+ * Check that the amount of data in the buffers
+ * is as at least much as the IDP header would have us expect.
+ * Trim mbufs if longer than we expect.
+ * Drop packet if shorter than we expect.
+ */
+ if (m->m_pkthdr.len < len) {
+ idpstat.idps_tooshort++;
+ goto bad;
+ }
+ if (m->m_pkthdr.len > len) {
+ if (m->m_len == m->m_pkthdr.len) {
+ m->m_len = len;
+ m->m_pkthdr.len = len;
+ } else
+ m_adj(m, len - m->m_pkthdr.len);
+ }
+ if (idpcksum && ((i = idp->idp_sum)!=0xffff)) {
+ idp->idp_sum = 0;
+ if (i != (idp->idp_sum = ns_cksum(m, len))) {
+ idpstat.idps_badsum++;
+ idp->idp_sum = i;
+ if (ns_hosteqnh(ns_thishost, idp->idp_dna.x_host))
+ error = NS_ERR_BADSUM;
+ else
+ error = NS_ERR_BADSUM_T;
+ ns_error(m, error, 0);
+ goto next;
+ }
+ }
+ /*
+ * Is this a directed broadcast?
+ */
+ if (ns_hosteqnh(ns_broadhost,idp->idp_dna.x_host)) {
+ if ((!ns_neteq(idp->idp_dna, idp->idp_sna)) &&
+ (!ns_neteqnn(idp->idp_dna.x_net, ns_broadnet)) &&
+ (!ns_neteqnn(idp->idp_sna.x_net, ns_zeronet)) &&
+ (!ns_neteqnn(idp->idp_dna.x_net, ns_zeronet)) ) {
+ /*
+ * Look to see if I need to eat this packet.
+ * Algorithm is to forward all young packets
+ * and prematurely age any packets which will
+ * by physically broadcasted.
+ * Any very old packets eaten without forwarding
+ * would die anyway.
+ *
+ * Suggestion of Bill Nesheim, Cornell U.
+ */
+ if (idp->idp_tc < NS_MAXHOPS) {
+ idp_forward(m);
+ goto next;
+ }
+ }
+ /*
+ * Is this our packet? If not, forward.
+ */
+ } else if (!ns_hosteqnh(ns_thishost,idp->idp_dna.x_host)) {
+ idp_forward(m);
+ goto next;
+ }
+ /*
+ * Locate pcb for datagram.
+ */
+ nsp = ns_pcblookup(&idp->idp_sna, idp->idp_dna.x_port, NS_WILDCARD);
+ /*
+ * Switch out to protocol's input routine.
+ */
+ nsintr_swtch++;
+ if (nsp) {
+ if (oddpacketp) {
+ m_adj(m, -1);
+ }
+ if ((nsp->nsp_flags & NSP_ALL_PACKETS)==0)
+ switch (idp->idp_pt) {
+
+ case NSPROTO_SPP:
+ spp_input(m, nsp);
+ goto next;
+
+ case NSPROTO_ERROR:
+ ns_err_input(m);
+ goto next;
+ }
+ idp_input(m, nsp);
+ } else {
+ ns_error(m, NS_ERR_NOSOCK, 0);
+ }
+ goto next;
+
+bad:
+ m_freem(m);
+ goto next;
+}
+
+u_char nsctlerrmap[PRC_NCMDS] = {
+ ECONNABORTED, ECONNABORTED, 0, 0,
+ 0, 0, EHOSTDOWN, EHOSTUNREACH,
+ ENETUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
+ EMSGSIZE, 0, 0, 0,
+ 0, 0, 0, 0
+};
+
+int idp_donosocks = 1;
+
+idp_ctlinput(cmd, arg)
+ int cmd;
+ caddr_t arg;
+{
+ struct ns_addr *ns;
+ struct nspcb *nsp;
+ struct ns_errp *errp;
+ int idp_abort();
+ extern struct nspcb *idp_drop();
+ int type;
+
+ if (cmd < 0 || cmd > PRC_NCMDS)
+ return;
+ if (nsctlerrmap[cmd] == 0)
+ return; /* XXX */
+ type = NS_ERR_UNREACH_HOST;
+ switch (cmd) {
+ struct sockaddr_ns *sns;
+
+ case PRC_IFDOWN:
+ case PRC_HOSTDEAD:
+ case PRC_HOSTUNREACH:
+ sns = (struct sockaddr_ns *)arg;
+ if (sns->sns_family != AF_NS)
+ return;
+ ns = &sns->sns_addr;
+ break;
+
+ default:
+ errp = (struct ns_errp *)arg;
+ ns = &errp->ns_err_idp.idp_dna;
+ type = errp->ns_err_num;
+ type = ntohs((u_short)type);
+ }
+ switch (type) {
+
+ case NS_ERR_UNREACH_HOST:
+ ns_pcbnotify(ns, (int)nsctlerrmap[cmd], idp_abort, (long)0);
+ break;
+
+ case NS_ERR_NOSOCK:
+ nsp = ns_pcblookup(ns, errp->ns_err_idp.idp_sna.x_port,
+ NS_WILDCARD);
+ if(nsp && idp_donosocks && ! ns_nullhost(nsp->nsp_faddr))
+ (void) idp_drop(nsp, (int)nsctlerrmap[cmd]);
+ }
+}
+
+int idpprintfs = 0;
+int idpforwarding = 1;
+/*
+ * Forward a packet. If some error occurs return the sender
+ * an error packet. Note we can't always generate a meaningful
+ * error message because the NS errors don't have a large enough repetoire
+ * of codes and types.
+ */
+struct route idp_droute;
+struct route idp_sroute;
+
+idp_forward(m)
+struct mbuf *m;
+{
+ register struct idp *idp = mtod(m, struct idp *);
+ register int error, type, code;
+ struct mbuf *mcopy = NULL;
+ int agedelta = 1;
+ int flags = NS_FORWARDING;
+ int ok_there = 0;
+ int ok_back = 0;
+
+ if (idpprintfs) {
+ printf("forward: src ");
+ ns_printhost(&idp->idp_sna);
+ printf(", dst ");
+ ns_printhost(&idp->idp_dna);
+ printf("hop count %d\n", idp->idp_tc);
+ }
+ if (idpforwarding == 0) {
+ /* can't tell difference between net and host */
+ type = NS_ERR_UNREACH_HOST, code = 0;
+ goto senderror;
+ }
+ idp->idp_tc++;
+ if (idp->idp_tc > NS_MAXHOPS) {
+ type = NS_ERR_TOO_OLD, code = 0;
+ goto senderror;
+ }
+ /*
+ * Save at most 42 bytes of the packet in case
+ * we need to generate an NS error message to the src.
+ */
+ mcopy = m_copy(m, 0, imin((int)ntohs(idp->idp_len), 42));
+
+ if ((ok_there = idp_do_route(&idp->idp_dna,&idp_droute))==0) {
+ type = NS_ERR_UNREACH_HOST, code = 0;
+ goto senderror;
+ }
+ /*
+ * Here we think about forwarding broadcast packets,
+ * so we try to insure that it doesn't go back out
+ * on the interface it came in on. Also, if we
+ * are going to physically broadcast this, let us
+ * age the packet so we can eat it safely the second time around.
+ */
+ if (idp->idp_dna.x_host.c_host[0] & 0x1) {
+ struct ns_ifaddr *ia = ns_iaonnetof(&idp->idp_dna);
+ struct ifnet *ifp;
+ if (ia) {
+ /* I'm gonna hafta eat this packet */
+ agedelta += NS_MAXHOPS - idp->idp_tc;
+ idp->idp_tc = NS_MAXHOPS;
+ }
+ if ((ok_back = idp_do_route(&idp->idp_sna,&idp_sroute))==0) {
+ /* error = ENETUNREACH; He'll never get it! */
+ m_freem(m);
+ goto cleanup;
+ }
+ if (idp_droute.ro_rt &&
+ (ifp=idp_droute.ro_rt->rt_ifp) &&
+ idp_sroute.ro_rt &&
+ (ifp!=idp_sroute.ro_rt->rt_ifp)) {
+ flags |= NS_ALLOWBROADCAST;
+ } else {
+ type = NS_ERR_UNREACH_HOST, code = 0;
+ goto senderror;
+ }
+ }
+ /* need to adjust checksum */
+ if (idp->idp_sum!=0xffff) {
+ union bytes {
+ u_char c[4];
+ u_short s[2];
+ long l;
+ } x;
+ register int shift;
+ x.l = 0; x.c[0] = agedelta;
+ shift = (((((int)ntohs(idp->idp_len))+1)>>1)-2) & 0xf;
+ x.l = idp->idp_sum + (x.s[0] << shift);
+ x.l = x.s[0] + x.s[1];
+ x.l = x.s[0] + x.s[1];
+ if (x.l==0xffff) idp->idp_sum = 0; else idp->idp_sum = x.l;
+ }
+ if ((error = ns_output(m, &idp_droute, flags)) &&
+ (mcopy!=NULL)) {
+ idp = mtod(mcopy, struct idp *);
+ type = NS_ERR_UNSPEC_T, code = 0;
+ switch (error) {
+
+ case ENETUNREACH:
+ case EHOSTDOWN:
+ case EHOSTUNREACH:
+ case ENETDOWN:
+ case EPERM:
+ type = NS_ERR_UNREACH_HOST;
+ break;
+
+ case EMSGSIZE:
+ type = NS_ERR_TOO_BIG;
+ code = 576; /* too hard to figure out mtu here */
+ break;
+
+ case ENOBUFS:
+ type = NS_ERR_UNSPEC_T;
+ break;
+ }
+ mcopy = NULL;
+ senderror:
+ ns_error(m, type, code);
+ }
+cleanup:
+ if (ok_there)
+ idp_undo_route(&idp_droute);
+ if (ok_back)
+ idp_undo_route(&idp_sroute);
+ if (mcopy != NULL)
+ m_freem(mcopy);
+}
+
+idp_do_route(src, ro)
+struct ns_addr *src;
+struct route *ro;
+{
+
+ struct sockaddr_ns *dst;
+
+ bzero((caddr_t)ro, sizeof (*ro));
+ dst = (struct sockaddr_ns *)&ro->ro_dst;
+
+ dst->sns_len = sizeof(*dst);
+ dst->sns_family = AF_NS;
+ dst->sns_addr = *src;
+ dst->sns_addr.x_port = 0;
+ rtalloc(ro);
+ if (ro->ro_rt == 0 || ro->ro_rt->rt_ifp == 0) {
+ return (0);
+ }
+ ro->ro_rt->rt_use++;
+ return (1);
+}
+
+idp_undo_route(ro)
+register struct route *ro;
+{
+ if (ro->ro_rt) {RTFREE(ro->ro_rt);}
+}
+
+ns_watch_output(m, ifp)
+struct mbuf *m;
+struct ifnet *ifp;
+{
+ register struct nspcb *nsp;
+ register struct ifaddr *ifa;
+ /*
+ * Give any raw listeners a crack at the packet
+ */
+ for (nsp = nsrawpcb.nsp_next; nsp != &nsrawpcb; nsp = nsp->nsp_next) {
+ struct mbuf *m0 = m_copy(m, 0, (int)M_COPYALL);
+ if (m0) {
+ register struct idp *idp;
+
+ M_PREPEND(m0, sizeof (*idp), M_DONTWAIT);
+ if (m0 == NULL)
+ continue;
+ idp = mtod(m0, struct idp *);
+ idp->idp_sna.x_net = ns_zeronet;
+ idp->idp_sna.x_host = ns_thishost;
+ if (ifp && (ifp->if_flags & IFF_POINTOPOINT))
+ for(ifa = ifp->if_addrlist; ifa;
+ ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family==AF_NS) {
+ idp->idp_sna = IA_SNS(ifa)->sns_addr;
+ break;
+ }
+ }
+ idp->idp_len = ntohl(m0->m_pkthdr.len);
+ idp_input(m0, nsp);
+ }
+ }
+}
diff --git a/sys/netns/ns_ip.c b/sys/netns/ns_ip.c
new file mode 100644
index 000000000000..09deb8fe7c43
--- /dev/null
+++ b/sys/netns/ns_ip.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_ip.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Software interface driver for encapsulating ns in ip.
+ */
+
+#ifdef NSIP
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+#include <machine/mtpr.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/idp.h>
+
+struct ifnet_en {
+ struct ifnet ifen_ifnet;
+ struct route ifen_route;
+ struct in_addr ifen_src;
+ struct in_addr ifen_dst;
+ struct ifnet_en *ifen_next;
+};
+
+int nsipoutput(), nsipioctl(), nsipstart();
+#define LOMTU (1024+512);
+
+struct ifnet nsipif;
+struct ifnet_en *nsip_list; /* list of all hosts and gateways or
+ broadcast addrs */
+
+struct ifnet_en *
+nsipattach()
+{
+ register struct ifnet_en *m;
+ register struct ifnet *ifp;
+
+ if (nsipif.if_mtu == 0) {
+ ifp = &nsipif;
+ ifp->if_name = "nsip";
+ ifp->if_mtu = LOMTU;
+ ifp->if_ioctl = nsipioctl;
+ ifp->if_output = nsipoutput;
+ ifp->if_start = nsipstart;
+ ifp->if_flags = IFF_POINTOPOINT;
+ }
+
+ MALLOC((m), struct ifnet_en *, sizeof(*m), M_PCB, M_NOWAIT);
+ if (m == NULL) return (NULL);
+ m->ifen_next = nsip_list;
+ nsip_list = m;
+ ifp = &m->ifen_ifnet;
+
+ ifp->if_name = "nsip";
+ ifp->if_mtu = LOMTU;
+ ifp->if_ioctl = nsipioctl;
+ ifp->if_output = nsipoutput;
+ ifp->if_start = nsipstart;
+ ifp->if_flags = IFF_POINTOPOINT;
+ ifp->if_unit = nsipif.if_unit++;
+ if_attach(ifp);
+
+ return (m);
+}
+
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+nsipioctl(ifp, cmd, data)
+ register struct ifnet *ifp;
+ int cmd;
+ caddr_t data;
+{
+ int error = 0;
+ struct ifreq *ifr;
+
+ switch (cmd) {
+
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ /* fall into: */
+
+ case SIOCSIFDSTADDR:
+ /*
+ * Everything else is done at a higher level.
+ */
+ break;
+
+ case SIOCSIFFLAGS:
+ ifr = (struct ifreq *)data;
+ if ((ifr->ifr_flags & IFF_UP) == 0)
+ error = nsip_free(ifp);
+
+
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+struct mbuf *nsip_badlen;
+struct mbuf *nsip_lastin;
+int nsip_hold_input;
+
+idpip_input(m, ifp)
+ register struct mbuf *m;
+ struct ifnet *ifp;
+{
+ register struct ip *ip;
+ register struct idp *idp;
+ register struct ifqueue *ifq = &nsintrq;
+ int len, s;
+
+ if (nsip_hold_input) {
+ if (nsip_lastin) {
+ m_freem(nsip_lastin);
+ }
+ nsip_lastin = m_copym(m, 0, (int)M_COPYALL, M_DONTWAIT);
+ }
+ /*
+ * Get IP and IDP header together in first mbuf.
+ */
+ nsipif.if_ipackets++;
+ s = sizeof (struct ip) + sizeof (struct idp);
+ if (((m->m_flags & M_EXT) || m->m_len < s) &&
+ (m = m_pullup(m, s)) == 0) {
+ nsipif.if_ierrors++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ if (ip->ip_hl > (sizeof (struct ip) >> 2)) {
+ ip_stripoptions(m, (struct mbuf *)0);
+ if (m->m_len < s) {
+ if ((m = m_pullup(m, s)) == 0) {
+ nsipif.if_ierrors++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ }
+
+ /*
+ * Make mbuf data length reflect IDP length.
+ * If not enough data to reflect IDP length, drop.
+ */
+ m->m_data += sizeof (struct ip);
+ m->m_len -= sizeof (struct ip);
+ m->m_pkthdr.len -= sizeof (struct ip);
+ idp = mtod(m, struct idp *);
+ len = ntohs(idp->idp_len);
+ if (len & 1) len++; /* Preserve Garbage Byte */
+ if (ip->ip_len != len) {
+ if (len > ip->ip_len) {
+ nsipif.if_ierrors++;
+ if (nsip_badlen) m_freem(nsip_badlen);
+ nsip_badlen = m;
+ return;
+ }
+ /* Any extra will be trimmed off by the NS routines */
+ }
+
+ /*
+ * Place interface pointer before the data
+ * for the receiving protocol.
+ */
+ m->m_pkthdr.rcvif = ifp;
+ /*
+ * Deliver to NS
+ */
+ s = splimp();
+ if (IF_QFULL(ifq)) {
+ IF_DROP(ifq);
+bad:
+ m_freem(m);
+ splx(s);
+ return;
+ }
+ IF_ENQUEUE(ifq, m);
+ schednetisr(NETISR_NS);
+ splx(s);
+ return;
+}
+
+/* ARGSUSED */
+nsipoutput(ifn, m, dst)
+ struct ifnet_en *ifn;
+ register struct mbuf *m;
+ struct sockaddr *dst;
+{
+
+ register struct ip *ip;
+ register struct route *ro = &(ifn->ifen_route);
+ register int len = 0;
+ register struct idp *idp = mtod(m, struct idp *);
+ int error;
+
+ ifn->ifen_ifnet.if_opackets++;
+ nsipif.if_opackets++;
+
+
+ /*
+ * Calculate data length and make space
+ * for IP header.
+ */
+ len = ntohs(idp->idp_len);
+ if (len & 1) len++; /* Preserve Garbage Byte */
+ /* following clause not necessary on vax */
+ if (3 & (int)m->m_data) {
+ /* force longword alignment of ip hdr */
+ struct mbuf *m0 = m_gethdr(MT_HEADER, M_DONTWAIT);
+ if (m0 == 0) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ MH_ALIGN(m0, sizeof (struct ip));
+ m0->m_flags = m->m_flags & M_COPYFLAGS;
+ m0->m_next = m;
+ m0->m_len = sizeof (struct ip);
+ m0->m_pkthdr.len = m0->m_len + m->m_len;
+ m->m_flags &= ~M_PKTHDR;
+ } else {
+ M_PREPEND(m, sizeof (struct ip), M_DONTWAIT);
+ if (m == 0)
+ return (ENOBUFS);
+ }
+ /*
+ * Fill in IP header.
+ */
+ ip = mtod(m, struct ip *);
+ *(long *)ip = 0;
+ ip->ip_p = IPPROTO_IDP;
+ ip->ip_src = ifn->ifen_src;
+ ip->ip_dst = ifn->ifen_dst;
+ ip->ip_len = (u_short)len + sizeof (struct ip);
+ ip->ip_ttl = MAXTTL;
+
+ /*
+ * Output final datagram.
+ */
+ error = (ip_output(m, (struct mbuf *)0, ro, SO_BROADCAST, NULL));
+ if (error) {
+ ifn->ifen_ifnet.if_oerrors++;
+ ifn->ifen_ifnet.if_ierrors = error;
+ }
+ return (error);
+bad:
+ m_freem(m);
+ return (ENETUNREACH);
+}
+
+nsipstart(ifp)
+struct ifnet *ifp;
+{
+ panic("nsip_start called\n");
+}
+
+struct ifreq ifr = {"nsip0"};
+
+nsip_route(m)
+ register struct mbuf *m;
+{
+ register struct nsip_req *rq = mtod(m, struct nsip_req *);
+ struct sockaddr_ns *ns_dst = (struct sockaddr_ns *)&rq->rq_ns;
+ struct sockaddr_in *ip_dst = (struct sockaddr_in *)&rq->rq_ip;
+ struct route ro;
+ struct ifnet_en *ifn;
+ struct sockaddr_in *src;
+
+ /*
+ * First, make sure we already have an ns address:
+ */
+ if (ns_hosteqnh(ns_thishost, ns_zerohost))
+ return (EADDRNOTAVAIL);
+ /*
+ * Now, determine if we can get to the destination
+ */
+ bzero((caddr_t)&ro, sizeof (ro));
+ ro.ro_dst = *(struct sockaddr *)ip_dst;
+ rtalloc(&ro);
+ if (ro.ro_rt == 0 || ro.ro_rt->rt_ifp == 0) {
+ return (ENETUNREACH);
+ }
+
+ /*
+ * And see how he's going to get back to us:
+ * i.e., what return ip address do we use?
+ */
+ {
+ register struct in_ifaddr *ia;
+ struct ifnet *ifp = ro.ro_rt->rt_ifp;
+
+ for (ia = in_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp)
+ break;
+ if (ia == 0)
+ ia = in_ifaddr;
+ if (ia == 0) {
+ RTFREE(ro.ro_rt);
+ return (EADDRNOTAVAIL);
+ }
+ src = (struct sockaddr_in *)&ia->ia_addr;
+ }
+
+ /*
+ * Is there a free (pseudo-)interface or space?
+ */
+ for (ifn = nsip_list; ifn; ifn = ifn->ifen_next) {
+ if ((ifn->ifen_ifnet.if_flags & IFF_UP) == 0)
+ break;
+ }
+ if (ifn == NULL)
+ ifn = nsipattach();
+ if (ifn == NULL) {
+ RTFREE(ro.ro_rt);
+ return (ENOBUFS);
+ }
+ ifn->ifen_route = ro;
+ ifn->ifen_dst = ip_dst->sin_addr;
+ ifn->ifen_src = src->sin_addr;
+
+ /*
+ * now configure this as a point to point link
+ */
+ ifr.ifr_name[4] = '0' + nsipif.if_unit - 1;
+ ifr.ifr_dstaddr = * (struct sockaddr *) ns_dst;
+ (void)ns_control((struct socket *)0, (int)SIOCSIFDSTADDR, (caddr_t)&ifr,
+ (struct ifnet *)ifn);
+ satons_addr(ifr.ifr_addr).x_host = ns_thishost;
+ return (ns_control((struct socket *)0, (int)SIOCSIFADDR, (caddr_t)&ifr,
+ (struct ifnet *)ifn));
+}
+
+nsip_free(ifp)
+struct ifnet *ifp;
+{
+ register struct ifnet_en *ifn = (struct ifnet_en *)ifp;
+ struct route *ro = & ifn->ifen_route;
+
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = 0;
+ }
+ ifp->if_flags &= ~IFF_UP;
+ return (0);
+}
+
+nsip_ctlinput(cmd, sa)
+ int cmd;
+ struct sockaddr *sa;
+{
+ extern u_char inetctlerrmap[];
+ struct sockaddr_in *sin;
+ int in_rtchange();
+
+ if ((unsigned)cmd >= PRC_NCMDS)
+ return;
+ if (sa->sa_family != AF_INET && sa->sa_family != AF_IMPLINK)
+ return;
+ sin = (struct sockaddr_in *)sa;
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ return;
+
+ switch (cmd) {
+
+ case PRC_ROUTEDEAD:
+ case PRC_REDIRECT_NET:
+ case PRC_REDIRECT_HOST:
+ case PRC_REDIRECT_TOSNET:
+ case PRC_REDIRECT_TOSHOST:
+ nsip_rtchange(&sin->sin_addr);
+ break;
+ }
+}
+
+nsip_rtchange(dst)
+ register struct in_addr *dst;
+{
+ register struct ifnet_en *ifn;
+
+ for (ifn = nsip_list; ifn; ifn = ifn->ifen_next) {
+ if (ifn->ifen_dst.s_addr == dst->s_addr &&
+ ifn->ifen_route.ro_rt) {
+ RTFREE(ifn->ifen_route.ro_rt);
+ ifn->ifen_route.ro_rt = 0;
+ }
+ }
+}
+#endif
diff --git a/sys/netns/ns_output.c b/sys/netns/ns_output.c
new file mode 100644
index 000000000000..4c9f364f1eac
--- /dev/null
+++ b/sys/netns/ns_output.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_output.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+
+#ifdef vax
+#include <machine/mtpr.h>
+#endif
+int ns_hold_output = 0;
+int ns_copy_output = 0;
+int ns_output_cnt = 0;
+struct mbuf *ns_lastout;
+
+ns_output(m0, ro, flags)
+ struct mbuf *m0;
+ struct route *ro;
+ int flags;
+{
+ register struct idp *idp = mtod(m0, struct idp *);
+ register struct ifnet *ifp = 0;
+ int error = 0;
+ struct route idproute;
+ struct sockaddr_ns *dst;
+ extern int idpcksum;
+
+ if (ns_hold_output) {
+ if (ns_lastout) {
+ (void)m_free(ns_lastout);
+ }
+ ns_lastout = m_copy(m0, 0, (int)M_COPYALL);
+ }
+ /*
+ * Route packet.
+ */
+ if (ro == 0) {
+ ro = &idproute;
+ bzero((caddr_t)ro, sizeof (*ro));
+ }
+ dst = (struct sockaddr_ns *)&ro->ro_dst;
+ if (ro->ro_rt == 0) {
+ dst->sns_family = AF_NS;
+ dst->sns_len = sizeof (*dst);
+ dst->sns_addr = idp->idp_dna;
+ dst->sns_addr.x_port = 0;
+ /*
+ * If routing to interface only,
+ * short circuit routing lookup.
+ */
+ if (flags & NS_ROUTETOIF) {
+ struct ns_ifaddr *ia = ns_iaonnetof(&idp->idp_dna);
+
+ if (ia == 0) {
+ error = ENETUNREACH;
+ goto bad;
+ }
+ ifp = ia->ia_ifp;
+ goto gotif;
+ }
+ rtalloc(ro);
+ } else if ((ro->ro_rt->rt_flags & RTF_UP) == 0) {
+ /*
+ * The old route has gone away; try for a new one.
+ */
+ rtfree(ro->ro_rt);
+ ro->ro_rt = NULL;
+ rtalloc(ro);
+ }
+ if (ro->ro_rt == 0 || (ifp = ro->ro_rt->rt_ifp) == 0) {
+ error = ENETUNREACH;
+ goto bad;
+ }
+ ro->ro_rt->rt_use++;
+ if (ro->ro_rt->rt_flags & (RTF_GATEWAY|RTF_HOST))
+ dst = (struct sockaddr_ns *)ro->ro_rt->rt_gateway;
+gotif:
+
+ /*
+ * Look for multicast addresses and
+ * and verify user is allowed to send
+ * such a packet.
+ */
+ if (dst->sns_addr.x_host.c_host[0]&1) {
+ if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ if ((flags & NS_ALLOWBROADCAST) == 0) {
+ error = EACCES;
+ goto bad;
+ }
+ }
+
+ if (htons(idp->idp_len) <= ifp->if_mtu) {
+ ns_output_cnt++;
+ if (ns_copy_output) {
+ ns_watch_output(m0, ifp);
+ }
+ error = (*ifp->if_output)(ifp, m0,
+ (struct sockaddr *)dst, ro->ro_rt);
+ goto done;
+ } else error = EMSGSIZE;
+
+
+bad:
+ if (ns_copy_output) {
+ ns_watch_output(m0, ifp);
+ }
+ m_freem(m0);
+done:
+ if (ro == &idproute && (flags & NS_ROUTETOIF) == 0 && ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = 0;
+ }
+ return (error);
+}
diff --git a/sys/netns/ns_pcb.c b/sys/netns/ns_pcb.c
new file mode 100644
index 000000000000..ca88472d594f
--- /dev/null
+++ b/sys/netns/ns_pcb.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_pcb.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/ns_pcb.h>
+
+struct ns_addr zerons_addr;
+
+ns_pcballoc(so, head)
+ struct socket *so;
+ struct nspcb *head;
+{
+ struct mbuf *m;
+ register struct nspcb *nsp;
+
+ m = m_getclr(M_DONTWAIT, MT_PCB);
+ if (m == NULL)
+ return (ENOBUFS);
+ nsp = mtod(m, struct nspcb *);
+ nsp->nsp_socket = so;
+ insque(nsp, head);
+ so->so_pcb = (caddr_t)nsp;
+ return (0);
+}
+
+ns_pcbbind(nsp, nam)
+ register struct nspcb *nsp;
+ struct mbuf *nam;
+{
+ register struct sockaddr_ns *sns;
+ u_short lport = 0;
+
+ if (nsp->nsp_lport || !ns_nullhost(nsp->nsp_laddr))
+ return (EINVAL);
+ if (nam == 0)
+ goto noname;
+ sns = mtod(nam, struct sockaddr_ns *);
+ if (nam->m_len != sizeof (*sns))
+ return (EINVAL);
+ if (!ns_nullhost(sns->sns_addr)) {
+ int tport = sns->sns_port;
+
+ sns->sns_port = 0; /* yech... */
+ if (ifa_ifwithaddr((struct sockaddr *)sns) == 0)
+ return (EADDRNOTAVAIL);
+ sns->sns_port = tport;
+ }
+ lport = sns->sns_port;
+ if (lport) {
+ u_short aport = ntohs(lport);
+
+ if (aport < NSPORT_RESERVED &&
+ (nsp->nsp_socket->so_state & SS_PRIV) == 0)
+ return (EACCES);
+ if (ns_pcblookup(&zerons_addr, lport, 0))
+ return (EADDRINUSE);
+ }
+ nsp->nsp_laddr = sns->sns_addr;
+noname:
+ if (lport == 0)
+ do {
+ if (nspcb.nsp_lport++ < NSPORT_RESERVED)
+ nspcb.nsp_lport = NSPORT_RESERVED;
+ lport = htons(nspcb.nsp_lport);
+ } while (ns_pcblookup(&zerons_addr, lport, 0));
+ nsp->nsp_lport = lport;
+ return (0);
+}
+
+/*
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sns.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+ns_pcbconnect(nsp, nam)
+ struct nspcb *nsp;
+ struct mbuf *nam;
+{
+ struct ns_ifaddr *ia;
+ register struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+ register struct ns_addr *dst;
+ register struct route *ro;
+ struct ifnet *ifp;
+
+ if (nam->m_len != sizeof (*sns))
+ return (EINVAL);
+ if (sns->sns_family != AF_NS)
+ return (EAFNOSUPPORT);
+ if (sns->sns_port==0 || ns_nullhost(sns->sns_addr))
+ return (EADDRNOTAVAIL);
+ /*
+ * If we haven't bound which network number to use as ours,
+ * we will use the number of the outgoing interface.
+ * This depends on having done a routing lookup, which
+ * we will probably have to do anyway, so we might
+ * as well do it now. On the other hand if we are
+ * sending to multiple destinations we may have already
+ * done the lookup, so see if we can use the route
+ * from before. In any case, we only
+ * chose a port number once, even if sending to multiple
+ * destinations.
+ */
+ ro = &nsp->nsp_route;
+ dst = &satons_addr(ro->ro_dst);
+ if (nsp->nsp_socket->so_options & SO_DONTROUTE)
+ goto flush;
+ if (!ns_neteq(nsp->nsp_lastdst, sns->sns_addr))
+ goto flush;
+ if (!ns_hosteq(nsp->nsp_lastdst, sns->sns_addr)) {
+ if (ro->ro_rt && ! (ro->ro_rt->rt_flags & RTF_HOST)) {
+ /* can patch route to avoid rtalloc */
+ *dst = sns->sns_addr;
+ } else {
+ flush:
+ if (ro->ro_rt)
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ nsp->nsp_laddr.x_net = ns_zeronet;
+ }
+ }/* else cached route is ok; do nothing */
+ nsp->nsp_lastdst = sns->sns_addr;
+ if ((nsp->nsp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+ (ro->ro_rt == (struct rtentry *)0 ||
+ ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+ /* No route yet, so try to acquire one */
+ ro->ro_dst.sa_family = AF_NS;
+ ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+ *dst = sns->sns_addr;
+ dst->x_port = 0;
+ rtalloc(ro);
+ }
+ if (ns_neteqnn(nsp->nsp_laddr.x_net, ns_zeronet)) {
+ /*
+ * If route is known or can be allocated now,
+ * our src addr is taken from the i/f, else punt.
+ */
+
+ ia = (struct ns_ifaddr *)0;
+ /*
+ * If we found a route, use the address
+ * corresponding to the outgoing interface
+ */
+ if (ro->ro_rt && (ifp = ro->ro_rt->rt_ifp))
+ for (ia = ns_ifaddr; ia; ia = ia->ia_next)
+ if (ia->ia_ifp == ifp)
+ break;
+ if (ia == 0) {
+ u_short fport = sns->sns_addr.x_port;
+ sns->sns_addr.x_port = 0;
+ ia = (struct ns_ifaddr *)
+ ifa_ifwithdstaddr((struct sockaddr *)sns);
+ sns->sns_addr.x_port = fport;
+ if (ia == 0)
+ ia = ns_iaonnetof(&sns->sns_addr);
+ if (ia == 0)
+ ia = ns_ifaddr;
+ if (ia == 0)
+ return (EADDRNOTAVAIL);
+ }
+ nsp->nsp_laddr.x_net = satons_addr(ia->ia_addr).x_net;
+ }
+ if (ns_pcblookup(&sns->sns_addr, nsp->nsp_lport, 0))
+ return (EADDRINUSE);
+ if (ns_nullhost(nsp->nsp_laddr)) {
+ if (nsp->nsp_lport == 0)
+ (void) ns_pcbbind(nsp, (struct mbuf *)0);
+ nsp->nsp_laddr.x_host = ns_thishost;
+ }
+ nsp->nsp_faddr = sns->sns_addr;
+ /* Includes nsp->nsp_fport = sns->sns_port; */
+ return (0);
+}
+
+ns_pcbdisconnect(nsp)
+ struct nspcb *nsp;
+{
+
+ nsp->nsp_faddr = zerons_addr;
+ if (nsp->nsp_socket->so_state & SS_NOFDREF)
+ ns_pcbdetach(nsp);
+}
+
+ns_pcbdetach(nsp)
+ struct nspcb *nsp;
+{
+ struct socket *so = nsp->nsp_socket;
+
+ so->so_pcb = 0;
+ sofree(so);
+ if (nsp->nsp_route.ro_rt)
+ rtfree(nsp->nsp_route.ro_rt);
+ remque(nsp);
+ (void) m_free(dtom(nsp));
+}
+
+ns_setsockaddr(nsp, nam)
+ register struct nspcb *nsp;
+ struct mbuf *nam;
+{
+ register struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+
+ nam->m_len = sizeof (*sns);
+ sns = mtod(nam, struct sockaddr_ns *);
+ bzero((caddr_t)sns, sizeof (*sns));
+ sns->sns_len = sizeof(*sns);
+ sns->sns_family = AF_NS;
+ sns->sns_addr = nsp->nsp_laddr;
+}
+
+ns_setpeeraddr(nsp, nam)
+ register struct nspcb *nsp;
+ struct mbuf *nam;
+{
+ register struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+
+ nam->m_len = sizeof (*sns);
+ sns = mtod(nam, struct sockaddr_ns *);
+ bzero((caddr_t)sns, sizeof (*sns));
+ sns->sns_len = sizeof(*sns);
+ sns->sns_family = AF_NS;
+ sns->sns_addr = nsp->nsp_faddr;
+}
+
+/*
+ * Pass some notification to all connections of a protocol
+ * associated with address dst. Call the
+ * protocol specific routine to handle each connection.
+ * Also pass an extra paramter via the nspcb. (which may in fact
+ * be a parameter list!)
+ */
+ns_pcbnotify(dst, errno, notify, param)
+ register struct ns_addr *dst;
+ long param;
+ int errno, (*notify)();
+{
+ register struct nspcb *nsp, *oinp;
+ int s = splimp();
+
+ for (nsp = (&nspcb)->nsp_next; nsp != (&nspcb);) {
+ if (!ns_hosteq(*dst,nsp->nsp_faddr)) {
+ next:
+ nsp = nsp->nsp_next;
+ continue;
+ }
+ if (nsp->nsp_socket == 0)
+ goto next;
+ if (errno)
+ nsp->nsp_socket->so_error = errno;
+ oinp = nsp;
+ nsp = nsp->nsp_next;
+ oinp->nsp_notify_param = param;
+ (*notify)(oinp);
+ }
+ splx(s);
+}
+
+#ifdef notdef
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+ns_rtchange(nsp)
+ struct nspcb *nsp;
+{
+ if (nsp->nsp_route.ro_rt) {
+ rtfree(nsp->nsp_route.ro_rt);
+ nsp->nsp_route.ro_rt = 0;
+ /*
+ * A new route can be allocated the next time
+ * output is attempted.
+ */
+ }
+ /* SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
+}
+#endif
+
+struct nspcb *
+ns_pcblookup(faddr, lport, wildp)
+ struct ns_addr *faddr;
+ u_short lport;
+{
+ register struct nspcb *nsp, *match = 0;
+ int matchwild = 3, wildcard;
+ u_short fport;
+
+ fport = faddr->x_port;
+ for (nsp = (&nspcb)->nsp_next; nsp != (&nspcb); nsp = nsp->nsp_next) {
+ if (nsp->nsp_lport != lport)
+ continue;
+ wildcard = 0;
+ if (ns_nullhost(nsp->nsp_faddr)) {
+ if (!ns_nullhost(*faddr))
+ wildcard++;
+ } else {
+ if (ns_nullhost(*faddr))
+ wildcard++;
+ else {
+ if (!ns_hosteq(nsp->nsp_faddr, *faddr))
+ continue;
+ if (nsp->nsp_fport != fport) {
+ if (nsp->nsp_fport != 0)
+ continue;
+ else
+ wildcard++;
+ }
+ }
+ }
+ if (wildcard && wildp==0)
+ continue;
+ if (wildcard < matchwild) {
+ match = nsp;
+ matchwild = wildcard;
+ if (wildcard == 0)
+ break;
+ }
+ }
+ return (match);
+}
diff --git a/sys/netns/ns_pcb.h b/sys/netns/ns_pcb.h
new file mode 100644
index 000000000000..68cf744f738b
--- /dev/null
+++ b/sys/netns/ns_pcb.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_pcb.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Ns protocol interface control block.
+ */
+struct nspcb {
+ struct nspcb *nsp_next; /* doubly linked list */
+ struct nspcb *nsp_prev;
+ struct nspcb *nsp_head;
+ struct socket *nsp_socket; /* back pointer to socket */
+ struct ns_addr nsp_faddr; /* destination address */
+ struct ns_addr nsp_laddr; /* socket's address */
+ caddr_t nsp_pcb; /* protocol specific stuff */
+ struct route nsp_route; /* routing information */
+ struct ns_addr nsp_lastdst; /* validate cached route for dg socks*/
+ long nsp_notify_param; /* extra info passed via ns_pcbnotify*/
+ short nsp_flags;
+ u_char nsp_dpt; /* default packet type for idp_output*/
+ u_char nsp_rpt; /* last received packet type by
+ idp_input() */
+};
+
+/* possible flags */
+
+#define NSP_IN_ABORT 0x1 /* calling abort through socket */
+#define NSP_RAWIN 0x2 /* show headers on input */
+#define NSP_RAWOUT 0x4 /* show header on output */
+#define NSP_ALL_PACKETS 0x8 /* Turn off higher proto processing */
+
+#define NS_WILDCARD 1
+
+#define nsp_lport nsp_laddr.x_port
+#define nsp_fport nsp_faddr.x_port
+
+#define sotonspcb(so) ((struct nspcb *)((so)->so_pcb))
+
+/*
+ * Nominal space allocated to a ns socket.
+ */
+#define NSSNDQ 2048
+#define NSRCVQ 2048
+
+
+#ifdef KERNEL
+struct nspcb nspcb; /* head of list */
+struct nspcb *ns_pcblookup();
+#endif
diff --git a/sys/netns/ns_proto.c b/sys/netns/ns_proto.c
new file mode 100644
index 000000000000..fc9f8238c551
--- /dev/null
+++ b/sys/netns/ns_proto.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ns_proto.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <net/radix.h>
+
+#include <netns/ns.h>
+
+/*
+ * NS protocol family: IDP, ERR, PE, SPP, ROUTE.
+ */
+int ns_init();
+int idp_input(), idp_output(), idp_ctlinput(), idp_usrreq();
+int idp_raw_usrreq(), idp_ctloutput();
+int spp_input(), spp_ctlinput();
+int spp_usrreq(), spp_usrreq_sp(), spp_ctloutput();
+int spp_init(), spp_fasttimo(), spp_slowtimo();
+extern int raw_usrreq();
+
+extern struct domain nsdomain;
+
+struct protosw nssw[] = {
+{ 0, &nsdomain, 0, 0,
+ 0, idp_output, 0, 0,
+ 0,
+ ns_init, 0, 0, 0,
+},
+{ SOCK_DGRAM, &nsdomain, 0, PR_ATOMIC|PR_ADDR,
+ 0, 0, idp_ctlinput, idp_ctloutput,
+ idp_usrreq,
+ 0, 0, 0, 0,
+},
+{ SOCK_STREAM, &nsdomain, NSPROTO_SPP, PR_CONNREQUIRED|PR_WANTRCVD,
+ spp_input, 0, spp_ctlinput, spp_ctloutput,
+ spp_usrreq,
+ spp_init, spp_fasttimo, spp_slowtimo, 0,
+},
+{ SOCK_SEQPACKET,&nsdomain, NSPROTO_SPP, PR_CONNREQUIRED|PR_WANTRCVD|PR_ATOMIC,
+ spp_input, 0, spp_ctlinput, spp_ctloutput,
+ spp_usrreq_sp,
+ 0, 0, 0, 0,
+},
+{ SOCK_RAW, &nsdomain, NSPROTO_RAW, PR_ATOMIC|PR_ADDR,
+ idp_input, idp_output, 0, idp_ctloutput,
+ idp_raw_usrreq,
+ 0, 0, 0, 0,
+},
+{ SOCK_RAW, &nsdomain, NSPROTO_ERROR, PR_ATOMIC|PR_ADDR,
+ idp_ctlinput, idp_output, 0, idp_ctloutput,
+ idp_raw_usrreq,
+ 0, 0, 0, 0,
+},
+};
+
+struct domain nsdomain =
+ { AF_NS, "network systems", 0, 0, 0,
+ nssw, &nssw[sizeof(nssw)/sizeof(nssw[0])], 0,
+ rn_inithead, 16, sizeof(struct sockaddr_ns)};
+
diff --git a/sys/netns/sp.h b/sys/netns/sp.h
new file mode 100644
index 000000000000..b55dac26039b
--- /dev/null
+++ b/sys/netns/sp.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for Xerox NS style sequenced packet protocol
+ */
+
+struct sphdr {
+ u_char sp_cc; /* connection control */
+ u_char sp_dt; /* datastream type */
+#define SP_SP 0x80 /* system packet */
+#define SP_SA 0x40 /* send acknowledgement */
+#define SP_OB 0x20 /* attention (out of band data) */
+#define SP_EM 0x10 /* end of message */
+ u_short sp_sid; /* source connection identifier */
+ u_short sp_did; /* destination connection identifier */
+ u_short sp_seq; /* sequence number */
+ u_short sp_ack; /* acknowledge number */
+ u_short sp_alo; /* allocation number */
+};
diff --git a/sys/netns/spidp.h b/sys/netns/spidp.h
new file mode 100644
index 000000000000..332df5be2353
--- /dev/null
+++ b/sys/netns/spidp.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spidp.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for NS(tm) Internet Datagram Protocol
+ * containing a Sequenced Packet Protocol packet.
+ */
+struct spidp {
+ struct idp si_i;
+ struct sphdr si_s;
+};
+struct spidp_q {
+ struct spidp_q *si_next;
+ struct spidp_q *si_prev;
+};
+#define SI(x) ((struct spidp *)x)
+#define si_sum si_i.idp_sum
+#define si_len si_i.idp_len
+#define si_tc si_i.idp_tc
+#define si_pt si_i.idp_pt
+#define si_dna si_i.idp_dna
+#define si_sna si_i.idp_sna
+#define si_sport si_i.idp_sna.x_port
+#define si_cc si_s.sp_cc
+#define si_dt si_s.sp_dt
+#define si_sid si_s.sp_sid
+#define si_did si_s.sp_did
+#define si_seq si_s.sp_seq
+#define si_ack si_s.sp_ack
+#define si_alo si_s.sp_alo
diff --git a/sys/netns/spp_debug.c b/sys/netns/spp_debug.c
new file mode 100644
index 000000000000..eaa1d023f879
--- /dev/null
+++ b/sys/netns/spp_debug.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spp_debug.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+#include <netinet/tcp_fsm.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/sp.h>
+#include <netns/spidp.h>
+#define SPPTIMERS
+#include <netns/spp_timer.h>
+#include <netns/spp_var.h>
+#define SANAMES
+#include <netns/spp_debug.h>
+
+int sppconsdebug = 0;
+/*
+ * spp debug routines
+ */
+spp_trace(act, ostate, sp, si, req)
+ short act;
+ u_char ostate;
+ struct sppcb *sp;
+ struct spidp *si;
+ int req;
+{
+#ifdef INET
+#ifdef TCPDEBUG
+ u_short seq, ack, len, alo;
+ unsigned long iptime();
+ int flags;
+ struct spp_debug *sd = &spp_debug[spp_debx++];
+ extern char *prurequests[];
+ extern char *sanames[];
+ extern char *tcpstates[];
+ extern char *spptimers[];
+
+ if (spp_debx == SPP_NDEBUG)
+ spp_debx = 0;
+ sd->sd_time = iptime();
+ sd->sd_act = act;
+ sd->sd_ostate = ostate;
+ sd->sd_cb = (caddr_t)sp;
+ if (sp)
+ sd->sd_sp = *sp;
+ else
+ bzero((caddr_t)&sd->sd_sp, sizeof (*sp));
+ if (si)
+ sd->sd_si = *si;
+ else
+ bzero((caddr_t)&sd->sd_si, sizeof (*si));
+ sd->sd_req = req;
+ if (sppconsdebug == 0)
+ return;
+ if (ostate >= TCP_NSTATES) ostate = 0;
+ if (act >= SA_DROP) act = SA_DROP;
+ if (sp)
+ printf("%x %s:", sp, tcpstates[ostate]);
+ else
+ printf("???????? ");
+ printf("%s ", sanames[act]);
+ switch (act) {
+
+ case SA_RESPOND:
+ case SA_INPUT:
+ case SA_OUTPUT:
+ case SA_DROP:
+ if (si == 0)
+ break;
+ seq = si->si_seq;
+ ack = si->si_ack;
+ alo = si->si_alo;
+ len = si->si_len;
+ if (act == SA_OUTPUT) {
+ seq = ntohs(seq);
+ ack = ntohs(ack);
+ alo = ntohs(alo);
+ len = ntohs(len);
+ }
+#ifndef lint
+#define p1(f) { printf("%s = %x, ", "f", f); }
+ p1(seq); p1(ack); p1(alo); p1(len);
+#endif
+ flags = si->si_cc;
+ if (flags) {
+ char *cp = "<";
+#ifndef lint
+#define pf(f) { if (flags&SP_/**/f) { printf("%s%s", cp, "f"); cp = ","; } }
+ pf(SP); pf(SA); pf(OB); pf(EM);
+#else
+ cp = cp;
+#endif
+ printf(">");
+ }
+#ifndef lint
+#define p2(f) { printf("%s = %x, ", "f", si->si_/**/f); }
+ p2(sid);p2(did);p2(dt);p2(pt);
+#endif
+ ns_printhost(&si->si_sna);
+ ns_printhost(&si->si_dna);
+
+ if (act==SA_RESPOND) {
+ printf("idp_len = %x, ",
+ ((struct idp *)si)->idp_len);
+ }
+ break;
+
+ case SA_USER:
+ printf("%s", prurequests[req&0xff]);
+ if ((req & 0xff) == PRU_SLOWTIMO)
+ printf("<%s>", spptimers[req>>8]);
+ break;
+ }
+ if (sp)
+ printf(" -> %s", tcpstates[sp->s_state]);
+ /* print out internal state of sp !?! */
+ printf("\n");
+ if (sp == 0)
+ return;
+#ifndef lint
+#define p3(f) { printf("%s = %x, ", "f", sp->s_/**/f); }
+ printf("\t"); p3(rack);p3(ralo);p3(smax);p3(flags); printf("\n");
+#endif
+#endif
+#endif
+}
diff --git a/sys/netns/spp_debug.h b/sys/netns/spp_debug.h
new file mode 100644
index 000000000000..8dfe24220690
--- /dev/null
+++ b/sys/netns/spp_debug.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spp_debug.h 8.1 (Berkeley) 6/10/93
+ */
+
+struct spp_debug {
+ u_long sd_time;
+ short sd_act;
+ short sd_ostate;
+ caddr_t sd_cb;
+ short sd_req;
+ struct spidp sd_si;
+ struct sppcb sd_sp;
+};
+
+#define SA_INPUT 0
+#define SA_OUTPUT 1
+#define SA_USER 2
+#define SA_RESPOND 3
+#define SA_DROP 4
+
+#ifdef SANAMES
+char *sanames[] =
+ { "input", "output", "user", "respond", "drop" };
+#endif
+
+#define SPP_NDEBUG 100
+struct spp_debug spp_debug[SPP_NDEBUG];
+int spp_debx;
diff --git a/sys/netns/spp_timer.h b/sys/netns/spp_timer.h
new file mode 100644
index 000000000000..f84e3282a234
--- /dev/null
+++ b/sys/netns/spp_timer.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spp_timer.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions of the SPP timers. These timers are counted
+ * down PR_SLOWHZ times a second.
+ */
+#define SPPT_NTIMERS 4
+
+#define SPPT_REXMT 0 /* retransmit */
+#define SPPT_PERSIST 1 /* retransmit persistance */
+#define SPPT_KEEP 2 /* keep alive */
+#define SPPT_2MSL 3 /* 2*msl quiet time timer */
+
+/*
+ * The SPPT_REXMT timer is used to force retransmissions.
+ * The SPP has the SPPT_REXMT timer set whenever segments
+ * have been sent for which ACKs are expected but not yet
+ * received. If an ACK is received which advances tp->snd_una,
+ * then the retransmit timer is cleared (if there are no more
+ * outstanding segments) or reset to the base value (if there
+ * are more ACKs expected). Whenever the retransmit timer goes off,
+ * we retransmit one unacknowledged segment, and do a backoff
+ * on the retransmit timer.
+ *
+ * The SPPT_PERSIST timer is used to keep window size information
+ * flowing even if the window goes shut. If all previous transmissions
+ * have been acknowledged (so that there are no retransmissions in progress),
+ * and the window is too small to bother sending anything, then we start
+ * the SPPT_PERSIST timer. When it expires, if the window is nonzero,
+ * we go to transmit state. Otherwise, at intervals send a single byte
+ * into the peer's window to force him to update our window information.
+ * We do this at most as often as SPPT_PERSMIN time intervals,
+ * but no more frequently than the current estimate of round-trip
+ * packet time. The SPPT_PERSIST timer is cleared whenever we receive
+ * a window update from the peer.
+ *
+ * The SPPT_KEEP timer is used to keep connections alive. If an
+ * connection is idle (no segments received) for SPPTV_KEEP amount of time,
+ * but not yet established, then we drop the connection. If the connection
+ * is established, then we force the peer to send us a segment by sending:
+ * <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
+ * This segment is (deliberately) outside the window, and should elicit
+ * an ack segment in response from the peer. If, despite the SPPT_KEEP
+ * initiated segments we cannot elicit a response from a peer in SPPT_MAXIDLE
+ * amount of time, then we drop the connection.
+ */
+
+#define SPP_TTL 30 /* default time to live for SPP segs */
+/*
+ * Time constants.
+ */
+#define SPPTV_MSL ( 15*PR_SLOWHZ) /* max seg lifetime */
+#define SPPTV_SRTTBASE 0 /* base roundtrip time;
+ if 0, no idea yet */
+#define SPPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */
+
+#define SPPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistance */
+#define SPPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */
+
+#define SPPTV_KEEP ( 75*PR_SLOWHZ) /* keep alive - 75 secs */
+#define SPPTV_MAXIDLE ( 8*SPPTV_KEEP) /* maximum allowable idle
+ time before drop conn */
+
+#define SPPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */
+#define SPPTV_REXMTMAX ( 64*PR_SLOWHZ) /* max allowable REXMT value */
+
+#define SPP_LINGERTIME 120 /* linger at most 2 minutes */
+
+#define SPP_MAXRXTSHIFT 12 /* maximum retransmits */
+
+#ifdef SPPTIMERS
+char *spptimers[] =
+ { "REXMT", "PERSIST", "KEEP", "2MSL" };
+#endif
+
+/*
+ * Force a time value to be in a certain range.
+ */
+#define SPPT_RANGESET(tv, value, tvmin, tvmax) { \
+ (tv) = (value); \
+ if ((tv) < (tvmin)) \
+ (tv) = (tvmin); \
+ else if ((tv) > (tvmax)) \
+ (tv) = (tvmax); \
+}
+
+#ifdef KERNEL
+extern int spp_backoff[];
+#endif
diff --git a/sys/netns/spp_usrreq.c b/sys/netns/spp_usrreq.c
new file mode 100644
index 000000000000..062bbec5fab9
--- /dev/null
+++ b/sys/netns/spp_usrreq.c
@@ -0,0 +1,1804 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spp_usrreq.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/tcp_fsm.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/ns_error.h>
+#include <netns/sp.h>
+#include <netns/spidp.h>
+#include <netns/spp_timer.h>
+#include <netns/spp_var.h>
+#include <netns/spp_debug.h>
+
+/*
+ * SP protocol implementation.
+ */
+spp_init()
+{
+
+ spp_iss = 1; /* WRONG !! should fish it out of TODR */
+}
+struct spidp spp_savesi;
+int traceallspps = 0;
+extern int sppconsdebug;
+int spp_hardnosed;
+int spp_use_delack = 0;
+u_short spp_newchecks[50];
+
+/*ARGSUSED*/
+spp_input(m, nsp)
+ register struct mbuf *m;
+ register struct nspcb *nsp;
+{
+ register struct sppcb *cb;
+ register struct spidp *si = mtod(m, struct spidp *);
+ register struct socket *so;
+ short ostate;
+ int dropsocket = 0;
+
+
+ sppstat.spps_rcvtotal++;
+ if (nsp == 0) {
+ panic("No nspcb in spp_input\n");
+ return;
+ }
+
+ cb = nstosppcb(nsp);
+ if (cb == 0) goto bad;
+
+ if (m->m_len < sizeof(*si)) {
+ if ((m = m_pullup(m, sizeof(*si))) == 0) {
+ sppstat.spps_rcvshort++;
+ return;
+ }
+ si = mtod(m, struct spidp *);
+ }
+ si->si_seq = ntohs(si->si_seq);
+ si->si_ack = ntohs(si->si_ack);
+ si->si_alo = ntohs(si->si_alo);
+
+ so = nsp->nsp_socket;
+ if (so->so_options & SO_DEBUG || traceallspps) {
+ ostate = cb->s_state;
+ spp_savesi = *si;
+ }
+ if (so->so_options & SO_ACCEPTCONN) {
+ struct sppcb *ocb = cb;
+
+ so = sonewconn(so, 0);
+ if (so == 0) {
+ goto drop;
+ }
+ /*
+ * This is ugly, but ....
+ *
+ * Mark socket as temporary until we're
+ * committed to keeping it. The code at
+ * ``drop'' and ``dropwithreset'' check the
+ * flag dropsocket to see if the temporary
+ * socket created here should be discarded.
+ * We mark the socket as discardable until
+ * we're committed to it below in TCPS_LISTEN.
+ */
+ dropsocket++;
+ nsp = (struct nspcb *)so->so_pcb;
+ nsp->nsp_laddr = si->si_dna;
+ cb = nstosppcb(nsp);
+ cb->s_mtu = ocb->s_mtu; /* preserve sockopts */
+ cb->s_flags = ocb->s_flags; /* preserve sockopts */
+ cb->s_flags2 = ocb->s_flags2; /* preserve sockopts */
+ cb->s_state = TCPS_LISTEN;
+ }
+
+ /*
+ * Packet received on connection.
+ * reset idle time and keep-alive timer;
+ */
+ cb->s_idle = 0;
+ cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+
+ switch (cb->s_state) {
+
+ case TCPS_LISTEN:{
+ struct mbuf *am;
+ register struct sockaddr_ns *sns;
+ struct ns_addr laddr;
+
+ /*
+ * If somebody here was carying on a conversation
+ * and went away, and his pen pal thinks he can
+ * still talk, we get the misdirected packet.
+ */
+ if (spp_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
+ spp_istat.gonawy++;
+ goto dropwithreset;
+ }
+ am = m_get(M_DONTWAIT, MT_SONAME);
+ if (am == NULL)
+ goto drop;
+ am->m_len = sizeof (struct sockaddr_ns);
+ sns = mtod(am, struct sockaddr_ns *);
+ sns->sns_len = sizeof(*sns);
+ sns->sns_family = AF_NS;
+ sns->sns_addr = si->si_sna;
+ laddr = nsp->nsp_laddr;
+ if (ns_nullhost(laddr))
+ nsp->nsp_laddr = si->si_dna;
+ if (ns_pcbconnect(nsp, am)) {
+ nsp->nsp_laddr = laddr;
+ (void) m_free(am);
+ spp_istat.noconn++;
+ goto drop;
+ }
+ (void) m_free(am);
+ spp_template(cb);
+ dropsocket = 0; /* committed to socket */
+ cb->s_did = si->si_sid;
+ cb->s_rack = si->si_ack;
+ cb->s_ralo = si->si_alo;
+#define THREEWAYSHAKE
+#ifdef THREEWAYSHAKE
+ cb->s_state = TCPS_SYN_RECEIVED;
+ cb->s_force = 1 + SPPT_KEEP;
+ sppstat.spps_accepts++;
+ cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+ }
+ break;
+ /*
+ * This state means that we have heard a response
+ * to our acceptance of their connection
+ * It is probably logically unnecessary in this
+ * implementation.
+ */
+ case TCPS_SYN_RECEIVED: {
+ if (si->si_did!=cb->s_sid) {
+ spp_istat.wrncon++;
+ goto drop;
+ }
+#endif
+ nsp->nsp_fport = si->si_sport;
+ cb->s_timer[SPPT_REXMT] = 0;
+ cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+ soisconnected(so);
+ cb->s_state = TCPS_ESTABLISHED;
+ sppstat.spps_accepts++;
+ }
+ break;
+
+ /*
+ * This state means that we have gotten a response
+ * to our attempt to establish a connection.
+ * We fill in the data from the other side,
+ * telling us which port to respond to, instead of the well-
+ * known one we might have sent to in the first place.
+ * We also require that this is a response to our
+ * connection id.
+ */
+ case TCPS_SYN_SENT:
+ if (si->si_did!=cb->s_sid) {
+ spp_istat.notme++;
+ goto drop;
+ }
+ sppstat.spps_connects++;
+ cb->s_did = si->si_sid;
+ cb->s_rack = si->si_ack;
+ cb->s_ralo = si->si_alo;
+ cb->s_dport = nsp->nsp_fport = si->si_sport;
+ cb->s_timer[SPPT_REXMT] = 0;
+ cb->s_flags |= SF_ACKNOW;
+ soisconnected(so);
+ cb->s_state = TCPS_ESTABLISHED;
+ /* Use roundtrip time of connection request for initial rtt */
+ if (cb->s_rtt) {
+ cb->s_srtt = cb->s_rtt << 3;
+ cb->s_rttvar = cb->s_rtt << 1;
+ SPPT_RANGESET(cb->s_rxtcur,
+ ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
+ SPPTV_MIN, SPPTV_REXMTMAX);
+ cb->s_rtt = 0;
+ }
+ }
+ if (so->so_options & SO_DEBUG || traceallspps)
+ spp_trace(SA_INPUT, (u_char)ostate, cb, &spp_savesi, 0);
+
+ m->m_len -= sizeof (struct idp);
+ m->m_pkthdr.len -= sizeof (struct idp);
+ m->m_data += sizeof (struct idp);
+
+ if (spp_reass(cb, si)) {
+ (void) m_freem(m);
+ }
+ if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
+ (void) spp_output(cb, (struct mbuf *)0);
+ cb->s_flags &= ~(SF_WIN|SF_RXT);
+ return;
+
+dropwithreset:
+ if (dropsocket)
+ (void) soabort(so);
+ si->si_seq = ntohs(si->si_seq);
+ si->si_ack = ntohs(si->si_ack);
+ si->si_alo = ntohs(si->si_alo);
+ ns_error(dtom(si), NS_ERR_NOSOCK, 0);
+ if (cb->s_nspcb->nsp_socket->so_options & SO_DEBUG || traceallspps)
+ spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
+ return;
+
+drop:
+bad:
+ if (cb == 0 || cb->s_nspcb->nsp_socket->so_options & SO_DEBUG ||
+ traceallspps)
+ spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
+ m_freem(m);
+}
+
+int spprexmtthresh = 3;
+
+/*
+ * This is structurally similar to the tcp reassembly routine
+ * but its function is somewhat different: It merely queues
+ * packets up, and suppresses duplicates.
+ */
+spp_reass(cb, si)
+register struct sppcb *cb;
+register struct spidp *si;
+{
+ register struct spidp_q *q;
+ register struct mbuf *m;
+ register struct socket *so = cb->s_nspcb->nsp_socket;
+ char packetp = cb->s_flags & SF_HI;
+ int incr;
+ char wakeup = 0;
+
+ if (si == SI(0))
+ goto present;
+ /*
+ * Update our news from them.
+ */
+ if (si->si_cc & SP_SA)
+ cb->s_flags |= (spp_use_delack ? SF_DELACK : SF_ACKNOW);
+ if (SSEQ_GT(si->si_alo, cb->s_ralo))
+ cb->s_flags |= SF_WIN;
+ if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
+ if ((si->si_cc & SP_SP) && cb->s_rack != (cb->s_smax + 1)) {
+ sppstat.spps_rcvdupack++;
+ /*
+ * If this is a completely duplicate ack
+ * and other conditions hold, we assume
+ * a packet has been dropped and retransmit
+ * it exactly as in tcp_input().
+ */
+ if (si->si_ack != cb->s_rack ||
+ si->si_alo != cb->s_ralo)
+ cb->s_dupacks = 0;
+ else if (++cb->s_dupacks == spprexmtthresh) {
+ u_short onxt = cb->s_snxt;
+ int cwnd = cb->s_cwnd;
+
+ cb->s_snxt = si->si_ack;
+ cb->s_cwnd = CUNIT;
+ cb->s_force = 1 + SPPT_REXMT;
+ (void) spp_output(cb, (struct mbuf *)0);
+ cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+ cb->s_rtt = 0;
+ if (cwnd >= 4 * CUNIT)
+ cb->s_cwnd = cwnd / 2;
+ if (SSEQ_GT(onxt, cb->s_snxt))
+ cb->s_snxt = onxt;
+ return (1);
+ }
+ } else
+ cb->s_dupacks = 0;
+ goto update_window;
+ }
+ cb->s_dupacks = 0;
+ /*
+ * If our correspondent acknowledges data we haven't sent
+ * TCP would drop the packet after acking. We'll be a little
+ * more permissive
+ */
+ if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
+ sppstat.spps_rcvacktoomuch++;
+ si->si_ack = cb->s_smax + 1;
+ }
+ sppstat.spps_rcvackpack++;
+ /*
+ * If transmit timer is running and timed sequence
+ * number was acked, update smoothed round trip time.
+ * See discussion of algorithm in tcp_input.c
+ */
+ if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
+ sppstat.spps_rttupdated++;
+ if (cb->s_srtt != 0) {
+ register short delta;
+ delta = cb->s_rtt - (cb->s_srtt >> 3);
+ if ((cb->s_srtt += delta) <= 0)
+ cb->s_srtt = 1;
+ if (delta < 0)
+ delta = -delta;
+ delta -= (cb->s_rttvar >> 2);
+ if ((cb->s_rttvar += delta) <= 0)
+ cb->s_rttvar = 1;
+ } else {
+ /*
+ * No rtt measurement yet
+ */
+ cb->s_srtt = cb->s_rtt << 3;
+ cb->s_rttvar = cb->s_rtt << 1;
+ }
+ cb->s_rtt = 0;
+ cb->s_rxtshift = 0;
+ SPPT_RANGESET(cb->s_rxtcur,
+ ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
+ SPPTV_MIN, SPPTV_REXMTMAX);
+ }
+ /*
+ * If all outstanding data is acked, stop retransmit
+ * timer and remember to restart (more output or persist).
+ * If there is more data to be acked, restart retransmit
+ * timer, using current (possibly backed-off) value;
+ */
+ if (si->si_ack == cb->s_smax + 1) {
+ cb->s_timer[SPPT_REXMT] = 0;
+ cb->s_flags |= SF_RXT;
+ } else if (cb->s_timer[SPPT_PERSIST] == 0)
+ cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+ /*
+ * When new data is acked, open the congestion window.
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (maxseg at a time).
+ * Otherwise open linearly (maxseg^2 / cwnd at a time).
+ */
+ incr = CUNIT;
+ if (cb->s_cwnd > cb->s_ssthresh)
+ incr = max(incr * incr / cb->s_cwnd, 1);
+ cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
+ /*
+ * Trim Acked data from output queue.
+ */
+ while ((m = so->so_snd.sb_mb) != NULL) {
+ if (SSEQ_LT((mtod(m, struct spidp *))->si_seq, si->si_ack))
+ sbdroprecord(&so->so_snd);
+ else
+ break;
+ }
+ sowwakeup(so);
+ cb->s_rack = si->si_ack;
+update_window:
+ if (SSEQ_LT(cb->s_snxt, cb->s_rack))
+ cb->s_snxt = cb->s_rack;
+ if (SSEQ_LT(cb->s_swl1, si->si_seq) || cb->s_swl1 == si->si_seq &&
+ (SSEQ_LT(cb->s_swl2, si->si_ack) ||
+ cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo))) {
+ /* keep track of pure window updates */
+ if ((si->si_cc & SP_SP) && cb->s_swl2 == si->si_ack
+ && SSEQ_LT(cb->s_ralo, si->si_alo)) {
+ sppstat.spps_rcvwinupd++;
+ sppstat.spps_rcvdupack--;
+ }
+ cb->s_ralo = si->si_alo;
+ cb->s_swl1 = si->si_seq;
+ cb->s_swl2 = si->si_ack;
+ cb->s_swnd = (1 + si->si_alo - si->si_ack);
+ if (cb->s_swnd > cb->s_smxw)
+ cb->s_smxw = cb->s_swnd;
+ cb->s_flags |= SF_WIN;
+ }
+ /*
+ * If this packet number is higher than that which
+ * we have allocated refuse it, unless urgent
+ */
+ if (SSEQ_GT(si->si_seq, cb->s_alo)) {
+ if (si->si_cc & SP_SP) {
+ sppstat.spps_rcvwinprobe++;
+ return (1);
+ } else
+ sppstat.spps_rcvpackafterwin++;
+ if (si->si_cc & SP_OB) {
+ if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
+ ns_error(dtom(si), NS_ERR_FULLUP, 0);
+ return (0);
+ } /* else queue this packet; */
+ } else {
+ /*register struct socket *so = cb->s_nspcb->nsp_socket;
+ if (so->so_state && SS_NOFDREF) {
+ ns_error(dtom(si), NS_ERR_NOSOCK, 0);
+ (void)spp_close(cb);
+ } else
+ would crash system*/
+ spp_istat.notyet++;
+ ns_error(dtom(si), NS_ERR_FULLUP, 0);
+ return (0);
+ }
+ }
+ /*
+ * If this is a system packet, we don't need to
+ * queue it up, and won't update acknowledge #
+ */
+ if (si->si_cc & SP_SP) {
+ return (1);
+ }
+ /*
+ * We have already seen this packet, so drop.
+ */
+ if (SSEQ_LT(si->si_seq, cb->s_ack)) {
+ spp_istat.bdreas++;
+ sppstat.spps_rcvduppack++;
+ if (si->si_seq == cb->s_ack - 1)
+ spp_istat.lstdup++;
+ return (1);
+ }
+ /*
+ * Loop through all packets queued up to insert in
+ * appropriate sequence.
+ */
+ for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
+ if (si->si_seq == SI(q)->si_seq) {
+ sppstat.spps_rcvduppack++;
+ return (1);
+ }
+ if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
+ sppstat.spps_rcvoopack++;
+ break;
+ }
+ }
+ insque(si, q->si_prev);
+ /*
+ * If this packet is urgent, inform process
+ */
+ if (si->si_cc & SP_OB) {
+ cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
+ sohasoutofband(so);
+ cb->s_oobflags |= SF_IOOB;
+ }
+present:
+#define SPINC sizeof(struct sphdr)
+ /*
+ * Loop through all packets queued up to update acknowledge
+ * number, and present all acknowledged data to user;
+ * If in packet interface mode, show packet headers.
+ */
+ for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
+ if (SI(q)->si_seq == cb->s_ack) {
+ cb->s_ack++;
+ m = dtom(q);
+ if (SI(q)->si_cc & SP_OB) {
+ cb->s_oobflags &= ~SF_IOOB;
+ if (so->so_rcv.sb_cc)
+ so->so_oobmark = so->so_rcv.sb_cc;
+ else
+ so->so_state |= SS_RCVATMARK;
+ }
+ q = q->si_prev;
+ remque(q->si_next);
+ wakeup = 1;
+ sppstat.spps_rcvpack++;
+#ifdef SF_NEWCALL
+ if (cb->s_flags2 & SF_NEWCALL) {
+ struct sphdr *sp = mtod(m, struct sphdr *);
+ u_char dt = sp->sp_dt;
+ spp_newchecks[4]++;
+ if (dt != cb->s_rhdr.sp_dt) {
+ struct mbuf *mm =
+ m_getclr(M_DONTWAIT, MT_CONTROL);
+ spp_newchecks[0]++;
+ if (mm != NULL) {
+ u_short *s =
+ mtod(mm, u_short *);
+ cb->s_rhdr.sp_dt = dt;
+ mm->m_len = 5; /*XXX*/
+ s[0] = 5;
+ s[1] = 1;
+ *(u_char *)(&s[2]) = dt;
+ sbappend(&so->so_rcv, mm);
+ }
+ }
+ if (sp->sp_cc & SP_OB) {
+ MCHTYPE(m, MT_OOBDATA);
+ spp_newchecks[1]++;
+ so->so_oobmark = 0;
+ so->so_state &= ~SS_RCVATMARK;
+ }
+ if (packetp == 0) {
+ m->m_data += SPINC;
+ m->m_len -= SPINC;
+ m->m_pkthdr.len -= SPINC;
+ }
+ if ((sp->sp_cc & SP_EM) || packetp) {
+ sbappendrecord(&so->so_rcv, m);
+ spp_newchecks[9]++;
+ } else
+ sbappend(&so->so_rcv, m);
+ } else
+#endif
+ if (packetp) {
+ sbappendrecord(&so->so_rcv, m);
+ } else {
+ cb->s_rhdr = *mtod(m, struct sphdr *);
+ m->m_data += SPINC;
+ m->m_len -= SPINC;
+ m->m_pkthdr.len -= SPINC;
+ sbappend(&so->so_rcv, m);
+ }
+ } else
+ break;
+ }
+ if (wakeup) sorwakeup(so);
+ return (0);
+}
+
+spp_ctlinput(cmd, arg)
+ int cmd;
+ caddr_t arg;
+{
+ struct ns_addr *na;
+ extern u_char nsctlerrmap[];
+ extern spp_abort(), spp_quench();
+ extern struct nspcb *idp_drop();
+ struct ns_errp *errp;
+ struct nspcb *nsp;
+ struct sockaddr_ns *sns;
+ int type;
+
+ if (cmd < 0 || cmd > PRC_NCMDS)
+ return;
+ type = NS_ERR_UNREACH_HOST;
+
+ switch (cmd) {
+
+ case PRC_ROUTEDEAD:
+ return;
+
+ case PRC_IFDOWN:
+ case PRC_HOSTDEAD:
+ case PRC_HOSTUNREACH:
+ sns = (struct sockaddr_ns *)arg;
+ if (sns->sns_family != AF_NS)
+ return;
+ na = &sns->sns_addr;
+ break;
+
+ default:
+ errp = (struct ns_errp *)arg;
+ na = &errp->ns_err_idp.idp_dna;
+ type = errp->ns_err_num;
+ type = ntohs((u_short)type);
+ }
+ switch (type) {
+
+ case NS_ERR_UNREACH_HOST:
+ ns_pcbnotify(na, (int)nsctlerrmap[cmd], spp_abort, (long) 0);
+ break;
+
+ case NS_ERR_TOO_BIG:
+ case NS_ERR_NOSOCK:
+ nsp = ns_pcblookup(na, errp->ns_err_idp.idp_sna.x_port,
+ NS_WILDCARD);
+ if (nsp) {
+ if(nsp->nsp_pcb)
+ (void) spp_drop((struct sppcb *)nsp->nsp_pcb,
+ (int)nsctlerrmap[cmd]);
+ else
+ (void) idp_drop(nsp, (int)nsctlerrmap[cmd]);
+ }
+ break;
+
+ case NS_ERR_FULLUP:
+ ns_pcbnotify(na, 0, spp_quench, (long) 0);
+ }
+}
+/*
+ * When a source quench is received, close congestion window
+ * to one packet. We will gradually open it again as we proceed.
+ */
+spp_quench(nsp)
+ struct nspcb *nsp;
+{
+ struct sppcb *cb = nstosppcb(nsp);
+
+ if (cb)
+ cb->s_cwnd = CUNIT;
+}
+
+#ifdef notdef
+int
+spp_fixmtu(nsp)
+register struct nspcb *nsp;
+{
+ register struct sppcb *cb = (struct sppcb *)(nsp->nsp_pcb);
+ register struct mbuf *m;
+ register struct spidp *si;
+ struct ns_errp *ep;
+ struct sockbuf *sb;
+ int badseq, len;
+ struct mbuf *firstbad, *m0;
+
+ if (cb) {
+ /*
+ * The notification that we have sent
+ * too much is bad news -- we will
+ * have to go through queued up so far
+ * splitting ones which are too big and
+ * reassigning sequence numbers and checksums.
+ * we should then retransmit all packets from
+ * one above the offending packet to the last one
+ * we had sent (or our allocation)
+ * then the offending one so that the any queued
+ * data at our destination will be discarded.
+ */
+ ep = (struct ns_errp *)nsp->nsp_notify_param;
+ sb = &nsp->nsp_socket->so_snd;
+ cb->s_mtu = ep->ns_err_param;
+ badseq = SI(&ep->ns_err_idp)->si_seq;
+ for (m = sb->sb_mb; m; m = m->m_act) {
+ si = mtod(m, struct spidp *);
+ if (si->si_seq == badseq)
+ break;
+ }
+ if (m == 0) return;
+ firstbad = m;
+ /*for (;;) {*/
+ /* calculate length */
+ for (m0 = m, len = 0; m ; m = m->m_next)
+ len += m->m_len;
+ if (len > cb->s_mtu) {
+ }
+ /* FINISH THIS
+ } */
+ }
+}
+#endif
+
+spp_output(cb, m0)
+ register struct sppcb *cb;
+ struct mbuf *m0;
+{
+ struct socket *so = cb->s_nspcb->nsp_socket;
+ register struct mbuf *m;
+ register struct spidp *si = (struct spidp *) 0;
+ register struct sockbuf *sb = &so->so_snd;
+ int len = 0, win, rcv_win;
+ short span, off, recordp = 0;
+ u_short alo;
+ int error = 0, sendalot;
+#ifdef notdef
+ int idle;
+#endif
+ struct mbuf *mprev;
+ extern int idpcksum;
+
+ if (m0) {
+ int mtu = cb->s_mtu;
+ int datalen;
+ /*
+ * Make sure that packet isn't too big.
+ */
+ for (m = m0; m ; m = m->m_next) {
+ mprev = m;
+ len += m->m_len;
+ if (m->m_flags & M_EOR)
+ recordp = 1;
+ }
+ datalen = (cb->s_flags & SF_HO) ?
+ len - sizeof (struct sphdr) : len;
+ if (datalen > mtu) {
+ if (cb->s_flags & SF_PI) {
+ m_freem(m0);
+ return (EMSGSIZE);
+ } else {
+ int oldEM = cb->s_cc & SP_EM;
+
+ cb->s_cc &= ~SP_EM;
+ while (len > mtu) {
+ /*
+ * Here we are only being called
+ * from usrreq(), so it is OK to
+ * block.
+ */
+ m = m_copym(m0, 0, mtu, M_WAIT);
+ if (cb->s_flags & SF_NEWCALL) {
+ struct mbuf *mm = m;
+ spp_newchecks[7]++;
+ while (mm) {
+ mm->m_flags &= ~M_EOR;
+ mm = mm->m_next;
+ }
+ }
+ error = spp_output(cb, m);
+ if (error) {
+ cb->s_cc |= oldEM;
+ m_freem(m0);
+ return(error);
+ }
+ m_adj(m0, mtu);
+ len -= mtu;
+ }
+ cb->s_cc |= oldEM;
+ }
+ }
+ /*
+ * Force length even, by adding a "garbage byte" if
+ * necessary.
+ */
+ if (len & 1) {
+ m = mprev;
+ if (M_TRAILINGSPACE(m) >= 1)
+ m->m_len++;
+ else {
+ struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
+
+ if (m1 == 0) {
+ m_freem(m0);
+ return (ENOBUFS);
+ }
+ m1->m_len = 1;
+ *(mtod(m1, u_char *)) = 0;
+ m->m_next = m1;
+ }
+ }
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == 0) {
+ m_freem(m0);
+ return (ENOBUFS);
+ }
+ /*
+ * Fill in mbuf with extended SP header
+ * and addresses and length put into network format.
+ */
+ MH_ALIGN(m, sizeof (struct spidp));
+ m->m_len = sizeof (struct spidp);
+ m->m_next = m0;
+ si = mtod(m, struct spidp *);
+ si->si_i = *cb->s_idp;
+ si->si_s = cb->s_shdr;
+ if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
+ register struct sphdr *sh;
+ if (m0->m_len < sizeof (*sh)) {
+ if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
+ (void) m_free(m);
+ m_freem(m0);
+ return (EINVAL);
+ }
+ m->m_next = m0;
+ }
+ sh = mtod(m0, struct sphdr *);
+ si->si_dt = sh->sp_dt;
+ si->si_cc |= sh->sp_cc & SP_EM;
+ m0->m_len -= sizeof (*sh);
+ m0->m_data += sizeof (*sh);
+ len -= sizeof (*sh);
+ }
+ len += sizeof(*si);
+ if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
+ si->si_cc |= SP_EM;
+ spp_newchecks[8]++;
+ }
+ if (cb->s_oobflags & SF_SOOB) {
+ /*
+ * Per jqj@cornell:
+ * make sure OB packets convey exactly 1 byte.
+ * If the packet is 1 byte or larger, we
+ * have already guaranted there to be at least
+ * one garbage byte for the checksum, and
+ * extra bytes shouldn't hurt!
+ */
+ if (len > sizeof(*si)) {
+ si->si_cc |= SP_OB;
+ len = (1 + sizeof(*si));
+ }
+ }
+ si->si_len = htons((u_short)len);
+ m->m_pkthdr.len = ((len - 1) | 1) + 1;
+ /*
+ * queue stuff up for output
+ */
+ sbappendrecord(sb, m);
+ cb->s_seq++;
+ }
+#ifdef notdef
+ idle = (cb->s_smax == (cb->s_rack - 1));
+#endif
+again:
+ sendalot = 0;
+ off = cb->s_snxt - cb->s_rack;
+ win = min(cb->s_swnd, (cb->s_cwnd/CUNIT));
+
+ /*
+ * If in persist timeout with window of 0, send a probe.
+ * Otherwise, if window is small but nonzero
+ * and timer expired, send what we can and go into
+ * transmit state.
+ */
+ if (cb->s_force == 1 + SPPT_PERSIST) {
+ if (win != 0) {
+ cb->s_timer[SPPT_PERSIST] = 0;
+ cb->s_rxtshift = 0;
+ }
+ }
+ span = cb->s_seq - cb->s_rack;
+ len = min(span, win) - off;
+
+ if (len < 0) {
+ /*
+ * Window shrank after we went into it.
+ * If window shrank to 0, cancel pending
+ * restransmission and pull s_snxt back
+ * to (closed) window. We will enter persist
+ * state below. If the widndow didn't close completely,
+ * just wait for an ACK.
+ */
+ len = 0;
+ if (win == 0) {
+ cb->s_timer[SPPT_REXMT] = 0;
+ cb->s_snxt = cb->s_rack;
+ }
+ }
+ if (len > 1)
+ sendalot = 1;
+ rcv_win = sbspace(&so->so_rcv);
+
+ /*
+ * Send if we owe peer an ACK.
+ */
+ if (cb->s_oobflags & SF_SOOB) {
+ /*
+ * must transmit this out of band packet
+ */
+ cb->s_oobflags &= ~ SF_SOOB;
+ sendalot = 1;
+ sppstat.spps_sndurg++;
+ goto found;
+ }
+ if (cb->s_flags & SF_ACKNOW)
+ goto send;
+ if (cb->s_state < TCPS_ESTABLISHED)
+ goto send;
+ /*
+ * Silly window can't happen in spp.
+ * Code from tcp deleted.
+ */
+ if (len)
+ goto send;
+ /*
+ * Compare available window to amount of window
+ * known to peer (as advertised window less
+ * next expected input.) If the difference is at least two
+ * packets or at least 35% of the mximum possible window,
+ * then want to send a window update to peer.
+ */
+ if (rcv_win > 0) {
+ u_short delta = 1 + cb->s_alo - cb->s_ack;
+ int adv = rcv_win - (delta * cb->s_mtu);
+
+ if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
+ (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
+ sppstat.spps_sndwinup++;
+ cb->s_flags |= SF_ACKNOW;
+ goto send;
+ }
+
+ }
+ /*
+ * Many comments from tcp_output.c are appropriate here
+ * including . . .
+ * If send window is too small, there is data to transmit, and no
+ * retransmit or persist is pending, then go to persist state.
+ * If nothing happens soon, send when timer expires:
+ * if window is nonzero, transmit what we can,
+ * otherwise send a probe.
+ */
+ if (so->so_snd.sb_cc && cb->s_timer[SPPT_REXMT] == 0 &&
+ cb->s_timer[SPPT_PERSIST] == 0) {
+ cb->s_rxtshift = 0;
+ spp_setpersist(cb);
+ }
+ /*
+ * No reason to send a packet, just return.
+ */
+ cb->s_outx = 1;
+ return (0);
+
+send:
+ /*
+ * Find requested packet.
+ */
+ si = 0;
+ if (len > 0) {
+ cb->s_want = cb->s_snxt;
+ for (m = sb->sb_mb; m; m = m->m_act) {
+ si = mtod(m, struct spidp *);
+ if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
+ break;
+ }
+ found:
+ if (si) {
+ if (si->si_seq == cb->s_snxt)
+ cb->s_snxt++;
+ else
+ sppstat.spps_sndvoid++, si = 0;
+ }
+ }
+ /*
+ * update window
+ */
+ if (rcv_win < 0)
+ rcv_win = 0;
+ alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
+ if (SSEQ_LT(alo, cb->s_alo))
+ alo = cb->s_alo;
+
+ if (si) {
+ /*
+ * must make a copy of this packet for
+ * idp_output to monkey with
+ */
+ m = m_copy(dtom(si), 0, (int)M_COPYALL);
+ if (m == NULL) {
+ return (ENOBUFS);
+ }
+ si = mtod(m, struct spidp *);
+ if (SSEQ_LT(si->si_seq, cb->s_smax))
+ sppstat.spps_sndrexmitpack++;
+ else
+ sppstat.spps_sndpack++;
+ } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
+ /*
+ * Must send an acknowledgement or a probe
+ */
+ if (cb->s_force)
+ sppstat.spps_sndprobe++;
+ if (cb->s_flags & SF_ACKNOW)
+ sppstat.spps_sndacks++;
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == 0)
+ return (ENOBUFS);
+ /*
+ * Fill in mbuf with extended SP header
+ * and addresses and length put into network format.
+ */
+ MH_ALIGN(m, sizeof (struct spidp));
+ m->m_len = sizeof (*si);
+ m->m_pkthdr.len = sizeof (*si);
+ si = mtod(m, struct spidp *);
+ si->si_i = *cb->s_idp;
+ si->si_s = cb->s_shdr;
+ si->si_seq = cb->s_smax + 1;
+ si->si_len = htons(sizeof (*si));
+ si->si_cc |= SP_SP;
+ } else {
+ cb->s_outx = 3;
+ if (so->so_options & SO_DEBUG || traceallspps)
+ spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
+ return (0);
+ }
+ /*
+ * Stuff checksum and output datagram.
+ */
+ if ((si->si_cc & SP_SP) == 0) {
+ if (cb->s_force != (1 + SPPT_PERSIST) ||
+ cb->s_timer[SPPT_PERSIST] == 0) {
+ /*
+ * If this is a new packet and we are not currently
+ * timing anything, time this one.
+ */
+ if (SSEQ_LT(cb->s_smax, si->si_seq)) {
+ cb->s_smax = si->si_seq;
+ if (cb->s_rtt == 0) {
+ sppstat.spps_segstimed++;
+ cb->s_rtseq = si->si_seq;
+ cb->s_rtt = 1;
+ }
+ }
+ /*
+ * Set rexmt timer if not currently set,
+ * Initial value for retransmit timer is smoothed
+ * round-trip time + 2 * round-trip time variance.
+ * Initialize shift counter which is used for backoff
+ * of retransmit time.
+ */
+ if (cb->s_timer[SPPT_REXMT] == 0 &&
+ cb->s_snxt != cb->s_rack) {
+ cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+ if (cb->s_timer[SPPT_PERSIST]) {
+ cb->s_timer[SPPT_PERSIST] = 0;
+ cb->s_rxtshift = 0;
+ }
+ }
+ } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
+ cb->s_smax = si->si_seq;
+ }
+ } else if (cb->s_state < TCPS_ESTABLISHED) {
+ if (cb->s_rtt == 0)
+ cb->s_rtt = 1; /* Time initial handshake */
+ if (cb->s_timer[SPPT_REXMT] == 0)
+ cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+ }
+ {
+ /*
+ * Do not request acks when we ack their data packets or
+ * when we do a gratuitous window update.
+ */
+ if (((si->si_cc & SP_SP) == 0) || cb->s_force)
+ si->si_cc |= SP_SA;
+ si->si_seq = htons(si->si_seq);
+ si->si_alo = htons(alo);
+ si->si_ack = htons(cb->s_ack);
+
+ if (idpcksum) {
+ si->si_sum = 0;
+ len = ntohs(si->si_len);
+ if (len & 1)
+ len++;
+ si->si_sum = ns_cksum(m, len);
+ } else
+ si->si_sum = 0xffff;
+
+ cb->s_outx = 4;
+ if (so->so_options & SO_DEBUG || traceallspps)
+ spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
+
+ if (so->so_options & SO_DONTROUTE)
+ error = ns_output(m, (struct route *)0, NS_ROUTETOIF);
+ else
+ error = ns_output(m, &cb->s_nspcb->nsp_route, 0);
+ }
+ if (error) {
+ return (error);
+ }
+ sppstat.spps_sndtotal++;
+ /*
+ * Data sent (as far as we can tell).
+ * If this advertises a larger window than any other segment,
+ * then remember the size of the advertized window.
+ * Any pending ACK has now been sent.
+ */
+ cb->s_force = 0;
+ cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
+ if (SSEQ_GT(alo, cb->s_alo))
+ cb->s_alo = alo;
+ if (sendalot)
+ goto again;
+ cb->s_outx = 5;
+ return (0);
+}
+
+int spp_do_persist_panics = 0;
+
+spp_setpersist(cb)
+ register struct sppcb *cb;
+{
+ register t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
+ extern int spp_backoff[];
+
+ if (cb->s_timer[SPPT_REXMT] && spp_do_persist_panics)
+ panic("spp_output REXMT");
+ /*
+ * Start/restart persistance timer.
+ */
+ SPPT_RANGESET(cb->s_timer[SPPT_PERSIST],
+ t*spp_backoff[cb->s_rxtshift],
+ SPPTV_PERSMIN, SPPTV_PERSMAX);
+ if (cb->s_rxtshift < SPP_MAXRXTSHIFT)
+ cb->s_rxtshift++;
+}
+/*ARGSUSED*/
+spp_ctloutput(req, so, level, name, value)
+ int req;
+ struct socket *so;
+ int name;
+ struct mbuf **value;
+{
+ register struct mbuf *m;
+ struct nspcb *nsp = sotonspcb(so);
+ register struct sppcb *cb;
+ int mask, error = 0;
+
+ if (level != NSPROTO_SPP) {
+ /* This will have to be changed when we do more general
+ stacking of protocols */
+ return (idp_ctloutput(req, so, level, name, value));
+ }
+ if (nsp == NULL) {
+ error = EINVAL;
+ goto release;
+ } else
+ cb = nstosppcb(nsp);
+
+ switch (req) {
+
+ case PRCO_GETOPT:
+ if (value == NULL)
+ return (EINVAL);
+ m = m_get(M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ return (ENOBUFS);
+ switch (name) {
+
+ case SO_HEADERS_ON_INPUT:
+ mask = SF_HI;
+ goto get_flags;
+
+ case SO_HEADERS_ON_OUTPUT:
+ mask = SF_HO;
+ get_flags:
+ m->m_len = sizeof(short);
+ *mtod(m, short *) = cb->s_flags & mask;
+ break;
+
+ case SO_MTU:
+ m->m_len = sizeof(u_short);
+ *mtod(m, short *) = cb->s_mtu;
+ break;
+
+ case SO_LAST_HEADER:
+ m->m_len = sizeof(struct sphdr);
+ *mtod(m, struct sphdr *) = cb->s_rhdr;
+ break;
+
+ case SO_DEFAULT_HEADERS:
+ m->m_len = sizeof(struct spidp);
+ *mtod(m, struct sphdr *) = cb->s_shdr;
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ *value = m;
+ break;
+
+ case PRCO_SETOPT:
+ if (value == 0 || *value == 0) {
+ error = EINVAL;
+ break;
+ }
+ switch (name) {
+ int *ok;
+
+ case SO_HEADERS_ON_INPUT:
+ mask = SF_HI;
+ goto set_head;
+
+ case SO_HEADERS_ON_OUTPUT:
+ mask = SF_HO;
+ set_head:
+ if (cb->s_flags & SF_PI) {
+ ok = mtod(*value, int *);
+ if (*ok)
+ cb->s_flags |= mask;
+ else
+ cb->s_flags &= ~mask;
+ } else error = EINVAL;
+ break;
+
+ case SO_MTU:
+ cb->s_mtu = *(mtod(*value, u_short *));
+ break;
+
+#ifdef SF_NEWCALL
+ case SO_NEWCALL:
+ ok = mtod(*value, int *);
+ if (*ok) {
+ cb->s_flags2 |= SF_NEWCALL;
+ spp_newchecks[5]++;
+ } else {
+ cb->s_flags2 &= ~SF_NEWCALL;
+ spp_newchecks[6]++;
+ }
+ break;
+#endif
+
+ case SO_DEFAULT_HEADERS:
+ {
+ register struct sphdr *sp
+ = mtod(*value, struct sphdr *);
+ cb->s_dt = sp->sp_dt;
+ cb->s_cc = sp->sp_cc & SP_EM;
+ }
+ break;
+
+ default:
+ error = EINVAL;
+ }
+ m_freem(*value);
+ break;
+ }
+ release:
+ return (error);
+}
+
+/*ARGSUSED*/
+spp_usrreq(so, req, m, nam, controlp)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *controlp;
+{
+ struct nspcb *nsp = sotonspcb(so);
+ register struct sppcb *cb;
+ int s = splnet();
+ int error = 0, ostate;
+ struct mbuf *mm;
+ register struct sockbuf *sb;
+
+ if (req == PRU_CONTROL)
+ return (ns_control(so, (int)m, (caddr_t)nam,
+ (struct ifnet *)controlp));
+ if (nsp == NULL) {
+ if (req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ } else
+ cb = nstosppcb(nsp);
+
+ ostate = cb ? cb->s_state : 0;
+
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (nsp != NULL) {
+ error = EISCONN;
+ break;
+ }
+ error = ns_pcballoc(so, &nspcb);
+ if (error)
+ break;
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ error = soreserve(so, (u_long) 3072, (u_long) 3072);
+ if (error)
+ break;
+ }
+ nsp = sotonspcb(so);
+
+ mm = m_getclr(M_DONTWAIT, MT_PCB);
+ sb = &so->so_snd;
+
+ if (mm == NULL) {
+ error = ENOBUFS;
+ break;
+ }
+ cb = mtod(mm, struct sppcb *);
+ mm = m_getclr(M_DONTWAIT, MT_HEADER);
+ if (mm == NULL) {
+ (void) m_free(dtom(m));
+ error = ENOBUFS;
+ break;
+ }
+ cb->s_idp = mtod(mm, struct idp *);
+ cb->s_state = TCPS_LISTEN;
+ cb->s_smax = -1;
+ cb->s_swl1 = -1;
+ cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
+ cb->s_nspcb = nsp;
+ cb->s_mtu = 576 - sizeof (struct spidp);
+ cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
+ cb->s_ssthresh = cb->s_cwnd;
+ cb->s_cwmx = sbspace(sb) * CUNIT /
+ (2 * sizeof (struct spidp));
+ /* Above is recomputed when connecting to account
+ for changed buffering or mtu's */
+ cb->s_rtt = SPPTV_SRTTBASE;
+ cb->s_rttvar = SPPTV_SRTTDFLT << 2;
+ SPPT_RANGESET(cb->s_rxtcur,
+ ((SPPTV_SRTTBASE >> 2) + (SPPTV_SRTTDFLT << 2)) >> 1,
+ SPPTV_MIN, SPPTV_REXMTMAX);
+ nsp->nsp_pcb = (caddr_t) cb;
+ break;
+
+ case PRU_DETACH:
+ if (nsp == NULL) {
+ error = ENOTCONN;
+ break;
+ }
+ if (cb->s_state > TCPS_LISTEN)
+ cb = spp_disconnect(cb);
+ else
+ cb = spp_close(cb);
+ break;
+
+ case PRU_BIND:
+ error = ns_pcbbind(nsp, nam);
+ break;
+
+ case PRU_LISTEN:
+ if (nsp->nsp_lport == 0)
+ error = ns_pcbbind(nsp, (struct mbuf *)0);
+ if (error == 0)
+ cb->s_state = TCPS_LISTEN;
+ break;
+
+ /*
+ * Initiate connection to peer.
+ * Enter SYN_SENT state, and mark socket as connecting.
+ * Start keep-alive timer, setup prototype header,
+ * Send initial system packet requesting connection.
+ */
+ case PRU_CONNECT:
+ if (nsp->nsp_lport == 0) {
+ error = ns_pcbbind(nsp, (struct mbuf *)0);
+ if (error)
+ break;
+ }
+ error = ns_pcbconnect(nsp, nam);
+ if (error)
+ break;
+ soisconnecting(so);
+ sppstat.spps_connattempt++;
+ cb->s_state = TCPS_SYN_SENT;
+ cb->s_did = 0;
+ spp_template(cb);
+ cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+ cb->s_force = 1 + SPPTV_KEEP;
+ /*
+ * Other party is required to respond to
+ * the port I send from, but he is not
+ * required to answer from where I am sending to,
+ * so allow wildcarding.
+ * original port I am sending to is still saved in
+ * cb->s_dport.
+ */
+ nsp->nsp_fport = 0;
+ error = spp_output(cb, (struct mbuf *) 0);
+ break;
+
+ case PRU_CONNECT2:
+ error = EOPNOTSUPP;
+ break;
+
+ /*
+ * We may decide later to implement connection closing
+ * handshaking at the spp level optionally.
+ * here is the hook to do it:
+ */
+ case PRU_DISCONNECT:
+ cb = spp_disconnect(cb);
+ break;
+
+ /*
+ * Accept a connection. Essentially all the work is
+ * done at higher levels; just return the address
+ * of the peer, storing through addr.
+ */
+ case PRU_ACCEPT: {
+ struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+
+ nam->m_len = sizeof (struct sockaddr_ns);
+ sns->sns_family = AF_NS;
+ sns->sns_addr = nsp->nsp_faddr;
+ break;
+ }
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ cb = spp_usrclosed(cb);
+ if (cb)
+ error = spp_output(cb, (struct mbuf *) 0);
+ break;
+
+ /*
+ * After a receive, possibly send acknowledgment
+ * updating allocation.
+ */
+ case PRU_RCVD:
+ cb->s_flags |= SF_RVD;
+ (void) spp_output(cb, (struct mbuf *) 0);
+ cb->s_flags &= ~SF_RVD;
+ break;
+
+ case PRU_ABORT:
+ (void) spp_drop(cb, ECONNABORTED);
+ break;
+
+ case PRU_SENSE:
+ case PRU_CONTROL:
+ m = NULL;
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_RCVOOB:
+ if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
+ (so->so_state & SS_RCVATMARK)) {
+ m->m_len = 1;
+ *mtod(m, caddr_t) = cb->s_iobc;
+ break;
+ }
+ error = EINVAL;
+ break;
+
+ case PRU_SENDOOB:
+ if (sbspace(&so->so_snd) < -512) {
+ error = ENOBUFS;
+ break;
+ }
+ cb->s_oobflags |= SF_SOOB;
+ /* fall into */
+ case PRU_SEND:
+ if (controlp) {
+ u_short *p = mtod(controlp, u_short *);
+ spp_newchecks[2]++;
+ if ((p[0] == 5) && p[1] == 1) { /* XXXX, for testing */
+ cb->s_shdr.sp_dt = *(u_char *)(&p[2]);
+ spp_newchecks[3]++;
+ }
+ m_freem(controlp);
+ }
+ controlp = NULL;
+ error = spp_output(cb, m);
+ m = NULL;
+ break;
+
+ case PRU_SOCKADDR:
+ ns_setsockaddr(nsp, nam);
+ break;
+
+ case PRU_PEERADDR:
+ ns_setpeeraddr(nsp, nam);
+ break;
+
+ case PRU_SLOWTIMO:
+ cb = spp_timers(cb, (int)nam);
+ req |= ((int)nam) << 8;
+ break;
+
+ case PRU_FASTTIMO:
+ case PRU_PROTORCV:
+ case PRU_PROTOSEND:
+ error = EOPNOTSUPP;
+ break;
+
+ default:
+ panic("sp_usrreq");
+ }
+ if (cb && (so->so_options & SO_DEBUG || traceallspps))
+ spp_trace(SA_USER, (u_char)ostate, cb, (struct spidp *)0, req);
+release:
+ if (controlp != NULL)
+ m_freem(controlp);
+ if (m != NULL)
+ m_freem(m);
+ splx(s);
+ return (error);
+}
+
+spp_usrreq_sp(so, req, m, nam, controlp)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *controlp;
+{
+ int error = spp_usrreq(so, req, m, nam, controlp);
+
+ if (req == PRU_ATTACH && error == 0) {
+ struct nspcb *nsp = sotonspcb(so);
+ ((struct sppcb *)nsp->nsp_pcb)->s_flags |=
+ (SF_HI | SF_HO | SF_PI);
+ }
+ return (error);
+}
+
+/*
+ * Create template to be used to send spp packets on a connection.
+ * Called after host entry created, fills
+ * in a skeletal spp header (choosing connection id),
+ * minimizing the amount of work necessary when the connection is used.
+ */
+spp_template(cb)
+ register struct sppcb *cb;
+{
+ register struct nspcb *nsp = cb->s_nspcb;
+ register struct idp *idp = cb->s_idp;
+ register struct sockbuf *sb = &(nsp->nsp_socket->so_snd);
+
+ idp->idp_pt = NSPROTO_SPP;
+ idp->idp_sna = nsp->nsp_laddr;
+ idp->idp_dna = nsp->nsp_faddr;
+ cb->s_sid = htons(spp_iss);
+ spp_iss += SPP_ISSINCR/2;
+ cb->s_alo = 1;
+ cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
+ cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
+ of large packets */
+ cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spidp));
+ cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
+ /* But allow for lots of little packets as well */
+}
+
+/*
+ * Close a SPIP control block:
+ * discard spp control block itself
+ * discard ns protocol control block
+ * wake up any sleepers
+ */
+struct sppcb *
+spp_close(cb)
+ register struct sppcb *cb;
+{
+ register struct spidp_q *s;
+ struct nspcb *nsp = cb->s_nspcb;
+ struct socket *so = nsp->nsp_socket;
+ register struct mbuf *m;
+
+ s = cb->s_q.si_next;
+ while (s != &(cb->s_q)) {
+ s = s->si_next;
+ m = dtom(s->si_prev);
+ remque(s->si_prev);
+ m_freem(m);
+ }
+ (void) m_free(dtom(cb->s_idp));
+ (void) m_free(dtom(cb));
+ nsp->nsp_pcb = 0;
+ soisdisconnected(so);
+ ns_pcbdetach(nsp);
+ sppstat.spps_closed++;
+ return ((struct sppcb *)0);
+}
+/*
+ * Someday we may do level 3 handshaking
+ * to close a connection or send a xerox style error.
+ * For now, just close.
+ */
+struct sppcb *
+spp_usrclosed(cb)
+ register struct sppcb *cb;
+{
+ return (spp_close(cb));
+}
+struct sppcb *
+spp_disconnect(cb)
+ register struct sppcb *cb;
+{
+ return (spp_close(cb));
+}
+/*
+ * Drop connection, reporting
+ * the specified error.
+ */
+struct sppcb *
+spp_drop(cb, errno)
+ register struct sppcb *cb;
+ int errno;
+{
+ struct socket *so = cb->s_nspcb->nsp_socket;
+
+ /*
+ * someday, in the xerox world
+ * we will generate error protocol packets
+ * announcing that the socket has gone away.
+ */
+ if (TCPS_HAVERCVDSYN(cb->s_state)) {
+ sppstat.spps_drops++;
+ cb->s_state = TCPS_CLOSED;
+ /*(void) tcp_output(cb);*/
+ } else
+ sppstat.spps_conndrops++;
+ so->so_error = errno;
+ return (spp_close(cb));
+}
+
+spp_abort(nsp)
+ struct nspcb *nsp;
+{
+
+ (void) spp_close((struct sppcb *)nsp->nsp_pcb);
+}
+
+int spp_backoff[SPP_MAXRXTSHIFT+1] =
+ { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+/*
+ * Fast timeout routine for processing delayed acks
+ */
+spp_fasttimo()
+{
+ register struct nspcb *nsp;
+ register struct sppcb *cb;
+ int s = splnet();
+
+ nsp = nspcb.nsp_next;
+ if (nsp)
+ for (; nsp != &nspcb; nsp = nsp->nsp_next)
+ if ((cb = (struct sppcb *)nsp->nsp_pcb) &&
+ (cb->s_flags & SF_DELACK)) {
+ cb->s_flags &= ~SF_DELACK;
+ cb->s_flags |= SF_ACKNOW;
+ sppstat.spps_delack++;
+ (void) spp_output(cb, (struct mbuf *) 0);
+ }
+ splx(s);
+}
+
+/*
+ * spp protocol timeout routine called every 500 ms.
+ * Updates the timers in all active pcb's and
+ * causes finite state machine actions if timers expire.
+ */
+spp_slowtimo()
+{
+ register struct nspcb *ip, *ipnxt;
+ register struct sppcb *cb;
+ int s = splnet();
+ register int i;
+
+ /*
+ * Search through tcb's and update active timers.
+ */
+ ip = nspcb.nsp_next;
+ if (ip == 0) {
+ splx(s);
+ return;
+ }
+ while (ip != &nspcb) {
+ cb = nstosppcb(ip);
+ ipnxt = ip->nsp_next;
+ if (cb == 0)
+ goto tpgone;
+ for (i = 0; i < SPPT_NTIMERS; i++) {
+ if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
+ (void) spp_usrreq(cb->s_nspcb->nsp_socket,
+ PRU_SLOWTIMO, (struct mbuf *)0,
+ (struct mbuf *)i, (struct mbuf *)0,
+ (struct mbuf *)0);
+ if (ipnxt->nsp_prev != ip)
+ goto tpgone;
+ }
+ }
+ cb->s_idle++;
+ if (cb->s_rtt)
+ cb->s_rtt++;
+tpgone:
+ ip = ipnxt;
+ }
+ spp_iss += SPP_ISSINCR/PR_SLOWHZ; /* increment iss */
+ splx(s);
+}
+/*
+ * SPP timer processing.
+ */
+struct sppcb *
+spp_timers(cb, timer)
+ register struct sppcb *cb;
+ int timer;
+{
+ long rexmt;
+ int win;
+
+ cb->s_force = 1 + timer;
+ switch (timer) {
+
+ /*
+ * 2 MSL timeout in shutdown went off. TCP deletes connection
+ * control block.
+ */
+ case SPPT_2MSL:
+ printf("spp: SPPT_2MSL went off for no reason\n");
+ cb->s_timer[timer] = 0;
+ break;
+
+ /*
+ * Retransmission timer went off. Message has not
+ * been acked within retransmit interval. Back off
+ * to a longer retransmit interval and retransmit one packet.
+ */
+ case SPPT_REXMT:
+ if (++cb->s_rxtshift > SPP_MAXRXTSHIFT) {
+ cb->s_rxtshift = SPP_MAXRXTSHIFT;
+ sppstat.spps_timeoutdrop++;
+ cb = spp_drop(cb, ETIMEDOUT);
+ break;
+ }
+ sppstat.spps_rexmttimeo++;
+ rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
+ rexmt *= spp_backoff[cb->s_rxtshift];
+ SPPT_RANGESET(cb->s_rxtcur, rexmt, SPPTV_MIN, SPPTV_REXMTMAX);
+ cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+ /*
+ * If we have backed off fairly far, our srtt
+ * estimate is probably bogus. Clobber it
+ * so we'll take the next rtt measurement as our srtt;
+ * move the current srtt into rttvar to keep the current
+ * retransmit times until then.
+ */
+ if (cb->s_rxtshift > SPP_MAXRXTSHIFT / 4 ) {
+ cb->s_rttvar += (cb->s_srtt >> 2);
+ cb->s_srtt = 0;
+ }
+ cb->s_snxt = cb->s_rack;
+ /*
+ * If timing a packet, stop the timer.
+ */
+ cb->s_rtt = 0;
+ /*
+ * See very long discussion in tcp_timer.c about congestion
+ * window and sstrhesh
+ */
+ win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
+ if (win < 2)
+ win = 2;
+ cb->s_cwnd = CUNIT;
+ cb->s_ssthresh = win * CUNIT;
+ (void) spp_output(cb, (struct mbuf *) 0);
+ break;
+
+ /*
+ * Persistance timer into zero window.
+ * Force a probe to be sent.
+ */
+ case SPPT_PERSIST:
+ sppstat.spps_persisttimeo++;
+ spp_setpersist(cb);
+ (void) spp_output(cb, (struct mbuf *) 0);
+ break;
+
+ /*
+ * Keep-alive timer went off; send something
+ * or drop connection if idle for too long.
+ */
+ case SPPT_KEEP:
+ sppstat.spps_keeptimeo++;
+ if (cb->s_state < TCPS_ESTABLISHED)
+ goto dropit;
+ if (cb->s_nspcb->nsp_socket->so_options & SO_KEEPALIVE) {
+ if (cb->s_idle >= SPPTV_MAXIDLE)
+ goto dropit;
+ sppstat.spps_keepprobe++;
+ (void) spp_output(cb, (struct mbuf *) 0);
+ } else
+ cb->s_idle = 0;
+ cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+ break;
+ dropit:
+ sppstat.spps_keepdrops++;
+ cb = spp_drop(cb, ETIMEDOUT);
+ break;
+ }
+ return (cb);
+}
+#ifndef lint
+int SppcbSize = sizeof (struct sppcb);
+int NspcbSize = sizeof (struct nspcb);
+#endif /* lint */
diff --git a/sys/netns/spp_var.h b/sys/netns/spp_var.h
new file mode 100644
index 000000000000..0d44f63904f9
--- /dev/null
+++ b/sys/netns/spp_var.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spp_var.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Sp control block, one per connection
+ */
+struct sppcb {
+ struct spidp_q s_q; /* queue for out-of-order receipt */
+ struct nspcb *s_nspcb; /* backpointer to internet pcb */
+ u_char s_state;
+ u_char s_flags;
+#define SF_ACKNOW 0x01 /* Ack peer immediately */
+#define SF_DELACK 0x02 /* Ack, but try to delay it */
+#define SF_HI 0x04 /* Show headers on input */
+#define SF_HO 0x08 /* Show headers on output */
+#define SF_PI 0x10 /* Packet (datagram) interface */
+#define SF_WIN 0x20 /* Window info changed */
+#define SF_RXT 0x40 /* Rxt info changed */
+#define SF_RVD 0x80 /* Calling from read usrreq routine */
+ u_short s_mtu; /* Max packet size for this stream */
+/* use sequence fields in headers to store sequence numbers for this
+ connection */
+ struct idp *s_idp;
+ struct sphdr s_shdr; /* prototype header to transmit */
+#define s_cc s_shdr.sp_cc /* connection control (for EM bit) */
+#define s_dt s_shdr.sp_dt /* datastream type */
+#define s_sid s_shdr.sp_sid /* source connection identifier */
+#define s_did s_shdr.sp_did /* destination connection identifier */
+#define s_seq s_shdr.sp_seq /* sequence number */
+#define s_ack s_shdr.sp_ack /* acknowledge number */
+#define s_alo s_shdr.sp_alo /* allocation number */
+#define s_dport s_idp->idp_dna.x_port /* where we are sending */
+ struct sphdr s_rhdr; /* last received header (in effect!)*/
+ u_short s_rack; /* their acknowledge number */
+ u_short s_ralo; /* their allocation number */
+ u_short s_smax; /* highest packet # we have sent */
+ u_short s_snxt; /* which packet to send next */
+
+/* congestion control */
+#define CUNIT 1024 /* scaling for ... */
+ int s_cwnd; /* Congestion-controlled window */
+ /* in packets * CUNIT */
+ short s_swnd; /* == tcp snd_wnd, in packets */
+ short s_smxw; /* == tcp max_sndwnd */
+ /* difference of two spp_seq's can be
+ no bigger than a short */
+ u_short s_swl1; /* == tcp snd_wl1 */
+ u_short s_swl2; /* == tcp snd_wl2 */
+ int s_cwmx; /* max allowable cwnd */
+ int s_ssthresh; /* s_cwnd size threshhold for
+ * slow start exponential-to-
+ * linear switch */
+/* transmit timing stuff
+ * srtt and rttvar are stored as fixed point, for convenience in smoothing.
+ * srtt has 3 bits to the right of the binary point, rttvar has 2.
+ */
+ short s_idle; /* time idle */
+ short s_timer[SPPT_NTIMERS]; /* timers */
+ short s_rxtshift; /* log(2) of rexmt exp. backoff */
+ short s_rxtcur; /* current retransmit value */
+ u_short s_rtseq; /* packet being timed */
+ short s_rtt; /* timer for round trips */
+ short s_srtt; /* averaged timer */
+ short s_rttvar; /* variance in round trip time */
+ char s_force; /* which timer expired */
+ char s_dupacks; /* counter to intuit xmt loss */
+
+/* out of band data */
+ char s_oobflags;
+#define SF_SOOB 0x08 /* sending out of band data */
+#define SF_IOOB 0x10 /* receiving out of band data */
+ char s_iobc; /* input characters */
+/* debug stuff */
+ u_short s_want; /* Last candidate for sending */
+ char s_outx; /* exit taken from spp_output */
+ char s_inx; /* exit taken from spp_input */
+ u_short s_flags2; /* more flags for testing */
+#define SF_NEWCALL 0x100 /* for new_recvmsg */
+#define SO_NEWCALL 10 /* for new_recvmsg */
+};
+
+#define nstosppcb(np) ((struct sppcb *)(np)->nsp_pcb)
+#define sotosppcb(so) (nstosppcb(sotonspcb(so)))
+
+struct sppstat {
+ long spps_connattempt; /* connections initiated */
+ long spps_accepts; /* connections accepted */
+ long spps_connects; /* connections established */
+ long spps_drops; /* connections dropped */
+ long spps_conndrops; /* embryonic connections dropped */
+ long spps_closed; /* conn. closed (includes drops) */
+ long spps_segstimed; /* segs where we tried to get rtt */
+ long spps_rttupdated; /* times we succeeded */
+ long spps_delack; /* delayed acks sent */
+ long spps_timeoutdrop; /* conn. dropped in rxmt timeout */
+ long spps_rexmttimeo; /* retransmit timeouts */
+ long spps_persisttimeo; /* persist timeouts */
+ long spps_keeptimeo; /* keepalive timeouts */
+ long spps_keepprobe; /* keepalive probes sent */
+ long spps_keepdrops; /* connections dropped in keepalive */
+
+ long spps_sndtotal; /* total packets sent */
+ long spps_sndpack; /* data packets sent */
+ long spps_sndbyte; /* data bytes sent */
+ long spps_sndrexmitpack; /* data packets retransmitted */
+ long spps_sndrexmitbyte; /* data bytes retransmitted */
+ long spps_sndacks; /* ack-only packets sent */
+ long spps_sndprobe; /* window probes sent */
+ long spps_sndurg; /* packets sent with URG only */
+ long spps_sndwinup; /* window update-only packets sent */
+ long spps_sndctrl; /* control (SYN|FIN|RST) packets sent */
+ long spps_sndvoid; /* couldn't find requested packet*/
+
+ long spps_rcvtotal; /* total packets received */
+ long spps_rcvpack; /* packets received in sequence */
+ long spps_rcvbyte; /* bytes received in sequence */
+ long spps_rcvbadsum; /* packets received with ccksum errs */
+ long spps_rcvbadoff; /* packets received with bad offset */
+ long spps_rcvshort; /* packets received too short */
+ long spps_rcvduppack; /* duplicate-only packets received */
+ long spps_rcvdupbyte; /* duplicate-only bytes received */
+ long spps_rcvpartduppack; /* packets with some duplicate data */
+ long spps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
+ long spps_rcvoopack; /* out-of-order packets received */
+ long spps_rcvoobyte; /* out-of-order bytes received */
+ long spps_rcvpackafterwin; /* packets with data after window */
+ long spps_rcvbyteafterwin; /* bytes rcvd after window */
+ long spps_rcvafterclose; /* packets rcvd after "close" */
+ long spps_rcvwinprobe; /* rcvd window probe packets */
+ long spps_rcvdupack; /* rcvd duplicate acks */
+ long spps_rcvacktoomuch; /* rcvd acks for unsent data */
+ long spps_rcvackpack; /* rcvd ack packets */
+ long spps_rcvackbyte; /* bytes acked by rcvd acks */
+ long spps_rcvwinupd; /* rcvd window update packets */
+};
+struct spp_istat {
+ short hdrops;
+ short badsum;
+ short badlen;
+ short slotim;
+ short fastim;
+ short nonucn;
+ short noconn;
+ short notme;
+ short wrncon;
+ short bdreas;
+ short gonawy;
+ short notyet;
+ short lstdup;
+ struct sppstat newstats;
+};
+
+#ifdef KERNEL
+struct spp_istat spp_istat;
+
+/* Following was struct sppstat sppstat; */
+#ifndef sppstat
+#define sppstat spp_istat.newstats
+#endif
+
+u_short spp_iss;
+extern struct sppcb *spp_close(), *spp_disconnect(),
+ *spp_usrclosed(), *spp_timers(), *spp_drop();
+#endif
+
+#define SPP_ISSINCR 128
+/*
+ * SPP sequence numbers are 16 bit integers operated
+ * on with modular arithmetic. These macros can be
+ * used to compare such integers.
+ */
+#ifdef sun
+short xnsCbug;
+#define SSEQ_LT(a,b) ((xnsCbug = (short)((a)-(b))) < 0)
+#define SSEQ_LEQ(a,b) ((xnsCbug = (short)((a)-(b))) <= 0)
+#define SSEQ_GT(a,b) ((xnsCbug = (short)((a)-(b))) > 0)
+#define SSEQ_GEQ(a,b) ((xnsCbug = (short)((a)-(b))) >= 0)
+#else
+#define SSEQ_LT(a,b) (((short)((a)-(b))) < 0)
+#define SSEQ_LEQ(a,b) (((short)((a)-(b))) <= 0)
+#define SSEQ_GT(a,b) (((short)((a)-(b))) > 0)
+#define SSEQ_GEQ(a,b) (((short)((a)-(b))) >= 0)
+#endif
diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h
new file mode 100644
index 000000000000..261fd42657a7
--- /dev/null
+++ b/sys/nfs/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
new file mode 100644
index 000000000000..177a278b6310
--- /dev/null
+++ b/sys/nfs/nfs_bio.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/trace.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsnode.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct buf *incore(), *nfs_getcacheblk();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+
+/*
+ * Vnode op for read using bio
+ * Any similarity to readip() is purely coincidental
+ */
+nfs_bioread(vp, uio, ioflag, cred)
+ register struct vnode *vp;
+ register struct uio *uio;
+ int ioflag;
+ struct ucred *cred;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register int biosize, diff;
+ struct buf *bp, *rabp;
+ struct vattr vattr;
+ struct proc *p;
+ struct nfsmount *nmp;
+ daddr_t lbn, bn, rabn;
+ caddr_t baddr;
+ int got_buf, nra, error = 0, n, on, not_readin;
+
+#ifdef lint
+ ioflag = ioflag;
+#endif /* lint */
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("nfs_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0 && vp->v_type != VDIR)
+ return (EINVAL);
+ nmp = VFSTONFS(vp->v_mount);
+ biosize = nmp->nm_rsize;
+ p = uio->uio_procp;
+ /*
+ * For nfs, cache consistency can only be maintained approximately.
+ * Although RFC1094 does not specify the criteria, the following is
+ * believed to be compatible with the reference port.
+ * For nqnfs, full cache consistency is maintained within the loop.
+ * For nfs:
+ * If the file's modify time on the server has changed since the
+ * last read rpc or you have written to the file,
+ * you may have lost data cache consistency with the
+ * server, so flush all of the file's data out of the cache.
+ * Then force a getattr rpc to ensure that you have up to date
+ * attributes.
+ * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
+ * the ones changing the modify time.
+ * NB: This implies that cache data can be read when up to
+ * NFS_ATTRTIMEO seconds out of date. If you find that you need current
+ * attributes this could be forced by setting n_attrstamp to 0 before
+ * the VOP_GETATTR() call.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
+ if (np->n_flag & NMODIFIED) {
+ if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
+ vp->v_type != VREG) {
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ } else {
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.ts_sec) {
+ np->n_direofoffset = 0;
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ }
+ }
+ }
+ do {
+
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_READ, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE) ||
+ ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
+ if (vp->v_type == VDIR) {
+ np->n_direofoffset = 0;
+ cache_purge(vp);
+ }
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
+ np->n_direofoffset = 0;
+ cache_purge(vp);
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ }
+ if (np->n_flag & NQNFSNONCACHE) {
+ switch (vp->v_type) {
+ case VREG:
+ error = nfs_readrpc(vp, uio, cred);
+ break;
+ case VLNK:
+ error = nfs_readlinkrpc(vp, uio, cred);
+ break;
+ case VDIR:
+ error = nfs_readdirrpc(vp, uio, cred);
+ break;
+ };
+ return (error);
+ }
+ baddr = (caddr_t)0;
+ switch (vp->v_type) {
+ case VREG:
+ nfsstats.biocache_reads++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize-1);
+ bn = lbn * (biosize / DEV_BSIZE);
+ not_readin = 1;
+
+ /*
+ * Start the read ahead(s), as required.
+ */
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ lbn == vp->v_lastr + 1) {
+ for (nra = 0; nra < nmp->nm_readahead &&
+ (lbn + 1 + nra) * biosize < np->n_size; nra++) {
+ rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
+ if (!incore(vp, rabn)) {
+ rabp = nfs_getcacheblk(vp, rabn, biosize, p);
+ if (!rabp)
+ return (EINTR);
+ if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ if (nfs_asyncio(rabp, cred)) {
+ rabp->b_flags |= B_INVAL;
+ brelse(rabp);
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * If the block is in the cache and has the required data
+ * in a valid region, just copy it out.
+ * Otherwise, get the block and write back/read in,
+ * as required.
+ */
+ if ((bp = incore(vp, bn)) &&
+ (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
+ (B_BUSY | B_WRITEINPROG))
+ got_buf = 0;
+ else {
+again:
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ got_buf = 1;
+ if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+ bp->b_flags |= B_READ;
+ not_readin = 0;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ }
+ n = min((unsigned)(biosize - on), uio->uio_resid);
+ diff = np->n_size - uio->uio_offset;
+ if (diff < n)
+ n = diff;
+ if (not_readin && n > 0) {
+ if (on < bp->b_validoff || (on + n) > bp->b_validend) {
+ if (!got_buf) {
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ got_buf = 1;
+ }
+ bp->b_flags |= B_INVAL;
+ if (bp->b_dirtyend > 0) {
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("nfsbioread");
+ if (VOP_BWRITE(bp) == EINTR)
+ return (EINTR);
+ } else
+ brelse(bp);
+ goto again;
+ }
+ }
+ vp->v_lastr = lbn;
+ diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
+ if (diff < n)
+ n = diff;
+ break;
+ case VLNK:
+ nfsstats.biocache_readlinks++;
+ bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_DONE) == 0) {
+ bp->b_flags |= B_READ;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+ got_buf = 1;
+ on = 0;
+ break;
+ case VDIR:
+ nfsstats.biocache_readdirs++;
+ bn = (daddr_t)uio->uio_offset;
+ bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_DONE) == 0) {
+ bp->b_flags |= B_READ;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ /*
+ * If not eof and read aheads are enabled, start one.
+ * (You need the current block first, so that you have the
+ * directory offset cookie of the next block.
+ */
+ rabn = bp->b_blkno;
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ rabn != 0 && rabn != np->n_direofoffset &&
+ !incore(vp, rabn)) {
+ rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
+ if (rabp) {
+ if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ if (nfs_asyncio(rabp, cred)) {
+ rabp->b_flags |= B_INVAL;
+ brelse(rabp);
+ }
+ }
+ }
+ }
+ on = 0;
+ n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
+ got_buf = 1;
+ break;
+ };
+
+ if (n > 0) {
+ if (!baddr)
+ baddr = bp->b_data;
+ error = uiomove(baddr + on, (int)n, uio);
+ }
+ switch (vp->v_type) {
+ case VREG:
+ if (n + on == biosize || uio->uio_offset == np->n_size)
+ bp->b_flags |= B_AGE;
+ break;
+ case VLNK:
+ n = 0;
+ break;
+ case VDIR:
+ uio->uio_offset = bp->b_blkno;
+ break;
+ };
+ if (got_buf)
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n > 0);
+ return (error);
+}
+
+/*
+ * Vnode op for write using bio
+ */
+nfs_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register int biosize;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ register struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ register struct ucred *cred = ap->a_cred;
+ int ioflag = ap->a_ioflag;
+ struct buf *bp;
+ struct vattr vattr;
+ struct nfsmount *nmp;
+ daddr_t lbn, bn;
+ int n, on, error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("nfs_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("nfs_write proc");
+#endif
+ if (vp->v_type != VREG)
+ return (EIO);
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ return (np->n_error);
+ }
+ if (ioflag & (IO_APPEND | IO_SYNC)) {
+ if (np->n_flag & NMODIFIED) {
+ np->n_attrstamp = 0;
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ if (ioflag & IO_APPEND) {
+ np->n_attrstamp = 0;
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ uio->uio_offset = np->n_size;
+ }
+ }
+ nmp = VFSTONFS(vp->v_mount);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ if (uio->uio_resid == 0)
+ return (0);
+ /*
+ * Maybe this should be above the vnode op call, but so long as
+ * file servers have no limits, i don't think it matters
+ */
+ if (p && uio->uio_offset + uio->uio_resid >
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+ psignal(p, SIGXFSZ);
+ return (EFBIG);
+ }
+ /*
+ * I use nm_rsize, not nm_wsize so that all buffer cache blocks
+ * will be the same size within a filesystem. nfs_writerpc will
+ * still use nm_wsize when sizing the rpc's.
+ */
+ biosize = nmp->nm_rsize;
+ do {
+
+ /*
+ * Check for a valid write lease.
+ * If non-cachable, just do the rpc
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ }
+ if (np->n_flag & NQNFSNONCACHE)
+ return (nfs_writerpc(vp, uio, cred, ioflag));
+ nfsstats.biocache_writes++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize-1);
+ n = min((unsigned)(biosize - on), uio->uio_resid);
+ bn = lbn * (biosize / DEV_BSIZE);
+again:
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ if (bp->b_wcred == NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ np->n_flag |= NMODIFIED;
+ if (uio->uio_offset + n > np->n_size) {
+ np->n_size = uio->uio_offset + n;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ }
+
+ /*
+ * If the new write will leave a contiguous dirty
+ * area, just update the b_dirtyoff and b_dirtyend,
+ * otherwise force a write rpc of the old dirty area.
+ */
+ if (bp->b_dirtyend > 0 &&
+ (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+ bp->b_proc = p;
+ if (VOP_BWRITE(bp) == EINTR)
+ return (EINTR);
+ goto again;
+ }
+
+ /*
+ * Check for valid write lease and get one as required.
+ * In case getblk() and/or bwrite() delayed us.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ brelse(bp);
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ goto again;
+ }
+ }
+ if (error = uiomove((char *)bp->b_data + on, n, uio)) {
+ bp->b_flags |= B_ERROR;
+ brelse(bp);
+ return (error);
+ }
+ if (bp->b_dirtyend > 0) {
+ bp->b_dirtyoff = min(on, bp->b_dirtyoff);
+ bp->b_dirtyend = max((on + n), bp->b_dirtyend);
+ } else {
+ bp->b_dirtyoff = on;
+ bp->b_dirtyend = on + n;
+ }
+#ifndef notdef
+ if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
+ bp->b_validoff > bp->b_dirtyend) {
+ bp->b_validoff = bp->b_dirtyoff;
+ bp->b_validend = bp->b_dirtyend;
+ } else {
+ bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
+ bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+ }
+#else
+ bp->b_validoff = bp->b_dirtyoff;
+ bp->b_validend = bp->b_dirtyend;
+#endif
+ if (ioflag & IO_APPEND)
+ bp->b_flags |= B_APPENDWRITE;
+
+ /*
+ * If the lease is non-cachable or IO_SYNC do bwrite().
+ */
+ if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
+ bp->b_proc = p;
+ if (error = VOP_BWRITE(bp))
+ return (error);
+ } else if ((n + on) == biosize &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
+ bp->b_proc = (struct proc *)0;
+ bawrite(bp);
+ } else
+ bdwrite(bp);
+ } while (uio->uio_resid > 0 && n > 0);
+ return (0);
+}
+
+/*
+ * Get an nfs cache block.
+ * Allocate a new one if the block isn't currently in the cache
+ * and return the block marked busy. If the calling process is
+ * interrupted by a signal for an interruptible mount point, return
+ * NULL.
+ */
+struct buf *
+nfs_getcacheblk(vp, bn, size, p)
+ struct vnode *vp;
+ daddr_t bn;
+ int size;
+ struct proc *p;
+{
+ register struct buf *bp;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+
+ if (nmp->nm_flag & NFSMNT_INT) {
+ bp = getblk(vp, bn, size, PCATCH, 0);
+ while (bp == (struct buf *)0) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return ((struct buf *)0);
+ bp = getblk(vp, bn, size, 0, 2 * hz);
+ }
+ } else
+ bp = getblk(vp, bn, size, 0, 0);
+ return (bp);
+}
+
+/*
+ * Flush and invalidate all dirty buffers. If another process is already
+ * doing the flush, just wait for completion.
+ */
+nfs_vinvalbuf(vp, flags, cred, p, intrflg)
+ struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int intrflg;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ int error = 0, slpflag, slptimeo;
+
+ if ((nmp->nm_flag & NFSMNT_INT) == 0)
+ intrflg = 0;
+ if (intrflg) {
+ slpflag = PCATCH;
+ slptimeo = 2 * hz;
+ } else {
+ slpflag = 0;
+ slptimeo = 0;
+ }
+ /*
+ * First wait for any other process doing a flush to complete.
+ */
+ while (np->n_flag & NFLUSHINPROG) {
+ np->n_flag |= NFLUSHWANT;
+ error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
+ slptimeo);
+ if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return (EINTR);
+ }
+
+ /*
+ * Now, flush as required.
+ */
+ np->n_flag |= NFLUSHINPROG;
+ error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
+ while (error) {
+ if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+ np->n_flag &= ~NFLUSHINPROG;
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (EINTR);
+ }
+ error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
+ }
+ np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (0);
+}
+
+/*
+ * Initiate asynchronous I/O. Return an error if no nfsiods are available.
+ * This is mainly to avoid queueing async I/O requests when the nfsiods
+ * are all hung on a dead server.
+ */
+nfs_asyncio(bp, cred)
+ register struct buf *bp;
+ struct ucred *cred;
+{
+ register int i;
+
+ if (nfs_numasync == 0)
+ return (EIO);
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_iodwant[i]) {
+ if (bp->b_flags & B_READ) {
+ if (bp->b_rcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_rcred = cred;
+ }
+ } else {
+ if (bp->b_wcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
+ nfs_iodwant[i] = (struct proc *)0;
+ wakeup((caddr_t)&nfs_iodwant[i]);
+ return (0);
+ }
+ return (EIO);
+}
+
+/*
+ * Do an I/O operation to/from a cache block. This may be called
+ * synchronously or from an nfsiod.
+ */
+int
+nfs_doio(bp, cr, p)
+ register struct buf *bp;
+ struct cred *cr;
+ struct proc *p;
+{
+ register struct uio *uiop;
+ register struct vnode *vp;
+ struct nfsnode *np;
+ struct nfsmount *nmp;
+ int error, diff, len;
+ struct uio uio;
+ struct iovec io;
+
+ vp = bp->b_vp;
+ np = VTONFS(vp);
+ nmp = VFSTONFS(vp->v_mount);
+ uiop = &uio;
+ uiop->uio_iov = &io;
+ uiop->uio_iovcnt = 1;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = p;
+
+ /*
+ * Historically, paging was done with physio, but no more.
+ */
+ if (bp->b_flags & B_PHYS)
+ panic("doio phys");
+ if (bp->b_flags & B_READ) {
+ io.iov_len = uiop->uio_resid = bp->b_bcount;
+ io.iov_base = bp->b_data;
+ uiop->uio_rw = UIO_READ;
+ switch (vp->v_type) {
+ case VREG:
+ uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
+ nfsstats.read_bios++;
+ error = nfs_readrpc(vp, uiop, cr);
+ if (!error) {
+ bp->b_validoff = 0;
+ if (uiop->uio_resid) {
+ /*
+ * If len > 0, there is a hole in the file and
+ * no writes after the hole have been pushed to
+ * the server yet.
+ * Just zero fill the rest of the valid area.
+ */
+ diff = bp->b_bcount - uiop->uio_resid;
+ len = np->n_size - (bp->b_blkno * DEV_BSIZE
+ + diff);
+ if (len > 0) {
+ len = min(len, uiop->uio_resid);
+ bzero((char *)bp->b_data + diff, len);
+ bp->b_validend = diff + len;
+ } else
+ bp->b_validend = diff;
+ } else
+ bp->b_validend = bp->b_bcount;
+ }
+ if (p && (vp->v_flag & VTEXT) &&
+ (((nmp->nm_flag & NFSMNT_NQNFS) &&
+ np->n_lrev != np->n_brev) ||
+ (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+ np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
+ uprintf("Process killed due to text file modification\n");
+ psignal(p, SIGKILL);
+ p->p_flag |= P_NOSWAP;
+ }
+ break;
+ case VLNK:
+ uiop->uio_offset = 0;
+ nfsstats.readlink_bios++;
+ error = nfs_readlinkrpc(vp, uiop, cr);
+ break;
+ case VDIR:
+ uiop->uio_offset = bp->b_lblkno;
+ nfsstats.readdir_bios++;
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
+ error = nfs_readdirlookrpc(vp, uiop, cr);
+ else
+ error = nfs_readdirrpc(vp, uiop, cr);
+ /*
+ * Save offset cookie in b_blkno.
+ */
+ bp->b_blkno = uiop->uio_offset;
+ break;
+ };
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = error;
+ }
+ } else {
+ io.iov_len = uiop->uio_resid = bp->b_dirtyend
+ - bp->b_dirtyoff;
+ uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
+ + bp->b_dirtyoff;
+ io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+ uiop->uio_rw = UIO_WRITE;
+ nfsstats.write_bios++;
+ if (bp->b_flags & B_APPENDWRITE)
+ error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
+ else
+ error = nfs_writerpc(vp, uiop, cr, 0);
+ bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
+
+ /*
+ * For an interrupted write, the buffer is still valid and the
+ * write hasn't been pushed to the server yet, so we can't set
+ * B_ERROR and report the interruption by setting B_EINTR. For
+ * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
+ * is essentially a noop.
+ */
+ if (error == EINTR) {
+ bp->b_flags &= ~B_INVAL;
+ bp->b_flags |= B_DELWRI;
+
+ /*
+ * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
+ * buffer to the clean list, we have to reassign it back to the
+ * dirty one. Ugh.
+ */
+ if (bp->b_flags & B_ASYNC)
+ reassignbuf(bp, vp);
+ else
+ bp->b_flags |= B_EINTR;
+ } else {
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ }
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ }
+ }
+ bp->b_resid = uiop->uio_resid;
+ biodone(bp);
+ return (error);
+}
diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c
new file mode 100644
index 000000000000..5778f7d7f01a
--- /dev/null
+++ b/sys/nfs/nfs_common.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+ rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+ mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nqnfs;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_long *xidp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ if (auth_type == RPCAUTH_NQNFS)
+ authsiz += 2 * NFSX_UNSIGNED;
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+ if (++nfs_xid == 0)
+ nfs_xid++;
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nqnfs) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER1);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ *tl++ = txdr_unsigned(procid);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_long *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_NQNFS:
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl = txdr_unsigned(auth_len);
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EINVAL);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ char *cp;
+ long siz;
+{
+ register struct mbuf *m1, *m2;
+ long left, xfer, len, tlen;
+ u_long *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_long *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_long *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+ register int i;
+
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+ nfs_vers = txdr_unsigned(NFS_VER2);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ /* Loop thru nfs procids */
+ for (i = 0; i < NFS_NPROCS; i++)
+ nfs_procids[i] = txdr_unsigned(i);
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ nfs_iodwant[i] = (struct proc *)0;
+ TAILQ_INIT(&nfs_bufq);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_nhinit(); /* Init the nfsnode table */
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+ nqthead.th_head[0] = &nqthead;
+ nqthead.th_head[1] = &nqthead;
+ nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+ nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfsv2_fattr *fp;
+ extern int (**spec_nfsv2nodeop_p)();
+ register struct nfsnode *np, *nq, **nhpp;
+ register long t1;
+ caddr_t dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ long rdev;
+ struct timespec mtime;
+ struct vnode *nvp;
+
+ md = *mdp;
+ dpos = *dposp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+ return (error);
+ fp = (struct nfsv2_fattr *)cp2;
+ vtyp = nfstov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ if (vtyp == VNON || vtyp == VREG)
+ vtyp = IFTOVT(vmode);
+ if (isnq) {
+ rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+ fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+ } else {
+ rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+ fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+ }
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type == VNON) {
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vp->v_type = vtyp = VFIFO;
+ else
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+#ifdef FIFO
+ extern int (**fifo_nfsv2nodeop_p)();
+ vp->v_op = fifo_nfsv2nodeop_p;
+#else
+ return (EOPNOTSUPP);
+#endif /* FIFO */
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+ /*
+ * Discard unneeded vnode, but save its nfsnode.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased node.
+ */
+ np->n_vnode = nvp;
+ nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ *vpp = vp = nvp;
+ }
+ }
+ np->n_mtime = mtime.ts_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_rdev = (dev_t)rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (isnq) {
+ fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+ fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+ fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+ vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+ fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+ fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+ } else {
+ vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+ vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+ fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+ vap->va_ctime.ts_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ np->n_attrstamp = time.tv_sec;
+ *dposp = dpos;
+ *mdp = md;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+ if (np->n_size > vap->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap;
+
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+ if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ vap = &np->n_vattr;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) == 0) {
+ np->n_size = vaper->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if (np->n_size > vaper->va_size)
+ if (np->n_size > vaper->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct proc *p;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp;
+ struct vnode *dp;
+ int error, rdonly;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ cnp->cn_hash = 0;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || *fromcp == '/') {
+ error = EINVAL;
+ goto out;
+ }
+ cnp->cn_hash += (unsigned char)*fromcp;
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if (error = nfs_adv(mdp, dposp, len, rem))
+ goto out;
+ }
+ ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Extract and set starting directory.
+ */
+ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly))
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+ ndp->ni_startdir = dp;
+ if (rdonly)
+ cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+ else
+ cnp->cn_flags |= NOCROSSMOUNT;
+ /*
+ * And call lookup() to do the real work
+ */
+ cnp->cn_proc = p;
+ if (error = lookup(ndp))
+ goto out;
+ /*
+ * Check for encountering a symbolic link
+ */
+ if (cnp->cn_flags & ISSYMLINK) {
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Check for saved name request
+ */
+ if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ int *rdonlyp;
+{
+ register struct mount *mp;
+ register struct nfsuid *uidp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+
+ *vpp = (struct vnode *)0;
+ if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+ return (error);
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == cred->cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (uidp) {
+ cred->cr_uid = uidp->nu_cr.cr_uid;
+ for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+ cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+ } else {
+ vput(*vpp);
+ return (NQNFS_AUTHERR);
+ }
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+ if (!lockflag)
+ VOP_UNLOCK(*vpp);
+ return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = mtod(nam, struct sockaddr_in *);
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = mtod(nam, struct sockaddr_iso *);
+ isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
diff --git a/sys/nfs/nfs_common.h b/sys/nfs/nfs_common.h
new file mode 100644
index 000000000000..879db3600577
--- /dev/null
+++ b/sys/nfs/nfs_common.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat
+#define NFSMADV(m, s) (m)->m_data += (s)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ { if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); }
+
+#define nfsm_dissect(a,c,s) \
+ { t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } }
+
+#define nfsm_fhtom(v) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+ { struct nfsnode *np; nfsv2fh_t *fhp; \
+ nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+ if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(np); \
+ nfsm_loadattr(v, (struct vattr *)0); \
+ }
+
+#define nfsm_loadattr(v,a) \
+ { struct vnode *tvp = (v); \
+ if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = tvp; }
+
+#define nfsm_strsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } }
+
+#define nfsm_srvstrsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } }
+
+#define nfsm_mtouio(p,s) \
+ if ((s) > 0 && \
+ (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_uiotom(p,s) \
+ if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_reqhead(v,a,s) \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone m_freem(mrep); \
+ nfsmout:
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ if (error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) \
+ goto nfsmout
+
+#define nfsm_strtom(a,s,m) \
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_long *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+ } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvdone \
+ nfsmout: \
+ return(error)
+
+#define nfsm_reply(s) \
+ { \
+ nfsd->nd_repstat = error; \
+ if (error) \
+ (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ m_freem(mrep); \
+ mreq = *mrq; \
+ if (error) \
+ return(0); \
+ }
+
+#define nfsm_adv(s) \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvmtofh(f) \
+ nfsm_dissect(tl, u_long *, NFSX_FH); \
+ bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define nfsm_clget \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_long *)bp
+
+#define nfsm_srvfillattr \
+ fp->fa_type = vtonfs_type(vap->va_type); \
+ fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+ fp->fa_uid = txdr_unsigned(vap->va_uid); \
+ fp->fa_gid = txdr_unsigned(vap->va_gid); \
+ if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+ fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nfsrdev = 0xffffffff; \
+ else \
+ fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+ fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+ fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+ txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+ txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+ fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+ fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+ } else { \
+ fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nqrdev = 0xffffffff; \
+ else \
+ fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+ txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+ txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+ txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+ txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+ txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+ fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+ fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+ txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+ }
+
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
new file mode 100644
index 000000000000..032bdef0d5ab
--- /dev/null
+++ b/sys/nfs/nfs_node.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_node.c 8.2 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct nfsnode **nheadhashtbl;
+u_long nheadhash;
+#define NFSNOHASH(fhsum) ((fhsum)&nheadhash)
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Initialize hash links for nfsnodes
+ * and build nfsnode free list.
+ */
+nfs_nhinit()
+{
+
+#ifndef lint
+ if ((sizeof(struct nfsnode) - 1) & sizeof(struct nfsnode))
+ printf("nfs_nhinit: bad size %d\n", sizeof(struct nfsnode));
+#endif /* not lint */
+ nheadhashtbl = hashinit(desiredvnodes, M_NFSNODE, &nheadhash);
+}
+
+/*
+ * Compute an entry in the NFS hash table structure
+ */
+struct nfsnode **
+nfs_hash(fhp)
+ register nfsv2fh_t *fhp;
+{
+ register u_char *fhpp;
+ register u_long fhsum;
+ int i;
+
+ fhpp = &fhp->fh_bytes[0];
+ fhsum = 0;
+ for (i = 0; i < NFSX_FH; i++)
+ fhsum += *fhpp++;
+ return (&nheadhashtbl[NFSNOHASH(fhsum)]);
+}
+
+/*
+ * Look up a vnode/nfsnode by file handle.
+ * Callers must check for mount points!!
+ * In all cases, a pointer to a
+ * nfsnode structure is returned.
+ */
+nfs_nget(mntp, fhp, npp)
+ struct mount *mntp;
+ register nfsv2fh_t *fhp;
+ struct nfsnode **npp;
+{
+ register struct nfsnode *np, *nq, **nhpp;
+ register struct vnode *vp;
+ extern int (**nfsv2_vnodeop_p)();
+ struct vnode *nvp;
+ int error;
+
+ nhpp = nfs_hash(fhp);
+loop:
+ for (np = *nhpp; np; np = np->n_forw) {
+ if (mntp != NFSTOV(np)->v_mount ||
+ bcmp((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH))
+ continue;
+ vp = NFSTOV(np);
+ if (vget(vp, 1))
+ goto loop;
+ *npp = np;
+ return(0);
+ }
+ if (error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp)) {
+ *npp = 0;
+ return (error);
+ }
+ vp = nvp;
+ MALLOC(np, struct nfsnode *, sizeof *np, M_NFSNODE, M_WAITOK);
+ vp->v_data = np;
+ np->n_vnode = vp;
+ /*
+ * Insert the nfsnode in the hash queue for its new file handle
+ */
+ np->n_flag = 0;
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ bcopy((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH);
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ np->n_sillyrename = (struct sillyrename *)0;
+ np->n_size = 0;
+ np->n_mtime = 0;
+ if (VFSTONFS(mntp)->nm_flag & NFSMNT_NQNFS) {
+ np->n_brev = 0;
+ np->n_lrev = 0;
+ np->n_expiry = (time_t)0;
+ np->n_tnext = (struct nfsnode *)0;
+ }
+ *npp = np;
+ return (0);
+}
+
+nfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct nfsnode *np;
+ register struct sillyrename *sp;
+ struct proc *p = curproc; /* XXX */
+ extern int prtactive;
+
+ np = VTONFS(ap->a_vp);
+ if (prtactive && ap->a_vp->v_usecount != 0)
+ vprint("nfs_inactive: pushing active", ap->a_vp);
+ sp = np->n_sillyrename;
+ np->n_sillyrename = (struct sillyrename *)0;
+ if (sp) {
+ /*
+ * Remove the silly file that was rename'd earlier
+ */
+ (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+ nfs_removeit(sp);
+ crfree(sp->s_cred);
+ vrele(sp->s_dvp);
+#ifdef SILLYSEPARATE
+ free((caddr_t)sp, M_NFSREQ);
+#endif
+ }
+ np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED |
+ NQNFSNONCACHE | NQNFSWRITE);
+ return (0);
+}
+
+/*
+ * Reclaim an nfsnode so that it can be used for other purposes.
+ */
+nfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ register struct nfsnode *nq;
+ extern int prtactive;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("nfs_reclaim: pushing active", vp);
+ /*
+ * Remove the nfsnode from its hash chain.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+
+ /*
+ * For nqnfs, take it off the timer queue as required.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_tnext) {
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np->n_tprev;
+ else
+ np->n_tnext->n_tprev = np->n_tprev;
+ if (np->n_tprev == (struct nfsnode *)nmp)
+ nmp->nm_tnext = np->n_tnext;
+ else
+ np->n_tprev->n_tnext = np->n_tnext;
+ }
+ cache_purge(vp);
+ FREE(vp->v_data, M_NFSNODE);
+ vp->v_data = (void *)0;
+ return (0);
+}
+
+/*
+ * Lock an nfsnode
+ */
+nfs_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ /*
+ * Ugh, another place where interruptible mounts will get hung.
+ * If you make this sleep interruptible, then you have to fix all
+ * the VOP_LOCK() calls to expect interruptibility.
+ */
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ }
+ if (vp->v_tag == VT_NON)
+ return (ENOENT);
+ return (0);
+}
+
+/*
+ * Unlock an nfsnode
+ */
+nfs_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Check for a locked nfsnode
+ */
+nfs_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. Currently nothing to do.
+ */
+/* ARGSUSED */
+int
+nfs_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return (0);
+}
diff --git a/sys/nfs/nfs_nqlease.c b/sys/nfs/nfs_nqlease.c
new file mode 100644
index 000000000000..965f46132a6c
--- /dev/null
+++ b/sys/nfs/nfs_nqlease.c
@@ -0,0 +1,1228 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_nqlease.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * References:
+ * Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant
+ * Mechanism for Distributed File Cache Consistency",
+ * In Proc. of the Twelfth ACM Symposium on Operating Systems
+ * Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989.
+ * Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching
+ * in the Sprite Network File System", ACM TOCS 6(1),
+ * pages 134-154, February 1988.
+ * V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and
+ * Performance of Cache-Consistency Protocols", Digital
+ * Equipment Corporation WRL Research Report 89/5, May 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/stat.h>
+#include <sys/protosw.h>
+
+#include <netinet/in.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+
+/*
+ * List head for the lease queue and other global data.
+ * At any time a lease is linked into a list ordered by increasing expiry time.
+ */
+#define NQFHHASH(f) ((*((u_long *)(f)))&nqfheadhash)
+
+union nqsrvthead nqthead;
+struct nqlease **nqfhead;
+u_long nqfheadhash;
+time_t nqnfsstarttime = (time_t)0;
+u_long nqnfs_prog, nqnfs_vers;
+int nqsrv_clockskew = NQ_CLOCKSKEW;
+int nqsrv_writeslack = NQ_WRITESLACK;
+int nqsrv_maxlease = NQ_MAXLEASE;
+int nqsrv_maxnumlease = NQ_MAXNUMLEASE;
+void nqsrv_instimeq(), nqsrv_send_eviction(), nfs_sndunlock();
+void nqsrv_unlocklease(), nqsrv_waitfor_expiry(), nfsrv_slpderef();
+void nqsrv_addhost(), nqsrv_locklease(), nqnfs_serverd();
+void nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+
+/*
+ * Signifies which rpcs can have piggybacked lease requests
+ */
+int nqnfs_piggy[NFS_NPROCS] = {
+ 0,
+ NQL_READ,
+ NQL_WRITE,
+ 0,
+ NQL_READ,
+ NQL_READ,
+ NQL_READ,
+ 0,
+ NQL_WRITE,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ NQL_READ,
+ 0,
+ NQL_READ,
+ 0,
+ 0,
+ 0,
+ 0,
+};
+
+int nnnnnn = sizeof (struct nqlease);
+int oooooo = sizeof (struct nfsnode);
+extern nfstype nfs_type[9];
+extern struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+extern struct nfsd nfsd_head;
+extern int nfsd_waiting;
+extern struct nfsreq nfsreqh;
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Get or check for a lease for "vp", based on NQL_CHECK flag.
+ * The rules are as follows:
+ * - if a current non-caching lease, reply non-caching
+ * - if a current lease for same host only, extend lease
+ * - if a read cachable lease and a read lease request
+ * add host to list any reply cachable
+ * - else { set non-cachable for read-write sharing }
+ * send eviction notice messages to all other hosts that have lease
+ * wait for lease termination { either by receiving vacated messages
+ * from all the other hosts or expiry
+ * via. timeout }
+ * modify lease to non-cachable
+ * - else if no current lease, issue new one
+ * - reply
+ * - return boolean TRUE iff nam should be m_freem()'d
+ * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep()
+ * in here must be framed by nqsrv_locklease() and nqsrv_unlocklease().
+ * nqsrv_locklease() is coded such that at least one of LC_LOCKED and
+ * LC_WANTED is set whenever a process is tsleeping in it. The exception
+ * is when a new lease is being allocated, since it is not in the timer
+ * queue yet. (Ditto for the splsoftclock() and splx(s) calls)
+ */
+nqsrv_getlease(vp, duration, flags, nd, nam, cachablep, frev, cred)
+ struct vnode *vp;
+ u_long *duration;
+ int flags;
+ struct nfsd *nd;
+ struct mbuf *nam;
+ int *cachablep;
+ u_quad_t *frev;
+ struct ucred *cred;
+{
+ register struct nqlease *lp, *lq, **lpp;
+ register struct nqhost *lph;
+ struct nqlease *tlp;
+ struct nqm **lphp;
+ struct vattr vattr;
+ fhandle_t fh;
+ int i, ok, error, s;
+
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+ return (0);
+ if (*duration > nqsrv_maxlease)
+ *duration = nqsrv_maxlease;
+ if (error = VOP_GETATTR(vp, &vattr, cred, nd->nd_procp))
+ return (error);
+ *frev = vattr.va_filerev;
+ s = splsoftclock();
+ tlp = vp->v_lease;
+ if ((flags & NQL_CHECK) == 0)
+ nfsstats.srvnqnfs_getleases++;
+ if (tlp == (struct nqlease *)0) {
+
+ /*
+ * Find the lease by searching the hash list.
+ */
+ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fh.fh_fid)) {
+ splx(s);
+ return (error);
+ }
+ lpp = &nqfhead[NQFHHASH(fh.fh_fid.fid_data)];
+ for (lp = *lpp; lp; lp = lp->lc_fhnext)
+ if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] &&
+ fh.fh_fsid.val[1] == lp->lc_fsid.val[1] &&
+ !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata,
+ fh.fh_fid.fid_len - sizeof (long))) {
+ /* Found it */
+ lp->lc_vp = vp;
+ vp->v_lease = lp;
+ tlp = lp;
+ break;
+ }
+ }
+ lp = tlp;
+ if (lp) {
+ if ((lp->lc_flag & LC_NONCACHABLE) ||
+ (lp->lc_morehosts == (struct nqm *)0 &&
+ nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host)))
+ goto doreply;
+ if ((flags & NQL_READ) && (lp->lc_flag & LC_WRITE)==0) {
+ if (flags & NQL_CHECK)
+ goto doreply;
+ if (nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host))
+ goto doreply;
+ i = 0;
+ if (lp->lc_morehosts) {
+ lph = lp->lc_morehosts->lpm_hosts;
+ lphp = &lp->lc_morehosts->lpm_next;
+ ok = 1;
+ } else {
+ lphp = &lp->lc_morehosts;
+ ok = 0;
+ }
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (nqsrv_cmpnam(nd->nd_slp, nam, lph))
+ goto doreply;
+ if (++i == LC_MOREHOSTSIZ) {
+ i = 0;
+ if (*lphp) {
+ lph = (*lphp)->lpm_hosts;
+ lphp = &((*lphp)->lpm_next);
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+ nqsrv_locklease(lp);
+ if (!ok) {
+ *lphp = (struct nqm *)
+ malloc(sizeof (struct nqm),
+ M_NQMHOST, M_WAITOK);
+ bzero((caddr_t)*lphp, sizeof (struct nqm));
+ lph = (*lphp)->lpm_hosts;
+ }
+ nqsrv_addhost(lph, nd->nd_slp, nam);
+ nqsrv_unlocklease(lp);
+ } else {
+ lp->lc_flag |= LC_NONCACHABLE;
+ nqsrv_locklease(lp);
+ nqsrv_send_eviction(vp, lp, nd->nd_slp, nam, cred);
+ nqsrv_waitfor_expiry(lp);
+ nqsrv_unlocklease(lp);
+ }
+doreply:
+ /*
+ * Update the lease and return
+ */
+ if ((flags & NQL_CHECK) == 0)
+ nqsrv_instimeq(lp, *duration);
+ if (lp->lc_flag & LC_NONCACHABLE)
+ *cachablep = 0;
+ else {
+ *cachablep = 1;
+ if (flags & NQL_WRITE)
+ lp->lc_flag |= LC_WRITTEN;
+ }
+ splx(s);
+ return (0);
+ }
+ splx(s);
+ if (flags & NQL_CHECK)
+ return (0);
+
+ /*
+ * Allocate new lease
+ * The value of nqsrv_maxnumlease should be set generously, so that
+ * the following "printf" happens infrequently.
+ */
+ if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) {
+ printf("Nqnfs server, too many leases\n");
+ do {
+ (void) tsleep((caddr_t)&lbolt, PSOCK,
+ "nqsrvnuml", 0);
+ } while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease);
+ }
+ MALLOC(lp, struct nqlease *, sizeof (struct nqlease), M_NQLEASE, M_WAITOK);
+ bzero((caddr_t)lp, sizeof (struct nqlease));
+ if (flags & NQL_WRITE)
+ lp->lc_flag |= (LC_WRITE | LC_WRITTEN);
+ nqsrv_addhost(&lp->lc_host, nd->nd_slp, nam);
+ lp->lc_vp = vp;
+ lp->lc_fsid = fh.fh_fsid;
+ bcopy(fh.fh_fid.fid_data, lp->lc_fiddata, fh.fh_fid.fid_len - sizeof (long));
+ if (lq = *lpp)
+ lq->lc_fhprev = &lp->lc_fhnext;
+ lp->lc_fhnext = lq;
+ lp->lc_fhprev = lpp;
+ *lpp = lp;
+ vp->v_lease = lp;
+ s = splsoftclock();
+ nqsrv_instimeq(lp, *duration);
+ splx(s);
+ *cachablep = 1;
+ if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases)
+ nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases;
+ return (0);
+}
+
+/*
+ * Local lease check for server syscalls.
+ * Just set up args and let nqsrv_getlease() do the rest.
+ */
+void
+lease_check(vp, p, cred, flag)
+ struct vnode *vp;
+ struct proc *p;
+ struct ucred *cred;
+ int flag;
+{
+ int duration = 0, cache;
+ struct nfsd nfsd;
+ u_quad_t frev;
+
+ nfsd.nd_slp = NQLOCALSLP;
+ nfsd.nd_procp = p;
+ (void) nqsrv_getlease(vp, &duration, NQL_CHECK | flag, &nfsd,
+ (struct mbuf *)0, &cache, &frev, cred);
+}
+
+/*
+ * Add a host to an nqhost structure for a lease.
+ */
+void
+nqsrv_addhost(lph, slp, nam)
+ register struct nqhost *lph;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *saddr;
+
+ if (slp == NQLOCALSLP)
+ lph->lph_flag |= (LC_VALID | LC_LOCAL);
+ else if (slp == nfs_udpsock) {
+ saddr = mtod(nam, struct sockaddr_in *);
+ lph->lph_flag |= (LC_VALID | LC_UDP);
+ lph->lph_inetaddr = saddr->sin_addr.s_addr;
+ lph->lph_port = saddr->sin_port;
+ } else if (slp == nfs_cltpsock) {
+ lph->lph_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+ lph->lph_flag |= (LC_VALID | LC_CLTP);
+ } else {
+ lph->lph_flag |= (LC_VALID | LC_SREF);
+ lph->lph_slp = slp;
+ slp->ns_sref++;
+ }
+}
+
+/*
+ * Update the lease expiry time and position it in the timer queue correctly.
+ */
+void
+nqsrv_instimeq(lp, duration)
+ register struct nqlease *lp;
+ u_long duration;
+{
+ register struct nqlease *tlp;
+ time_t newexpiry;
+
+ newexpiry = time.tv_sec + duration + nqsrv_clockskew;
+ if (lp->lc_expiry == newexpiry)
+ return;
+ if (lp->lc_chain1[0])
+ remque(lp);
+ lp->lc_expiry = newexpiry;
+
+ /*
+ * Find where in the queue it should be.
+ */
+ tlp = nqthead.th_chain[1];
+ while (tlp->lc_expiry > newexpiry && tlp != (struct nqlease *)&nqthead)
+ tlp = tlp->lc_chain1[1];
+ if (tlp == nqthead.th_chain[1])
+ NQSTORENOVRAM(newexpiry);
+ insque(lp, tlp);
+}
+
+/*
+ * Compare the requesting host address with the lph entry in the lease.
+ * Return true iff it is the same.
+ * This is somewhat messy due to the union in the nqhost structure.
+ * The local host is indicated by the special value of NQLOCALSLP for slp.
+ */
+nqsrv_cmpnam(slp, nam, lph)
+ register struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ register struct nqhost *lph;
+{
+ register struct sockaddr_in *saddr;
+ struct mbuf *addr;
+ union nethostaddr lhaddr;
+ int ret;
+
+ if (slp == NQLOCALSLP) {
+ if (lph->lph_flag & LC_LOCAL)
+ return (1);
+ else
+ return (0);
+ }
+ if (slp == nfs_udpsock || slp == nfs_cltpsock)
+ addr = nam;
+ else
+ addr = slp->ns_nam;
+ if (lph->lph_flag & LC_UDP)
+ ret = netaddr_match(AF_INET, &lph->lph_haddr, addr);
+ else if (lph->lph_flag & LC_CLTP)
+ ret = netaddr_match(AF_ISO, &lph->lph_claddr, addr);
+ else {
+ if ((lph->lph_slp->ns_flag & SLP_VALID) == 0)
+ return (0);
+ saddr = mtod(lph->lph_slp->ns_nam, struct sockaddr_in *);
+ if (saddr->sin_family == AF_INET)
+ lhaddr.had_inetaddr = saddr->sin_addr.s_addr;
+ else
+ lhaddr.had_nam = lph->lph_slp->ns_nam;
+ ret = netaddr_match(saddr->sin_family, &lhaddr, addr);
+ }
+ return (ret);
+}
+
+/*
+ * Send out eviction notice messages to all other hosts for the lease.
+ */
+void
+nqsrv_send_eviction(vp, lp, slp, nam, cred)
+ struct vnode *vp;
+ register struct nqlease *lp;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct ucred *cred;
+{
+ register struct nqhost *lph = &lp->lc_host;
+ register struct mbuf *m;
+ register int siz;
+ struct nqm *lphnext = lp->lc_morehosts;
+ struct mbuf *mreq, *mb, *mb2, *nam2, *mheadend;
+ struct socket *so;
+ struct sockaddr_in *saddr;
+ fhandle_t *fhp;
+ caddr_t bpos, cp;
+ u_long xid;
+ int len = 1, ok = 1, i = 0;
+ int sotype, *solockp;
+
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (nqsrv_cmpnam(slp, nam, lph))
+ lph->lph_flag |= LC_VACATED;
+ else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
+ if (lph->lph_flag & LC_UDP) {
+ MGET(nam2, M_WAIT, MT_SONAME);
+ saddr = mtod(nam2, struct sockaddr_in *);
+ nam2->m_len = saddr->sin_len =
+ sizeof (struct sockaddr_in);
+ saddr->sin_family = AF_INET;
+ saddr->sin_addr.s_addr = lph->lph_inetaddr;
+ saddr->sin_port = lph->lph_port;
+ so = nfs_udpsock->ns_so;
+ } else if (lph->lph_flag & LC_CLTP) {
+ nam2 = lph->lph_nam;
+ so = nfs_cltpsock->ns_so;
+ } else if (lph->lph_slp->ns_flag & SLP_VALID) {
+ nam2 = (struct mbuf *)0;
+ so = lph->lph_slp->ns_so;
+ } else
+ goto nextone;
+ sotype = so->so_type;
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+ solockp = &lph->lph_slp->ns_solock;
+ else
+ solockp = (int *)0;
+ nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED,
+ NFSX_FH);
+ nfsm_build(cp, caddr_t, NFSX_FH);
+ bzero(cp, NFSX_FH);
+ fhp = (fhandle_t *)cp;
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ VFS_VPTOFH(vp, &fhp->fh_fid);
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = nfsm_rpchead(cred, TRUE, NQNFSPROC_EVICTED,
+ RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0,
+ mreq, siz, &mheadend, &xid);
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 &&
+ (lph->lph_slp->ns_flag & SLP_VALID) == 0) ||
+ (solockp && (*solockp & NFSMNT_SNDLOCK)))
+ m_freem(m);
+ else {
+ if (solockp)
+ *solockp |= NFSMNT_SNDLOCK;
+ (void) nfs_send(so, nam2, m,
+ (struct nfsreq *)0);
+ if (solockp)
+ nfs_sndunlock(solockp);
+ }
+ if (lph->lph_flag & LC_UDP)
+ MFREE(nam2, m);
+ }
+nextone:
+ if (++i == len) {
+ if (lphnext) {
+ i = 0;
+ len = LC_MOREHOSTSIZ;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+}
+
+/*
+ * Wait for the lease to expire.
+ * This will occur when all clients have sent "vacated" messages to
+ * this server OR when it expires do to timeout.
+ */
+void
+nqsrv_waitfor_expiry(lp)
+ register struct nqlease *lp;
+{
+ register struct nqhost *lph;
+ register int i;
+ struct nqm *lphnext;
+ int len, ok;
+
+tryagain:
+ if (time.tv_sec > lp->lc_expiry)
+ return;
+ lph = &lp->lc_host;
+ lphnext = lp->lc_morehosts;
+ len = 1;
+ i = 0;
+ ok = 1;
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
+ lp->lc_flag |= LC_EXPIREDWANTED;
+ (void) tsleep((caddr_t)&lp->lc_flag, PSOCK,
+ "nqexp", 0);
+ goto tryagain;
+ }
+ if (++i == len) {
+ if (lphnext) {
+ i = 0;
+ len = LC_MOREHOSTSIZ;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+}
+
+/*
+ * Nqnfs server timer that maintains the server lease queue.
+ * Scan the lease queue for expired entries:
+ * - when one is found, wakeup anyone waiting for it
+ * else dequeue and free
+ */
+void
+nqnfs_serverd()
+{
+ register struct nqlease *lp, *lq;
+ register struct nqhost *lph;
+ struct nqlease *nextlp;
+ struct nqm *lphnext, *olphnext;
+ struct mbuf *n;
+ int i, len, ok;
+
+ lp = nqthead.th_chain[0];
+ while (lp != (struct nqlease *)&nqthead) {
+ if (lp->lc_expiry >= time.tv_sec)
+ break;
+ nextlp = lp->lc_chain1[0];
+ if (lp->lc_flag & LC_EXPIREDWANTED) {
+ lp->lc_flag &= ~LC_EXPIREDWANTED;
+ wakeup((caddr_t)&lp->lc_flag);
+ } else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) {
+ /*
+ * Make a best effort at keeping a write caching lease long
+ * enough by not deleting it until it has been explicitly
+ * vacated or there have been no writes in the previous
+ * write_slack seconds since expiry and the nfsds are not
+ * all busy. The assumption is that if the nfsds are not
+ * all busy now (no queue of nfs requests), then the client
+ * would have been able to do at least one write to the
+ * file during the last write_slack seconds if it was still
+ * trying to push writes to the server.
+ */
+ if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE &&
+ ((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) {
+ lp->lc_flag &= ~LC_WRITTEN;
+ nqsrv_instimeq(lp, nqsrv_writeslack);
+ } else {
+ remque(lp);
+ if (lq = lp->lc_fhnext)
+ lq->lc_fhprev = lp->lc_fhprev;
+ *lp->lc_fhprev = lq;
+ /*
+ * This soft reference may no longer be valid, but
+ * no harm done. The worst case is if the vnode was
+ * recycled and has another valid lease reference,
+ * which is dereferenced prematurely.
+ */
+ lp->lc_vp->v_lease = (struct nqlease *)0;
+ lph = &lp->lc_host;
+ lphnext = lp->lc_morehosts;
+ olphnext = (struct nqm *)0;
+ len = 1;
+ i = 0;
+ ok = 1;
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (lph->lph_flag & LC_CLTP)
+ MFREE(lph->lph_nam, n);
+ if (lph->lph_flag & LC_SREF)
+ nfsrv_slpderef(lph->lph_slp);
+ if (++i == len) {
+ if (olphnext) {
+ free((caddr_t)olphnext, M_NQMHOST);
+ olphnext = (struct nqm *)0;
+ }
+ if (lphnext) {
+ olphnext = lphnext;
+ i = 0;
+ len = LC_MOREHOSTSIZ;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+ FREE((caddr_t)lp, M_NQLEASE);
+ if (olphnext)
+ free((caddr_t)olphnext, M_NQMHOST);
+ nfsstats.srvnqnfs_leases--;
+ }
+ }
+ lp = nextlp;
+ }
+}
+
+/*
+ * Called from nfssvc_nfsd() for a getlease rpc request.
+ * Do the from/to xdr translation and call nqsrv_getlease() to
+ * do the real work.
+ */
+nqnfsrv_getlease(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ u_quad_t frev;
+ caddr_t bpos;
+ int error = 0;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ int flags, rdonly, cache;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ flags = fxdr_unsigned(int, *tl++);
+ nfsd->nd_duration = fxdr_unsigned(int, *tl);
+ if (error = nfsrv_fhtovp(fhp,
+ TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (rdonly && flags == NQL_WRITE) {
+ error = EROFS;
+ nfsm_reply(0);
+ }
+ (void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, nfsd,
+ nam, &cache, &frev, cred);
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_NQFATTR + 4*NFSX_UNSIGNED);
+ nfsm_build(tl, u_long *, 4*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nfsd->nd_duration);
+ txdr_hyper(&frev, tl);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_NQFATTR);
+ nfsm_srvfillattr;
+ nfsm_srvdone;
+}
+
+/*
+ * Called from nfssvc_nfsd() when a "vacated" message is received from a
+ * client. Find the entry and expire it.
+ */
+nqnfsrv_vacated(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nqlease *lp;
+ register struct nqhost *lph;
+ struct nqlease *tlp = (struct nqlease *)0;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ struct nqm *lphnext;
+ int error = 0, i, len, ok, gotit = 0;
+ char *cp2;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ m_freem(mrep);
+ /*
+ * Find the lease by searching the hash list.
+ */
+ for (lp = nqfhead[NQFHHASH(fhp->fh_fid.fid_data)]; lp;
+ lp = lp->lc_fhnext)
+ if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] &&
+ fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] &&
+ !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata,
+ MAXFIDSZ)) {
+ /* Found it */
+ tlp = lp;
+ break;
+ }
+ if (tlp) {
+ lp = tlp;
+ len = 1;
+ i = 0;
+ lph = &lp->lc_host;
+ lphnext = lp->lc_morehosts;
+ ok = 1;
+ while (ok && (lph->lph_flag & LC_VALID)) {
+ if (nqsrv_cmpnam(nfsd->nd_slp, nam, lph)) {
+ lph->lph_flag |= LC_VACATED;
+ gotit++;
+ break;
+ }
+ if (++i == len) {
+ if (lphnext) {
+ len = LC_MOREHOSTSIZ;
+ i = 0;
+ lph = lphnext->lpm_hosts;
+ lphnext = lphnext->lpm_next;
+ } else
+ ok = 0;
+ } else
+ lph++;
+ }
+ if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) {
+ lp->lc_flag &= ~LC_EXPIREDWANTED;
+ wakeup((caddr_t)&lp->lc_flag);
+ }
+nfsmout:
+ return (EPERM);
+ }
+ return (EPERM);
+}
+
+/*
+ * Client get lease rpc function.
+ */
+nqnfs_getlease(vp, rwflag, cred, p)
+ register struct vnode *vp;
+ int rwflag;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ register struct nfsnode *np;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ caddr_t bpos, dpos, cp2;
+ time_t reqtime;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ int cachable;
+ u_quad_t frev;
+
+ nfsstats.rpccnt[NQNFSPROC_GETLEASE]++;
+ mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_FH+2*NFSX_UNSIGNED,
+ &bpos);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(rwflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ reqtime = time.tv_sec;
+ nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred);
+ np = VTONFS(vp);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time.tv_sec) {
+ fxdr_hyper(tl, &frev);
+ nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ } else
+ error = NQNFS_EXPIRED;
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * Client vacated message function.
+ */
+nqnfs_vacated(vp, cred)
+ register struct vnode *vp;
+ struct ucred *cred;
+{
+ register caddr_t cp;
+ register struct mbuf *m;
+ register int i;
+ caddr_t bpos;
+ u_long xid;
+ int error = 0;
+ struct mbuf *mreq, *mb, *mb2, *mheadend;
+ struct nfsmount *nmp;
+ struct nfsreq myrep;
+
+ nmp = VFSTONFS(vp->v_mount);
+ nfsstats.rpccnt[NQNFSPROC_VACATED]++;
+ nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_FH);
+ nfsm_fhtom(vp);
+ m = mreq;
+ i = 0;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ m = nfsm_rpchead(cred, TRUE, NQNFSPROC_VACATED,
+ RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0,
+ mreq, i, &mheadend, &xid);
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len -
+ NFSX_UNSIGNED));
+ }
+ myrep.r_flags = 0;
+ myrep.r_nmp = nmp;
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ (void) nfs_sndlock(&nmp->nm_flag, (struct nfsreq *)0);
+ (void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(&nmp->nm_flag);
+ return (error);
+}
+
+/*
+ * Called for client side callbacks
+ */
+nqnfs_callback(nmp, mrep, md, dpos)
+ struct nfsmount *nmp;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+{
+ register struct vnode *vp;
+ register u_long *tl;
+ register long t1;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct nfsnode *np;
+ struct nfsd nd;
+ int error;
+ char *cp2;
+
+ nd.nd_mrep = mrep;
+ nd.nd_md = md;
+ nd.nd_dpos = dpos;
+ if (error = nfs_getreq(&nd, FALSE))
+ return (error);
+ md = nd.nd_md;
+ dpos = nd.nd_dpos;
+ if (nd.nd_procnum != NQNFSPROC_EVICTED) {
+ m_freem(mrep);
+ return (EPERM);
+ }
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ m_freem(mrep);
+ if (error = nfs_nget(nmp->nm_mountp, fhp, &np))
+ return (error);
+ vp = NFSTOV(np);
+ if (np->n_tnext) {
+ np->n_expiry = 0;
+ np->n_flag |= NQNFSEVICTED;
+ if (np->n_tprev != (struct nfsnode *)nmp) {
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np->n_tprev;
+ else
+ np->n_tnext->n_tprev = np->n_tprev;
+ np->n_tprev->n_tnext = np->n_tnext;
+ np->n_tnext = nmp->nm_tnext;
+ nmp->nm_tnext = np;
+ np->n_tprev = (struct nfsnode *)nmp;
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np;
+ else
+ np->n_tnext->n_tprev = np;
+ }
+ }
+ vrele(vp);
+ nfsm_srvdone;
+}
+
+/*
+ * Nqnfs client helper daemon. Runs once a second to expire leases.
+ * It also get authorization strings for "kerb" mounts.
+ * It must start at the beginning of the list again after any potential
+ * "sleep" since nfs_reclaim() called from vclean() can pull a node off
+ * the list asynchronously.
+ */
+nqnfs_clientd(nmp, cred, ncd, flag, argp, p)
+ register struct nfsmount *nmp;
+ struct ucred *cred;
+ struct nfsd_cargs *ncd;
+ int flag;
+ caddr_t argp;
+ struct proc *p;
+{
+ register struct nfsnode *np;
+ struct vnode *vp;
+ struct nfsreq myrep;
+ int error, vpid;
+
+ /*
+ * First initialize some variables
+ */
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+
+ /*
+ * If an authorization string is being passed in, get it.
+ */
+ if ((flag & NFSSVC_GOTAUTH) &&
+ (nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT)) == 0) {
+ if (nmp->nm_flag & NFSMNT_HASAUTH)
+ panic("cld kerb");
+ if ((flag & NFSSVC_AUTHINFAIL) == 0) {
+ if (ncd->ncd_authlen <= RPCAUTH_MAXSIZ &&
+ copyin(ncd->ncd_authstr, nmp->nm_authstr,
+ ncd->ncd_authlen) == 0) {
+ nmp->nm_authtype = ncd->ncd_authtype;
+ nmp->nm_authlen = ncd->ncd_authlen;
+ } else
+ nmp->nm_flag |= NFSMNT_AUTHERR;
+ } else
+ nmp->nm_flag |= NFSMNT_AUTHERR;
+ nmp->nm_flag |= NFSMNT_HASAUTH;
+ wakeup((caddr_t)&nmp->nm_authlen);
+ } else
+ nmp->nm_flag |= NFSMNT_WAITAUTH;
+
+ /*
+ * Loop every second updating queue until there is a termination sig.
+ */
+ while ((nmp->nm_flag & NFSMNT_DISMNT) == 0) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ /*
+ * If there are no outstanding requests (and therefore no
+ * processes in nfs_reply) and there is data in the receive
+ * queue, poke for callbacks.
+ */
+ if (nfsreqh.r_next == &nfsreqh && nmp->nm_so &&
+ nmp->nm_so->so_rcv.sb_cc > 0) {
+ myrep.r_flags = R_GETONEREP;
+ myrep.r_nmp = nmp;
+ myrep.r_mrep = (struct mbuf *)0;
+ myrep.r_procp = (struct proc *)0;
+ (void) nfs_reply(&myrep);
+ }
+
+ /*
+ * Loop through the leases, updating as required.
+ */
+ np = nmp->nm_tnext;
+ while (np != (struct nfsnode *)nmp &&
+ (nmp->nm_flag & NFSMNT_DISMINPROG) == 0) {
+ vp = NFSTOV(np);
+if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash2");
+ vpid = vp->v_id;
+ if (np->n_expiry < time.tv_sec) {
+ if (vget(vp, 1) == 0) {
+ nmp->nm_inprog = vp;
+ if (vpid == vp->v_id) {
+if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash3");
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np->n_tprev;
+ else
+ np->n_tnext->n_tprev = np->n_tprev;
+ if (np->n_tprev == (struct nfsnode *)nmp)
+ nmp->nm_tnext = np->n_tnext;
+ else
+ np->n_tprev->n_tnext = np->n_tnext;
+ np->n_tnext = (struct nfsnode *)0;
+ if ((np->n_flag & (NMODIFIED | NQNFSEVICTED))
+ && vp->v_type == VREG) {
+ if (np->n_flag & NQNFSEVICTED) {
+ (void) nfs_vinvalbuf(vp,
+ V_SAVE, cred, p, 0);
+ np->n_flag &= ~NQNFSEVICTED;
+ (void) nqnfs_vacated(vp, cred);
+ } else {
+ (void) VOP_FSYNC(vp, cred,
+ MNT_WAIT, p);
+ np->n_flag &= ~NMODIFIED;
+ }
+ }
+ }
+ vrele(vp);
+ nmp->nm_inprog = NULLVP;
+ }
+ if (np != nmp->nm_tnext)
+ np = nmp->nm_tnext;
+ else
+ break;
+ } else if ((np->n_expiry - NQ_RENEWAL) < time.tv_sec) {
+ if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE))
+ == NQNFSWRITE && vp->v_dirtyblkhd.lh_first &&
+ vget(vp, 1) == 0) {
+ nmp->nm_inprog = vp;
+if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash4");
+ if (vpid == vp->v_id &&
+ nqnfs_getlease(vp, NQL_WRITE, cred, p)==0)
+ np->n_brev = np->n_lrev;
+ vrele(vp);
+ nmp->nm_inprog = NULLVP;
+ }
+ if (np != nmp->nm_tnext)
+ np = nmp->nm_tnext;
+ else
+ break;
+ } else
+ break;
+ }
+ }
+
+ /*
+ * Get an authorization string, if required.
+ */
+ if ((nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT | NFSMNT_HASAUTH)) == 0) {
+ ncd->ncd_authuid = nmp->nm_authuid;
+ if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs)))
+ nmp->nm_flag |= NFSMNT_WAITAUTH;
+ else
+ return (ENEEDAUTH);
+ }
+
+ /*
+ * Wait a bit (no pun) and do it again.
+ */
+ if ((nmp->nm_flag & NFSMNT_DISMNT) == 0 &&
+ (nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_HASAUTH))) {
+ error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH,
+ "nqnfstimr", hz / 3);
+ if (error == EINTR || error == ERESTART)
+ (void) dounmount(nmp->nm_mountp, 0, p);
+ }
+ }
+ free((caddr_t)nmp, M_NFSMNT);
+ if (error == EWOULDBLOCK)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Adjust all timer queue expiry times when the time of day clock is changed.
+ * Called from the settimeofday() syscall.
+ */
+void
+lease_updatetime(deltat)
+ register int deltat;
+{
+ register struct nqlease *lp;
+ register struct nfsnode *np;
+ struct mount *mp;
+ struct nfsmount *nmp;
+ int s;
+
+ if (nqnfsstarttime != 0)
+ nqnfsstarttime += deltat;
+ s = splsoftclock();
+ lp = nqthead.th_chain[0];
+ while (lp != (struct nqlease *)&nqthead) {
+ lp->lc_expiry += deltat;
+ lp = lp->lc_chain1[0];
+ }
+ splx(s);
+
+ /*
+ * Search the mount list for all nqnfs mounts and do their timer
+ * queues.
+ */
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ if (mp->mnt_stat.f_fsid.val[1] == MOUNT_NFS) {
+ nmp = VFSTONFS(mp);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ np = nmp->nm_tnext;
+ while (np != (struct nfsnode *)nmp) {
+ np->n_expiry += deltat;
+ np = np->n_tnext;
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Lock a server lease.
+ */
+void
+nqsrv_locklease(lp)
+ struct nqlease *lp;
+{
+
+ while (lp->lc_flag & LC_LOCKED) {
+ lp->lc_flag |= LC_WANTED;
+ (void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0);
+ }
+ lp->lc_flag |= LC_LOCKED;
+ lp->lc_flag &= ~LC_WANTED;
+}
+
+/*
+ * Unlock a server lease.
+ */
+void
+nqsrv_unlocklease(lp)
+ struct nqlease *lp;
+{
+
+ lp->lc_flag &= ~LC_LOCKED;
+ if (lp->lc_flag & LC_WANTED)
+ wakeup((caddr_t)lp);
+}
+
+/*
+ * Update a client lease.
+ */
+void
+nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev)
+ register struct nfsmount *nmp;
+ register struct nfsnode *np;
+ int rwflag, cachable;
+ time_t expiry;
+ u_quad_t frev;
+{
+ register struct nfsnode *tp;
+
+ if (np->n_tnext) {
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np->n_tprev;
+ else
+ np->n_tnext->n_tprev = np->n_tprev;
+ if (np->n_tprev == (struct nfsnode *)nmp)
+ nmp->nm_tnext = np->n_tnext;
+ else
+ np->n_tprev->n_tnext = np->n_tnext;
+ if (rwflag == NQL_WRITE)
+ np->n_flag |= NQNFSWRITE;
+ } else if (rwflag == NQL_READ)
+ np->n_flag &= ~NQNFSWRITE;
+ else
+ np->n_flag |= NQNFSWRITE;
+ if (cachable)
+ np->n_flag &= ~NQNFSNONCACHE;
+ else
+ np->n_flag |= NQNFSNONCACHE;
+ np->n_expiry = expiry;
+ np->n_lrev = frev;
+ tp = nmp->nm_tprev;
+ while (tp != (struct nfsnode *)nmp && tp->n_expiry > np->n_expiry)
+ tp = tp->n_tprev;
+ if (tp == (struct nfsnode *)nmp) {
+ np->n_tnext = nmp->nm_tnext;
+ nmp->nm_tnext = np;
+ } else {
+ np->n_tnext = tp->n_tnext;
+ tp->n_tnext = np;
+ }
+ np->n_tprev = tp;
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np;
+ else
+ np->n_tnext->n_tprev = np;
+}
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
new file mode 100644
index 000000000000..f31b96e02edc
--- /dev/null
+++ b/sys/nfs/nfs_serv.c
@@ -0,0 +1,1908 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * nfs version 2 server calls to vnode ops
+ * - these routines generally have 3 phases
+ * 1 - break down and validate rpc request in mbuf list
+ * 2 - do the vnode ops for the request
+ * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
+ * 3 - build the rpc reply in an mbuf list
+ * nb:
+ * - do not mix the phases, since the nfsm_?? macros can return failures
+ * on a bad rpc or similar and do not do any vrele() or vput()'s
+ *
+ * - the nfsm_reply() macro generates an nfs rpc reply with the nfs
+ * error number iff error != 0 whereas
+ * returning an error from the server function implies a fatal error
+ * such as a badly constructed rpc request that should be dropped without
+ * a reply.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsv2.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define TRUE 1
+#define FALSE 0
+
+/* Global vars */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_xdrneg1;
+extern u_long nfs_false, nfs_true;
+nfstype nfs_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
+ NFCHR, NFNON };
+
+/*
+ * nqnfs access service
+ */
+nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, mode = 0;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (*tl++ == nfs_true)
+ mode |= VREAD;
+ if (*tl++ == nfs_true)
+ mode |= VWRITE;
+ if (*tl == nfs_true)
+ mode |= VEXEC;
+ error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs getattr service
+ */
+nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs setattr service
+ */
+nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register struct nfsv2_fattr *fp;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev, frev2;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_WRITE);
+ VATTR_NULL(vap);
+ /*
+ * Nah nah nah nah na nah
+ * There is a bug in the Sun client that puts 0xffff in the mode
+ * field of sattr when it should put in 0xffffffff. The u_short
+ * doesn't sign extend.
+ * --> check the low order 2 bytes for 0xffff
+ */
+ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ if (sp->sa_uid != nfs_xdrneg1)
+ vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
+ if (sp->sa_gid != nfs_xdrneg1)
+ vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ if (sp->sa_nfssize != nfs_xdrneg1)
+ vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_nfssize);
+ if (sp->sa_nfsatime.nfs_sec != nfs_xdrneg1) {
+#ifdef notyet
+ fxdr_nfstime(&sp->sa_nfsatime, &vap->va_atime);
+#else
+ vap->va_atime.ts_sec =
+ fxdr_unsigned(long, sp->sa_nfsatime.nfs_sec);
+ vap->va_atime.ts_nsec = 0;
+#endif
+ }
+ if (sp->sa_nfsmtime.nfs_sec != nfs_xdrneg1)
+ fxdr_nfstime(&sp->sa_nfsmtime, &vap->va_mtime);
+ } else {
+ fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+ fxdr_nqtime(&sp->sa_nqatime, &vap->va_atime);
+ fxdr_nqtime(&sp->sa_nqmtime, &vap->va_mtime);
+ vap->va_flags = fxdr_unsigned(u_long, sp->sa_nqflags);
+ }
+
+ /*
+ * If the size is being changed write acces is required, otherwise
+ * just check for a read only file system.
+ */
+ if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto out;
+ }
+ } else {
+ if (vp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly,
+ nfsd->nd_procp))
+ goto out;
+ }
+ if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+out:
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ txdr_hyper(&frev2, tl);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * nfs lookup rpc
+ */
+nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct nameidata nd;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, duration2, cache2, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vattr va, *vap = &va;
+ u_quad_t frev, frev2;
+
+ fhp = &nfh.fh_generic;
+ duration2 = 0;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ duration2 = fxdr_unsigned(int, *tl);
+ }
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ nqsrv_getl(nd.ni_startdir, NQL_READ);
+ vrele(nd.ni_startdir);
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ vp = nd.ni_vp;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (duration2)
+ (void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd,
+ nam, &cache2, &frev2, cred);
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED);
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ if (duration2) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(NQL_READ);
+ *tl++ = txdr_unsigned(cache2);
+ *tl++ = txdr_unsigned(duration2);
+ txdr_hyper(&frev2, tl);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs readlink service
+ */
+nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
+ register struct iovec *ivp = iv;
+ register struct mbuf *mp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, i, tlen, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ len = 0;
+ i = 0;
+ while (len < NFS_MAXPATHLEN) {
+ MGET(mp, M_WAIT, MT_DATA);
+ MCLGET(mp, M_WAIT);
+ mp->m_len = NFSMSIZ(mp);
+ if (len == 0)
+ mp3 = mp2 = mp;
+ else {
+ mp2->m_next = mp;
+ mp2 = mp;
+ }
+ if ((len+mp->m_len) > NFS_MAXPATHLEN) {
+ mp->m_len = NFS_MAXPATHLEN-len;
+ len = NFS_MAXPATHLEN;
+ } else
+ len += mp->m_len;
+ ivp->iov_base = mtod(mp, caddr_t);
+ ivp->iov_len = mp->m_len;
+ i++;
+ ivp++;
+ }
+ uiop->uio_iov = iv;
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = 0;
+ uiop->uio_resid = len;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) {
+ m_freem(mp3);
+ nfsm_reply(0);
+ }
+ if (vp->v_type != VLNK) {
+ error = EINVAL;
+ goto out;
+ }
+ nqsrv_getl(vp, NQL_READ);
+ error = VOP_READLINK(vp, uiop, cred);
+out:
+ vput(vp);
+ if (error)
+ m_freem(mp3);
+ nfsm_reply(NFSX_UNSIGNED);
+ if (uiop->uio_resid > 0) {
+ len -= uiop->uio_resid;
+ tlen = nfsm_rndup(len);
+ nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
+ }
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(len);
+ mb->m_next = mp3;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs read service
+ */
+nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct iovec *iv;
+ struct iovec *iv2;
+ register struct mbuf *m;
+ register struct nfsv2_fattr *fp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, i, cnt, len, left, siz, tlen;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct mbuf *m2;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ struct vattr va, *vap = &va;
+ off_t off;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ off = (off_t)fxdr_unsigned(u_long, *tl);
+ } else {
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ fxdr_hyper(tl, &off);
+ }
+ nfsm_srvstrsiz(cnt, NFS_MAXDATA);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type != VREG) {
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_READ);
+ if ((error = nfsrv_access(vp, VREAD, cred, rdonly, nfsd->nd_procp)) &&
+ (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp))) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (off >= vap->va_size)
+ cnt = 0;
+ else if ((off + cnt) > vap->va_size)
+ cnt = nfsm_rndup(vap->va_size - off);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)+NFSX_UNSIGNED+nfsm_rndup(cnt));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ len = left = cnt;
+ if (cnt > 0) {
+ /*
+ * Generate the mbuf list with the uio_iov ref. to it.
+ */
+ i = 0;
+ m = m2 = mb;
+ MALLOC(iv, struct iovec *,
+ ((NFS_MAXDATA+MLEN-1)/MLEN) * sizeof (struct iovec),
+ M_TEMP, M_WAITOK);
+ iv2 = iv;
+ while (left > 0) {
+ siz = min(M_TRAILINGSPACE(m), left);
+ if (siz > 0) {
+ m->m_len += siz;
+ iv->iov_base = bpos;
+ iv->iov_len = siz;
+ iv++;
+ i++;
+ left -= siz;
+ }
+ if (left > 0) {
+ MGET(m, M_WAIT, MT_DATA);
+ MCLGET(m, M_WAIT);
+ m->m_len = 0;
+ m2->m_next = m;
+ m2 = m;
+ bpos = mtod(m, caddr_t);
+ }
+ }
+ uiop->uio_iov = iv2;
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = off;
+ uiop->uio_resid = cnt;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ error = VOP_READ(vp, uiop, IO_NODELOCKED, cred);
+ off = uiop->uio_offset;
+ FREE((caddr_t)iv2, M_TEMP);
+ if (error || (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp))) {
+ m_freem(mreq);
+ vput(vp);
+ nfsm_reply(0);
+ }
+ } else
+ uiop->uio_resid = 0;
+ vput(vp);
+ nfsm_srvfillattr;
+ len -= uiop->uio_resid;
+ tlen = nfsm_rndup(len);
+ if (cnt != tlen || tlen != len)
+ nfsm_adj(mb, cnt-tlen, tlen-len);
+ *tl = txdr_unsigned(len);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs write service
+ */
+nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct iovec *ivp;
+ register struct mbuf *mp;
+ register struct nfsv2_fattr *fp;
+ struct iovec iv[NFS_MAXIOVEC];
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, siz, len, xfer;
+ int ioflags = IO_SYNC | IO_NODELOCKED;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ off_t off;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ off = (off_t)fxdr_unsigned(u_long, *++tl);
+ tl += 2;
+ } else {
+ fxdr_hyper(tl, &off);
+ tl += 2;
+ if (fxdr_unsigned(u_long, *tl++))
+ ioflags |= IO_APPEND;
+ }
+ len = fxdr_unsigned(long, *tl);
+ if (len > NFS_MAXDATA || len <= 0) {
+ error = EBADRPC;
+ nfsm_reply(0);
+ }
+ if (dpos == (mtod(md, caddr_t)+md->m_len)) {
+ mp = md->m_next;
+ if (mp == NULL) {
+ error = EBADRPC;
+ nfsm_reply(0);
+ }
+ } else {
+ mp = md;
+ siz = dpos-mtod(mp, caddr_t);
+ mp->m_len -= siz;
+ NFSMADV(mp, siz);
+ }
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type != VREG) {
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_WRITE);
+ if (error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ uiop->uio_resid = 0;
+ uiop->uio_rw = UIO_WRITE;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ /*
+ * Do up to NFS_MAXIOVEC mbufs of write each iteration of the
+ * loop until done.
+ */
+ while (len > 0 && uiop->uio_resid == 0) {
+ ivp = iv;
+ siz = 0;
+ uiop->uio_iov = ivp;
+ uiop->uio_iovcnt = 0;
+ uiop->uio_offset = off;
+ while (len > 0 && uiop->uio_iovcnt < NFS_MAXIOVEC && mp != NULL) {
+ ivp->iov_base = mtod(mp, caddr_t);
+ if (len < mp->m_len)
+ ivp->iov_len = xfer = len;
+ else
+ ivp->iov_len = xfer = mp->m_len;
+#ifdef notdef
+ /* Not Yet .. */
+ if (M_HASCL(mp) && (((u_long)ivp->iov_base) & CLOFSET) == 0)
+ ivp->iov_op = NULL; /* what should it be ?? */
+ else
+ ivp->iov_op = NULL;
+#endif
+ uiop->uio_iovcnt++;
+ ivp++;
+ len -= xfer;
+ siz += xfer;
+ mp = mp->m_next;
+ }
+ if (len > 0 && mp == NULL) {
+ error = EBADRPC;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ uiop->uio_resid = siz;
+ if (error = VOP_WRITE(vp, uiop, ioflags, cred)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ off = uiop->uio_offset;
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ txdr_hyper(&vap->va_filerev, tl);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * nfs create service
+ * now does a truncate to 0 length via. setattr if it already exists
+ */
+nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ struct nameidata nd;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdev, cache, len, tsize;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nd.ni_cnd.cn_nameiop = 0;
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ VATTR_NULL(vap);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ /*
+ * Iff doesn't exist, create it
+ * otherwise just truncate to 0 length
+ * should I set the mode too ??
+ */
+ if (nd.ni_vp == NULL) {
+ vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode));
+ if (vap->va_type == VNON)
+ vap->va_type = VREG;
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ if (nfsd->nd_nqlflag == NQL_NOVAL)
+ rdev = fxdr_unsigned(long, sp->sa_nfssize);
+ else
+ rdev = fxdr_unsigned(long, sp->sa_nqrdev);
+ if (vap->va_type == VREG || vap->va_type == VSOCK) {
+ vrele(nd.ni_startdir);
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+ nfsm_reply(0);
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
+ vap->va_type == VFIFO) {
+ if (vap->va_type == VCHR && rdev == 0xffffffff)
+ vap->va_type = VFIFO;
+ if (vap->va_type == VFIFO) {
+#ifndef FIFO
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ error = ENXIO;
+ goto out;
+#endif /* FIFO */
+ } else if (error = suser(cred, (u_short *)0)) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ goto out;
+ } else
+ vap->va_rdev = (dev_t)rdev;
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) {
+ vrele(nd.ni_startdir);
+ nfsm_reply(0);
+ }
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART);
+ nd.ni_cnd.cn_proc = nfsd->nd_procp;
+ nd.ni_cnd.cn_cred = nfsd->nd_procp->p_ucred;
+ if (error = lookup(&nd)) {
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+ }
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (nd.ni_cnd.cn_flags & ISSYMLINK) {
+ vrele(nd.ni_dvp);
+ vput(nd.ni_vp);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ error = EINVAL;
+ nfsm_reply(0);
+ }
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ error = ENXIO;
+ goto out;
+ }
+ vp = nd.ni_vp;
+ } else {
+ vrele(nd.ni_startdir);
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ vp = nd.ni_vp;
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ tsize = fxdr_unsigned(long, sp->sa_nfssize);
+ if (tsize != -1)
+ vap->va_size = (u_quad_t)tsize;
+ else
+ vap->va_size = -1;
+ } else
+ fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+ if (vap->va_size != -1) {
+ if (error = nfsrv_access(vp, VWRITE, cred,
+ (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_WRITE);
+ if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ }
+ }
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ return (error);
+nfsmout:
+ if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags)
+ vrele(nd.ni_startdir);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vput(nd.ni_vp);
+ return (error);
+
+out:
+ vrele(nd.ni_startdir);
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+}
+
+/*
+ * nfs remove service
+ */
+nfsrv_remove(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct nameidata nd;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ if (vp->v_type == VDIR &&
+ (error = suser(cred, (u_short *)0)))
+ goto out;
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT) {
+ error = EBUSY;
+ goto out;
+ }
+ if (vp->v_flag & VTEXT)
+ (void) vnode_pager_uncache(vp);
+out:
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(vp, NQL_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs rename service
+ */
+nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len, len2;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct nameidata fromnd, tond;
+ struct vnode *fvp, *tvp, *tdvp;
+ nfsv2fh_t fnfh, tnfh;
+ fhandle_t *ffhp, *tfhp;
+ u_quad_t frev;
+ uid_t saved_uid;
+
+ ffhp = &fnfh.fh_generic;
+ tfhp = &tnfh.fh_generic;
+ fromnd.ni_cnd.cn_nameiop = 0;
+ tond.ni_cnd.cn_nameiop = 0;
+ nfsm_srvmtofh(ffhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ /*
+ * Remember our original uid so that we can reset cr_uid before
+ * the second nfs_namei() call, in case it is remapped.
+ */
+ saved_uid = cred->cr_uid;
+ fromnd.ni_cnd.cn_cred = cred;
+ fromnd.ni_cnd.cn_nameiop = DELETE;
+ fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
+ if (error = nfs_namei(&fromnd, ffhp, len, nfsd->nd_slp, nam, &md,
+ &dpos, nfsd->nd_procp))
+ nfsm_reply(0);
+ fvp = fromnd.ni_vp;
+ nfsm_srvmtofh(tfhp);
+ nfsm_strsiz(len2, NFS_MAXNAMLEN);
+ cred->cr_uid = saved_uid;
+ tond.ni_cnd.cn_cred = cred;
+ tond.ni_cnd.cn_nameiop = RENAME;
+ tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
+ if (error = nfs_namei(&tond, tfhp, len2, nfsd->nd_slp, nam, &md,
+ &dpos, nfsd->nd_procp)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ if (tvp->v_type == VDIR && tvp->v_mountedhere) {
+ error = EXDEV;
+ goto out;
+ }
+ }
+ if (fvp->v_type == VDIR && fvp->v_mountedhere) {
+ error = EBUSY;
+ goto out;
+ }
+ if (fvp->v_mount != tdvp->v_mount) {
+ error = EXDEV;
+ goto out;
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same vnode with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ nqsrv_getl(fromnd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(tdvp, NQL_WRITE);
+ if (tvp)
+ nqsrv_getl(tvp, NQL_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+ return (error);
+
+nfsmout:
+ if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) {
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+ }
+ if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) {
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ return (error);
+}
+
+/*
+ * nfs link service
+ */
+nfsrv_link(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct nameidata nd;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp, *xp;
+ nfsv2fh_t nfh, dnfh;
+ fhandle_t *fhp, *dfhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ dfhp = &dnfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvmtofh(dfhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0)))
+ goto out1;
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, dfhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ goto out1;
+ xp = nd.ni_vp;
+ if (xp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+ xp = nd.ni_dvp;
+ if (vp->v_mount != xp->v_mount)
+ error = EXDEV;
+out:
+ if (!error) {
+ nqsrv_getl(vp, NQL_WRITE);
+ nqsrv_getl(xp, NQL_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+out1:
+ vrele(vp);
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs symbolic link service
+ */
+nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ struct nameidata nd;
+ register struct vattr *vap = &va;
+ register u_long *tl;
+ register long t1;
+ struct nfsv2_sattr *sp;
+ caddr_t bpos;
+ struct uio io;
+ struct iovec iv;
+ int error = 0, cache, len, len2;
+ char *pathcp, *cp2;
+ struct mbuf *mb, *mreq;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ pathcp = (char *)0;
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ goto out;
+ nfsm_strsiz(len2, NFS_MAXPATHLEN);
+ MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
+ iv.iov_base = pathcp;
+ iv.iov_len = len2;
+ io.uio_resid = len2;
+ io.uio_offset = 0;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ nfsm_mtouio(&io, len2);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ *(pathcp + len2) = '\0';
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(vap);
+ vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode);
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
+out:
+ if (pathcp)
+ FREE(pathcp, M_TEMP);
+ nfsm_reply(0);
+ return (error);
+nfsmout:
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ if (pathcp)
+ FREE(pathcp, M_TEMP);
+ return (error);
+}
+
+/*
+ * nfs mkdir service
+ */
+nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_fattr *fp;
+ struct nameidata nd;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ VATTR_NULL(vap);
+ vap->va_type = VDIR;
+ vap->va_mode = nfstov_mode(*tl++);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ error = EEXIST;
+ nfsm_reply(0);
+ }
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ return (error);
+nfsmout:
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * nfs rmdir service
+ */
+nfsrv_rmdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct nameidata nd;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(vp, NQL_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs readdir service
+ * - mallocs what it thinks is enough to read
+ * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
+ * - calls VOP_READDIR()
+ * - loops around building the reply
+ * if the output generated exceeds count break out of loop
+ * The nfsm_clget macro is used here so that the reply will be packed
+ * tightly in mbuf clusters.
+ * - it only knows that it has encountered eof when the VOP_READDIR()
+ * reads nothing
+ * - as such one readdir rpc will return eof false although you are there
+ * and then the next will return eof
+ * - it trims out records with d_fileno == 0
+ * this doesn't matter for Unix clients, but they might confuse clients
+ * for other os'.
+ * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
+ * than requested, but this may not apply to all filesystems. For
+ * example, client NFS does not { although it is never remote mounted
+ * anyhow }
+ * The alternate call nqnfsrv_readdirlook() does lookups as well.
+ * PS: The NFS protocol spec. does not clarify what the "count" byte
+ * argument is a count of.. just name strings and file id's or the
+ * entire reply rpc or ...
+ * I tried just file name and id sizes and it confused the Sun client,
+ * so I am using the full rpc size now. The "paranoia.." comment refers
+ * to including the status longwords that are not a part of the dir.
+ * "entry" structures, but are in the rpc.
+ */
+struct flrep {
+ u_long fl_cachable;
+ u_long fl_duration;
+ u_long fl_frev[2];
+ nfsv2fh_t fl_nfh;
+ u_long fl_fattr[NFSX_NQFATTR / sizeof (u_long)];
+};
+
+nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io;
+ struct iovec iv;
+ int len, nlen, rem, xfer, tsiz, i, error = 0;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache;
+ u_quad_t frev;
+ u_long on, off, toff;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ toff = fxdr_unsigned(u_long, *tl++);
+ off = (toff & ~(NFS_DIRBLKSIZ-1));
+ on = (toff & (NFS_DIRBLKSIZ-1));
+ cnt = fxdr_unsigned(int, *tl);
+ siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+ if (cnt > NFS_MAXREADDIR)
+ siz = NFS_MAXREADDIR;
+ fullsiz = siz;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ VOP_UNLOCK(vp);
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ error = VOP_READDIR(vp, &io, cred);
+ off = (off_t)io.uio_offset;
+ if (error) {
+ vrele(vp);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(0);
+ }
+ if (io.uio_resid < fullsiz)
+ eofflag = 0;
+ else
+ eofflag = 1;
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ nfsm_reply(2*NFSX_UNSIGNED);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)rbuf, M_TEMP);
+ return (0);
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ while (cpos < cend && dp->d_fileno == 0) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ if (cpos >= cend) {
+ toff = off;
+ siz = fullsiz;
+ on = 0;
+ goto again;
+ }
+
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ len = 3*NFSX_UNSIGNED; /* paranoia, probably can be 0 */
+ nfsm_reply(siz);
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend) {
+ if (dp->d_fileno != 0) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen)-nlen;
+ len += (4*NFSX_UNSIGNED + nlen + rem);
+ if (len > cnt) {
+ eofflag = 0;
+ break;
+ }
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a long boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+ nfsm_clget;
+
+ /* Finish off the record */
+ toff += dp->d_reclen;
+ *tl = txdr_unsigned(toff);
+ bp += NFSX_UNSIGNED;
+ } else
+ toff += dp->d_reclen;
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ vrele(vp);
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE(rbuf, M_TEMP);
+ nfsm_srvdone;
+}
+
+nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp, *nvp;
+ struct flrep fl;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io;
+ struct iovec iv;
+ struct vattr va, *vap = &va;
+ struct nfsv2_fattr *fp;
+ int len, nlen, rem, xfer, tsiz, i, error = 0, duration2, cache2;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache;
+ u_quad_t frev, frev2;
+ u_long on, off, toff;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ toff = fxdr_unsigned(u_long, *tl++);
+ off = (toff & ~(NFS_DIRBLKSIZ-1));
+ on = (toff & (NFS_DIRBLKSIZ-1));
+ cnt = fxdr_unsigned(int, *tl++);
+ duration2 = fxdr_unsigned(int, *tl);
+ siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+ if (cnt > NFS_MAXREADDIR)
+ siz = NFS_MAXREADDIR;
+ fullsiz = siz;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ VOP_UNLOCK(vp);
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ error = VOP_READDIR(vp, &io, cred);
+ off = (u_long)io.uio_offset;
+ if (error) {
+ vrele(vp);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(0);
+ }
+ if (io.uio_resid < fullsiz)
+ eofflag = 0;
+ else
+ eofflag = 1;
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)rbuf, M_TEMP);
+ return (0);
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ while (cpos < cend && dp->d_fileno == 0) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ if (cpos >= cend) {
+ toff = off;
+ siz = fullsiz;
+ on = 0;
+ goto again;
+ }
+
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */
+ nfsm_reply(siz);
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend) {
+ if (dp->d_fileno != 0) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen)-nlen;
+
+ /*
+ * For readdir_and_lookup get the vnode using
+ * the file number.
+ */
+ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
+ goto invalid;
+ bzero((caddr_t)&fl.fl_nfh, sizeof (nfsv2fh_t));
+ fl.fl_nfh.fh_generic.fh_fsid =
+ nvp->v_mount->mnt_stat.f_fsid;
+ if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) {
+ vput(nvp);
+ goto invalid;
+ }
+ if (duration2) {
+ (void) nqsrv_getlease(nvp, &duration2, NQL_READ,
+ nfsd, nam, &cache2, &frev2, cred);
+ fl.fl_duration = txdr_unsigned(duration2);
+ fl.fl_cachable = txdr_unsigned(cache2);
+ txdr_hyper(&frev2, fl.fl_frev);
+ } else
+ fl.fl_duration = 0;
+ if (VOP_GETATTR(nvp, vap, cred, nfsd->nd_procp)) {
+ vput(nvp);
+ goto invalid;
+ }
+ vput(nvp);
+ fp = (struct nfsv2_fattr *)&fl.fl_fattr;
+ nfsm_srvfillattr;
+ len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH
+ + NFSX_NQFATTR);
+ if (len > cnt) {
+ eofflag = 0;
+ break;
+ }
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+
+ /*
+ * For readdir_and_lookup copy the stuff out.
+ */
+ xfer = sizeof (struct flrep);
+ cp = (caddr_t)&fl;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a long boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+ nfsm_clget;
+
+ /* Finish off the record */
+ toff += dp->d_reclen;
+ *tl = txdr_unsigned(toff);
+ bp += NFSX_UNSIGNED;
+ } else
+invalid:
+ toff += dp->d_reclen;
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ vrele(vp);
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE(rbuf, M_TEMP);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs statfs service
+ */
+nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct statfs *sf;
+ register struct nfsv2_statfs *sfp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, isnq;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct statfs statfs;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ isnq = (nfsd->nd_nqlflag != NQL_NOVAL);
+ nfsm_srvmtofh(fhp);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ sf = &statfs;
+ error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_STATFS(isnq));
+ nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+ sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
+ sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
+ sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
+ sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
+ sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
+ if (isnq) {
+ sfp->sf_files = txdr_unsigned(sf->f_files);
+ sfp->sf_ffree = txdr_unsigned(sf->f_ffree);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * Null operation, used by clients to ping server
+ */
+/* ARGSUSED */
+nfsrv_null(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ caddr_t bpos;
+ int error = VNOVAL, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ nfsm_reply(0);
+ return (error);
+}
+
+/*
+ * No operation, used for obsolete procedures
+ */
+/* ARGSUSED */
+nfsrv_noop(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ caddr_t bpos;
+ int error, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ if (nfsd->nd_repstat)
+ error = nfsd->nd_repstat;
+ else
+ error = EPROCUNAVAIL;
+ nfsm_reply(0);
+ return (error);
+}
+
+/*
+ * Perform access checking for vnodes obtained from file handles that would
+ * refer to files already opened by a Unix client. You cannot just use
+ * vn_writechk() and VOP_ACCESS() for two reasons.
+ * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
+ * 2 - The owner is to be given access irrespective of mode bits so that
+ * processes that chmod after opening a file don't break. I don't like
+ * this because it opens a security hole, but since the nfs server opens
+ * a security hole the size of a barn door anyhow, what the heck.
+ */
+nfsrv_access(vp, flags, cred, rdonly, p)
+ register struct vnode *vp;
+ int flags;
+ register struct ucred *cred;
+ int rdonly;
+ struct proc *p;
+{
+ struct vattr vattr;
+ int error;
+ if (flags & VWRITE) {
+ /* Just vn_writechk() changed to check rdonly */
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket or a block or character
+ * device resident on the file system.
+ */
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ switch (vp->v_type) {
+ case VREG: case VDIR: case VLNK:
+ return (EROFS);
+ }
+ }
+ /*
+ * If there's shared text associated with
+ * the inode, try to free it up once. If
+ * we fail, we can't allow writing.
+ */
+ if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+ return (ETXTBSY);
+ }
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ if ((error = VOP_ACCESS(vp, flags, cred, p)) &&
+ cred->cr_uid != vattr.va_uid)
+ return (error);
+ return (0);
+}
diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c
new file mode 100644
index 000000000000..cf88ed33d92d
--- /dev/null
+++ b/sys/nfs/nfs_socket.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write - A+4D
+ * other - nm_timeo
+ */
+#define NFS_RTO(n, t) \
+ ((t) == 0 ? (n)->nm_timeo : \
+ ((t) < 3 ? \
+ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+ rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+ rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+ NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
+ NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
+ NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+ 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define NFS_CWNDSCALE 256
+#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int nfs_sbwait();
+void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int nfsrv_null(),
+ nfsrv_getattr(),
+ nfsrv_setattr(),
+ nfsrv_lookup(),
+ nfsrv_readlink(),
+ nfsrv_read(),
+ nfsrv_write(),
+ nfsrv_create(),
+ nfsrv_remove(),
+ nfsrv_rename(),
+ nfsrv_link(),
+ nfsrv_symlink(),
+ nfsrv_mkdir(),
+ nfsrv_rmdir(),
+ nfsrv_readdir(),
+ nfsrv_statfs(),
+ nfsrv_noop(),
+ nqnfsrv_readdirlook(),
+ nqnfsrv_getlease(),
+ nqnfsrv_vacated(),
+ nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+ nfsrv_null,
+ nfsrv_getattr,
+ nfsrv_setattr,
+ nfsrv_noop,
+ nfsrv_lookup,
+ nfsrv_readlink,
+ nfsrv_read,
+ nfsrv_noop,
+ nfsrv_write,
+ nfsrv_create,
+ nfsrv_remove,
+ nfsrv_rename,
+ nfsrv_link,
+ nfsrv_symlink,
+ nfsrv_mkdir,
+ nfsrv_rmdir,
+ nfsrv_readdir,
+ nfsrv_statfs,
+ nqnfsrv_readdirlook,
+ nqnfsrv_getlease,
+ nqnfsrv_vacated,
+ nfsrv_noop,
+ nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+{
+ register struct socket *so;
+ int s, error, rcvreserve, sndreserve;
+ struct sockaddr *saddr;
+ struct sockaddr_in *sin;
+ struct mbuf *m;
+ u_short tport;
+
+ nmp->nm_so = (struct socket *)0;
+ saddr = mtod(nmp->nm_nam, struct sockaddr *);
+ if (error = socreate(saddr->sa_family,
+ &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+ goto bad;
+ so = nmp->nm_so;
+ nmp->nm_soflags = so->so_proto->pr_flags;
+
+ /*
+ * Some servers require that the client port be a reserved port number.
+ */
+ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+ MGET(m, M_WAIT, MT_SONAME);
+ sin = mtod(m, struct sockaddr_in *);
+ sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ tport = IPPORT_RESERVED - 1;
+ sin->sin_port = htons(tport);
+ while ((error = sobind(so, m)) == EADDRINUSE &&
+ --tport > IPPORT_RESERVED / 2)
+ sin->sin_port = htons(tport);
+ m_freem(m);
+ if (error)
+ goto bad;
+ }
+
+ /*
+ * Protocols that do not require connections may be optionally left
+ * unconnected for servers that reply from a port other than NFS_PORT.
+ */
+ if (nmp->nm_flag & NFSMNT_NOCONN) {
+ if (nmp->nm_soflags & PR_CONNREQUIRED) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ } else {
+ if (error = soconnect(so, nmp->nm_nam))
+ goto bad;
+
+ /*
+ * Wait for the connection to complete. Cribbed from the
+ * connect system call but with the wait timing out so
+ * that interruptible mounts don't hang here for a long time.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+ "nfscon", 2 * hz);
+ if ((so->so_state & SS_ISCONNECTING) &&
+ so->so_error == 0 && rep &&
+ (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+ so->so_state &= ~SS_ISCONNECTING;
+ splx(s);
+ goto bad;
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto bad;
+ }
+ splx(s);
+ }
+ if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+ so->so_rcv.sb_timeo = (5 * hz);
+ so->so_snd.sb_timeo = (5 * hz);
+ } else {
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ }
+ if (nmp->nm_sotype == SOCK_DGRAM) {
+ sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+ rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+ } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+ } else {
+ if (nmp->nm_sotype != SOCK_STREAM)
+ panic("nfscon sotype");
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ }
+ if (error = soreserve(so, sndreserve, rcvreserve))
+ goto bad;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+ /* Initialize other non-zero congestion variables */
+ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+ nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+ nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+ nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+ nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
+ nmp->nm_sent = 0;
+ nmp->nm_timeouts = 0;
+ return (0);
+
+bad:
+ nfs_disconnect(nmp);
+ return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+ register struct nfsreq *rep;
+{
+ register struct nfsreq *rp;
+ register struct nfsmount *nmp = rep->r_nmp;
+ int error;
+
+ nfs_disconnect(nmp);
+ while (error = nfs_connect(nmp, rep)) {
+ if (error == EINTR || error == ERESTART)
+ return (EINTR);
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+ }
+
+ /*
+ * Loop through outstanding request list and fix up all requests
+ * on old socket.
+ */
+ rp = nfsreqh.r_next;
+ while (rp != &nfsreqh) {
+ if (rp->r_nmp == nmp)
+ rp->r_flags |= R_MUSTRESEND;
+ rp = rp->r_next;
+ }
+ return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+ register struct nfsmount *nmp;
+{
+ register struct socket *so;
+
+ if (nmp->nm_so) {
+ so = nmp->nm_so;
+ nmp->nm_so = (struct socket *)0;
+ soshutdown(so, 2);
+ soclose(so);
+ }
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+ register struct socket *so;
+ struct mbuf *nam;
+ register struct mbuf *top;
+ struct nfsreq *rep;
+{
+ struct mbuf *sendnam;
+ int error, soflags, flags;
+
+ if (rep) {
+ if (rep->r_flags & R_SOFTTERM) {
+ m_freem(top);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ rep->r_flags |= R_MUSTRESEND;
+ m_freem(top);
+ return (0);
+ }
+ rep->r_flags &= ~R_MUSTRESEND;
+ soflags = rep->r_nmp->nm_soflags;
+ } else
+ soflags = so->so_proto->pr_flags;
+ if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+ sendnam = (struct mbuf *)0;
+ else
+ sendnam = nam;
+ if (so->so_type == SOCK_SEQPACKET)
+ flags = MSG_EOR;
+ else
+ flags = 0;
+
+ error = sosend(so, sendnam, (struct uio *)0, top,
+ (struct mbuf *)0, flags);
+ if (error) {
+ if (rep) {
+ log(LOG_INFO, "nfs send error %d for server %s\n",error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ /*
+ * Deal with errors for the client side.
+ */
+ if (rep->r_flags & R_SOFTTERM)
+ error = EINTR;
+ else
+ rep->r_flags |= R_MUSTRESEND;
+ } else
+ log(LOG_INFO, "nfsd send error %d\n", error);
+
+ /*
+ * Handle any recoverable (soft) socket errors here. (???)
+ */
+ if (error != EINTR && error != ERESTART &&
+ error != EWOULDBLOCK && error != EPIPE)
+ error = 0;
+ }
+ return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ * small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+ register struct nfsreq *rep;
+ struct mbuf **aname;
+ struct mbuf **mp;
+{
+ register struct socket *so;
+ struct uio auio;
+ struct iovec aio;
+ register struct mbuf *m;
+ struct mbuf *control;
+ u_long len;
+ struct mbuf **getnam;
+ int error, sotype, rcvflg;
+ struct proc *p = curproc; /* XXX */
+
+ /*
+ * Set up arguments for soreceive()
+ */
+ *mp = (struct mbuf *)0;
+ *aname = (struct mbuf *)0;
+ sotype = rep->r_nmp->nm_sotype;
+
+ /*
+ * For reliable protocols, lock against other senders/receivers
+ * in case a reconnect is necessary.
+ * For SOCK_STREAM, first get the Record Mark to find out how much
+ * more there is to get.
+ * We must lock the socket against other receivers
+ * until we have an entire rpc request/reply.
+ */
+ if (sotype != SOCK_DGRAM) {
+ if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+ return (error);
+tryagain:
+ /*
+ * Check for fatal errors and resending request.
+ */
+ /*
+ * Ugh: If a reconnect attempt just happened, nm_so
+ * would have changed. NULL indicates a failed
+ * attempt that has essentially shut down this
+ * mount point.
+ */
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ if (error = nfs_reconnect(rep)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ while (rep->r_flags & R_MUSTRESEND) {
+ m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+ nfsstats.rpcretries++;
+ if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+ if (error == EINTR || error == ERESTART ||
+ (error = nfs_reconnect(rep))) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ }
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ if (sotype == SOCK_STREAM) {
+ aio.iov_base = (caddr_t) &len;
+ aio.iov_len = sizeof(u_long);
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(u_long);
+ auio.uio_procp = p;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0, &auio,
+ (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ sizeof(u_long) - auio.uio_resid,
+ sizeof(u_long),
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ if (error)
+ goto errout;
+ len = ntohl(len) & ~0x80000000;
+ /*
+ * This is SERIOUS! We are out of sync with the sender
+ * and forcing a disconnect/reconnect is all I can do.
+ */
+ if (len > NFS_MAXPACKET) {
+ log(LOG_ERR, "%s (%d) from nfs server %s\n",
+ "impossible packet length",
+ len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EFBIG;
+ goto errout;
+ }
+ auio.uio_resid = len;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, (struct mbuf **)0, &rcvflg);
+ } while (error == EWOULDBLOCK || error == EINTR ||
+ error == ERESTART);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ len - auio.uio_resid, len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ } else {
+ /*
+ * NB: Since uio_resid is big, MSG_WAITALL is ignored
+ * and soreceive() will return when it has either a
+ * control msg or a data msg.
+ * We have no use for control msg., but must grab them
+ * and then throw them away so we know what is going
+ * on.
+ */
+ auio.uio_resid = len = 100000000; /* Anything Big */
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, &control, &rcvflg);
+ if (control)
+ m_freem(control);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK ||
+ (!error && *mp == NULL && control));
+ if ((rcvflg & MSG_EOR) == 0)
+ printf("Egad!!\n");
+ if (!error && *mp == NULL)
+ error = EPIPE;
+ len -= auio.uio_resid;
+ }
+errout:
+ if (error && error != EINTR && error != ERESTART) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ if (error != EPIPE)
+ log(LOG_INFO,
+ "receive error %d from nfs server %s\n",
+ error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+ if (!error)
+ error = nfs_reconnect(rep);
+ if (!error)
+ goto tryagain;
+ }
+ } else {
+ if ((so = rep->r_nmp->nm_so) == NULL)
+ return (EACCES);
+ if (so->so_state & SS_ISCONNECTED)
+ getnam = (struct mbuf **)0;
+ else
+ getnam = aname;
+ auio.uio_resid = len = 1000000;
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, getnam, &auio, mp,
+ (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK &&
+ (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ } while (error == EWOULDBLOCK);
+ len -= auio.uio_resid;
+ }
+ if (error) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ }
+ /*
+ * Search for any mbufs that are not a multiple of 4 bytes long
+ * or with m_data not longword aligned.
+ * These could cause pointer alignment problems, so copy them to
+ * well aligned mbufs.
+ */
+ nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+ return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+ struct nfsreq *myrep;
+{
+ register struct nfsreq *rep;
+ register struct nfsmount *nmp = myrep->r_nmp;
+ register long t1;
+ struct mbuf *mrep, *nam, *md;
+ u_long rxid, *tl;
+ caddr_t dpos, cp2;
+ int error;
+
+ /*
+ * Loop around until we get our own reply
+ */
+ for (;;) {
+ /*
+ * Lock against other receivers so that I don't get stuck in
+ * sbwait() after someone else has received my reply for me.
+ * Also necessary for connection based protocols to avoid
+ * race conditions during a reconnect.
+ */
+ if (error = nfs_rcvlock(myrep))
+ return (error);
+ /* Already received, bye bye */
+ if (myrep->r_mrep != NULL) {
+ nfs_rcvunlock(&nmp->nm_flag);
+ return (0);
+ }
+ /*
+ * Get the next Rpc reply off the socket
+ */
+ error = nfs_receive(myrep, &nam, &mrep);
+ nfs_rcvunlock(&nmp->nm_flag);
+ if (error) {
+
+ /*
+ * Ignore routing errors on connectionless protocols??
+ */
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+ nmp->nm_so->so_error = 0;
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+ return (error);
+ }
+ if (nam)
+ m_freem(nam);
+
+ /*
+ * Get the xid and check that it is an rpc reply
+ */
+ md = mrep;
+ dpos = mtod(md, caddr_t);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ rxid = *tl++;
+ if (*tl != rpc_reply) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (nqnfs_callback(nmp, mrep, md, dpos))
+ nfsstats.rpcinvalid++;
+ } else {
+ nfsstats.rpcinvalid++;
+ m_freem(mrep);
+ }
+nfsmout:
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+
+ /*
+ * Loop through the request list to match up the reply
+ * Iff no match, just drop the datagram
+ */
+ rep = nfsreqh.r_next;
+ while (rep != &nfsreqh) {
+ if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+ /* Found it.. */
+ rep->r_mrep = mrep;
+ rep->r_md = md;
+ rep->r_dpos = dpos;
+ if (nfsrtton) {
+ struct rttl *rt;
+
+ rt = &nfsrtt.rttl[nfsrtt.pos];
+ rt->proc = rep->r_procnum;
+ rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+ rt->sent = nmp->nm_sent;
+ rt->cwnd = nmp->nm_cwnd;
+ rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+ rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+ rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+ rt->tstamp = time;
+ if (rep->r_flags & R_TIMING)
+ rt->rtt = rep->r_rtt;
+ else
+ rt->rtt = 1000000;
+ nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+ }
+ /*
+ * Update congestion window.
+ * Do the additive increase of
+ * one rpc/rtt.
+ */
+ if (nmp->nm_cwnd <= nmp->nm_sent) {
+ nmp->nm_cwnd +=
+ (NFS_CWNDSCALE * NFS_CWNDSCALE +
+ (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+ if (nmp->nm_cwnd > NFS_MAXCWND)
+ nmp->nm_cwnd = NFS_MAXCWND;
+ }
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ /*
+ * Update rtt using a gain of 0.125 on the mean
+ * and a gain of 0.25 on the deviation.
+ */
+ if (rep->r_flags & R_TIMING) {
+ /*
+ * Since the timer resolution of
+ * NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since
+ * r_rtt == N means that the actual
+ * rtt is between N+dt and N+2-dt ticks,
+ * add 1.
+ */
+ t1 = rep->r_rtt + 1;
+ t1 -= (NFS_SRTT(rep) >> 3);
+ NFS_SRTT(rep) += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= (NFS_SDRTT(rep) >> 2);
+ NFS_SDRTT(rep) += t1;
+ }
+ nmp->nm_timeouts = 0;
+ break;
+ }
+ rep = rep->r_next;
+ }
+ /*
+ * If not matched to a request, drop it.
+ * If it's mine, get out.
+ */
+ if (rep == &nfsreqh) {
+ nfsstats.rpcunexpected++;
+ m_freem(mrep);
+ } else if (rep == myrep) {
+ if (rep->r_mrep == NULL)
+ panic("nfsreply nil");
+ return (0);
+ }
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ }
+}
+
+/*
+ * nfs_request - goes something like this
+ * - fill in request struct
+ * - links it into list
+ * - calls nfs_send() for first transmit
+ * - calls nfs_receive() to get reply
+ * - break down rpc header and return with nfs reply pointed to
+ * by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+ struct vnode *vp;
+ struct mbuf *mrest;
+ int procnum;
+ struct proc *procp;
+ struct ucred *cred;
+ struct mbuf **mrp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+{
+ register struct mbuf *m, *mrep;
+ register struct nfsreq *rep;
+ register u_long *tl;
+ register int i;
+ struct nfsmount *nmp;
+ struct mbuf *md, *mheadend;
+ struct nfsreq *reph;
+ struct nfsnode *np;
+ time_t reqtime, waituntil;
+ caddr_t dpos, cp2;
+ int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+ int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+ u_long xid;
+ u_quad_t frev;
+ char *auth_str;
+
+ nmp = VFSTONFS(vp->v_mount);
+ MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+ rep->r_nmp = nmp;
+ rep->r_vp = vp;
+ rep->r_procp = procp;
+ rep->r_procnum = procnum;
+ i = 0;
+ m = mrest;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ mrest_len = i;
+
+ /*
+ * Get the RPC header with authorization.
+ */
+kerbauth:
+ auth_str = (char *)0;
+ if (nmp->nm_flag & NFSMNT_KERB) {
+ if (failed_auth) {
+ error = nfs_getauth(nmp, rep, cred, &auth_type,
+ &auth_str, &auth_len);
+ if (error) {
+ free((caddr_t)rep, M_NFSREQ);
+ m_freem(mrest);
+ return (error);
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ auth_len = 5 * NFSX_UNSIGNED;
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ if (cred->cr_ngroups < 1)
+ panic("nfsreq nogrps");
+ auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+ nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+ 5 * NFSX_UNSIGNED;
+ }
+ m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+ auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+ if (auth_str)
+ free(auth_str, M_TEMP);
+
+ /*
+ * For stream protocols, insert a Sun RPC Record Mark.
+ */
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ rep->r_mreq = m;
+ rep->r_xid = xid;
+tryagain:
+ if (nmp->nm_flag & NFSMNT_SOFT)
+ rep->r_retry = nmp->nm_retry;
+ else
+ rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
+ rep->r_rtt = rep->r_rexmit = 0;
+ if (proct[procnum] > 0)
+ rep->r_flags = R_TIMING;
+ else
+ rep->r_flags = 0;
+ rep->r_mrep = NULL;
+
+ /*
+ * Do the client side RPC.
+ */
+ nfsstats.rpcrequests++;
+ /*
+ * Chain request into list of outstanding requests. Be sure
+ * to put it LAST so timer finds oldest requests first.
+ */
+ s = splsoftclock();
+ reph = &nfsreqh;
+ reph->r_prev->r_next = rep;
+ rep->r_prev = reph->r_prev;
+ reph->r_prev = rep;
+ rep->r_next = reph;
+
+ /* Get send time for nqnfs */
+ reqtime = time.tv_sec;
+
+ /*
+ * If backing off another request or avoiding congestion, don't
+ * send this one now but let timer do it. If not timing a request,
+ * do it now.
+ */
+ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+ (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ nmp->nm_sent < nmp->nm_cwnd)) {
+ splx(s);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ error = nfs_sndlock(&nmp->nm_flag, rep);
+ if (!error) {
+ m = m_copym(m, 0, M_COPYALL, M_WAIT);
+ error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(&nmp->nm_flag);
+ }
+ if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+ nmp->nm_sent += NFS_CWNDSCALE;
+ rep->r_flags |= R_SENT;
+ }
+ } else {
+ splx(s);
+ rep->r_rtt = -1;
+ }
+
+ /*
+ * Wait for the reply from our send or the timer's.
+ */
+ if (!error || error == EPIPE)
+ error = nfs_reply(rep);
+
+ /*
+ * RPC done, unlink the request.
+ */
+ s = splsoftclock();
+ rep->r_prev->r_next = rep->r_next;
+ rep->r_next->r_prev = rep->r_prev;
+ splx(s);
+
+ /*
+ * Decrement the outstanding request count.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT; /* paranoia */
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
+
+ /*
+ * If there was a successful reply and a tprintf msg.
+ * tprintf a response.
+ */
+ if (!error && (rep->r_flags & R_TPRINTFMSG))
+ nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "is alive again");
+ mrep = rep->r_mrep;
+ md = rep->r_md;
+ dpos = rep->r_dpos;
+ if (error) {
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * break down the rpc header and check if ok
+ */
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ if (*tl++ == rpc_msgdenied) {
+ if (*tl == rpc_mismatch)
+ error = EOPNOTSUPP;
+ else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+ if (*tl == rpc_rejectedcred && failed_auth == 0) {
+ failed_auth++;
+ mheadend->m_next = (struct mbuf *)0;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ goto kerbauth;
+ } else
+ error = EAUTH;
+ } else
+ error = EACCES;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * skip over the auth_verf, someday we may want to cache auth_short's
+ * for nfs_reqhead(), but for now just dump it
+ */
+ if (*++tl != 0) {
+ i = nfsm_rndup(fxdr_unsigned(long, *tl));
+ nfsm_adv(i);
+ }
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ /* 0 == ok */
+ if (*tl == 0) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl != 0) {
+ error = fxdr_unsigned(int, *tl);
+ m_freem(mrep);
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ error == NQNFS_TRYLATER) {
+ error = 0;
+ waituntil = time.tv_sec + trylater_delay;
+ while (time.tv_sec < waituntil)
+ (void) tsleep((caddr_t)&lbolt,
+ PSOCK, "nqnfstry", 0);
+ trylater_delay *= nfs_backoff[trylater_cnt];
+ if (trylater_cnt < 7)
+ trylater_cnt++;
+ goto tryagain;
+ }
+
+ /*
+ * If the File Handle was stale, invalidate the
+ * lookup cache, just in case.
+ */
+ if (error == ESTALE)
+ cache_purge(vp);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * For nqnfs, get any lease in reply
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ np = VTONFS(vp);
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time.tv_sec) {
+ fxdr_hyper(tl, &frev);
+ nqnfs_clientlease(nmp, np, nqlflag,
+ cachable, reqtime, frev);
+ }
+ }
+ }
+ *mrp = mrep;
+ *mdp = md;
+ *dposp = dpos;
+ m_freem(rep->r_mreq);
+ FREE((caddr_t)rep, M_NFSREQ);
+ return (0);
+ }
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ error = EPROTONOSUPPORT;
+nfsmout:
+ return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+ int siz;
+ struct nfsd *nd;
+ int err;
+ int cache;
+ u_quad_t *frev;
+ struct mbuf **mrq;
+ struct mbuf **mbp;
+ caddr_t *bposp;
+{
+ register u_long *tl;
+ register struct mbuf *mreq;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2;
+
+ MGETHDR(mreq, M_WAIT, MT_DATA);
+ mb = mreq;
+ /*
+ * If this is a big reply, use a cluster else
+ * try and leave leading space for the lower level headers.
+ */
+ siz += RPC_REPLYSIZ;
+ if (siz >= MINCLSIZE) {
+ MCLGET(mreq, M_WAIT);
+ } else
+ mreq->m_data += max_hdr;
+ tl = mtod(mreq, u_long *);
+ mreq->m_len = 6*NFSX_UNSIGNED;
+ bpos = ((caddr_t)tl)+mreq->m_len;
+ *tl++ = nd->nd_retxid;
+ *tl++ = rpc_reply;
+ if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+ *tl++ = rpc_msgdenied;
+ if (err == NQNFS_AUTHERR) {
+ *tl++ = rpc_autherr;
+ *tl = rpc_rejectedcred;
+ mreq->m_len -= NFSX_UNSIGNED;
+ bpos -= NFSX_UNSIGNED;
+ } else {
+ *tl++ = rpc_mismatch;
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2);
+ }
+ } else {
+ *tl++ = rpc_msgaccepted;
+ *tl++ = 0;
+ *tl++ = 0;
+ switch (err) {
+ case EPROGUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROGUNAVAIL);
+ break;
+ case EPROGMISMATCH:
+ *tl = txdr_unsigned(RPC_PROGMISMATCH);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2); /* someday 3 */
+ break;
+ case EPROCUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROCUNAVAIL);
+ break;
+ default:
+ *tl = 0;
+ if (err != VNOVAL) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if (err)
+ *tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+ else
+ *tl = 0;
+ }
+ break;
+ };
+ }
+
+ /*
+ * For nqnfs, piggyback lease as requested.
+ */
+ if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+ if (nd->nd_nqlflag) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nd->nd_nqlflag);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nd->nd_duration);
+ txdr_hyper(frev, tl);
+ } else {
+ if (nd->nd_nqlflag != 0)
+ panic("nqreph");
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ *mrq = mreq;
+ *mbp = mb;
+ *bposp = bpos;
+ if (err != 0 && err != VNOVAL)
+ nfsstats.srvrpc_errs++;
+ return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+ void *arg;
+{
+ register struct nfsreq *rep;
+ register struct mbuf *m;
+ register struct socket *so;
+ register struct nfsmount *nmp;
+ register int timeo;
+ static long lasttime = 0;
+ int s, error;
+
+ s = splnet();
+ for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+ nmp = rep->r_nmp;
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+ continue;
+ if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (rep->r_rtt >= 0) {
+ rep->r_rtt++;
+ if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+ timeo = nmp->nm_timeo;
+ else
+ timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+ if (nmp->nm_timeouts > 0)
+ timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+ if (rep->r_rtt <= timeo)
+ continue;
+ if (nmp->nm_timeouts < 8)
+ nmp->nm_timeouts++;
+ }
+ /*
+ * Check for server not responding
+ */
+ if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+ rep->r_rexmit > nmp->nm_deadthresh) {
+ nfs_msg(rep->r_procp,
+ nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "not responding");
+ rep->r_flags |= R_TPRINTFMSG;
+ }
+ if (rep->r_rexmit >= rep->r_retry) { /* too many */
+ nfsstats.rpctimeouts++;
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (nmp->nm_sotype != SOCK_DGRAM) {
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ continue;
+ }
+ if ((so = nmp->nm_so) == NULL)
+ continue;
+
+ /*
+ * If there is enough space and the window allows..
+ * Resend it
+ * Set r_rtt to -1 in case we fail to send it now.
+ */
+ rep->r_rtt = -1;
+ if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+ ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ (rep->r_flags & R_SENT) ||
+ nmp->nm_sent < nmp->nm_cwnd) &&
+ (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ (struct mbuf *)0, (struct mbuf *)0);
+ else
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ nmp->nm_nam, (struct mbuf *)0);
+ if (error) {
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+ so->so_error = 0;
+ } else {
+ /*
+ * Iff first send, start timing
+ * else turn timing off, backoff timer
+ * and divide congestion window by 2.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_TIMING;
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ nmp->nm_cwnd >>= 1;
+ if (nmp->nm_cwnd < NFS_CWNDSCALE)
+ nmp->nm_cwnd = NFS_CWNDSCALE;
+ nfsstats.rpcretries++;
+ } else {
+ rep->r_flags |= R_SENT;
+ nmp->nm_sent += NFS_CWNDSCALE;
+ }
+ rep->r_rtt = 0;
+ }
+ }
+ }
+
+ /*
+ * Call the nqnfs server timer once a second to handle leases.
+ */
+ if (lasttime != time.tv_sec) {
+ lasttime = time.tv_sec;
+ nqnfs_serverd();
+ }
+ splx(s);
+ timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+ struct nfsmount *nmp;
+ struct nfsreq *rep;
+ register struct proc *p;
+{
+
+ if (rep && (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ if (!(nmp->nm_flag & NFSMNT_INT))
+ return (0);
+ if (p && p->p_siglist &&
+ (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+ NFSINT_SIGMASK))
+ return (EINTR);
+ return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+ register int *flagp;
+ struct nfsreq *rep;
+{
+ struct proc *p;
+ int slpflag = 0, slptimeo = 0;
+
+ if (rep) {
+ p = rep->r_procp;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ } else
+ p = (struct proc *)0;
+ while (*flagp & NFSMNT_SNDLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, p))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTSND;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_SNDLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_SNDLOCK) == 0)
+ panic("nfs sndunlock");
+ *flagp &= ~NFSMNT_SNDLOCK;
+ if (*flagp & NFSMNT_WANTSND) {
+ *flagp &= ~NFSMNT_WANTSND;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+nfs_rcvlock(rep)
+ register struct nfsreq *rep;
+{
+ register int *flagp = &rep->r_nmp->nm_flag;
+ int slpflag, slptimeo = 0;
+
+ if (*flagp & NFSMNT_INT)
+ slpflag = PCATCH;
+ else
+ slpflag = 0;
+ while (*flagp & NFSMNT_RCVLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTRCV;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_RCVLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_RCVLOCK) == 0)
+ panic("nfs rcvunlock");
+ *flagp &= ~NFSMNT_RCVLOCK;
+ if (*flagp & NFSMNT_WANTRCV) {
+ *flagp &= ~NFSMNT_WANTRCV;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+ register struct mbuf *m;
+ int hsiz;
+{
+ register struct mbuf *m2;
+ register int siz, mlen, olen;
+ register caddr_t tcp, fcp;
+ struct mbuf *mnew;
+
+ while (m) {
+ /*
+ * This never happens for UDP, rarely happens for TCP
+ * but frequently happens for iso transport.
+ */
+ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ if ((int)fcp & 0x3) {
+ m->m_flags &= ~M_PKTHDR;
+ if (m->m_flags & M_EXT)
+ m->m_data = m->m_ext.ext_buf +
+ ((m->m_ext.ext_size - olen) & ~0x3);
+ else
+ m->m_data = m->m_dat;
+ }
+ m->m_len = 0;
+ tcp = mtod(m, caddr_t);
+ mnew = m;
+ m2 = m->m_next;
+
+ /*
+ * If possible, only put the first invariant part
+ * of the RPC header in the first mbuf.
+ */
+ mlen = M_TRAILINGSPACE(m);
+ if (olen <= hsiz && mlen > hsiz)
+ mlen = hsiz;
+
+ /*
+ * Loop through the mbuf list consolidating data.
+ */
+ while (m) {
+ while (olen > 0) {
+ if (mlen == 0) {
+ m2->m_flags &= ~M_PKTHDR;
+ if (m2->m_flags & M_EXT)
+ m2->m_data = m2->m_ext.ext_buf;
+ else
+ m2->m_data = m2->m_dat;
+ m2->m_len = 0;
+ mlen = M_TRAILINGSPACE(m2);
+ tcp = mtod(m2, caddr_t);
+ mnew = m2;
+ m2 = m2->m_next;
+ }
+ siz = min(mlen, olen);
+ if (tcp != fcp)
+ bcopy(fcp, tcp, siz);
+ mnew->m_len += siz;
+ mlen -= siz;
+ olen -= siz;
+ tcp += siz;
+ fcp += siz;
+ }
+ m = m->m_next;
+ if (m) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ }
+ }
+
+ /*
+ * Finally, set m_len == 0 for any trailing mbufs that have
+ * been copied out of.
+ */
+ while (m2) {
+ m2->m_len = 0;
+ m2 = m2->m_next;
+ }
+ return;
+ }
+ m = m->m_next;
+ }
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+ struct socket *so;
+ caddr_t arg;
+ int waitflag;
+{
+ register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+ register struct mbuf *m;
+ struct mbuf *mp, *nam;
+ struct uio auio;
+ int flags, error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+#ifdef notdef
+ /*
+ * Define this to test for nfsds handling this under heavy load.
+ */
+ if (waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+ }
+#endif
+ auio.uio_procp = NULL;
+ if (so->so_type == SOCK_STREAM) {
+ /*
+ * If there are already records on the queue, defer soreceive()
+ * to an nfsd so that there is feedback to the TCP layer that
+ * the nfs servers are heavily loaded.
+ */
+ if (slp->ns_rec && waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ;
+ goto dorecs;
+ }
+
+ /*
+ * Do soreceive().
+ */
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+ if (error || mp == (struct mbuf *)0) {
+ if (error == EWOULDBLOCK)
+ slp->ns_flag |= SLP_NEEDQ;
+ else
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ m = mp;
+ if (slp->ns_rawend) {
+ slp->ns_rawend->m_next = m;
+ slp->ns_cc += 1000000000 - auio.uio_resid;
+ } else {
+ slp->ns_raw = m;
+ slp->ns_cc = 1000000000 - auio.uio_resid;
+ }
+ while (m->m_next)
+ m = m->m_next;
+ slp->ns_rawend = m;
+
+ /*
+ * Now try and parse record(s) out of the raw stream data.
+ */
+ if (error = nfsrv_getstream(slp, waitflag)) {
+ if (error == EPERM)
+ slp->ns_flag |= SLP_DISCONN;
+ else
+ slp->ns_flag |= SLP_NEEDQ;
+ }
+ } else {
+ do {
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp,
+ (struct mbuf **)0, &flags);
+ if (mp) {
+ nfs_realign(mp, 10 * NFSX_UNSIGNED);
+ if (nam) {
+ m = nam;
+ m->m_next = mp;
+ } else
+ m = mp;
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = m;
+ else
+ slp->ns_rec = m;
+ slp->ns_recend = m;
+ m->m_nextpkt = (struct mbuf *)0;
+ }
+ if (error) {
+ if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+ && error != EWOULDBLOCK) {
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ }
+ } while (mp);
+ }
+
+ /*
+ * Now try and process the request records, non-blocking.
+ */
+dorecs:
+ if (waitflag == M_DONTWAIT &&
+ (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+ nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+ register struct nfssvc_sock *slp;
+ int waitflag;
+{
+ register struct mbuf *m;
+ register char *cp1, *cp2;
+ register int len;
+ struct mbuf *om, *m2, *recm;
+ u_long recmark;
+
+ if (slp->ns_flag & SLP_GETSTREAM)
+ panic("nfs getstream");
+ slp->ns_flag |= SLP_GETSTREAM;
+ for (;;) {
+ if (slp->ns_reclen == 0) {
+ if (slp->ns_cc < NFSX_UNSIGNED) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ m = slp->ns_raw;
+ if (m->m_len >= NFSX_UNSIGNED) {
+ bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+ m->m_data += NFSX_UNSIGNED;
+ m->m_len -= NFSX_UNSIGNED;
+ } else {
+ cp1 = (caddr_t)&recmark;
+ cp2 = mtod(m, caddr_t);
+ while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+ while (m->m_len == 0) {
+ m = m->m_next;
+ cp2 = mtod(m, caddr_t);
+ }
+ *cp1++ = *cp2++;
+ m->m_data++;
+ m->m_len--;
+ }
+ }
+ slp->ns_cc -= NFSX_UNSIGNED;
+ slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+ if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EPERM);
+ }
+ }
+
+ /*
+ * Now get the record part.
+ */
+ if (slp->ns_cc == slp->ns_reclen) {
+ recm = slp->ns_raw;
+ slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+ slp->ns_cc = slp->ns_reclen = 0;
+ } else if (slp->ns_cc > slp->ns_reclen) {
+ len = 0;
+ m = slp->ns_raw;
+ om = (struct mbuf *)0;
+ while (len < slp->ns_reclen) {
+ if ((len + m->m_len) > slp->ns_reclen) {
+ m2 = m_copym(m, 0, slp->ns_reclen - len,
+ waitflag);
+ if (m2) {
+ if (om) {
+ om->m_next = m2;
+ recm = slp->ns_raw;
+ } else
+ recm = m2;
+ m->m_data += slp->ns_reclen - len;
+ m->m_len -= slp->ns_reclen - len;
+ len = slp->ns_reclen;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EWOULDBLOCK);
+ }
+ } else if ((len + m->m_len) == slp->ns_reclen) {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ recm = slp->ns_raw;
+ om->m_next = (struct mbuf *)0;
+ } else {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ }
+ }
+ slp->ns_raw = m;
+ slp->ns_cc -= len;
+ slp->ns_reclen = 0;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ nfs_realign(recm, 10 * NFSX_UNSIGNED);
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = recm;
+ else
+ slp->ns_rec = recm;
+ slp->ns_recend = recm;
+ }
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+ register struct nfssvc_sock *slp;
+ register struct nfsd *nd;
+{
+ register struct mbuf *m;
+ int error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0 ||
+ (m = slp->ns_rec) == (struct mbuf *)0)
+ return (ENOBUFS);
+ if (slp->ns_rec = m->m_nextpkt)
+ m->m_nextpkt = (struct mbuf *)0;
+ else
+ slp->ns_recend = (struct mbuf *)0;
+ if (m->m_type == MT_SONAME) {
+ nd->nd_nam = m;
+ nd->nd_md = nd->nd_mrep = m->m_next;
+ m->m_next = (struct mbuf *)0;
+ } else {
+ nd->nd_nam = (struct mbuf *)0;
+ nd->nd_md = nd->nd_mrep = m;
+ }
+ nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+ if (error = nfs_getreq(nd, TRUE)) {
+ m_freem(nd->nd_nam);
+ return (error);
+ }
+ return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+ register struct nfsd *nd;
+ int has_header;
+{
+ register int len, i;
+ register u_long *tl;
+ register long t1;
+ struct uio uio;
+ struct iovec iov;
+ caddr_t dpos, cp2;
+ u_long nfsvers, auth_type;
+ int error = 0, nqnfs = 0;
+ struct mbuf *mrep, *md;
+
+ mrep = nd->nd_mrep;
+ md = nd->nd_md;
+ dpos = nd->nd_dpos;
+ if (has_header) {
+ nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+ nd->nd_retxid = *tl++;
+ if (*tl++ != rpc_call) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ } else {
+ nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+ }
+ nd->nd_repstat = 0;
+ if (*tl++ != rpc_vers) {
+ nd->nd_repstat = ERPCMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsvers = nfs_vers;
+ if (*tl != nfs_prog) {
+ if (*tl == nqnfs_prog) {
+ nqnfs++;
+ nfsvers = nqnfs_vers;
+ } else {
+ nd->nd_repstat = EPROGUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ }
+ tl++;
+ if (*tl++ != nfsvers) {
+ nd->nd_repstat = EPROGMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+ if (nd->nd_procnum == NFSPROC_NULL)
+ return (0);
+ if (nd->nd_procnum >= NFS_NPROCS ||
+ (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+ (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+ nd->nd_repstat = EPROCUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ auth_type = *tl++;
+ len = fxdr_unsigned(int, *tl++);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+
+ /*
+ * Handle auth_unix or auth_kerb.
+ */
+ if (auth_type == rpc_auth_unix) {
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > NFS_MAXNAMLEN) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_adv(nfsm_rndup(len));
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+ for (i = 1; i <= len; i++)
+ if (i < NGROUPS)
+ nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+ else
+ tl++;
+ nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+ } else if (auth_type == rpc_auth_kerb) {
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_authlen = fxdr_unsigned(int, *tl);
+ uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+ if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ uio.uio_offset = 0;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ iov.iov_base = (caddr_t)nd->nd_authstr;
+ iov.iov_len = RPCAUTH_MAXSIZ;
+ nfsm_mtouio(&uio, uio.uio_resid);
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ nd->nd_flag |= NFSD_NEEDAUTH;
+ }
+
+ /*
+ * Do we have any use for the verifier.
+ * According to the "Remote Procedure Call Protocol Spec." it
+ * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+ * For now, just skip over it
+ */
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ if (len > 0) {
+ nfsm_adv(nfsm_rndup(len));
+ }
+
+ /*
+ * For nqnfs, get piggybacked lease request.
+ */
+ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+ if (nd->nd_nqlflag) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_duration = fxdr_unsigned(int, *tl);
+ } else
+ nd->nd_duration = NQ_MINLEASE;
+ } else {
+ nd->nd_nqlflag = NQL_NOVAL;
+ nd->nd_duration = NQ_MINLEASE;
+ }
+ nd->nd_md = md;
+ nd->nd_dpos = dpos;
+ return (0);
+nfsmout:
+ return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+ struct nfssvc_sock *slp;
+{
+ register struct nfsd *nd = nfsd_head.nd_next;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+ while (nd != (struct nfsd *)&nfsd_head) {
+ if (nd->nd_flag & NFSD_WAITING) {
+ nd->nd_flag &= ~NFSD_WAITING;
+ if (nd->nd_slp)
+ panic("nfsd wakeup");
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ wakeup((caddr_t)nd);
+ return;
+ }
+ nd = nd->nd_next;
+ }
+ slp->ns_flag |= SLP_DOREC;
+ nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+ struct proc *p;
+ char *server, *msg;
+{
+ tpr_t tpr;
+
+ if (p)
+ tpr = tprintf_open(p);
+ else
+ tpr = NULL;
+ tprintf(tpr, "nfs server %s: %s\n", server, msg);
+ tprintf_close(tpr);
+}
diff --git a/sys/nfs/nfs_srvcache.c b/sys/nfs/nfs_srvcache.c
new file mode 100644
index 000000000000..63d8bb72d82f
--- /dev/null
+++ b/sys/nfs/nfs_srvcache.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_srvcache.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Reference: Chet Juszczak, "Improving the Performance and Correctness
+ * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
+ * pages 53-63. San Diego, February 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/nfsm_subs.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nqnfs.h>
+
+long numnfsrvcache, desirednfsrvcache = NFSRVCACHESIZ;
+
+#define NFSRCHASH(xid) (((xid) + ((xid) >> 24)) & rheadhash)
+static struct nfsrvcache *nfsrvlruhead, **nfsrvlrutail = &nfsrvlruhead;
+static struct nfsrvcache **rheadhtbl;
+static u_long rheadhash;
+
+#define TRUE 1
+#define FALSE 0
+
+#define NETFAMILY(rp) \
+ (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO)
+
+/*
+ * Static array that defines which nfs rpc's are nonidempotent
+ */
+int nonidempotent[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+};
+
+/* True iff the rpc reply is an nfs status ONLY! */
+static int repliesstatus[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+};
+
+/*
+ * Initialize the server request cache list
+ */
+nfsrv_initcache()
+{
+
+ rheadhtbl = hashinit(desirednfsrvcache, M_NFSD, &rheadhash);
+}
+
+/*
+ * Look for the request in the cache
+ * If found then
+ * return action and optionally reply
+ * else
+ * insert it in the cache
+ *
+ * The rules are as follows:
+ * - if in progress, return DROP request
+ * - if completed within DELAY of the current time, return DROP it
+ * - if completed a longer time ago return REPLY if the reply was cached or
+ * return DOIT
+ * Update/add new request at end of lru list
+ */
+nfsrv_getcache(nam, nd, repp)
+ struct mbuf *nam;
+ register struct nfsd *nd;
+ struct mbuf **repp;
+{
+ register struct nfsrvcache *rp, *rq, **rpp;
+ struct mbuf *mb;
+ struct sockaddr_in *saddr;
+ caddr_t bpos;
+ int ret;
+
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ return (RC_DOIT);
+ rpp = &rheadhtbl[NFSRCHASH(nd->nd_retxid)];
+loop:
+ for (rp = *rpp; rp; rp = rp->rc_forw) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ /* If not at end of LRU chain, move it there */
+ if (rp->rc_next) {
+ /* remove from LRU chain */
+ *rp->rc_prev = rp->rc_next;
+ rp->rc_next->rc_prev = rp->rc_prev;
+ /* and replace at end of it */
+ rp->rc_next = NULL;
+ rp->rc_prev = nfsrvlrutail;
+ *nfsrvlrutail = rp;
+ nfsrvlrutail = &rp->rc_next;
+ }
+ if (rp->rc_state == RC_UNUSED)
+ panic("nfsrv cache");
+ if (rp->rc_state == RC_INPROG) {
+ nfsstats.srvcache_inproghits++;
+ ret = RC_DROPIT;
+ } else if (rp->rc_flag & RC_REPSTATUS) {
+ nfsstats.srvcache_nonidemdonehits++;
+ nfs_rephead(0, nd, rp->rc_status,
+ 0, (u_quad_t *)0, repp, &mb, &bpos);
+ ret = RC_REPLY;
+ } else if (rp->rc_flag & RC_REPMBUF) {
+ nfsstats.srvcache_nonidemdonehits++;
+ *repp = m_copym(rp->rc_reply, 0, M_COPYALL,
+ M_WAIT);
+ ret = RC_REPLY;
+ } else {
+ nfsstats.srvcache_idemdonehits++;
+ rp->rc_state = RC_INPROG;
+ ret = RC_DOIT;
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (ret);
+ }
+ }
+ nfsstats.srvcache_misses++;
+ if (numnfsrvcache < desirednfsrvcache) {
+ rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
+ M_NFSD, M_WAITOK);
+ bzero((char *)rp, sizeof *rp);
+ numnfsrvcache++;
+ rp->rc_flag = RC_LOCKED;
+ } else {
+ rp = nfsrvlruhead;
+ while ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ rp = nfsrvlruhead;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ /* remove from hash chain */
+ if (rq = rp->rc_forw)
+ rq->rc_back = rp->rc_back;
+ *rp->rc_back = rq;
+ /* remove from LRU chain */
+ *rp->rc_prev = rp->rc_next;
+ rp->rc_next->rc_prev = rp->rc_prev;
+ if (rp->rc_flag & RC_REPMBUF)
+ m_freem(rp->rc_reply);
+ if (rp->rc_flag & RC_NAM)
+ MFREE(rp->rc_nam, mb);
+ rp->rc_flag &= (RC_LOCKED | RC_WANTED);
+ }
+ /* place at end of LRU list */
+ rp->rc_next = NULL;
+ rp->rc_prev = nfsrvlrutail;
+ *nfsrvlrutail = rp;
+ nfsrvlrutail = &rp->rc_next;
+ rp->rc_state = RC_INPROG;
+ rp->rc_xid = nd->nd_retxid;
+ saddr = mtod(nam, struct sockaddr_in *);
+ switch (saddr->sin_family) {
+ case AF_INET:
+ rp->rc_flag |= RC_INETADDR;
+ rp->rc_inetaddr = saddr->sin_addr.s_addr;
+ break;
+ case AF_ISO:
+ default:
+ rp->rc_flag |= RC_NAM;
+ rp->rc_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+ break;
+ };
+ rp->rc_proc = nd->nd_procnum;
+ /* insert into hash chain */
+ if (rq = *rpp)
+ rq->rc_back = &rp->rc_forw;
+ rp->rc_forw = rq;
+ rp->rc_back = rpp;
+ *rpp = rp;
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (RC_DOIT);
+}
+
+/*
+ * Update a request cache entry after the rpc has been done
+ */
+void
+nfsrv_updatecache(nam, nd, repvalid, repmbuf)
+ struct mbuf *nam;
+ register struct nfsd *nd;
+ int repvalid;
+ struct mbuf *repmbuf;
+{
+ register struct nfsrvcache *rp;
+
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ return;
+loop:
+ for (rp = rheadhtbl[NFSRCHASH(nd->nd_retxid)]; rp; rp = rp->rc_forw) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ rp->rc_state = RC_DONE;
+ /*
+ * If we have a valid reply update status and save
+ * the reply for non-idempotent rpc's.
+ */
+ if (repvalid && nonidempotent[nd->nd_procnum]) {
+ if (repliesstatus[nd->nd_procnum]) {
+ rp->rc_status = nd->nd_repstat;
+ rp->rc_flag |= RC_REPSTATUS;
+ } else {
+ rp->rc_reply = m_copym(repmbuf,
+ 0, M_COPYALL, M_WAIT);
+ rp->rc_flag |= RC_REPMBUF;
+ }
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return;
+ }
+ }
+}
+
+/*
+ * Clean out the cache. Called when the last nfsd terminates.
+ */
+void
+nfsrv_cleancache()
+{
+ register struct nfsrvcache *rp, *nextrp;
+
+ for (rp = nfsrvlruhead; rp; rp = nextrp) {
+ nextrp = rp->rc_next;
+ free(rp, M_NFSD);
+ }
+ bzero((char *)rheadhtbl, (rheadhash + 1) * sizeof(void *));
+ nfsrvlruhead = NULL;
+ nfsrvlrutail = &nfsrvlruhead;
+ numnfsrvcache = 0;
+}
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
new file mode 100644
index 000000000000..5778f7d7f01a
--- /dev/null
+++ b/sys/nfs/nfs_subs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+ rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+ mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nqnfs;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_long *xidp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ if (auth_type == RPCAUTH_NQNFS)
+ authsiz += 2 * NFSX_UNSIGNED;
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+ if (++nfs_xid == 0)
+ nfs_xid++;
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nqnfs) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER1);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ *tl++ = txdr_unsigned(procid);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_long *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_NQNFS:
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl = txdr_unsigned(auth_len);
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EINVAL);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ char *cp;
+ long siz;
+{
+ register struct mbuf *m1, *m2;
+ long left, xfer, len, tlen;
+ u_long *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_long *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_long *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+ register int i;
+
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+ nfs_vers = txdr_unsigned(NFS_VER2);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ /* Loop thru nfs procids */
+ for (i = 0; i < NFS_NPROCS; i++)
+ nfs_procids[i] = txdr_unsigned(i);
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ nfs_iodwant[i] = (struct proc *)0;
+ TAILQ_INIT(&nfs_bufq);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_nhinit(); /* Init the nfsnode table */
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+ nqthead.th_head[0] = &nqthead;
+ nqthead.th_head[1] = &nqthead;
+ nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+ nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfsv2_fattr *fp;
+ extern int (**spec_nfsv2nodeop_p)();
+ register struct nfsnode *np, *nq, **nhpp;
+ register long t1;
+ caddr_t dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ long rdev;
+ struct timespec mtime;
+ struct vnode *nvp;
+
+ md = *mdp;
+ dpos = *dposp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+ return (error);
+ fp = (struct nfsv2_fattr *)cp2;
+ vtyp = nfstov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ if (vtyp == VNON || vtyp == VREG)
+ vtyp = IFTOVT(vmode);
+ if (isnq) {
+ rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+ fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+ } else {
+ rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+ fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+ }
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type == VNON) {
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vp->v_type = vtyp = VFIFO;
+ else
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+#ifdef FIFO
+ extern int (**fifo_nfsv2nodeop_p)();
+ vp->v_op = fifo_nfsv2nodeop_p;
+#else
+ return (EOPNOTSUPP);
+#endif /* FIFO */
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+ /*
+ * Discard unneeded vnode, but save its nfsnode.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased node.
+ */
+ np->n_vnode = nvp;
+ nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ *vpp = vp = nvp;
+ }
+ }
+ np->n_mtime = mtime.ts_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_rdev = (dev_t)rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (isnq) {
+ fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+ fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+ fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+ vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+ fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+ fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+ } else {
+ vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+ vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+ fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+ vap->va_ctime.ts_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ np->n_attrstamp = time.tv_sec;
+ *dposp = dpos;
+ *mdp = md;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+ if (np->n_size > vap->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap;
+
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+ if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ vap = &np->n_vattr;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) == 0) {
+ np->n_size = vaper->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if (np->n_size > vaper->va_size)
+ if (np->n_size > vaper->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct proc *p;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp;
+ struct vnode *dp;
+ int error, rdonly;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ cnp->cn_hash = 0;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || *fromcp == '/') {
+ error = EINVAL;
+ goto out;
+ }
+ cnp->cn_hash += (unsigned char)*fromcp;
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if (error = nfs_adv(mdp, dposp, len, rem))
+ goto out;
+ }
+ ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Extract and set starting directory.
+ */
+ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly))
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+ ndp->ni_startdir = dp;
+ if (rdonly)
+ cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+ else
+ cnp->cn_flags |= NOCROSSMOUNT;
+ /*
+ * And call lookup() to do the real work
+ */
+ cnp->cn_proc = p;
+ if (error = lookup(ndp))
+ goto out;
+ /*
+ * Check for encountering a symbolic link
+ */
+ if (cnp->cn_flags & ISSYMLINK) {
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Check for saved name request
+ */
+ if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ int *rdonlyp;
+{
+ register struct mount *mp;
+ register struct nfsuid *uidp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+
+ *vpp = (struct vnode *)0;
+ if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+ return (error);
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == cred->cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (uidp) {
+ cred->cr_uid = uidp->nu_cr.cr_uid;
+ for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+ cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+ } else {
+ vput(*vpp);
+ return (NQNFS_AUTHERR);
+ }
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+ if (!lockflag)
+ VOP_UNLOCK(*vpp);
+ return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = mtod(nam, struct sockaddr_in *);
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = mtod(nam, struct sockaddr_iso *);
+ isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
new file mode 100644
index 000000000000..5d86b42ee20a
--- /dev/null
+++ b/sys/nfs/nfs_syscalls.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_syscalls.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define TRUE 1
+#define FALSE 0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+ char *fname;
+ fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+ struct proc *p;
+ register struct getfh_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ fhandle_t fh;
+ int error;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ bzero((caddr_t)&fh, sizeof(fh));
+ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fh.fh_fid);
+ vput(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+ return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+ int flag;
+ caddr_t argp;
+};
+nfssvc(p, uap, retval)
+ struct proc *p;
+ register struct nfssvc_args *uap;
+ int *retval;
+{
+ struct nameidata nd;
+ struct file *fp;
+ struct mbuf *nam;
+ struct nfsd_args nfsdarg;
+ struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+ struct nfsd_cargs ncd;
+ struct nfsd *nfsd;
+ struct nfssvc_sock *slp;
+ struct nfsuid *nuidp, **nuh;
+ struct nfsmount *nmp;
+ int error;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+ nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+ (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+ }
+ if (uap->flag & NFSSVC_BIOD)
+ error = nfssvc_iod(p);
+ else if (uap->flag & NFSSVC_MNTD) {
+ if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+ ncd.ncd_dirp, p);
+ if (error = namei(&nd))
+ return (error);
+ if ((nd.ni_vp->v_flag & VROOT) == 0)
+ error = EINVAL;
+ nmp = VFSTONFS(nd.ni_vp->v_mount);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ if ((nmp->nm_flag & NFSMNT_MNTD) &&
+ (uap->flag & NFSSVC_GOTAUTH) == 0)
+ return (0);
+ nmp->nm_flag |= NFSMNT_MNTD;
+ error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+ uap->argp, p);
+ } else if (uap->flag & NFSSVC_ADDSOCK) {
+ if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+ sizeof(nfsdarg)))
+ return (error);
+ if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+ return (error);
+ /*
+ * Get the client address for connected sockets.
+ */
+ if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+ nam = (struct mbuf *)0;
+ else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+ MT_SONAME))
+ return (error);
+ error = nfssvc_addsock(fp, nam);
+ } else {
+ if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+ return (error);
+ if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+ (nfsd->nd_slp->ns_flag & SLP_VALID)) {
+ slp = nfsd->nd_slp;
+
+ /*
+ * First check to see if another nfsd has already
+ * added this credential.
+ */
+ nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ while (nuidp) {
+ if (nuidp->nu_uid == nsd->nsd_uid)
+ break;
+ nuidp = nuidp->nu_hnext;
+ }
+ if (!nuidp) {
+ /*
+ * Nope, so we will.
+ */
+ if (slp->ns_numuids < nuidhash_max) {
+ slp->ns_numuids++;
+ nuidp = (struct nfsuid *)
+ malloc(sizeof (struct nfsuid), M_NFSUID,
+ M_WAITOK);
+ } else
+ nuidp = (struct nfsuid *)0;
+ if ((slp->ns_flag & SLP_VALID) == 0) {
+ if (nuidp)
+ free((caddr_t)nuidp, M_NFSUID);
+ } else {
+ if (nuidp == (struct nfsuid *)0) {
+ nuidp = slp->ns_lruprev;
+ remque(nuidp);
+ if (nuidp->nu_hprev)
+ nuidp->nu_hprev->nu_hnext =
+ nuidp->nu_hnext;
+ if (nuidp->nu_hnext)
+ nuidp->nu_hnext->nu_hprev =
+ nuidp->nu_hprev;
+ }
+ nuidp->nu_cr = nsd->nsd_cr;
+ if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+ nuidp->nu_cr.cr_ngroups = NGROUPS;
+ nuidp->nu_cr.cr_ref = 1;
+ nuidp->nu_uid = nsd->nsd_uid;
+ insque(nuidp, (struct nfsuid *)slp);
+ nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ if (nuidp->nu_hnext = *nuh)
+ nuidp->nu_hnext->nu_hprev = nuidp;
+ nuidp->nu_hprev = (struct nfsuid *)0;
+ *nuh = nuidp;
+ }
+ }
+ }
+ if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+ nfsd->nd_flag |= NFSD_AUTHFAIL;
+ error = nfssvc_nfsd(nsd, uap->argp, p);
+ }
+ if (error == EINTR || error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+ struct file *fp;
+ struct mbuf *mynam;
+{
+ register struct mbuf *m;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ struct nfssvc_sock *tslp;
+ int error, s;
+
+ so = (struct socket *)fp->f_data;
+ tslp = (struct nfssvc_sock *)0;
+ /*
+ * Add it to the list, as required.
+ */
+ if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+ tslp = nfs_udpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#ifdef ISO
+ } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ tslp = nfs_cltpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#endif /* ISO */
+ }
+ if (so->so_type == SOCK_STREAM)
+ siz = NFS_MAXPACKET + sizeof (u_long);
+ else
+ siz = NFS_MAXPACKET;
+ if (error = soreserve(so, siz, siz)) {
+ m_freem(mynam);
+ return (error);
+ }
+
+ /*
+ * Set protocol specific options { for now TCP only } and
+ * reserve some space. For datagram sockets, this can get called
+ * repeatedly for the same socket, but that isn't harmful.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_domain->dom_family == AF_INET &&
+ so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ so->so_rcv.sb_flags &= ~SB_NOINTR;
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_flags &= ~SB_NOINTR;
+ so->so_snd.sb_timeo = 0;
+ if (tslp)
+ slp = tslp;
+ else {
+ slp = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+ slp->ns_prev = nfssvc_sockhead.ns_prev;
+ slp->ns_prev->ns_next = slp;
+ slp->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = slp;
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ }
+ slp->ns_so = so;
+ slp->ns_nam = mynam;
+ fp->f_count++;
+ slp->ns_fp = fp;
+ s = splnet();
+ so->so_upcallarg = (caddr_t)slp;
+ so->so_upcall = nfsrv_rcv;
+ slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+ nfsrv_wakenfsd(slp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+ struct nfsd_srvargs *nsd;
+ caddr_t argp;
+ struct proc *p;
+{
+ register struct mbuf *m, *nam2;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ register int *solockp;
+ struct nfsd *nd = nsd->nsd_nfsd;
+ struct mbuf *mreq, *nam;
+ struct timeval starttime;
+ struct nfsuid *uidp;
+ int error, cacherep, s;
+ int sotype;
+
+ s = splnet();
+ if (nd == (struct nfsd *)0) {
+ nsd->nsd_nfsd = nd = (struct nfsd *)
+ malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+ bzero((caddr_t)nd, sizeof (struct nfsd));
+ nd->nd_procp = p;
+ nd->nd_cr.cr_ref = 1;
+ insque(nd, &nfsd_head);
+ nd->nd_nqlflag = NQL_NOVAL;
+ nfs_numnfsd++;
+ }
+ /*
+ * Loop getting rpc requests until SIGKILL.
+ */
+ for (;;) {
+ if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+ while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+ nd->nd_flag |= NFSD_WAITING;
+ nfsd_waiting++;
+ error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+ nfsd_waiting--;
+ if (error)
+ goto done;
+ }
+ if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+ == (SLP_VALID | SLP_DOREC)) {
+ slp->ns_flag &= ~SLP_DOREC;
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ break;
+ }
+ slp = slp->ns_next;
+ }
+ if (slp == &nfssvc_sockhead)
+ nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+ }
+ if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+ continue;
+ if (slp->ns_flag & SLP_VALID) {
+ if (slp->ns_flag & SLP_DISCONN)
+ nfsrv_zapsock(slp);
+ else if (slp->ns_flag & SLP_NEEDQ) {
+ slp->ns_flag &= ~SLP_NEEDQ;
+ (void) nfs_sndlock(&slp->ns_solock,
+ (struct nfsreq *)0);
+ nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+ M_WAIT);
+ nfs_sndunlock(&slp->ns_solock);
+ }
+ error = nfsrv_dorec(slp, nd);
+ nd->nd_flag |= NFSD_REQINPROG;
+ }
+ } else {
+ error = 0;
+ slp = nd->nd_slp;
+ }
+ if (error || (slp->ns_flag & SLP_VALID) == 0) {
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nfsrv_slpderef(slp);
+ continue;
+ }
+ splx(s);
+ so = slp->ns_so;
+ sotype = so->so_type;
+ starttime = time;
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+ solockp = &slp->ns_solock;
+ else
+ solockp = (int *)0;
+ /*
+ * nam == nam2 for connectionless protocols such as UDP
+ * nam2 == NULL for connection based protocols to disable
+ * recent request caching.
+ */
+ if (nam2 = nd->nd_nam) {
+ nam = nam2;
+ cacherep = RC_CHECKIT;
+ } else {
+ nam = slp->ns_nam;
+ cacherep = RC_DOIT;
+ }
+
+ /*
+ * Check to see if authorization is needed.
+ */
+ if (nd->nd_flag & NFSD_NEEDAUTH) {
+ static int logauth = 0;
+
+ nd->nd_flag &= ~NFSD_NEEDAUTH;
+ /*
+ * Check for a mapping already installed.
+ */
+ uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == nd->nd_cr.cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (!uidp) {
+ nsd->nsd_uid = nd->nd_cr.cr_uid;
+ if (nam2 && logauth++ == 0)
+ log(LOG_WARNING, "Kerberized NFS using UDP\n");
+ nsd->nsd_haddr =
+ mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ nsd->nsd_authlen = nd->nd_authlen;
+ if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+ nd->nd_authlen) == 0 &&
+ copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+ return (ENEEDAUTH);
+ cacherep = RC_DROPIT;
+ }
+ }
+ if (cacherep == RC_CHECKIT)
+ cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+ /*
+ * Check for just starting up for NQNFS and send
+ * fake "try again later" replies to the NQNFS clients.
+ */
+ if (notstarted && nqnfsstarttime <= time.tv_sec) {
+ if (modify_flag) {
+ nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+ modify_flag = 0;
+ } else
+ notstarted = 0;
+ }
+ if (notstarted) {
+ if (nd->nd_nqlflag == NQL_NOVAL)
+ cacherep = RC_DROPIT;
+ else if (nd->nd_procnum != NFSPROC_WRITE) {
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_TRYLATER;
+ cacherep = RC_DOIT;
+ } else
+ modify_flag = 1;
+ } else if (nd->nd_flag & NFSD_AUTHFAIL) {
+ nd->nd_flag &= ~NFSD_AUTHFAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_AUTHERR;
+ cacherep = RC_DOIT;
+ }
+
+ switch (cacherep) {
+ case RC_DOIT:
+ error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+ nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+ nam, &mreq);
+ if (nd->nd_cr.cr_ref != 1) {
+ printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+ panic("nfssvc cref");
+ }
+ if (error) {
+ if (nd->nd_procnum != NQNFSPROC_VACATED)
+ nfsstats.srv_errs++;
+ if (nam2) {
+ nfsrv_updatecache(nam2, nd, FALSE, mreq);
+ m_freem(nam2);
+ }
+ break;
+ }
+ nfsstats.srvrpccnt[nd->nd_procnum]++;
+ if (nam2)
+ nfsrv_updatecache(nam2, nd, TRUE, mreq);
+ nd->nd_mrep = (struct mbuf *)0;
+ case RC_REPLY:
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = mreq;
+ m->m_pkthdr.len = siz;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 | siz);
+ }
+ if (solockp)
+ (void) nfs_sndlock(solockp, (struct nfsreq *)0);
+ if (slp->ns_flag & SLP_VALID)
+ error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+ else {
+ error = EPIPE;
+ m_freem(m);
+ }
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ if (nam2)
+ MFREE(nam2, m);
+ if (nd->nd_mrep)
+ m_freem(nd->nd_mrep);
+ if (error == EPIPE)
+ nfsrv_zapsock(slp);
+ if (solockp)
+ nfs_sndunlock(solockp);
+ if (error == EINTR || error == ERESTART) {
+ nfsrv_slpderef(slp);
+ s = splnet();
+ goto done;
+ }
+ break;
+ case RC_DROPIT:
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ m_freem(nd->nd_mrep);
+ m_freem(nam2);
+ break;
+ };
+ s = splnet();
+ if (nfsrv_dorec(slp, nd)) {
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nfsrv_slpderef(slp);
+ }
+ }
+done:
+ remque(nd);
+ splx(s);
+ free((caddr_t)nd, M_NFSD);
+ nsd->nsd_nfsd = (struct nfsd *)0;
+ if (--nfs_numnfsd == 0)
+ nfsrv_init(TRUE); /* Reinitialize everything */
+ return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+ struct proc *p;
+{
+ register struct buf *bp;
+ register int i, myiod;
+ int error = 0;
+
+ /*
+ * Assign my position or return error if too many already running
+ */
+ myiod = -1;
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_asyncdaemon[i] == 0) {
+ nfs_asyncdaemon[i]++;
+ myiod = i;
+ break;
+ }
+ if (myiod == -1)
+ return (EBUSY);
+ nfs_numasync++;
+ /*
+ * Just loop around doin our stuff until SIGKILL
+ */
+ for (;;) {
+ while (nfs_bufq.tqh_first == NULL && error == 0) {
+ nfs_iodwant[myiod] = p;
+ error = tsleep((caddr_t)&nfs_iodwant[myiod],
+ PWAIT | PCATCH, "nfsidl", 0);
+ }
+ while ((bp = nfs_bufq.tqh_first) != NULL) {
+ /* Take one off the front of the list */
+ TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+ if (bp->b_flags & B_READ)
+ (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+ else
+ (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+ }
+ if (error) {
+ nfs_asyncdaemon[myiod] = 0;
+ nfs_numasync--;
+ return (error);
+ }
+ }
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+ register struct nfssvc_sock *slp;
+{
+ register struct nfsuid *nuidp, *onuidp;
+ register int i;
+ struct socket *so;
+ struct file *fp;
+ struct mbuf *m;
+
+ slp->ns_flag &= ~SLP_ALLFLAGS;
+ if (fp = slp->ns_fp) {
+ slp->ns_fp = (struct file *)0;
+ so = slp->ns_so;
+ so->so_upcall = NULL;
+ soshutdown(so, 2);
+ closef(fp, (struct proc *)0);
+ if (slp->ns_nam)
+ MFREE(slp->ns_nam, m);
+ m_freem(slp->ns_raw);
+ m_freem(slp->ns_rec);
+ nuidp = slp->ns_lrunext;
+ while (nuidp != (struct nfsuid *)slp) {
+ onuidp = nuidp;
+ nuidp = nuidp->nu_lrunext;
+ free((caddr_t)onuidp, M_NFSUID);
+ }
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ for (i = 0; i < NUIDHASHSIZ; i++)
+ slp->ns_uidh[i] = (struct nfsuid *)0;
+ }
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+ struct ucred *cred;
+ int *auth_type;
+ char **auth_str;
+ int *auth_len;
+{
+ int error = 0;
+
+ while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+ nmp->nm_flag |= NFSMNT_WANTAUTH;
+ (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+ "nfsauth1", 2 * hz);
+ if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ return (error);
+ }
+ }
+ nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+ nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+ nmp->nm_authuid = cred->cr_uid;
+ wakeup((caddr_t)&nmp->nm_authstr);
+
+ /*
+ * And wait for mount_nfs to do its stuff.
+ */
+ while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+ (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+ "nfsauth2", 2 * hz);
+ error = nfs_sigintr(nmp, rep, rep->r_procp);
+ }
+ if (nmp->nm_flag & NFSMNT_AUTHERR) {
+ nmp->nm_flag &= ~NFSMNT_AUTHERR;
+ error = EAUTH;
+ }
+ if (error)
+ free((caddr_t)*auth_str, M_TEMP);
+ else {
+ *auth_type = nmp->nm_authtype;
+ *auth_len = nmp->nm_authlen;
+ }
+ nmp->nm_flag &= ~NFSMNT_HASAUTH;
+ nmp->nm_flag |= NFSMNT_WAITAUTH;
+ if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ wakeup((caddr_t)&nmp->nm_authtype);
+ }
+ return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+ register struct nfssvc_sock *slp;
+{
+ if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+ slp->ns_prev->ns_next = slp->ns_next;
+ slp->ns_next->ns_prev = slp->ns_prev;
+ free((caddr_t)slp, M_NFSSVC);
+ }
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+ int terminating;
+{
+ register struct nfssvc_sock *slp;
+ struct nfssvc_sock *oslp;
+
+ if (nfssvc_sockhead.ns_flag & SLP_INIT)
+ panic("nfsd init");
+ nfssvc_sockhead.ns_flag |= SLP_INIT;
+ if (terminating) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if (slp->ns_flag & SLP_VALID)
+ nfsrv_zapsock(slp);
+ slp->ns_next->ns_prev = slp->ns_prev;
+ slp->ns_prev->ns_next = slp->ns_next;
+ oslp = slp;
+ slp = slp->ns_next;
+ free((caddr_t)oslp, M_NFSSVC);
+ }
+ nfsrv_cleancache(); /* And clear out server cache */
+ }
+ nfs_udpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+ nfs_cltpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+ nfssvc_sockhead.ns_next = nfs_udpsock;
+ nfs_udpsock->ns_next = nfs_cltpsock;
+ nfs_cltpsock->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = nfs_cltpsock;
+ nfs_cltpsock->ns_prev = nfs_udpsock;
+ nfs_udpsock->ns_prev = &nfssvc_sockhead;
+ nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+ (struct nfsuid *)nfs_udpsock;
+ nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+ (struct nfsuid *)nfs_cltpsock;
+ nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+ nfsd_head.nd_flag = 0;
+ nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+ if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+ nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+ wakeup((caddr_t)&nfssvc_sockhead);
+ }
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+ struct timeval *startp;
+ int sotype;
+ register struct nfsd *nd;
+ struct mbuf *nam;
+ int cacherep;
+{
+ register struct drt *rt;
+
+ rt = &nfsdrt.drt[nfsdrt.pos];
+ if (cacherep == RC_DOIT)
+ rt->flag = 0;
+ else if (cacherep == RC_REPLY)
+ rt->flag = DRT_CACHEREPLY;
+ else
+ rt->flag = DRT_CACHEDROP;
+ if (sotype == SOCK_STREAM)
+ rt->flag |= DRT_TCP;
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ rt->flag |= DRT_NQNFS;
+ rt->proc = nd->nd_procnum;
+ if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+ rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ else
+ rt->ipadr = INADDR_ANY;
+ rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+ (time.tv_usec - startp->tv_usec);
+ rt->tstamp = time;
+ nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
new file mode 100644
index 000000000000..1f1867606892
--- /dev/null
+++ b/sys/nfs/nfs_vfsops.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vfsops.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/signal.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsdiskless.h>
+#include <nfs/nqnfs.h>
+
+/*
+ * nfs vfs operations.
+ */
+struct vfsops nfs_vfsops = {
+ nfs_mount,
+ nfs_start,
+ nfs_unmount,
+ nfs_root,
+ nfs_quotactl,
+ nfs_statfs,
+ nfs_sync,
+ nfs_vget,
+ nfs_fhtovp,
+ nfs_vptofh,
+ nfs_init,
+};
+
+/*
+ * This structure must be filled in by a primary bootstrap or bootstrap
+ * server for a diskless/dataless machine. It is initialized below just
+ * to ensure that it is allocated to initialized data (.data not .bss).
+ */
+struct nfs_diskless nfs_diskless = { 0 };
+
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers;
+void nfs_disconnect __P((struct nfsmount *));
+void nfsargs_ntoh __P((struct nfs_args *));
+static struct mount *nfs_mountdiskless __P((char *, char *, int,
+ struct sockaddr_in *, struct nfs_args *, register struct vnode **));
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * nfs statfs call
+ */
+int
+nfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ register struct nfsv2_statfs *sfp;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ struct ucred *cred;
+ struct nfsnode *np;
+
+ nmp = VFSTONFS(mp);
+ isnq = (nmp->nm_flag & NFSMNT_NQNFS);
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return (error);
+ vp = NFSTOV(np);
+ nfsstats.rpccnt[NFSPROC_STATFS]++;
+ cred = crget();
+ cred->cr_ngroups = 1;
+ nfsm_reqhead(vp, NFSPROC_STATFS, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_STATFS, p, cred);
+ nfsm_dissect(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+ sbp->f_type = MOUNT_NFS;
+ sbp->f_flags = nmp->nm_flag;
+ sbp->f_iosize = NFS_MAXDGRAMDATA;
+ sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize);
+ sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
+ sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
+ sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
+ if (isnq) {
+ sbp->f_files = fxdr_unsigned(long, sfp->sf_files);
+ sbp->f_ffree = fxdr_unsigned(long, sfp->sf_ffree);
+ } else {
+ sbp->f_files = 0;
+ sbp->f_ffree = 0;
+ }
+ if (sbp != &mp->mnt_stat) {
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ nfsm_reqdone;
+ vrele(vp);
+ crfree(cred);
+ return (error);
+}
+
+/*
+ * Mount a remote root fs via. nfs. This depends on the info in the
+ * nfs_diskless structure that has been filled in properly by some primary
+ * bootstrap.
+ * It goes something like this:
+ * - do enough of "ifconfig" by calling ifioctl() so that the system
+ * can talk to the server
+ * - If nfs_diskless.mygateway is filled in, use that address as
+ * a default gateway.
+ * - hand craft the swap nfs vnode hanging off a fake mount point
+ * if swdevt[0].sw_dev == NODEV
+ * - build the rootfs mount point and call mountnfs() to do the rest.
+ */
+int
+nfs_mountroot()
+{
+ register struct mount *mp;
+ register struct nfs_diskless *nd = &nfs_diskless;
+ struct socket *so;
+ struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+ int error, i;
+
+ /*
+ * XXX time must be non-zero when we init the interface or else
+ * the arp code will wedge...
+ */
+ if (time.tv_sec == 0)
+ time.tv_sec = 1;
+
+#ifdef notyet
+ /* Set up swap credentials. */
+ proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid);
+ proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid);
+ if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) >
+ NGROUPS)
+ proc0.p_ucred->cr_ngroups = NGROUPS;
+ for (i = 0; i < proc0.p_ucred->cr_ngroups; i++)
+ proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]);
+#endif
+
+ /*
+ * Do enough of ifconfig(8) so that the critical net interface can
+ * talk to the server.
+ */
+ if (error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
+ panic("nfs_mountroot: socreate: %d", error);
+ if (error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p))
+ panic("nfs_mountroot: SIOCAIFADDR: %d", error);
+ soclose(so);
+
+ /*
+ * If the gateway field is filled in, set it as the default route.
+ */
+ if (nd->mygateway.sin_len != 0) {
+ struct sockaddr_in mask, sin;
+
+ bzero((caddr_t)&mask, sizeof(mask));
+ sin = mask;
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ if (error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
+ (struct sockaddr *)&nd->mygateway,
+ (struct sockaddr *)&mask,
+ RTF_UP | RTF_GATEWAY, (struct rtentry **)0))
+ panic("nfs_mountroot: RTM_ADD: %d", error);
+ }
+
+ /*
+ * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
+ * Create a fake mount point just for the swap vnode so that the
+ * swap file can be on a different server from the rootfs.
+ */
+ if (swdevt[0].sw_dev == NODEV) {
+ nd->swap_args.fh = (nfsv2fh_t *)nd->swap_fh;
+ (void) nfs_mountdiskless(nd->swap_hostnam, "/swap", 0,
+ &nd->swap_saddr, &nd->swap_args, &vp);
+
+ /*
+ * Since the swap file is not the root dir of a file system,
+ * hack it to a regular file.
+ */
+ vp->v_type = VREG;
+ vp->v_flag = 0;
+ swapdev_vp = vp;
+ VREF(vp);
+ swdevt[0].sw_vp = vp;
+ swdevt[0].sw_nblks = ntohl(nd->swap_nblks);
+ } else if (bdevvp(swapdev, &swapdev_vp))
+ panic("nfs_mountroot: can't setup swapdev_vp");
+
+ /*
+ * Create the rootfs mount point.
+ */
+ nd->root_args.fh = (nfsv2fh_t *)nd->root_fh;
+ mp = nfs_mountdiskless(nd->root_hostnam, "/", MNT_RDONLY,
+ &nd->root_saddr, &nd->root_args, &vp);
+
+ if (vfs_lock(mp))
+ panic("nfs_mountroot: vfs_lock");
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ vfs_unlock(mp);
+ rootvp = vp;
+
+ /*
+ * This is not really an nfs issue, but it is much easier to
+ * set hostname here and then let the "/etc/rc.xxx" files
+ * mount the right /var based upon its preset value.
+ */
+ bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
+ hostname[MAXHOSTNAMELEN - 1] = '\0';
+ for (i = 0; i < MAXHOSTNAMELEN; i++)
+ if (hostname[i] == '\0')
+ break;
+ hostnamelen = i;
+ inittodr(ntohl(nd->root_time));
+ return (0);
+}
+
+/*
+ * Internal version of mount system call for diskless setup.
+ */
+static struct mount *
+nfs_mountdiskless(path, which, mountflag, sin, args, vpp)
+ char *path;
+ char *which;
+ int mountflag;
+ struct sockaddr_in *sin;
+ struct nfs_args *args;
+ register struct vnode **vpp;
+{
+ register struct mount *mp;
+ register struct mbuf *m;
+ register int error;
+
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_NOWAIT);
+ if (mp == NULL)
+ panic("nfs_mountroot: %s mount malloc", which);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &nfs_vfsops;
+ mp->mnt_flag = mountflag;
+
+ MGET(m, MT_SONAME, M_DONTWAIT);
+ if (m == NULL)
+ panic("nfs_mountroot: %s mount mbuf", which);
+ bcopy((caddr_t)sin, mtod(m, caddr_t), sin->sin_len);
+ m->m_len = sin->sin_len;
+ nfsargs_ntoh(args);
+ if (error = mountnfs(args, mp, m, which, path, vpp))
+ panic("nfs_mountroot: mount %s on %s: %d", path, which, error);
+
+ return (mp);
+}
+
+/*
+ * Convert the integer fields of the nfs_args structure from net byte order
+ * to host byte order. Called by nfs_mountroot() above.
+ */
+void
+nfsargs_ntoh(nfsp)
+ register struct nfs_args *nfsp;
+{
+
+ NTOHL(nfsp->sotype);
+ NTOHL(nfsp->proto);
+ NTOHL(nfsp->flags);
+ NTOHL(nfsp->wsize);
+ NTOHL(nfsp->rsize);
+ NTOHL(nfsp->timeo);
+ NTOHL(nfsp->retrans);
+ NTOHL(nfsp->maxgrouplist);
+ NTOHL(nfsp->readahead);
+ NTOHL(nfsp->leaseterm);
+ NTOHL(nfsp->deadthresh);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ * It seems a bit dumb to copyinstr() the host and path here and then
+ * bcopy() them in mountnfs(), but I wanted to detect errors before
+ * doing the sockargs() call because sockargs() allocates an mbuf and
+ * an error after that means that I have to release the mbuf.
+ */
+/* ARGSUSED */
+int
+nfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error;
+ struct nfs_args args;
+ struct mbuf *nam;
+ struct vnode *vp;
+ char pth[MNAMELEN], hst[MNAMELEN];
+ u_int len;
+ nfsv2fh_t nfh;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
+ return (error);
+ if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
+ return (error);
+ if (error = copyinstr(path, pth, MNAMELEN-1, &len))
+ return (error);
+ bzero(&pth[len], MNAMELEN - len);
+ if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
+ return (error);
+ bzero(&hst[len], MNAMELEN - len);
+ /* sockargs() call must be after above copyin() calls */
+ if (error = sockargs(&nam, (caddr_t)args.addr,
+ args.addrlen, MT_SONAME))
+ return (error);
+ args.fh = &nfh;
+ error = mountnfs(&args, mp, nam, pth, hst, &vp);
+ return (error);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+mountnfs(argp, mp, nam, pth, hst, vpp)
+ register struct nfs_args *argp;
+ register struct mount *mp;
+ struct mbuf *nam;
+ char *pth, *hst;
+ struct vnode **vpp;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+
+ if (mp->mnt_flag & MNT_UPDATE) {
+ nmp = VFSTONFS(mp);
+ /* update paths, file handles, etc, here XXX */
+ m_freem(nam);
+ return (0);
+ } else {
+ MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount),
+ M_NFSMNT, M_WAITOK);
+ bzero((caddr_t)nmp, sizeof (struct nfsmount));
+ mp->mnt_data = (qaddr_t)nmp;
+ }
+ getnewfsid(mp, MOUNT_NFS);
+ nmp->nm_mountp = mp;
+ nmp->nm_flag = argp->flags;
+ if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_MYWRITE)) ==
+ (NFSMNT_NQNFS | NFSMNT_MYWRITE)) {
+ error = EPERM;
+ goto bad;
+ }
+ if (nmp->nm_flag & NFSMNT_NQNFS)
+ /*
+ * We have to set mnt_maxsymlink to a non-zero value so
+ * that COMPAT_43 routines will know that we are setting
+ * the d_type field in directories (and can zero it for
+ * unsuspecting binaries).
+ */
+ mp->mnt_maxsymlinklen = 1;
+ nmp->nm_timeo = NFS_TIMEO;
+ nmp->nm_retry = NFS_RETRANS;
+ nmp->nm_wsize = NFS_WSIZE;
+ nmp->nm_rsize = NFS_RSIZE;
+ nmp->nm_numgrps = NFS_MAXGRPS;
+ nmp->nm_readahead = NFS_DEFRAHEAD;
+ nmp->nm_leaseterm = NQ_DEFLEASE;
+ nmp->nm_deadthresh = NQ_DEADTHRESH;
+ nmp->nm_tnext = (struct nfsnode *)nmp;
+ nmp->nm_tprev = (struct nfsnode *)nmp;
+ nmp->nm_inprog = NULLVP;
+ bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
+ mp->mnt_stat.f_type = MOUNT_NFS;
+ bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
+ nmp->nm_nam = nam;
+
+ if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
+ nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+ if (nmp->nm_timeo < NFS_MINTIMEO)
+ nmp->nm_timeo = NFS_MINTIMEO;
+ else if (nmp->nm_timeo > NFS_MAXTIMEO)
+ nmp->nm_timeo = NFS_MAXTIMEO;
+ }
+
+ if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
+ nmp->nm_retry = argp->retrans;
+ if (nmp->nm_retry > NFS_MAXREXMIT)
+ nmp->nm_retry = NFS_MAXREXMIT;
+ }
+
+ if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
+ nmp->nm_wsize = argp->wsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_wsize &= ~0x1ff;
+ if (nmp->nm_wsize <= 0)
+ nmp->nm_wsize = 512;
+ else if (nmp->nm_wsize > NFS_MAXDATA)
+ nmp->nm_wsize = NFS_MAXDATA;
+ }
+ if (nmp->nm_wsize > MAXBSIZE)
+ nmp->nm_wsize = MAXBSIZE;
+
+ if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
+ nmp->nm_rsize = argp->rsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_rsize &= ~0x1ff;
+ if (nmp->nm_rsize <= 0)
+ nmp->nm_rsize = 512;
+ else if (nmp->nm_rsize > NFS_MAXDATA)
+ nmp->nm_rsize = NFS_MAXDATA;
+ }
+ if (nmp->nm_rsize > MAXBSIZE)
+ nmp->nm_rsize = MAXBSIZE;
+ if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
+ argp->maxgrouplist <= NFS_MAXGRPS)
+ nmp->nm_numgrps = argp->maxgrouplist;
+ if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
+ argp->readahead <= NFS_MAXRAHEAD)
+ nmp->nm_readahead = argp->readahead;
+ if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
+ argp->leaseterm <= NQ_MAXLEASE)
+ nmp->nm_leaseterm = argp->leaseterm;
+ if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
+ argp->deadthresh <= NQ_NEVERDEAD)
+ nmp->nm_deadthresh = argp->deadthresh;
+ /* Set up the sockets and per-host congestion */
+ nmp->nm_sotype = argp->sotype;
+ nmp->nm_soproto = argp->proto;
+
+ /*
+ * For Connection based sockets (TCP,...) defer the connect until
+ * the first request, in case the server is not responding.
+ */
+ if (nmp->nm_sotype == SOCK_DGRAM &&
+ (error = nfs_connect(nmp, (struct nfsreq *)0)))
+ goto bad;
+
+ /*
+ * This is silly, but it has to be set so that vinifod() works.
+ * We do not want to do an nfs_statfs() here since we can get
+ * stuck on a dead server and we are holding a lock on the mount
+ * point.
+ */
+ mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+ /*
+ * A reference count is needed on the nfsnode representing the
+ * remote root. If this object is not persistent, then backward
+ * traversals of the mount point (i.e. "..") will not work if
+ * the nfsnode gets flushed out of the cache. Ufs does not have
+ * this problem, because one can identify root inodes by their
+ * number == ROOTINO (2).
+ */
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ goto bad;
+ *vpp = NFSTOV(np);
+
+ return (0);
+bad:
+ nfs_disconnect(nmp);
+ free((caddr_t)nmp, M_NFSMNT);
+ m_freem(nam);
+ return (error);
+}
+
+/*
+ * unmount system call
+ */
+int
+nfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *vp;
+ int error, flags = 0;
+ extern int doforce;
+
+ if (mntflags & MNT_FORCE) {
+ if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+ nmp = VFSTONFS(mp);
+ /*
+ * Goes something like this..
+ * - Check for activity on the root vnode (other than ourselves).
+ * - Call vflush() to clear out vnodes for this file system,
+ * except for the root vnode.
+ * - Decrement reference on the vnode representing remote root.
+ * - Close the socket
+ * - Free up the data structures
+ */
+ /*
+ * We need to decrement the ref. count on the nfsnode representing
+ * the remote root. See comment in mountnfs(). The VFS unmount()
+ * has done vput on this vnode, otherwise we would get deadlock!
+ */
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return(error);
+ vp = NFSTOV(np);
+ if (vp->v_usecount > 2) {
+ vput(vp);
+ return (EBUSY);
+ }
+
+ /*
+ * Must handshake with nqnfs_clientd() if it is active.
+ */
+ nmp->nm_flag |= NFSMNT_DISMINPROG;
+ while (nmp->nm_inprog != NULLVP)
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+ if (error = vflush(mp, vp, flags)) {
+ vput(vp);
+ nmp->nm_flag &= ~NFSMNT_DISMINPROG;
+ return (error);
+ }
+
+ /*
+ * We are now committed to the unmount.
+ * For NQNFS, let the server daemon free the nfsmount structure.
+ */
+ if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB))
+ nmp->nm_flag |= NFSMNT_DISMNT;
+
+ /*
+ * There are two reference counts to get rid of here.
+ */
+ vrele(vp);
+ vrele(vp);
+ vgone(vp);
+ nfs_disconnect(nmp);
+ m_freem(nmp->nm_nam);
+
+ if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0)
+ free((caddr_t)nmp, M_NFSMNT);
+ return (0);
+}
+
+/*
+ * Return root of a filesystem
+ */
+int
+nfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+
+ nmp = VFSTONFS(mp);
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return (error);
+ vp = NFSTOV(np);
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ *vpp = vp;
+ return (0);
+}
+
+extern int syncprt;
+
+/*
+ * Flush out the buffer cache
+ */
+/* ARGSUSED */
+int
+nfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ int error, allerror = 0;
+
+ /*
+ * Force stale buffer cache information to be flushed.
+ */
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * If the vnode that we are about to sync is no longer
+ * associated with this mount point, start over.
+ */
+ if (vp->v_mount != mp)
+ goto loop;
+ if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL)
+ continue;
+ if (vget(vp, 1))
+ goto loop;
+ if (error = VOP_FSYNC(vp, cred, waitfor, p))
+ allerror = error;
+ vput(vp);
+ }
+ return (allerror);
+}
+
+/*
+ * NFS flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * At this point, this should never happen
+ */
+/* ARGSUSED */
+int
+nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Vnode pointer to File handle, should never happen either
+ */
+/* ARGSUSED */
+int
+nfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Vfs start routine, a no-op.
+ */
+/* ARGSUSED */
+int
+nfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+nfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
new file mode 100644
index 000000000000..a909b48dc67d
--- /dev/null
+++ b/sys/nfs/nfs_vnops.c
@@ -0,0 +1,2539 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vnops.c 8.5 (Berkeley) 2/13/94
+ */
+
+/*
+ * vnode op calls for sun nfs version 2
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**nfsv2_vnodeop_p)();
+struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, nfs_lookup }, /* lookup */
+ { &vop_create_desc, nfs_create }, /* create */
+ { &vop_mknod_desc, nfs_mknod }, /* mknod */
+ { &vop_open_desc, nfs_open }, /* open */
+ { &vop_close_desc, nfs_close }, /* close */
+ { &vop_access_desc, nfs_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfs_read }, /* read */
+ { &vop_write_desc, nfs_write }, /* write */
+ { &vop_ioctl_desc, nfs_ioctl }, /* ioctl */
+ { &vop_select_desc, nfs_select }, /* select */
+ { &vop_mmap_desc, nfs_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, nfs_seek }, /* seek */
+ { &vop_remove_desc, nfs_remove }, /* remove */
+ { &vop_link_desc, nfs_link }, /* link */
+ { &vop_rename_desc, nfs_rename }, /* rename */
+ { &vop_mkdir_desc, nfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, nfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, nfs_symlink }, /* symlink */
+ { &vop_readdir_desc, nfs_readdir }, /* readdir */
+ { &vop_readlink_desc, nfs_readlink }, /* readlink */
+ { &vop_abortop_desc, nfs_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, nfs_bmap }, /* bmap */
+ { &vop_strategy_desc, nfs_strategy }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, nfs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, nfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, nfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, nfs_valloc }, /* valloc */
+ { &vop_reallocblks_desc, nfs_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, nfs_vfree }, /* vfree */
+ { &vop_truncate_desc, nfs_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
+ { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**spec_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, nfsspec_close }, /* close */
+ { &vop_access_desc, nfsspec_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfsspec_read }, /* read */
+ { &vop_write_desc, nfsspec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
+ { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
+
+#ifdef FIFO
+int (**fifo_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, nfsfifo_close }, /* close */
+ { &vop_access_desc, nfsspec_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfsfifo_read }, /* read */
+ { &vop_write_desc, nfsfifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_badop }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
+ { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
+#endif /* FIFO */
+
+void nqnfs_clientlease();
+
+/*
+ * Global variables
+ */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers, nfs_true, nfs_false;
+extern char nfsiobuf[MAXPHYS+NBPG];
+struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+int nfs_numasync = 0;
+#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
+
+/*
+ * nfs null call from vfs.
+ */
+int
+nfs_null(vp, cred, procp)
+ struct vnode *vp;
+ struct ucred *cred;
+ struct proc *procp;
+{
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb;
+
+ nfsm_reqhead(vp, NFSPROC_NULL, 0);
+ nfsm_request(vp, NFSPROC_NULL, procp, cred);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs access vnode op.
+ * For nfs, just return ok. File accesses may fail later.
+ * For nqnfs, use the access rpc to check accessibility. If file modes are
+ * changed on the server, accesses might still fail later.
+ */
+int
+nfs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register u_long *tl;
+ register caddr_t cp;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /*
+ * For nqnfs, do an access rpc, otherwise you are stuck emulating
+ * ufs_access() locally using the vattr. This may not be correct,
+ * since the server may apply other access criteria such as
+ * client uid-->server uid mapping that we do not know about, but
+ * this is better than just returning anything that is lying about
+ * in the cache.
+ */
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+ nfsstats.rpccnt[NQNFSPROC_ACCESS]++;
+ nfsm_reqhead(vp, NQNFSPROC_ACCESS, NFSX_FH + 3 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+ if (ap->a_mode & VREAD)
+ *tl++ = nfs_true;
+ else
+ *tl++ = nfs_false;
+ if (ap->a_mode & VWRITE)
+ *tl++ = nfs_true;
+ else
+ *tl++ = nfs_false;
+ if (ap->a_mode & VEXEC)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ nfsm_request(vp, NQNFSPROC_ACCESS, ap->a_p, ap->a_cred);
+ nfsm_reqdone;
+ return (error);
+ } else
+ return (nfsspec_access(ap));
+}
+
+/*
+ * nfs open vnode op
+ * Check to see if the type is ok
+ * and that deletion is not in progress.
+ * For paged in text files, you will need to flush the page cache
+ * if consistency is lost.
+ */
+/* ARGSUSED */
+int
+nfs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ struct vattr vattr;
+ int error;
+
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+ return (EACCES);
+ if (vp->v_flag & VTEXT) {
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_READ, ap->a_cred, ap->a_p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_brev = np->n_lrev;
+ }
+ }
+ } else {
+ if (np->n_flag & NMODIFIED) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ } else {
+ if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.ts_sec) {
+ np->n_direofoffset = 0;
+ if ((error = nfs_vinvalbuf(vp, V_SAVE,
+ ap->a_cred, ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ }
+ }
+ }
+ } else if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ np->n_attrstamp = 0; /* For Open/Close consistency */
+ return (0);
+}
+
+/*
+ * nfs close vnode op
+ * For reg files, invalidate any buffer cache entries.
+ */
+/* ARGSUSED */
+int
+nfs_close(ap)
+ struct vop_close_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ int error = 0;
+
+ if (vp->v_type == VREG) {
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
+ (np->n_flag & NMODIFIED)) {
+ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+ np->n_attrstamp = 0;
+ }
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ error = np->n_error;
+ }
+ }
+ return (error);
+}
+
+/*
+ * nfs getattr call from vfs.
+ */
+int
+nfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register caddr_t cp;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /*
+ * Update local times for special files.
+ */
+ if (np->n_flag & (NACC | NUPD))
+ np->n_flag |= NCHG;
+ /*
+ * First look in the cache.
+ */
+ if (nfs_getattrcache(vp, ap->a_vap) == 0)
+ return (0);
+ nfsstats.rpccnt[NFSPROC_GETATTR]++;
+ nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+ nfsm_loadattr(vp, ap->a_vap);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs setattr call.
+ */
+int
+nfs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct nfsv2_sattr *sp;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ u_long *tl;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap = ap->a_vap;
+ u_quad_t frev, tsize;
+
+ if (vap->va_size != VNOVAL || vap->va_mtime.ts_sec != VNOVAL ||
+ vap->va_atime.ts_sec != VNOVAL) {
+ if (vap->va_size != VNOVAL) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size == 0)
+ error = nfs_vinvalbuf(vp, 0, ap->a_cred,
+ ap->a_p, 1);
+ else
+ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1);
+ if (error)
+ return (error);
+ }
+ tsize = np->n_size;
+ np->n_size = np->n_vattr.va_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if ((np->n_flag & NMODIFIED) &&
+ (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_SETATTR]++;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH+NFSX_SATTR(isnq));
+ nfsm_fhtom(vp);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ if (vap->va_mode == (u_short)-1)
+ sp->sa_mode = VNOVAL;
+ else
+ sp->sa_mode = vtonfs_mode(vp->v_type, vap->va_mode);
+ if (vap->va_uid == (uid_t)-1)
+ sp->sa_uid = VNOVAL;
+ else
+ sp->sa_uid = txdr_unsigned(vap->va_uid);
+ if (vap->va_gid == (gid_t)-1)
+ sp->sa_gid = VNOVAL;
+ else
+ sp->sa_gid = txdr_unsigned(vap->va_gid);
+ if (isnq) {
+ txdr_hyper(&vap->va_size, &sp->sa_nqsize);
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ sp->sa_nqflags = txdr_unsigned(vap->va_flags);
+ sp->sa_nqrdev = VNOVAL;
+ } else {
+ sp->sa_nfssize = txdr_unsigned(vap->va_size);
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(vp, NFSPROC_SETATTR, ap->a_p, ap->a_cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ fxdr_hyper(tl, &frev);
+ if (frev > np->n_brev)
+ np->n_brev = frev;
+ }
+ nfsm_reqdone;
+ if (error) {
+ np->n_size = np->n_vattr.va_size = tsize;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ }
+ return (error);
+}
+
+/*
+ * nfs lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the rpc
+ */
+int
+nfs_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct componentname *cnp = ap->a_cnp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vnode **vpp = ap->a_vpp;
+ register int flags = cnp->cn_flags;
+ register struct vnode *vdp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ struct nfsmount *nmp;
+ caddr_t bpos, dpos, cp2;
+ time_t reqtime;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vnode *newvp;
+ long len;
+ nfsv2fh_t *fhp;
+ struct nfsnode *np;
+ int lockparent, wantparent, error = 0;
+ int nqlflag, cachable;
+ u_quad_t frev;
+
+ *vpp = NULL;
+ if (dvp->v_type != VDIR)
+ return (ENOTDIR);
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+ nmp = VFSTONFS(dvp->v_mount);
+ np = VTONFS(dvp);
+ if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
+ struct vattr vattr;
+ int vpid;
+
+ vdp = *vpp;
+ vpid = vdp->v_id;
+ /*
+ * See the comment starting `Step through' in ufs/ufs_lookup.c
+ * for an explanation of the locking protocol
+ */
+ if (dvp == vdp) {
+ VREF(vdp);
+ error = 0;
+ } else
+ error = vget(vdp, 1);
+ if (!error) {
+ if (vpid == vdp->v_id) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) == 0) {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ } else if (NQNFS_CKCACHABLE(dvp, NQL_READ)) {
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NMODIFIED)) {
+ np->n_direofoffset = 0;
+ cache_purge(dvp);
+ error = nfs_vinvalbuf(dvp, 0,
+ cnp->cn_cred, cnp->cn_proc,
+ 1);
+ if (error == EINTR)
+ return (error);
+ np->n_brev = np->n_lrev;
+ } else {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+ }
+ } else if (!VOP_GETATTR(vdp, &vattr, cnp->cn_cred, cnp->cn_proc) &&
+ vattr.va_ctime.ts_sec == VTONFS(vdp)->n_ctime) {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+ cache_purge(vdp);
+ }
+ vrele(vdp);
+ }
+ *vpp = NULLVP;
+ }
+ error = 0;
+ nfsstats.lookupcache_misses++;
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ len = cnp->cn_namelen;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+
+ /*
+ * For nqnfs optionally piggyback a getlease request for the name
+ * being looked up.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+ ((cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))))
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ else
+ *tl = 0;
+ }
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ reqtime = time.tv_sec;
+ nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+nfsmout:
+ if (error) {
+ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+ (flags & ISLASTCN) && error == ENOENT)
+ error = EJUSTRETURN;
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (error);
+ }
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ fxdr_hyper(tl, &frev);
+ } else
+ nqlflag = 0;
+ }
+ nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+
+ /*
+ * Handle RENAME case...
+ */
+ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
+ if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ m_freem(mrep);
+ return (EISDIR);
+ }
+ if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ if (error =
+ nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+ vrele(newvp);
+ m_freem(mrep);
+ return (error);
+ }
+ *vpp = newvp;
+ m_freem(mrep);
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+
+ if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ VREF(dvp);
+ newvp = dvp;
+ } else {
+ if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ }
+ if (error = nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+ vrele(newvp);
+ m_freem(mrep);
+ return (error);
+ }
+ m_freem(mrep);
+ *vpp = newvp;
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ if ((cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ np->n_ctime = np->n_vattr.va_ctime.ts_sec;
+ else if (nqlflag && reqtime > time.tv_sec)
+ nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime,
+ frev);
+ cache_enter(dvp, *vpp, cnp);
+ }
+ return (0);
+}
+
+/*
+ * nfs read call.
+ * Just call nfs_bioread() to do the work.
+ */
+int
+nfs_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VREG)
+ return (EPERM);
+ return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
+}
+
+/*
+ * nfs readlink call
+ */
+int
+nfs_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VLNK)
+ return (EPERM);
+ return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Do a readlink rpc.
+ * Called by nfs_doio() from below the buffer cache.
+ */
+int
+nfs_readlinkrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ long len;
+
+ nfsstats.rpccnt[NFSPROC_READLINK]++;
+ nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+ nfsm_strsiz(len, NFS_MAXPATHLEN);
+ nfsm_mtouio(uiop, len);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs read rpc call
+ * Ditto above
+ */
+int
+nfs_readrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ long len, retlen, tsiz;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > 0xffffffff &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_READ]++;
+ len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH+NFSX_UNSIGNED*3);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED*3);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ txdr_hyper(&uiop->uio_offset, tl);
+ *(tl + 2) = txdr_unsigned(len);
+ } else {
+ *tl++ = txdr_unsigned(uiop->uio_offset);
+ *tl++ = txdr_unsigned(len);
+ *tl = 0;
+ }
+ nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ nfsm_strsiz(retlen, nmp->nm_rsize);
+ nfsm_mtouio(uiop, retlen);
+ m_freem(mrep);
+ if (retlen < len)
+ tsiz = 0;
+ else
+ tsiz -= len;
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * nfs write call
+ */
+int
+nfs_writerpc(vp, uiop, cred, ioflags)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+ int ioflags;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ struct nfsnode *np = VTONFS(vp);
+ u_quad_t frev;
+ long len, tsiz;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > 0xffffffff &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_WRITE]++;
+ len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_WRITE,
+ NFSX_FH+NFSX_UNSIGNED*4+nfsm_rndup(len));
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED * 4);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ txdr_hyper(&uiop->uio_offset, tl);
+ tl += 2;
+ if (ioflags & IO_APPEND)
+ *tl++ = txdr_unsigned(1);
+ else
+ *tl++ = 0;
+ } else {
+ *++tl = txdr_unsigned(uiop->uio_offset);
+ tl += 2;
+ }
+ *tl = txdr_unsigned(len);
+ nfsm_uiotom(uiop, len);
+ nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ if (nmp->nm_flag & NFSMNT_MYWRITE)
+ VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.ts_sec;
+ else if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ fxdr_hyper(tl, &frev);
+ if (frev > np->n_brev)
+ np->n_brev = frev;
+ }
+ m_freem(mrep);
+ tsiz -= len;
+ }
+nfsmout:
+ if (error)
+ uiop->uio_resid = tsiz;
+ return (error);
+}
+
+/*
+ * nfs mknod call
+ * This is a kludge. Use a create rpc but with the IFMT bits of the mode
+ * set to specify the file type and the size field for rdev.
+ */
+/* ARGSUSED */
+int
+nfs_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ struct vnode *newvp;
+ struct vattr vattr;
+ char *cp2;
+ caddr_t bpos, dpos;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ u_long rdev;
+
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (vap->va_type == VCHR || vap->va_type == VBLK)
+ rdev = txdr_unsigned(vap->va_rdev);
+#ifdef FIFO
+ else if (vap->va_type == VFIFO)
+ rdev = 0xffffffff;
+#endif /* FIFO */
+ else {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (EOPNOTSUPP);
+ }
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_CREATE]++;
+ nfsm_reqhead(dvp, NFSPROC_CREATE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ sp->sa_nqrdev = rdev;
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = rdev;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, newvp);
+ nfsm_reqdone;
+ if (!error && (cnp->cn_flags & MAKEENTRY))
+ cache_enter(dvp, newvp, cnp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs file create call
+ */
+int
+nfs_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_CREATE]++;
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(dvp, NFSPROC_CREATE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ u_quad_t qval = 0;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ sp->sa_nqrdev = -1;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = 0;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *ap->a_vpp);
+ nfsm_reqdone;
+ if (!error && (cnp->cn_flags & MAKEENTRY))
+ cache_enter(dvp, *ap->a_vpp, cnp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs file remove call
+ * To try and make nfs semantics closer to ufs semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If v_usecount > 1
+ * If a rename is not already in the works
+ * call nfs_sillyrename() to set it up
+ * else
+ * do the remove rpc
+ */
+int
+nfs_remove(ap)
+ struct vop_remove_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_dvp;
+ struct vnode * a_vp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsnode *np = VTONFS(vp);
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (vp->v_usecount > 1) {
+ if (!np->n_sillyrename)
+ error = nfs_sillyrename(dvp, vp, cnp);
+ } else {
+ /*
+ * Purge the name cache so that the chance of a lookup for
+ * the name succeeding while the remove is in progress is
+ * minimized. Without node locking it can still happen, such
+ * that an I/O op returns ESTALE, but since you get this if
+ * another host removes the file..
+ */
+ cache_purge(vp);
+ /*
+ * Throw away biocache buffers. Mainly to avoid
+ * unnecessary delayed writes.
+ */
+ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
+ if (error == EINTR)
+ return (error);
+ /* Do the rpc */
+ nfsstats.rpccnt[NFSPROC_REMOVE]++;
+ nfsm_reqhead(dvp, NFSPROC_REMOVE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_REMOVE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ /*
+ * Kludge City: If the first reply to the remove rpc is lost..
+ * the reply to the retransmitted request will be ENOENT
+ * since the file was in fact removed
+ * Therefore, we cheat and return success.
+ */
+ if (error == ENOENT)
+ error = 0;
+ }
+ np->n_attrstamp = 0;
+ vrele(dvp);
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * nfs file remove rpc called from nfs_inactive
+ */
+int
+nfs_removeit(sp)
+ register struct sillyrename *sp;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_REMOVE]++;
+ nfsm_reqhead(sp->s_dvp, NFSPROC_REMOVE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(sp->s_namlen));
+ nfsm_fhtom(sp->s_dvp);
+ nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+ nfsm_request(sp->s_dvp, NFSPROC_REMOVE, NULL, sp->s_cred);
+ nfsm_reqdone;
+ VTONFS(sp->s_dvp)->n_flag |= NMODIFIED;
+ VTONFS(sp->s_dvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename call
+ */
+int
+nfs_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ register struct vnode *fvp = ap->a_fvp;
+ register struct vnode *tvp = ap->a_tvp;
+ register struct vnode *fdvp = ap->a_fdvp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *tcnp = ap->a_tcnp;
+ register struct componentname *fcnp = ap->a_fcnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /* Check for cross-device rename */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out;
+ }
+
+
+ nfsstats.rpccnt[NFSPROC_RENAME]++;
+ nfsm_reqhead(fdvp, NFSPROC_RENAME,
+ (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(fcnp->cn_namelen)+
+ nfsm_rndup(fcnp->cn_namelen)); /* or fcnp->cn_cred?*/
+ nfsm_fhtom(fdvp);
+ nfsm_strtom(fcnp->cn_nameptr, fcnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_fhtom(tdvp);
+ nfsm_strtom(tcnp->cn_nameptr, tcnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(fdvp, NFSPROC_RENAME, tcnp->cn_proc, tcnp->cn_cred);
+ nfsm_reqdone;
+ VTONFS(fdvp)->n_flag |= NMODIFIED;
+ VTONFS(fdvp)->n_attrstamp = 0;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ VTONFS(tdvp)->n_attrstamp = 0;
+ if (fvp->v_type == VDIR) {
+ if (tvp != NULL && tvp->v_type == VDIR)
+ cache_purge(tdvp);
+ cache_purge(fdvp);
+ }
+out:
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ vrele(fdvp);
+ vrele(fvp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename rpc called from nfs_remove() above
+ */
+int
+nfs_renameit(sdvp, scnp, sp)
+ struct vnode *sdvp;
+ struct componentname *scnp;
+ register struct sillyrename *sp;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_RENAME]++;
+ nfsm_reqhead(sdvp, NFSPROC_RENAME,
+ (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(scnp->cn_namelen)+
+ nfsm_rndup(sp->s_namlen));
+ nfsm_fhtom(sdvp);
+ nfsm_strtom(scnp->cn_nameptr, scnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_fhtom(sdvp);
+ nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+ nfsm_request(sdvp, NFSPROC_RENAME, scnp->cn_proc, scnp->cn_cred);
+ nfsm_reqdone;
+ FREE(scnp->cn_pnbuf, M_NAMEI);
+ VTONFS(sdvp)->n_flag |= NMODIFIED;
+ VTONFS(sdvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs hard link create call
+ */
+int
+nfs_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (vp->v_mount != tdvp->v_mount) {
+ /*VOP_ABORTOP(vp, cnp);*/
+ if (tdvp == vp)
+ vrele(vp);
+ else
+ vput(vp);
+ return (EXDEV);
+ }
+
+ nfsstats.rpccnt[NFSPROC_LINK]++;
+ nfsm_reqhead(tdvp, NFSPROC_LINK,
+ NFSX_FH*2+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(tdvp);
+ nfsm_fhtom(vp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(tdvp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(tdvp)->n_attrstamp = 0;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ VTONFS(vp)->n_attrstamp = 0;
+ vrele(vp);
+ /*
+ * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs symbolic link create call
+ */
+/* start here */
+int
+nfs_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int slen, error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_SYMLINK]++;
+ slen = strlen(ap->a_target);
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH+2*NFSX_UNSIGNED+
+ nfsm_rndup(cnp->cn_namelen)+nfsm_rndup(slen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(VLNK, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
+ if (isnq) {
+ quad_t qval = -1;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = -1;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ /*
+ * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs make dir call
+ */
+int
+nfs_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct vnode **vpp = ap->a_vpp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ register int len;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, firsttry = 1, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ len = cnp->cn_namelen;
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsstats.rpccnt[NFSPROC_MKDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_MKDIR,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(VDIR, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ quad_t qval = -1;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = -1;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *vpp);
+ nfsm_reqdone;
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ /*
+ * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
+ * if we can succeed in looking up the directory.
+ * "firsttry" is necessary since the macros may "goto nfsmout" which
+ * is above the if on errors. (Ugh)
+ */
+ if (error == EEXIST && firsttry) {
+ firsttry = 0;
+ error = 0;
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ *vpp = NULL;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *vpp);
+ if ((*vpp)->v_type != VDIR) {
+ vput(*vpp);
+ error = EEXIST;
+ }
+ m_freem(mrep);
+ }
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs remove directory call
+ */
+int
+nfs_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (dvp == vp) {
+ vrele(dvp);
+ vrele(dvp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (EINVAL);
+ }
+ nfsstats.rpccnt[NFSPROC_RMDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_RMDIR,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ cache_purge(dvp);
+ cache_purge(vp);
+ vrele(vp);
+ vrele(dvp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs readdir call
+ * Although cookie is defined as opaque, I translate it to/from net byte
+ * order so that it looks more sensible. This appears consistent with the
+ * Ultrix implementation of NFS.
+ */
+int
+nfs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct uio *uio = ap->a_uio;
+ int tresid, error;
+ struct vattr vattr;
+
+ if (vp->v_type != VDIR)
+ return (EPERM);
+ /*
+ * First, check for hit on the EOF offset cache
+ */
+ if (uio->uio_offset != 0 && uio->uio_offset == np->n_direofoffset &&
+ (np->n_flag & NMODIFIED) == 0) {
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKCACHABLE(vp, NQL_READ)) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
+ np->n_mtime == vattr.va_mtime.ts_sec) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ }
+
+ /*
+ * Call nfs_bioread() to do the real work.
+ */
+ tresid = uio->uio_resid;
+ error = nfs_bioread(vp, uio, 0, ap->a_cred);
+
+ if (!error && uio->uio_resid == tresid)
+ nfsstats.direofcache_misses++;
+ return (error);
+}
+
+/*
+ * Readdir rpc call.
+ * Called from below the buffer cache by nfs_doio().
+ */
+int
+nfs_readdirrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register long len;
+ register struct dirent *dp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ long tlen, lastlen;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct mbuf *md2;
+ caddr_t dpos2;
+ int siz;
+ int more_dirs = 1;
+ u_long off, savoff;
+ struct dirent *savdp;
+ struct nfsmount *nmp;
+ struct nfsnode *np = VTONFS(vp);
+ long tresid;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tresid = uiop->uio_resid;
+ /*
+ * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+ * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+ nfsstats.rpccnt[NFSPROC_READDIR]++;
+ nfsm_reqhead(vp, NFSPROC_READDIR,
+ NFSX_FH + 2 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+ off = (u_long)uiop->uio_offset;
+ *tl++ = txdr_unsigned(off);
+ *tl = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+ nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+ nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+ siz = 0;
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* Save the position so that we can do nfsm_mtouio() later */
+ dpos2 = dpos;
+ md2 = md;
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+#ifdef lint
+ dp = (struct dirent *)0;
+#endif /* lint */
+ while (more_dirs && siz < uiop->uio_resid) {
+ savoff = off; /* Hold onto offset and dp */
+ savdp = dp;
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ dp = (struct dirent *)tl;
+ dp->d_fileno = fxdr_unsigned(u_long, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ dp->d_namlen = (u_char)len;
+ dp->d_type = DT_UNKNOWN;
+ nfsm_adv(len); /* Point past name */
+ tlen = nfsm_rndup(len);
+ /*
+ * This should not be necessary, but some servers have
+ * broken XDR such that these bytes are not null filled.
+ */
+ if (tlen != len) {
+ *dpos = '\0'; /* Null-terminate */
+ nfsm_adv(tlen - len);
+ len = tlen;
+ }
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ off = fxdr_unsigned(u_long, *tl);
+ *tl++ = 0; /* Ensures null termination of name */
+ more_dirs = fxdr_unsigned(int, *tl);
+ dp->d_reclen = len + 4 * NFSX_UNSIGNED;
+ siz += dp->d_reclen;
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+ /*
+ * If at EOF, cache directory offset
+ */
+ if (!more_dirs)
+ np->n_direofoffset = off;
+ }
+ /*
+ * If there is too much to fit in the data buffer, use savoff and
+ * savdp to trim off the last record.
+ * --> we are not at eof
+ */
+ if (siz > uiop->uio_resid) {
+ off = savoff;
+ siz -= dp->d_reclen;
+ dp = savdp;
+ more_dirs = 0; /* Paranoia */
+ }
+ if (siz > 0) {
+ lastlen = dp->d_reclen;
+ md = md2;
+ dpos = dpos2;
+ nfsm_mtouio(uiop, siz);
+ uiop->uio_offset = (off_t)off;
+ } else
+ more_dirs = 0; /* Ugh, never happens, but in case.. */
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (uiop->uio_resid < tresid) {
+ len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+ if (len > 0) {
+ dp = (struct dirent *)
+ (uiop->uio_iov->iov_base - lastlen);
+ dp->d_reclen += len;
+ uiop->uio_iov->iov_base += len;
+ uiop->uio_iov->iov_len -= len;
+ uiop->uio_resid -= len;
+ }
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * Nqnfs readdir_and_lookup RPC. Used in place of nfs_readdirrpc().
+ */
+int
+nfs_readdirlookrpc(vp, uiop, cred)
+ struct vnode *vp;
+ register struct uio *uiop;
+ struct ucred *cred;
+{
+ register int len;
+ register struct dirent *dp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nameidata nami, *ndp = &nami;
+ struct componentname *cnp = &ndp->ni_cnd;
+ u_long off, endoff, fileno;
+ time_t reqtime, ltime;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *newvp;
+ nfsv2fh_t *fhp;
+ u_quad_t frev;
+ int error = 0, tlen, more_dirs = 1, tresid, doit, bigenough, i;
+ int cachable;
+
+ if (uiop->uio_iovcnt != 1)
+ panic("nfs rdirlook");
+ nmp = VFSTONFS(vp->v_mount);
+ tresid = uiop->uio_resid;
+ ndp->ni_dvp = vp;
+ newvp = NULLVP;
+ /*
+ * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+ * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+ nfsstats.rpccnt[NQNFSPROC_READDIRLOOK]++;
+ nfsm_reqhead(vp, NQNFSPROC_READDIRLOOK,
+ NFSX_FH + 3 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+ off = (u_long)uiop->uio_offset;
+ *tl++ = txdr_unsigned(off);
+ *tl++ = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+ nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+ if (nmp->nm_flag & NFSMNT_NQLOOKLEASE)
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ else
+ *tl = 0;
+ reqtime = time.tv_sec;
+ nfsm_request(vp, NQNFSPROC_READDIRLOOK, uiop->uio_procp, cred);
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+ bigenough = 1;
+ while (more_dirs && bigenough) {
+ doit = 1;
+ nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+ if (nmp->nm_flag & NFSMNT_NQLOOKLEASE) {
+ cachable = fxdr_unsigned(int, *tl++);
+ ltime = reqtime + fxdr_unsigned(int, *tl++);
+ fxdr_hyper(tl, &frev);
+ }
+ nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+ if (!bcmp(VTONFS(vp)->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ VREF(vp);
+ newvp = vp;
+ np = VTONFS(vp);
+ } else {
+ if (error = nfs_nget(vp->v_mount, fhp, &np))
+ doit = 0;
+ newvp = NFSTOV(np);
+ }
+ if (error = nfs_loadattrcache(&newvp, &md, &dpos,
+ (struct vattr *)0))
+ doit = 0;
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ fileno = fxdr_unsigned(u_long, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ tlen = (len + 4) & ~0x3;
+ if ((tlen + DIRHDSIZ) > uiop->uio_resid)
+ bigenough = 0;
+ if (bigenough && doit) {
+ dp = (struct dirent *)uiop->uio_iov->iov_base;
+ dp->d_fileno = fileno;
+ dp->d_namlen = len;
+ dp->d_reclen = tlen + DIRHDSIZ;
+ dp->d_type =
+ IFTODT(VTTOIF(np->n_vattr.va_type));
+ uiop->uio_resid -= DIRHDSIZ;
+ uiop->uio_iov->iov_base += DIRHDSIZ;
+ uiop->uio_iov->iov_len -= DIRHDSIZ;
+ cnp->cn_nameptr = uiop->uio_iov->iov_base;
+ cnp->cn_namelen = len;
+ ndp->ni_vp = newvp;
+ nfsm_mtouio(uiop, len);
+ cp = uiop->uio_iov->iov_base;
+ tlen -= len;
+ for (i = 0; i < tlen; i++)
+ *cp++ = '\0';
+ uiop->uio_iov->iov_base += tlen;
+ uiop->uio_iov->iov_len -= tlen;
+ uiop->uio_resid -= tlen;
+ cnp->cn_hash = 0;
+ for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++)
+ cnp->cn_hash += (unsigned char)*cp * i;
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+ ltime > time.tv_sec)
+ nqnfs_clientlease(nmp, np, NQL_READ,
+ cachable, ltime, frev);
+ if (cnp->cn_namelen <= NCHNAMLEN)
+ cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
+ } else {
+ nfsm_adv(nfsm_rndup(len));
+ }
+ if (newvp != NULLVP) {
+ vrele(newvp);
+ newvp = NULLVP;
+ }
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ if (bigenough)
+ endoff = off = fxdr_unsigned(u_long, *tl++);
+ else
+ endoff = fxdr_unsigned(u_long, *tl++);
+ more_dirs = fxdr_unsigned(int, *tl);
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+ /*
+ * If at EOF, cache directory offset
+ */
+ if (!more_dirs)
+ VTONFS(vp)->n_direofoffset = endoff;
+ }
+ if (uiop->uio_resid < tresid)
+ uiop->uio_offset = (off_t)off;
+ else
+ more_dirs = 0;
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (uiop->uio_resid < tresid) {
+ len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+ if (len > 0) {
+ dp->d_reclen += len;
+ uiop->uio_iov->iov_base += len;
+ uiop->uio_iov->iov_len -= len;
+ uiop->uio_resid -= len;
+ }
+ }
+nfsmout:
+ if (newvp != NULLVP)
+ vrele(newvp);
+ return (error);
+}
+static char hextoasc[] = "0123456789abcdef";
+
+/*
+ * Silly rename. To make the NFS filesystem that is stateless look a little
+ * more like the "ufs" a remove of an active vnode is translated to a rename
+ * to a funny looking filename that is removed by nfs_inactive on the
+ * nfsnode. There is the potential for another process on a different client
+ * to create the same funny name between the nfs_lookitup() fails and the
+ * nfs_rename() completes, but...
+ */
+int
+nfs_sillyrename(dvp, vp, cnp)
+ struct vnode *dvp, *vp;
+ struct componentname *cnp;
+{
+ register struct nfsnode *np;
+ register struct sillyrename *sp;
+ int error;
+ short pid;
+
+ cache_purge(dvp);
+ np = VTONFS(vp);
+#ifdef SILLYSEPARATE
+ MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
+ M_NFSREQ, M_WAITOK);
+#else
+ sp = &np->n_silly;
+#endif
+ sp->s_cred = crdup(cnp->cn_cred);
+ sp->s_dvp = dvp;
+ VREF(dvp);
+
+ /* Fudge together a funny name */
+ pid = cnp->cn_proc->p_pid;
+ bcopy(".nfsAxxxx4.4", sp->s_name, 13);
+ sp->s_namlen = 12;
+ sp->s_name[8] = hextoasc[pid & 0xf];
+ sp->s_name[7] = hextoasc[(pid >> 4) & 0xf];
+ sp->s_name[6] = hextoasc[(pid >> 8) & 0xf];
+ sp->s_name[5] = hextoasc[(pid >> 12) & 0xf];
+
+ /* Try lookitups until we get one that isn't there */
+ while (nfs_lookitup(sp, (nfsv2fh_t *)0, cnp->cn_proc) == 0) {
+ sp->s_name[4]++;
+ if (sp->s_name[4] > 'z') {
+ error = EINVAL;
+ goto bad;
+ }
+ }
+ if (error = nfs_renameit(dvp, cnp, sp))
+ goto bad;
+ nfs_lookitup(sp, &np->n_fh, cnp->cn_proc);
+ np->n_sillyrename = sp;
+ return (0);
+bad:
+ vrele(sp->s_dvp);
+ crfree(sp->s_cred);
+#ifdef SILLYSEPARATE
+ free((caddr_t)sp, M_NFSREQ);
+#endif
+ return (error);
+}
+
+/*
+ * Look up a file name for silly rename stuff.
+ * Just like nfs_lookup() except that it doesn't load returned values
+ * into the nfsnode table.
+ * If fhp != NULL it copies the returned file handle out
+ */
+int
+nfs_lookitup(sp, fhp, procp)
+ register struct sillyrename *sp;
+ nfsv2fh_t *fhp;
+ struct proc *procp;
+{
+ register struct vnode *vp = sp->s_dvp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ long len;
+
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ len = sp->s_namlen;
+ nfsm_reqhead(vp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+ if (isnq) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ nfsm_fhtom(vp);
+ nfsm_strtom(sp->s_name, len, NFS_MAXNAMLEN);
+ nfsm_request(vp, NFSPROC_LOOKUP, procp, sp->s_cred);
+ if (fhp != NULL) {
+ if (isnq)
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nfsm_dissect(cp, caddr_t, NFSX_FH);
+ bcopy(cp, (caddr_t)fhp, NFSX_FH);
+ }
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * Kludge City..
+ * - make nfs_bmap() essentially a no-op that does no translation
+ * - do nfs_strategy() by faking physical I/O with nfs_readrpc/nfs_writerpc
+ * after mapping the physical addresses into Kernel Virtual space in the
+ * nfsiobuf area.
+ * (Maybe I could use the process's page mapping, but I was concerned that
+ * Kernel Write might not be enabled and also figured copyout() would do
+ * a lot more work than bcopy() and also it currently happens in the
+ * context of the swapper process (2).
+ */
+int
+nfs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
+ return (0);
+}
+
+/*
+ * Strategy routine.
+ * For async requests when nfsiod(s) are running, queue the request by
+ * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
+ * request.
+ */
+int
+nfs_strategy(ap)
+ struct vop_strategy_args *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ struct ucred *cr;
+ struct proc *p;
+ int error = 0;
+
+ if (bp->b_flags & B_PHYS)
+ panic("nfs physio");
+ if (bp->b_flags & B_ASYNC)
+ p = (struct proc *)0;
+ else
+ p = curproc; /* XXX */
+ if (bp->b_flags & B_READ)
+ cr = bp->b_rcred;
+ else
+ cr = bp->b_wcred;
+ /*
+ * If the op is asynchronous and an i/o daemon is waiting
+ * queue the request, wake it up and wait for completion
+ * otherwise just do it ourselves.
+ */
+ if ((bp->b_flags & B_ASYNC) == 0 ||
+ nfs_asyncio(bp, NOCRED))
+ error = nfs_doio(bp, cr, p);
+ return (error);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Flush all the blocks associated with a vnode.
+ * Walk through the buffer pool and push any dirty pages
+ * associated with the vnode.
+ */
+/* ARGSUSED */
+int
+nfs_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_vp;
+ struct ucred * a_cred;
+ int a_waitfor;
+ struct proc * a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct buf *bp;
+ struct buf *nbp;
+ struct nfsmount *nmp;
+ int s, error = 0, slptimeo = 0, slpflag = 0;
+
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+loop:
+ s = splbio();
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (bp->b_flags & B_BUSY) {
+ if (ap->a_waitfor != MNT_WAIT)
+ continue;
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+ "nfsfsync", slptimeo);
+ splx(s);
+ if (error) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ goto loop;
+ }
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("nfs_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ bp->b_flags |= B_ASYNC;
+ VOP_BWRITE(bp);
+ goto loop;
+ }
+ splx(s);
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ error = tsleep((caddr_t)&vp->v_numoutput,
+ slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
+ if (error) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ }
+ if (vp->v_dirtyblkhd.lh_first) {
+#ifdef DIAGNOSTIC
+ vprint("nfs_fsync: dirty", vp);
+#endif
+ goto loop;
+ }
+ }
+ if (np->n_flag & NWRITEERR) {
+ error = np->n_error;
+ np->n_flag &= ~NWRITEERR;
+ }
+ return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to nfs.
+ *
+ * Currently the NFS protocol does not support getting such
+ * information from the remote server.
+ */
+/* ARGSUSED */
+nfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * NFS advisory byte-level locks.
+ * Currently unsupported.
+ */
+int
+nfs_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Print out the contents of an nfsnode.
+ */
+int
+nfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+
+ printf("tag VT_NFS, fileid %d fsid 0x%x",
+ np->n_vattr.va_fileid, np->n_vattr.va_fsid);
+#ifdef FIFO
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+#endif /* FIFO */
+ printf("\n");
+}
+
+/*
+ * NFS directory offset lookup.
+ * Currently unsupported.
+ */
+int
+nfs_blkatoff(ap)
+ struct vop_blkatoff_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ char **a_res;
+ struct buf **a_bpp;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace allocation.
+ * Currently unsupported.
+ */
+int
+nfs_valloc(ap)
+ struct vop_valloc_args /* {
+ struct vnode *a_pvp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct vnode **a_vpp;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace free.
+ * Currently unsupported.
+ */
+int
+nfs_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS file truncation.
+ */
+int
+nfs_truncate(ap)
+ struct vop_truncate_args /* {
+ struct vnode *a_vp;
+ off_t a_length;
+ int a_flags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /* Use nfs_setattr */
+ printf("nfs_truncate: need to implement!!");
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS update.
+ */
+int
+nfs_update(ap)
+ struct vop_update_args /* {
+ struct vnode *a_vp;
+ struct timeval *a_ta;
+ struct timeval *a_tm;
+ int a_waitfor;
+ } */ *ap;
+{
+
+ /* Use nfs_setattr */
+ printf("nfs_update: need to implement!!");
+ return (EOPNOTSUPP);
+}
+
+/*
+ * nfs special file access vnode op.
+ * Essentially just get vattr and then imitate iaccess() since the device is
+ * local to the client.
+ */
+int
+nfsspec_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vattr *vap;
+ register gid_t *gp;
+ register struct ucred *cred = ap->a_cred;
+ mode_t mode = ap->a_mode;
+ struct vattr vattr;
+ register int i;
+ int error;
+
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (cred->cr_uid == 0)
+ return (0);
+ vap = &vattr;
+ if (error = VOP_GETATTR(ap->a_vp, vap, cred, ap->a_p))
+ return (error);
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (cred->cr_uid != vap->va_uid) {
+ mode >>= 3;
+ gp = cred->cr_groups;
+ for (i = 0; i < cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ mode >>= 3;
+found:
+ ;
+ }
+ return ((vap->va_mode & mode) == mode ? 0 : EACCES);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+nfsspec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ np->n_atim = time;
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+nfsspec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ np->n_mtim = time;
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the nfsnode then do device close.
+ */
+int
+nfsspec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+
+ if (np->n_flag & (NACC | NUPD)) {
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC) {
+ vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+ vattr.va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vattr.va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifos.
+ */
+int
+nfsfifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ np->n_atim = time;
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifos.
+ */
+int
+nfsfifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ np->n_mtim = time;
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifos.
+ *
+ * Update the times on the nfsnode then do fifo close.
+ */
+int
+nfsfifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+ extern int (**fifo_vnodeop_p)();
+
+ if (np->n_flag & (NACC | NUPD)) {
+ if (np->n_flag & NACC)
+ np->n_atim = time;
+ if (np->n_flag & NUPD)
+ np->n_mtim = time;
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC) {
+ vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+ vattr.va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vattr.va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
diff --git a/sys/nfs/nfsdiskless.h b/sys/nfs/nfsdiskless.h
new file mode 100644
index 000000000000..74e6b7bca438
--- /dev/null
+++ b/sys/nfs/nfsdiskless.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsdiskless.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structure that must be initialized for a diskless nfs client.
+ * This structure is used by nfs_mountroot() to set up the root and swap
+ * vnodes plus do a partial ifconfig(8) and route(8) so that the critical net
+ * interface can communicate with the server.
+ * The primary bootstrap is expected to fill in the appropriate fields before
+ * starting vmunix. Whether or not the swap area is nfs mounted is determined
+ * by the value in swdevt[0]. (equal to NODEV --> swap over nfs)
+ * Currently only works for AF_INET protocols.
+ * NB: All fields are stored in net byte order to avoid hassles with
+ * client/server byte ordering differences.
+ */
+struct nfs_diskless {
+ struct ifaliasreq myif; /* Default interface */
+ struct sockaddr_in mygateway; /* Default gateway */
+ struct nfs_args swap_args; /* Mount args for swap file */
+ u_char swap_fh[NFS_FHSIZE]; /* Swap file's file handle */
+ struct sockaddr_in swap_saddr; /* Address of swap server */
+ char swap_hostnam[MNAMELEN]; /* Host name for mount pt */
+ int swap_nblks; /* Size of server swap file */
+ struct ucred swap_ucred; /* Swap credentials */
+ struct nfs_args root_args; /* Mount args for root fs */
+ u_char root_fh[NFS_FHSIZE]; /* File handle of root dir */
+ struct sockaddr_in root_saddr; /* Address of root server */
+ char root_hostnam[MNAMELEN]; /* Host name for mount pt */
+ long root_time; /* Timestamp of root fs */
+ char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */
+};
diff --git a/sys/nfs/nfsm_subs.h b/sys/nfs/nfsm_subs.h
new file mode 100644
index 000000000000..879db3600577
--- /dev/null
+++ b/sys/nfs/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat
+#define NFSMADV(m, s) (m)->m_data += (s)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ { if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); }
+
+#define nfsm_dissect(a,c,s) \
+ { t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } }
+
+#define nfsm_fhtom(v) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+ { struct nfsnode *np; nfsv2fh_t *fhp; \
+ nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+ if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(np); \
+ nfsm_loadattr(v, (struct vattr *)0); \
+ }
+
+#define nfsm_loadattr(v,a) \
+ { struct vnode *tvp = (v); \
+ if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = tvp; }
+
+#define nfsm_strsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } }
+
+#define nfsm_srvstrsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } }
+
+#define nfsm_mtouio(p,s) \
+ if ((s) > 0 && \
+ (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_uiotom(p,s) \
+ if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_reqhead(v,a,s) \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone m_freem(mrep); \
+ nfsmout:
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ if (error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) \
+ goto nfsmout
+
+#define nfsm_strtom(a,s,m) \
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_long *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+ } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvdone \
+ nfsmout: \
+ return(error)
+
+#define nfsm_reply(s) \
+ { \
+ nfsd->nd_repstat = error; \
+ if (error) \
+ (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ m_freem(mrep); \
+ mreq = *mrq; \
+ if (error) \
+ return(0); \
+ }
+
+#define nfsm_adv(s) \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvmtofh(f) \
+ nfsm_dissect(tl, u_long *, NFSX_FH); \
+ bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define nfsm_clget \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_long *)bp
+
+#define nfsm_srvfillattr \
+ fp->fa_type = vtonfs_type(vap->va_type); \
+ fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+ fp->fa_uid = txdr_unsigned(vap->va_uid); \
+ fp->fa_gid = txdr_unsigned(vap->va_gid); \
+ if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+ fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nfsrdev = 0xffffffff; \
+ else \
+ fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+ fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+ fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+ txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+ txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+ fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+ fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+ } else { \
+ fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nqrdev = 0xffffffff; \
+ else \
+ fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+ txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+ txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+ txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+ txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+ txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+ fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+ fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+ txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+ }
+
diff --git a/sys/nfs/nfsmount.h b/sys/nfs/nfsmount.h
new file mode 100644
index 000000000000..4d74acb38a54
--- /dev/null
+++ b/sys/nfs/nfsmount.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsmount.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Mount structure.
+ * One allocated on every NFS mount.
+ * Holds NFS specific information for mount.
+ */
+struct nfsmount {
+ int nm_flag; /* Flags for soft/hard... */
+ struct mount *nm_mountp; /* Vfs structure for this filesystem */
+ int nm_numgrps; /* Max. size of groupslist */
+ nfsv2fh_t nm_fh; /* File handle of root dir */
+ struct socket *nm_so; /* Rpc socket */
+ int nm_sotype; /* Type of socket */
+ int nm_soproto; /* and protocol */
+ int nm_soflags; /* pr_flags for socket protocol */
+ struct mbuf *nm_nam; /* Addr of server */
+ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */
+ int nm_retry; /* Max retries */
+ int nm_srtt[4]; /* Timers for rpcs */
+ int nm_sdrtt[4];
+ int nm_sent; /* Request send count */
+ int nm_cwnd; /* Request send window */
+ int nm_timeouts; /* Request timeouts */
+ int nm_deadthresh; /* Threshold of timeouts-->dead server*/
+ int nm_rsize; /* Max size of read rpc */
+ int nm_wsize; /* Max size of write rpc */
+ int nm_readahead; /* Num. of blocks to readahead */
+ int nm_leaseterm; /* Term (sec) for NQNFS lease */
+ struct nfsnode *nm_tnext; /* Head of lease timer queue */
+ struct nfsnode *nm_tprev;
+ struct vnode *nm_inprog; /* Vnode in prog by nqnfs_clientd() */
+ uid_t nm_authuid; /* Uid for authenticator */
+ int nm_authtype; /* Authenticator type */
+ int nm_authlen; /* and length */
+ char *nm_authstr; /* Authenticator string */
+};
+
+#ifdef KERNEL
+/*
+ * Convert mount ptr to nfsmount ptr.
+ */
+#define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data))
+#endif /* KERNEL */
+
+/*
+ * Prototypes for NFS mount operations
+ */
+int nfs_mount __P((
+ struct mount *mp,
+ char *path,
+ caddr_t data,
+ struct nameidata *ndp,
+ struct proc *p));
+int nfs_start __P((
+ struct mount *mp,
+ int flags,
+ struct proc *p));
+int nfs_unmount __P((
+ struct mount *mp,
+ int mntflags,
+ struct proc *p));
+int nfs_root __P((
+ struct mount *mp,
+ struct vnode **vpp));
+int nfs_quotactl __P((
+ struct mount *mp,
+ int cmds,
+ uid_t uid,
+ caddr_t arg,
+ struct proc *p));
+int nfs_statfs __P((
+ struct mount *mp,
+ struct statfs *sbp,
+ struct proc *p));
+int nfs_sync __P((
+ struct mount *mp,
+ int waitfor,
+ struct ucred *cred,
+ struct proc *p));
+int nfs_fhtovp __P((
+ struct mount *mp,
+ struct fid *fhp,
+ struct mbuf *nam,
+ struct vnode **vpp,
+ int *exflagsp,
+ struct ucred **credanonp));
+int nfs_vptofh __P((
+ struct vnode *vp,
+ struct fid *fhp));
+int nfs_init __P(());
diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
new file mode 100644
index 000000000000..f5fee5bf2f3a
--- /dev/null
+++ b/sys/nfs/nfsnode.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsnode.h 8.4 (Berkeley) 2/13/94
+ */
+
+/*
+ * Silly rename structure that hangs off the nfsnode until the name
+ * can be removed by nfs_inactive()
+ */
+struct sillyrename {
+ struct ucred *s_cred;
+ struct vnode *s_dvp;
+ long s_namlen;
+ char s_name[20];
+};
+
+/*
+ * The nfsnode is the nfs equivalent to ufs's inode. Any similarity
+ * is purely coincidental.
+ * There is a unique nfsnode allocated for each active file,
+ * each current directory, each mounted-on file, text file, and the root.
+ * An nfsnode is 'named' by its file handle. (nget/nfs_node.c)
+ */
+
+struct nfsnode {
+ struct nfsnode *n_forw; /* hash, forward */
+ struct nfsnode **n_back; /* hash, backward */
+ nfsv2fh_t n_fh; /* NFS File Handle */
+ long n_flag; /* Flag for locking.. */
+ struct vnode *n_vnode; /* vnode associated with this node */
+ struct vattr n_vattr; /* Vnode attribute cache */
+ time_t n_attrstamp; /* Time stamp for cached attributes */
+ struct sillyrename *n_sillyrename; /* Ptr to silly rename struct */
+ u_quad_t n_size; /* Current size of file */
+ int n_error; /* Save write error value */
+ u_long n_direofoffset; /* Dir. EOF offset cache */
+ time_t n_mtime; /* Prev modify time. */
+ time_t n_ctime; /* Prev create time. */
+ u_quad_t n_brev; /* Modify rev when cached */
+ u_quad_t n_lrev; /* Modify rev for lease */
+ time_t n_expiry; /* Lease expiry time */
+ struct nfsnode *n_tnext; /* Nqnfs timer chain */
+ struct nfsnode *n_tprev;
+ long spare1; /* To 8 byte boundary */
+ struct sillyrename n_silly; /* Silly rename struct */
+ struct timeval n_atim; /* Special file times */
+ struct timeval n_mtim;
+};
+
+/*
+ * Flags for n_flag
+ */
+#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */
+#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */
+#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
+#define NWRITEERR 0x0008 /* Flag write errors so close will know */
+#define NQNFSNONCACHE 0x0020 /* Non-cachable lease */
+#define NQNFSWRITE 0x0040 /* Write lease */
+#define NQNFSEVICTED 0x0080 /* Has been evicted */
+#define NACC 0x0100 /* Special file accessed */
+#define NUPD 0x0200 /* Special file updated */
+#define NCHG 0x0400 /* Special file times changed */
+
+/*
+ * Convert between nfsnode pointers and vnode pointers
+ */
+#define VTONFS(vp) ((struct nfsnode *)(vp)->v_data)
+#define NFSTOV(np) ((struct vnode *)(np)->n_vnode)
+
+/*
+ * Queue head for nfsiod's
+ */
+TAILQ_HEAD(nfsbufs, buf) nfs_bufq;
+
+#ifdef KERNEL
+/*
+ * Prototypes for NFS vnode operations
+ */
+int nfs_lookup __P((struct vop_lookup_args *));
+int nfs_create __P((struct vop_create_args *));
+int nfs_mknod __P((struct vop_mknod_args *));
+int nfs_open __P((struct vop_open_args *));
+int nfs_close __P((struct vop_close_args *));
+int nfsspec_close __P((struct vop_close_args *));
+#ifdef FIFO
+int nfsfifo_close __P((struct vop_close_args *));
+#endif
+int nfs_access __P((struct vop_access_args *));
+int nfsspec_access __P((struct vop_access_args *));
+int nfs_getattr __P((struct vop_getattr_args *));
+int nfs_setattr __P((struct vop_setattr_args *));
+int nfs_read __P((struct vop_read_args *));
+int nfs_write __P((struct vop_write_args *));
+int nfsspec_read __P((struct vop_read_args *));
+int nfsspec_write __P((struct vop_write_args *));
+#ifdef FIFO
+int nfsfifo_read __P((struct vop_read_args *));
+int nfsfifo_write __P((struct vop_write_args *));
+#endif
+#define nfs_ioctl ((int (*) __P((struct vop_ioctl_args *)))enoioctl)
+#define nfs_select ((int (*) __P((struct vop_select_args *)))seltrue)
+int nfs_mmap __P((struct vop_mmap_args *));
+int nfs_fsync __P((struct vop_fsync_args *));
+#define nfs_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+int nfs_remove __P((struct vop_remove_args *));
+int nfs_link __P((struct vop_link_args *));
+int nfs_rename __P((struct vop_rename_args *));
+int nfs_mkdir __P((struct vop_mkdir_args *));
+int nfs_rmdir __P((struct vop_rmdir_args *));
+int nfs_symlink __P((struct vop_symlink_args *));
+int nfs_readdir __P((struct vop_readdir_args *));
+int nfs_readlink __P((struct vop_readlink_args *));
+int nfs_abortop __P((struct vop_abortop_args *));
+int nfs_inactive __P((struct vop_inactive_args *));
+int nfs_reclaim __P((struct vop_reclaim_args *));
+int nfs_lock __P((struct vop_lock_args *));
+int nfs_unlock __P((struct vop_unlock_args *));
+int nfs_bmap __P((struct vop_bmap_args *));
+int nfs_strategy __P((struct vop_strategy_args *));
+int nfs_print __P((struct vop_print_args *));
+int nfs_islocked __P((struct vop_islocked_args *));
+int nfs_pathconf __P((struct vop_pathconf_args *));
+int nfs_advlock __P((struct vop_advlock_args *));
+int nfs_blkatoff __P((struct vop_blkatoff_args *));
+int nfs_vget __P((struct mount *, ino_t, struct vnode **));
+int nfs_valloc __P((struct vop_valloc_args *));
+#define nfs_reallocblks \
+ ((int (*) __P((struct vop_reallocblks_args *)))eopnotsupp)
+int nfs_vfree __P((struct vop_vfree_args *));
+int nfs_truncate __P((struct vop_truncate_args *));
+int nfs_update __P((struct vop_update_args *));
+int nfs_bwrite __P((struct vop_bwrite_args *));
+#endif /* KERNEL */
diff --git a/sys/nfs/nfsrtt.h b/sys/nfs/nfsrtt.h
new file mode 100644
index 000000000000..0d23880019b9
--- /dev/null
+++ b/sys/nfs/nfsrtt.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsrtt.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for performance monitor.
+ * The client and server logging are turned on by setting the global
+ * constant "nfsrtton" to 1.
+ */
+#define NFSRTTLOGSIZ 128
+
+/*
+ * Circular log of client side rpc activity. Each log entry is for one
+ * rpc filled in upon completion. (ie. in order of completion)
+ * The "pos" is the table index for the "next" entry, therefore the
+ * list goes from nfsrtt.rttl[pos] --> nfsrtt.rttl[pos - 1] in
+ * chronological order of completion.
+ */
+struct nfsrtt {
+ int pos; /* Position in array for next entry */
+ struct rttl {
+ int proc; /* NFS procedure number */
+ int rtt; /* Measured round trip time */
+ int rto; /* Round Trip Timeout */
+ int sent; /* # rpcs in progress */
+ int cwnd; /* Send window */
+ int srtt; /* Ave Round Trip Time */
+ int sdrtt; /* Ave mean deviation of RTT */
+ fsid_t fsid; /* Fsid for mount point */
+ struct timeval tstamp; /* Timestamp of log entry */
+ } rttl[NFSRTTLOGSIZ];
+};
+
+/*
+ * And definitions for server side performance monitor.
+ * The log organization is the same as above except it is filled in at the
+ * time the server sends the rpc reply.
+ */
+
+/*
+ * Bits for the flags field.
+ */
+#define DRT_NQNFS 0x01 /* Rpc used Nqnfs protocol */
+#define DRT_TCP 0x02 /* Client used TCP transport */
+#define DRT_CACHEREPLY 0x04 /* Reply was from recent request cache */
+#define DRT_CACHEDROP 0x08 /* Rpc request dropped, due to recent reply */
+
+/*
+ * Server log structure
+ * NB: ipadr == INADDR_ANY indicates a client using a non IP protocol.
+ * (ISO perhaps?)
+ */
+struct nfsdrt {
+ int pos; /* Position of next log entry */
+ struct drt {
+ int flag; /* Bits as defined above */
+ int proc; /* NFS procedure number */
+ u_long ipadr; /* IP address of client */
+ int resptime; /* Response time (usec) */
+ struct timeval tstamp; /* Timestamp of log entry */
+ } drt[NFSRTTLOGSIZ];
+};
diff --git a/sys/nfs/nfsrvcache.h b/sys/nfs/nfsrvcache.h
new file mode 100644
index 000000000000..26da2c275df6
--- /dev/null
+++ b/sys/nfs/nfsrvcache.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsrvcache.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for the server recent request cache
+ */
+
+#define NFSRVCACHESIZ 256
+
+struct nfsrvcache {
+ struct nfsrvcache *rc_forw; /* Hash chain links */
+ struct nfsrvcache **rc_back; /* Hash chain links */
+ struct nfsrvcache *rc_next; /* Lru list */
+ struct nfsrvcache **rc_prev; /* Lru list */
+ u_long rc_xid; /* rpc id number */
+ union {
+ struct mbuf *ru_repmb; /* Reply mbuf list OR */
+ int ru_repstat; /* Reply status */
+ } rc_un;
+ union nethostaddr rc_haddr; /* Host address */
+ short rc_proc; /* rpc proc number */
+ u_char rc_state; /* Current state of request */
+ u_char rc_flag; /* Flag bits */
+};
+
+#define rc_reply rc_un.ru_repmb
+#define rc_status rc_un.ru_repstat
+#define rc_inetaddr rc_haddr.had_inetaddr
+#define rc_nam rc_haddr.had_nam
+
+/* Cache entry states */
+#define RC_UNUSED 0
+#define RC_INPROG 1
+#define RC_DONE 2
+
+/* Return values */
+#define RC_DROPIT 0
+#define RC_REPLY 1
+#define RC_DOIT 2
+#define RC_CHECKIT 3
+
+/* Flag bits */
+#define RC_LOCKED 0x01
+#define RC_WANTED 0x02
+#define RC_REPSTATUS 0x04
+#define RC_REPMBUF 0x08
+#define RC_NQNFS 0x10
+#define RC_INETADDR 0x20
+#define RC_NAM 0x40
diff --git a/sys/nfs/nfsv2.h b/sys/nfs/nfsv2.h
new file mode 100644
index 000000000000..e9d2985efacd
--- /dev/null
+++ b/sys/nfs/nfsv2.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsv2.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * nfs definitions as per the version 2 specs
+ */
+
+/*
+ * Constants as defined in the Sun NFS Version 2 spec.
+ * "NFS: Network File System Protocol Specification" RFC1094
+ */
+
+#define NFS_PORT 2049
+#define NFS_PROG 100003
+#define NFS_VER2 2
+#define NFS_MAXDGRAMDATA 8192
+#define NFS_MAXDATA 32768
+#define NFS_MAXPATHLEN 1024
+#define NFS_MAXNAMLEN 255
+#define NFS_FHSIZE 32
+#define NFS_MAXPKTHDR 404
+#define NFS_MAXPACKET (NFS_MAXPKTHDR+NFS_MAXDATA)
+#define NFS_MINPACKET 20
+#define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */
+
+/* Stat numbers for rpc returns */
+#define NFS_OK 0
+#define NFSERR_PERM 1
+#define NFSERR_NOENT 2
+#define NFSERR_IO 5
+#define NFSERR_NXIO 6
+#define NFSERR_ACCES 13
+#define NFSERR_EXIST 17
+#define NFSERR_NODEV 19
+#define NFSERR_NOTDIR 20
+#define NFSERR_ISDIR 21
+#define NFSERR_FBIG 27
+#define NFSERR_NOSPC 28
+#define NFSERR_ROFS 30
+#define NFSERR_NAMETOL 63
+#define NFSERR_NOTEMPTY 66
+#define NFSERR_DQUOT 69
+#define NFSERR_STALE 70
+#define NFSERR_WFLUSH 99
+
+/* Sizes in bytes of various nfs rpc components */
+#define NFSX_FH 32
+#define NFSX_UNSIGNED 4
+#define NFSX_NFSFATTR 68
+#define NFSX_NQFATTR 92
+#define NFSX_NFSSATTR 32
+#define NFSX_NQSATTR 44
+#define NFSX_COOKIE 4
+#define NFSX_NFSSTATFS 20
+#define NFSX_NQSTATFS 28
+#define NFSX_FATTR(isnq) ((isnq) ? NFSX_NQFATTR : NFSX_NFSFATTR)
+#define NFSX_SATTR(isnq) ((isnq) ? NFSX_NQSATTR : NFSX_NFSSATTR)
+#define NFSX_STATFS(isnq) ((isnq) ? NFSX_NQSTATFS : NFSX_NFSSTATFS)
+
+/* nfs rpc procedure numbers */
+#define NFSPROC_NULL 0
+#define NFSPROC_GETATTR 1
+#define NFSPROC_SETATTR 2
+#define NFSPROC_NOOP 3
+#define NFSPROC_ROOT NFSPROC_NOOP /* Obsolete */
+#define NFSPROC_LOOKUP 4
+#define NFSPROC_READLINK 5
+#define NFSPROC_READ 6
+#define NFSPROC_WRITECACHE NFSPROC_NOOP /* Obsolete */
+#define NFSPROC_WRITE 8
+#define NFSPROC_CREATE 9
+#define NFSPROC_REMOVE 10
+#define NFSPROC_RENAME 11
+#define NFSPROC_LINK 12
+#define NFSPROC_SYMLINK 13
+#define NFSPROC_MKDIR 14
+#define NFSPROC_RMDIR 15
+#define NFSPROC_READDIR 16
+#define NFSPROC_STATFS 17
+
+/* NQ nfs numbers */
+#define NQNFSPROC_READDIRLOOK 18
+#define NQNFSPROC_GETLEASE 19
+#define NQNFSPROC_VACATED 20
+#define NQNFSPROC_EVICTED 21
+#define NQNFSPROC_ACCESS 22
+
+#define NFS_NPROCS 23
+/* Conversion macros */
+extern int vttoif_tab[];
+#define vtonfs_mode(t,m) \
+ txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \
+ MAKEIMODE((t), (m)))
+#define nfstov_mode(a) (fxdr_unsigned(u_short, (a))&07777)
+#define vtonfs_type(a) txdr_unsigned(nfs_type[((long)(a))])
+#define nfstov_type(a) ntov_type[fxdr_unsigned(u_long,(a))&0x7]
+
+/* File types */
+typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5 } nfstype;
+
+/* Structs for common parts of the rpc's */
+struct nfsv2_time {
+ u_long nfs_sec;
+ u_long nfs_usec;
+};
+
+struct nqnfs_time {
+ u_long nq_sec;
+ u_long nq_nsec;
+};
+
+/*
+ * File attributes and setable attributes. These structures cover both
+ * NFS version 2 and the NQNFS protocol. Note that the union is only
+ * used to that one pointer can refer to both variants. These structures
+ * go out on the wire and must be densely packed, so no quad data types
+ * are used. (all fields are longs or u_longs or structures of same)
+ * NB: You can't do sizeof(struct nfsv2_fattr), you must use the
+ * NFSX_FATTR(isnq) macro.
+ */
+struct nfsv2_fattr {
+ u_long fa_type;
+ u_long fa_mode;
+ u_long fa_nlink;
+ u_long fa_uid;
+ u_long fa_gid;
+ union {
+ struct {
+ u_long nfsfa_size;
+ u_long nfsfa_blocksize;
+ u_long nfsfa_rdev;
+ u_long nfsfa_blocks;
+ u_long nfsfa_fsid;
+ u_long nfsfa_fileid;
+ struct nfsv2_time nfsfa_atime;
+ struct nfsv2_time nfsfa_mtime;
+ struct nfsv2_time nfsfa_ctime;
+ } fa_nfsv2;
+ struct {
+ struct {
+ u_long nqfa_qsize[2];
+ } nqfa_size;
+ u_long nqfa_blocksize;
+ u_long nqfa_rdev;
+ struct {
+ u_long nqfa_qbytes[2];
+ } nqfa_bytes;
+ u_long nqfa_fsid;
+ u_long nqfa_fileid;
+ struct nqnfs_time nqfa_atime;
+ struct nqnfs_time nqfa_mtime;
+ struct nqnfs_time nqfa_ctime;
+ u_long nqfa_flags;
+ u_long nqfa_gen;
+ struct {
+ u_long nqfa_qfilerev[2];
+ } nqfa_filerev;
+ } fa_nqnfs;
+ } fa_un;
+};
+
+/* and some ugly defines for accessing union components */
+#define fa_nfssize fa_un.fa_nfsv2.nfsfa_size
+#define fa_nfsblocksize fa_un.fa_nfsv2.nfsfa_blocksize
+#define fa_nfsrdev fa_un.fa_nfsv2.nfsfa_rdev
+#define fa_nfsblocks fa_un.fa_nfsv2.nfsfa_blocks
+#define fa_nfsfsid fa_un.fa_nfsv2.nfsfa_fsid
+#define fa_nfsfileid fa_un.fa_nfsv2.nfsfa_fileid
+#define fa_nfsatime fa_un.fa_nfsv2.nfsfa_atime
+#define fa_nfsmtime fa_un.fa_nfsv2.nfsfa_mtime
+#define fa_nfsctime fa_un.fa_nfsv2.nfsfa_ctime
+#define fa_nqsize fa_un.fa_nqnfs.nqfa_size
+#define fa_nqblocksize fa_un.fa_nqnfs.nqfa_blocksize
+#define fa_nqrdev fa_un.fa_nqnfs.nqfa_rdev
+#define fa_nqbytes fa_un.fa_nqnfs.nqfa_bytes
+#define fa_nqfsid fa_un.fa_nqnfs.nqfa_fsid
+#define fa_nqfileid fa_un.fa_nqnfs.nqfa_fileid
+#define fa_nqatime fa_un.fa_nqnfs.nqfa_atime
+#define fa_nqmtime fa_un.fa_nqnfs.nqfa_mtime
+#define fa_nqctime fa_un.fa_nqnfs.nqfa_ctime
+#define fa_nqflags fa_un.fa_nqnfs.nqfa_flags
+#define fa_nqgen fa_un.fa_nqnfs.nqfa_gen
+#define fa_nqfilerev fa_un.fa_nqnfs.nqfa_filerev
+
+struct nfsv2_sattr {
+ u_long sa_mode;
+ u_long sa_uid;
+ u_long sa_gid;
+ union {
+ struct {
+ u_long nfssa_size;
+ struct nfsv2_time nfssa_atime;
+ struct nfsv2_time nfssa_mtime;
+ } sa_nfsv2;
+ struct {
+ struct {
+ u_long nqsa_qsize[2];
+ } nqsa_size;
+ struct nqnfs_time nqsa_atime;
+ struct nqnfs_time nqsa_mtime;
+ u_long nqsa_flags;
+ u_long nqsa_rdev;
+ } sa_nqnfs;
+ } sa_un;
+};
+
+/* and some ugly defines for accessing the unions */
+#define sa_nfssize sa_un.sa_nfsv2.nfssa_size
+#define sa_nfsatime sa_un.sa_nfsv2.nfssa_atime
+#define sa_nfsmtime sa_un.sa_nfsv2.nfssa_mtime
+#define sa_nqsize sa_un.sa_nqnfs.nqsa_size
+#define sa_nqatime sa_un.sa_nqnfs.nqsa_atime
+#define sa_nqmtime sa_un.sa_nqnfs.nqsa_mtime
+#define sa_nqflags sa_un.sa_nqnfs.nqsa_flags
+#define sa_nqrdev sa_un.sa_nqnfs.nqsa_rdev
+
+struct nfsv2_statfs {
+ u_long sf_tsize;
+ u_long sf_bsize;
+ u_long sf_blocks;
+ u_long sf_bfree;
+ u_long sf_bavail;
+ u_long sf_files; /* Nqnfs only */
+ u_long sf_ffree; /* ditto */
+};
diff --git a/sys/nfs/nqnfs.h b/sys/nfs/nqnfs.h
new file mode 100644
index 000000000000..730741a4137b
--- /dev/null
+++ b/sys/nfs/nqnfs.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nqnfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for NQNFS (Not Quite NFS) cache consistency protocol.
+ */
+
+/* Tunable constants */
+#define NQ_CLOCKSKEW 3 /* Clock skew factor (sec) */
+#define NQ_WRITESLACK 5 /* Delay for write cache flushing */
+#define NQ_MAXLEASE 60 /* Max lease duration (sec) */
+#define NQ_MINLEASE 5 /* Min lease duration (sec) */
+#define NQ_DEFLEASE 30 /* Default lease duration (sec) */
+#define NQ_RENEWAL 3 /* Time before expiry (sec) to renew */
+#define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */
+#define NQ_MAXNUMLEASE 2048 /* Upper bound on number of server leases */
+#define NQ_DEADTHRESH NQ_NEVERDEAD /* Default nm_deadthresh */
+#define NQ_NEVERDEAD 9 /* Greater than max. nm_timeouts */
+#define NQLCHSZ 256 /* Server hash table size */
+
+#define NQNFS_PROG 300105 /* As assigned by Sun */
+#define NQNFS_VER1 1
+#define NQNFS_EVICTSIZ 156 /* Size of eviction request in bytes */
+
+/*
+ * Definitions used for saving the "last lease expires" time in Non-volatile
+ * RAM on the server. The default definitions below assume that NOVRAM is not
+ * available.
+ */
+#define NQSTORENOVRAM(t)
+#define NQLOADNOVRAM(t)
+
+/*
+ * Defn and structs used on the server to maintain state for current leases.
+ * The list of host(s) that hold the lease are kept as nqhost structures.
+ * The first one lives in nqlease and any others are held in a linked
+ * list of nqm structures hanging off of nqlease.
+ *
+ * Each nqlease structure is chained into two lists. The first is a list
+ * ordered by increasing expiry time for nqsrv_timer() and the second is a chain
+ * hashed on lc_fh.
+ */
+#define LC_MOREHOSTSIZ 10
+
+struct nqhost {
+ union {
+ struct {
+ u_short udp_flag;
+ u_short udp_port;
+ union nethostaddr udp_haddr;
+ } un_udp;
+ struct {
+ u_short connless_flag;
+ u_short connless_spare;
+ union nethostaddr connless_haddr;
+ } un_connless;
+ struct {
+ u_short conn_flag;
+ u_short conn_spare;
+ struct nfssvc_sock *conn_slp;
+ } un_conn;
+ } lph_un;
+};
+#define lph_flag lph_un.un_udp.udp_flag
+#define lph_port lph_un.un_udp.udp_port
+#define lph_haddr lph_un.un_udp.udp_haddr
+#define lph_inetaddr lph_un.un_udp.udp_haddr.had_inetaddr
+#define lph_claddr lph_un.un_connless.connless_haddr
+#define lph_nam lph_un.un_connless.connless_haddr.had_nam
+#define lph_slp lph_un.un_conn.conn_slp
+
+struct nqlease {
+ struct nqlease *lc_chain1[2]; /* Timer queue list (must be first) */
+ struct nqlease *lc_fhnext; /* Fhandle hash list */
+ struct nqlease **lc_fhprev;
+ time_t lc_expiry; /* Expiry time (sec) */
+ struct nqhost lc_host; /* Host that got lease */
+ struct nqm *lc_morehosts; /* Other hosts that share read lease */
+ fsid_t lc_fsid; /* Fhandle */
+ char lc_fiddata[MAXFIDSZ];
+ struct vnode *lc_vp; /* Soft reference to associated vnode */
+};
+#define lc_flag lc_host.lph_un.un_udp.udp_flag
+
+/* lc_flag bits */
+#define LC_VALID 0x0001 /* Host address valid */
+#define LC_WRITE 0x0002 /* Write cache */
+#define LC_NONCACHABLE 0x0004 /* Non-cachable lease */
+#define LC_LOCKED 0x0008 /* Locked */
+#define LC_WANTED 0x0010 /* Lock wanted */
+#define LC_EXPIREDWANTED 0x0020 /* Want lease when expired */
+#define LC_UDP 0x0040 /* Host address for udp socket */
+#define LC_CLTP 0x0080 /* Host address for other connectionless */
+#define LC_LOCAL 0x0100 /* Host is server */
+#define LC_VACATED 0x0200 /* Host has vacated lease */
+#define LC_WRITTEN 0x0400 /* Recently wrote to the leased file */
+#define LC_SREF 0x0800 /* Holds a nfssvc_sock reference */
+
+struct nqm {
+ struct nqm *lpm_next;
+ struct nqhost lpm_hosts[LC_MOREHOSTSIZ];
+};
+
+/*
+ * Flag bits for flags argument to nqsrv_getlease.
+ */
+#define NQL_READ LEASE_READ /* Read Request */
+#define NQL_WRITE LEASE_WRITE /* Write Request */
+#define NQL_CHECK 0x4 /* Check for lease */
+#define NQL_NOVAL 0xffffffff /* Invalid */
+
+/*
+ * Special value for slp for local server calls.
+ */
+#define NQLOCALSLP ((struct nfssvc_sock *) -1)
+
+/*
+ * Server side macros.
+ */
+#define nqsrv_getl(v, l) \
+ (void) nqsrv_getlease((v), &nfsd->nd_duration, \
+ ((nfsd->nd_nqlflag != 0 && nfsd->nd_nqlflag != NQL_NOVAL) ? nfsd->nd_nqlflag : \
+ ((l) | NQL_CHECK)), \
+ nfsd, nam, &cache, &frev, cred)
+
+/*
+ * Client side macros that check for a valid lease.
+ */
+#define NQNFS_CKINVALID(v, n, f) \
+ ((time.tv_sec > (n)->n_expiry && \
+ VFSTONFS((v)->v_mount)->nm_timeouts < VFSTONFS((v)->v_mount)->nm_deadthresh) \
+ || ((f) == NQL_WRITE && ((n)->n_flag & NQNFSWRITE) == 0))
+
+#define NQNFS_CKCACHABLE(v, f) \
+ ((time.tv_sec <= VTONFS(v)->n_expiry || \
+ VFSTONFS((v)->v_mount)->nm_timeouts >= VFSTONFS((v)->v_mount)->nm_deadthresh) \
+ && (VTONFS(v)->n_flag & NQNFSNONCACHE) == 0 && \
+ ((f) == NQL_READ || (VTONFS(v)->n_flag & NQNFSWRITE)))
+
+#define NQNFS_NEEDLEASE(v, p) \
+ (time.tv_sec > VTONFS(v)->n_expiry ? \
+ ((VTONFS(v)->n_flag & NQNFSEVICTED) ? 0 : nqnfs_piggy[p]) : \
+ (((time.tv_sec + NQ_RENEWAL) > VTONFS(v)->n_expiry && \
+ nqnfs_piggy[p]) ? \
+ ((VTONFS(v)->n_flag & NQNFSWRITE) ? \
+ NQL_WRITE : nqnfs_piggy[p]) : 0))
+
+/*
+ * List head for timer queue.
+ */
+extern union nqsrvthead {
+ union nqsrvthead *th_head[2];
+ struct nqlease *th_chain[2];
+} nqthead;
+extern struct nqlease **nqfhead;
+extern u_long nqfheadhash;
+
+/*
+ * Nqnfs return status numbers.
+ */
+#define NQNFS_EXPIRED 500
+#define NQNFS_TRYLATER 501
+#define NQNFS_AUTHERR 502
diff --git a/sys/nfs/rpcv2.h b/sys/nfs/rpcv2.h
new file mode 100644
index 000000000000..9c793a7f8758
--- /dev/null
+++ b/sys/nfs/rpcv2.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)rpcv2.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for Sun RPC Version 2, from
+ * "RPC: Remote Procedure Call Protocol Specification" RFC1057
+ */
+
+/* Version # */
+#define RPC_VER2 2
+
+/* Authentication */
+#define RPCAUTH_NULL 0
+#define RPCAUTH_UNIX 1
+#define RPCAUTH_SHORT 2
+#define RPCAUTH_NQNFS 300000
+#define RPCAUTH_MAXSIZ 400
+#define RPCAUTH_UNIXGIDS 16
+
+/* Rpc Constants */
+#define RPC_CALL 0
+#define RPC_REPLY 1
+#define RPC_MSGACCEPTED 0
+#define RPC_MSGDENIED 1
+#define RPC_PROGUNAVAIL 1
+#define RPC_PROGMISMATCH 2
+#define RPC_PROCUNAVAIL 3
+#define RPC_GARBAGE 4 /* I like this one */
+#define RPC_MISMATCH 0
+#define RPC_AUTHERR 1
+
+/* Authentication failures */
+#define AUTH_BADCRED 1
+#define AUTH_REJECTCRED 2
+#define AUTH_BADVERF 3
+#define AUTH_REJECTVERF 4
+#define AUTH_TOOWEAK 5 /* Give em wheaties */
+
+/* Sizes of rpc header parts */
+#define RPC_SIZ 24
+#define RPC_REPLYSIZ 28
+
+/* RPC Prog definitions */
+#define RPCPROG_MNT 100005
+#define RPCMNT_VER1 1
+#define RPCMNT_MOUNT 1
+#define RPCMNT_DUMP 2
+#define RPCMNT_UMOUNT 3
+#define RPCMNT_UMNTALL 4
+#define RPCMNT_EXPORT 5
+#define RPCMNT_NAMELEN 255
+#define RPCMNT_PATHLEN 1024
+#define RPCPROG_NFS 100003
diff --git a/sys/nfs/xdr_subs.h b/sys/nfs/xdr_subs.h
new file mode 100644
index 000000000000..c2aa4f3f3434
--- /dev/null
+++ b/sys/nfs/xdr_subs.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)xdr_subs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Macros used for conversion to/from xdr representation by nfs...
+ * These use the MACHINE DEPENDENT routines ntohl, htonl
+ * As defined by "XDR: External Data Representation Standard" RFC1014
+ *
+ * To simplify the implementation, we use ntohl/htonl even on big-endian
+ * machines, and count on them being `#define'd away. Some of these
+ * might be slightly more efficient as quad_t copies on a big-endian,
+ * but we cannot count on their alignment anyway.
+ */
+
+#define fxdr_unsigned(t, v) ((t)ntohl((long)(v)))
+#define txdr_unsigned(v) (htonl((long)(v)))
+
+#define fxdr_nfstime(f, t) { \
+ (t)->ts_sec = ntohl(((struct nfsv2_time *)(f))->nfs_sec); \
+ (t)->ts_nsec = 1000 * ntohl(((struct nfsv2_time *)(f))->nfs_usec); \
+}
+#define txdr_nfstime(f, t) { \
+ ((struct nfsv2_time *)(t))->nfs_sec = htonl((f)->ts_sec); \
+ ((struct nfsv2_time *)(t))->nfs_usec = htonl((f)->ts_nsec) / 1000; \
+}
+
+#define fxdr_nqtime(f, t) { \
+ (t)->ts_sec = ntohl(((struct nqnfs_time *)(f))->nq_sec); \
+ (t)->ts_nsec = ntohl(((struct nqnfs_time *)(f))->nq_nsec); \
+}
+#define txdr_nqtime(f, t) { \
+ ((struct nqnfs_time *)(t))->nq_sec = htonl((f)->ts_sec); \
+ ((struct nqnfs_time *)(t))->nq_nsec = htonl((f)->ts_nsec); \
+}
+
+#define fxdr_hyper(f, t) { \
+ ((long *)(t))[_QUAD_HIGHWORD] = ntohl(((long *)(f))[0]); \
+ ((long *)(t))[_QUAD_LOWWORD] = ntohl(((long *)(f))[1]); \
+}
+#define txdr_hyper(f, t) { \
+ ((long *)(t))[0] = htonl(((long *)(f))[_QUAD_HIGHWORD]); \
+ ((long *)(t))[1] = htonl(((long *)(f))[_QUAD_LOWWORD]); \
+}
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
new file mode 100644
index 000000000000..261fd42657a7
--- /dev/null
+++ b/sys/nfsclient/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
new file mode 100644
index 000000000000..177a278b6310
--- /dev/null
+++ b/sys/nfsclient/nfs_bio.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/trace.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsnode.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct buf *incore(), *nfs_getcacheblk();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+
+/*
+ * Vnode op for read using bio
+ * Any similarity to readip() is purely coincidental
+ */
+nfs_bioread(vp, uio, ioflag, cred)
+ register struct vnode *vp;
+ register struct uio *uio;
+ int ioflag;
+ struct ucred *cred;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register int biosize, diff;
+ struct buf *bp, *rabp;
+ struct vattr vattr;
+ struct proc *p;
+ struct nfsmount *nmp;
+ daddr_t lbn, bn, rabn;
+ caddr_t baddr;
+ int got_buf, nra, error = 0, n, on, not_readin;
+
+#ifdef lint
+ ioflag = ioflag;
+#endif /* lint */
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("nfs_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0 && vp->v_type != VDIR)
+ return (EINVAL);
+ nmp = VFSTONFS(vp->v_mount);
+ biosize = nmp->nm_rsize;
+ p = uio->uio_procp;
+ /*
+ * For nfs, cache consistency can only be maintained approximately.
+ * Although RFC1094 does not specify the criteria, the following is
+ * believed to be compatible with the reference port.
+ * For nqnfs, full cache consistency is maintained within the loop.
+ * For nfs:
+ * If the file's modify time on the server has changed since the
+ * last read rpc or you have written to the file,
+ * you may have lost data cache consistency with the
+ * server, so flush all of the file's data out of the cache.
+ * Then force a getattr rpc to ensure that you have up to date
+ * attributes.
+ * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
+ * the ones changing the modify time.
+ * NB: This implies that cache data can be read when up to
+ * NFS_ATTRTIMEO seconds out of date. If you find that you need current
+ * attributes this could be forced by setting n_attrstamp to 0 before
+ * the VOP_GETATTR() call.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
+ if (np->n_flag & NMODIFIED) {
+ if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
+ vp->v_type != VREG) {
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ } else {
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.ts_sec) {
+ np->n_direofoffset = 0;
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ }
+ }
+ }
+ do {
+
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_READ, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE) ||
+ ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
+ if (vp->v_type == VDIR) {
+ np->n_direofoffset = 0;
+ cache_purge(vp);
+ }
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
+ np->n_direofoffset = 0;
+ cache_purge(vp);
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ }
+ if (np->n_flag & NQNFSNONCACHE) {
+ switch (vp->v_type) {
+ case VREG:
+ error = nfs_readrpc(vp, uio, cred);
+ break;
+ case VLNK:
+ error = nfs_readlinkrpc(vp, uio, cred);
+ break;
+ case VDIR:
+ error = nfs_readdirrpc(vp, uio, cred);
+ break;
+ };
+ return (error);
+ }
+ baddr = (caddr_t)0;
+ switch (vp->v_type) {
+ case VREG:
+ nfsstats.biocache_reads++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize-1);
+ bn = lbn * (biosize / DEV_BSIZE);
+ not_readin = 1;
+
+ /*
+ * Start the read ahead(s), as required.
+ */
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ lbn == vp->v_lastr + 1) {
+ for (nra = 0; nra < nmp->nm_readahead &&
+ (lbn + 1 + nra) * biosize < np->n_size; nra++) {
+ rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
+ if (!incore(vp, rabn)) {
+ rabp = nfs_getcacheblk(vp, rabn, biosize, p);
+ if (!rabp)
+ return (EINTR);
+ if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ if (nfs_asyncio(rabp, cred)) {
+ rabp->b_flags |= B_INVAL;
+ brelse(rabp);
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * If the block is in the cache and has the required data
+ * in a valid region, just copy it out.
+ * Otherwise, get the block and write back/read in,
+ * as required.
+ */
+ if ((bp = incore(vp, bn)) &&
+ (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
+ (B_BUSY | B_WRITEINPROG))
+ got_buf = 0;
+ else {
+again:
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ got_buf = 1;
+ if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+ bp->b_flags |= B_READ;
+ not_readin = 0;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ }
+ n = min((unsigned)(biosize - on), uio->uio_resid);
+ diff = np->n_size - uio->uio_offset;
+ if (diff < n)
+ n = diff;
+ if (not_readin && n > 0) {
+ if (on < bp->b_validoff || (on + n) > bp->b_validend) {
+ if (!got_buf) {
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ got_buf = 1;
+ }
+ bp->b_flags |= B_INVAL;
+ if (bp->b_dirtyend > 0) {
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("nfsbioread");
+ if (VOP_BWRITE(bp) == EINTR)
+ return (EINTR);
+ } else
+ brelse(bp);
+ goto again;
+ }
+ }
+ vp->v_lastr = lbn;
+ diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
+ if (diff < n)
+ n = diff;
+ break;
+ case VLNK:
+ nfsstats.biocache_readlinks++;
+ bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_DONE) == 0) {
+ bp->b_flags |= B_READ;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+ got_buf = 1;
+ on = 0;
+ break;
+ case VDIR:
+ nfsstats.biocache_readdirs++;
+ bn = (daddr_t)uio->uio_offset;
+ bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_DONE) == 0) {
+ bp->b_flags |= B_READ;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ /*
+ * If not eof and read aheads are enabled, start one.
+ * (You need the current block first, so that you have the
+ * directory offset cookie of the next block.
+ */
+ rabn = bp->b_blkno;
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ rabn != 0 && rabn != np->n_direofoffset &&
+ !incore(vp, rabn)) {
+ rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
+ if (rabp) {
+ if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ if (nfs_asyncio(rabp, cred)) {
+ rabp->b_flags |= B_INVAL;
+ brelse(rabp);
+ }
+ }
+ }
+ }
+ on = 0;
+ n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
+ got_buf = 1;
+ break;
+ };
+
+ if (n > 0) {
+ if (!baddr)
+ baddr = bp->b_data;
+ error = uiomove(baddr + on, (int)n, uio);
+ }
+ switch (vp->v_type) {
+ case VREG:
+ if (n + on == biosize || uio->uio_offset == np->n_size)
+ bp->b_flags |= B_AGE;
+ break;
+ case VLNK:
+ n = 0;
+ break;
+ case VDIR:
+ uio->uio_offset = bp->b_blkno;
+ break;
+ };
+ if (got_buf)
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n > 0);
+ return (error);
+}
+
+/*
+ * Vnode op for write using bio
+ */
+nfs_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register int biosize;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ register struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ register struct ucred *cred = ap->a_cred;
+ int ioflag = ap->a_ioflag;
+ struct buf *bp;
+ struct vattr vattr;
+ struct nfsmount *nmp;
+ daddr_t lbn, bn;
+ int n, on, error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("nfs_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("nfs_write proc");
+#endif
+ if (vp->v_type != VREG)
+ return (EIO);
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ return (np->n_error);
+ }
+ if (ioflag & (IO_APPEND | IO_SYNC)) {
+ if (np->n_flag & NMODIFIED) {
+ np->n_attrstamp = 0;
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ if (ioflag & IO_APPEND) {
+ np->n_attrstamp = 0;
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ uio->uio_offset = np->n_size;
+ }
+ }
+ nmp = VFSTONFS(vp->v_mount);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ if (uio->uio_resid == 0)
+ return (0);
+ /*
+ * Maybe this should be above the vnode op call, but so long as
+ * file servers have no limits, i don't think it matters
+ */
+ if (p && uio->uio_offset + uio->uio_resid >
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+ psignal(p, SIGXFSZ);
+ return (EFBIG);
+ }
+ /*
+ * I use nm_rsize, not nm_wsize so that all buffer cache blocks
+ * will be the same size within a filesystem. nfs_writerpc will
+ * still use nm_wsize when sizing the rpc's.
+ */
+ biosize = nmp->nm_rsize;
+ do {
+
+ /*
+ * Check for a valid write lease.
+ * If non-cachable, just do the rpc
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ }
+ if (np->n_flag & NQNFSNONCACHE)
+ return (nfs_writerpc(vp, uio, cred, ioflag));
+ nfsstats.biocache_writes++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize-1);
+ n = min((unsigned)(biosize - on), uio->uio_resid);
+ bn = lbn * (biosize / DEV_BSIZE);
+again:
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ if (bp->b_wcred == NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ np->n_flag |= NMODIFIED;
+ if (uio->uio_offset + n > np->n_size) {
+ np->n_size = uio->uio_offset + n;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ }
+
+ /*
+ * If the new write will leave a contiguous dirty
+ * area, just update the b_dirtyoff and b_dirtyend,
+ * otherwise force a write rpc of the old dirty area.
+ */
+ if (bp->b_dirtyend > 0 &&
+ (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+ bp->b_proc = p;
+ if (VOP_BWRITE(bp) == EINTR)
+ return (EINTR);
+ goto again;
+ }
+
+ /*
+ * Check for valid write lease and get one as required.
+ * In case getblk() and/or bwrite() delayed us.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ brelse(bp);
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ goto again;
+ }
+ }
+ if (error = uiomove((char *)bp->b_data + on, n, uio)) {
+ bp->b_flags |= B_ERROR;
+ brelse(bp);
+ return (error);
+ }
+ if (bp->b_dirtyend > 0) {
+ bp->b_dirtyoff = min(on, bp->b_dirtyoff);
+ bp->b_dirtyend = max((on + n), bp->b_dirtyend);
+ } else {
+ bp->b_dirtyoff = on;
+ bp->b_dirtyend = on + n;
+ }
+#ifndef notdef
+ if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
+ bp->b_validoff > bp->b_dirtyend) {
+ bp->b_validoff = bp->b_dirtyoff;
+ bp->b_validend = bp->b_dirtyend;
+ } else {
+ bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
+ bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+ }
+#else
+ bp->b_validoff = bp->b_dirtyoff;
+ bp->b_validend = bp->b_dirtyend;
+#endif
+ if (ioflag & IO_APPEND)
+ bp->b_flags |= B_APPENDWRITE;
+
+ /*
+ * If the lease is non-cachable or IO_SYNC do bwrite().
+ */
+ if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
+ bp->b_proc = p;
+ if (error = VOP_BWRITE(bp))
+ return (error);
+ } else if ((n + on) == biosize &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
+ bp->b_proc = (struct proc *)0;
+ bawrite(bp);
+ } else
+ bdwrite(bp);
+ } while (uio->uio_resid > 0 && n > 0);
+ return (0);
+}
+
+/*
+ * Get an nfs cache block.
+ * Allocate a new one if the block isn't currently in the cache
+ * and return the block marked busy. If the calling process is
+ * interrupted by a signal for an interruptible mount point, return
+ * NULL.
+ */
+struct buf *
+nfs_getcacheblk(vp, bn, size, p)
+ struct vnode *vp;
+ daddr_t bn;
+ int size;
+ struct proc *p;
+{
+ register struct buf *bp;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+
+ if (nmp->nm_flag & NFSMNT_INT) {
+ bp = getblk(vp, bn, size, PCATCH, 0);
+ while (bp == (struct buf *)0) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return ((struct buf *)0);
+ bp = getblk(vp, bn, size, 0, 2 * hz);
+ }
+ } else
+ bp = getblk(vp, bn, size, 0, 0);
+ return (bp);
+}
+
+/*
+ * Flush and invalidate all dirty buffers. If another process is already
+ * doing the flush, just wait for completion.
+ */
+nfs_vinvalbuf(vp, flags, cred, p, intrflg)
+ struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int intrflg;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ int error = 0, slpflag, slptimeo;
+
+ if ((nmp->nm_flag & NFSMNT_INT) == 0)
+ intrflg = 0;
+ if (intrflg) {
+ slpflag = PCATCH;
+ slptimeo = 2 * hz;
+ } else {
+ slpflag = 0;
+ slptimeo = 0;
+ }
+ /*
+ * First wait for any other process doing a flush to complete.
+ */
+ while (np->n_flag & NFLUSHINPROG) {
+ np->n_flag |= NFLUSHWANT;
+ error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
+ slptimeo);
+ if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return (EINTR);
+ }
+
+ /*
+ * Now, flush as required.
+ */
+ np->n_flag |= NFLUSHINPROG;
+ error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
+ while (error) {
+ if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+ np->n_flag &= ~NFLUSHINPROG;
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (EINTR);
+ }
+ error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
+ }
+ np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (0);
+}
+
+/*
+ * Initiate asynchronous I/O. Return an error if no nfsiods are available.
+ * This is mainly to avoid queueing async I/O requests when the nfsiods
+ * are all hung on a dead server.
+ */
+nfs_asyncio(bp, cred)
+ register struct buf *bp;
+ struct ucred *cred;
+{
+ register int i;
+
+ if (nfs_numasync == 0)
+ return (EIO);
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_iodwant[i]) {
+ if (bp->b_flags & B_READ) {
+ if (bp->b_rcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_rcred = cred;
+ }
+ } else {
+ if (bp->b_wcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
+ nfs_iodwant[i] = (struct proc *)0;
+ wakeup((caddr_t)&nfs_iodwant[i]);
+ return (0);
+ }
+ return (EIO);
+}
+
+/*
+ * Do an I/O operation to/from a cache block. This may be called
+ * synchronously or from an nfsiod.
+ */
+int
+nfs_doio(bp, cr, p)
+ register struct buf *bp;
+ struct cred *cr;
+ struct proc *p;
+{
+ register struct uio *uiop;
+ register struct vnode *vp;
+ struct nfsnode *np;
+ struct nfsmount *nmp;
+ int error, diff, len;
+ struct uio uio;
+ struct iovec io;
+
+ vp = bp->b_vp;
+ np = VTONFS(vp);
+ nmp = VFSTONFS(vp->v_mount);
+ uiop = &uio;
+ uiop->uio_iov = &io;
+ uiop->uio_iovcnt = 1;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = p;
+
+ /*
+ * Historically, paging was done with physio, but no more.
+ */
+ if (bp->b_flags & B_PHYS)
+ panic("doio phys");
+ if (bp->b_flags & B_READ) {
+ io.iov_len = uiop->uio_resid = bp->b_bcount;
+ io.iov_base = bp->b_data;
+ uiop->uio_rw = UIO_READ;
+ switch (vp->v_type) {
+ case VREG:
+ uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
+ nfsstats.read_bios++;
+ error = nfs_readrpc(vp, uiop, cr);
+ if (!error) {
+ bp->b_validoff = 0;
+ if (uiop->uio_resid) {
+ /*
+ * If len > 0, there is a hole in the file and
+ * no writes after the hole have been pushed to
+ * the server yet.
+ * Just zero fill the rest of the valid area.
+ */
+ diff = bp->b_bcount - uiop->uio_resid;
+ len = np->n_size - (bp->b_blkno * DEV_BSIZE
+ + diff);
+ if (len > 0) {
+ len = min(len, uiop->uio_resid);
+ bzero((char *)bp->b_data + diff, len);
+ bp->b_validend = diff + len;
+ } else
+ bp->b_validend = diff;
+ } else
+ bp->b_validend = bp->b_bcount;
+ }
+ if (p && (vp->v_flag & VTEXT) &&
+ (((nmp->nm_flag & NFSMNT_NQNFS) &&
+ np->n_lrev != np->n_brev) ||
+ (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+ np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
+ uprintf("Process killed due to text file modification\n");
+ psignal(p, SIGKILL);
+ p->p_flag |= P_NOSWAP;
+ }
+ break;
+ case VLNK:
+ uiop->uio_offset = 0;
+ nfsstats.readlink_bios++;
+ error = nfs_readlinkrpc(vp, uiop, cr);
+ break;
+ case VDIR:
+ uiop->uio_offset = bp->b_lblkno;
+ nfsstats.readdir_bios++;
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
+ error = nfs_readdirlookrpc(vp, uiop, cr);
+ else
+ error = nfs_readdirrpc(vp, uiop, cr);
+ /*
+ * Save offset cookie in b_blkno.
+ */
+ bp->b_blkno = uiop->uio_offset;
+ break;
+ };
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = error;
+ }
+ } else {
+ io.iov_len = uiop->uio_resid = bp->b_dirtyend
+ - bp->b_dirtyoff;
+ uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
+ + bp->b_dirtyoff;
+ io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+ uiop->uio_rw = UIO_WRITE;
+ nfsstats.write_bios++;
+ if (bp->b_flags & B_APPENDWRITE)
+ error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
+ else
+ error = nfs_writerpc(vp, uiop, cr, 0);
+ bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
+
+ /*
+ * For an interrupted write, the buffer is still valid and the
+ * write hasn't been pushed to the server yet, so we can't set
+ * B_ERROR and report the interruption by setting B_EINTR. For
+ * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
+ * is essentially a noop.
+ */
+ if (error == EINTR) {
+ bp->b_flags &= ~B_INVAL;
+ bp->b_flags |= B_DELWRI;
+
+ /*
+ * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
+ * buffer to the clean list, we have to reassign it back to the
+ * dirty one. Ugh.
+ */
+ if (bp->b_flags & B_ASYNC)
+ reassignbuf(bp, vp);
+ else
+ bp->b_flags |= B_EINTR;
+ } else {
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ }
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ }
+ }
+ bp->b_resid = uiop->uio_resid;
+ biodone(bp);
+ return (error);
+}
diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c
new file mode 100644
index 000000000000..5d86b42ee20a
--- /dev/null
+++ b/sys/nfsclient/nfs_nfsiod.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_syscalls.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define TRUE 1
+#define FALSE 0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+ char *fname;
+ fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+ struct proc *p;
+ register struct getfh_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ fhandle_t fh;
+ int error;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ bzero((caddr_t)&fh, sizeof(fh));
+ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fh.fh_fid);
+ vput(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+ return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+ int flag;
+ caddr_t argp;
+};
+nfssvc(p, uap, retval)
+ struct proc *p;
+ register struct nfssvc_args *uap;
+ int *retval;
+{
+ struct nameidata nd;
+ struct file *fp;
+ struct mbuf *nam;
+ struct nfsd_args nfsdarg;
+ struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+ struct nfsd_cargs ncd;
+ struct nfsd *nfsd;
+ struct nfssvc_sock *slp;
+ struct nfsuid *nuidp, **nuh;
+ struct nfsmount *nmp;
+ int error;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+ nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+ (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+ }
+ if (uap->flag & NFSSVC_BIOD)
+ error = nfssvc_iod(p);
+ else if (uap->flag & NFSSVC_MNTD) {
+ if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+ ncd.ncd_dirp, p);
+ if (error = namei(&nd))
+ return (error);
+ if ((nd.ni_vp->v_flag & VROOT) == 0)
+ error = EINVAL;
+ nmp = VFSTONFS(nd.ni_vp->v_mount);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ if ((nmp->nm_flag & NFSMNT_MNTD) &&
+ (uap->flag & NFSSVC_GOTAUTH) == 0)
+ return (0);
+ nmp->nm_flag |= NFSMNT_MNTD;
+ error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+ uap->argp, p);
+ } else if (uap->flag & NFSSVC_ADDSOCK) {
+ if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+ sizeof(nfsdarg)))
+ return (error);
+ if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+ return (error);
+ /*
+ * Get the client address for connected sockets.
+ */
+ if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+ nam = (struct mbuf *)0;
+ else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+ MT_SONAME))
+ return (error);
+ error = nfssvc_addsock(fp, nam);
+ } else {
+ if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+ return (error);
+ if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+ (nfsd->nd_slp->ns_flag & SLP_VALID)) {
+ slp = nfsd->nd_slp;
+
+ /*
+ * First check to see if another nfsd has already
+ * added this credential.
+ */
+ nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ while (nuidp) {
+ if (nuidp->nu_uid == nsd->nsd_uid)
+ break;
+ nuidp = nuidp->nu_hnext;
+ }
+ if (!nuidp) {
+ /*
+ * Nope, so we will.
+ */
+ if (slp->ns_numuids < nuidhash_max) {
+ slp->ns_numuids++;
+ nuidp = (struct nfsuid *)
+ malloc(sizeof (struct nfsuid), M_NFSUID,
+ M_WAITOK);
+ } else
+ nuidp = (struct nfsuid *)0;
+ if ((slp->ns_flag & SLP_VALID) == 0) {
+ if (nuidp)
+ free((caddr_t)nuidp, M_NFSUID);
+ } else {
+ if (nuidp == (struct nfsuid *)0) {
+ nuidp = slp->ns_lruprev;
+ remque(nuidp);
+ if (nuidp->nu_hprev)
+ nuidp->nu_hprev->nu_hnext =
+ nuidp->nu_hnext;
+ if (nuidp->nu_hnext)
+ nuidp->nu_hnext->nu_hprev =
+ nuidp->nu_hprev;
+ }
+ nuidp->nu_cr = nsd->nsd_cr;
+ if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+ nuidp->nu_cr.cr_ngroups = NGROUPS;
+ nuidp->nu_cr.cr_ref = 1;
+ nuidp->nu_uid = nsd->nsd_uid;
+ insque(nuidp, (struct nfsuid *)slp);
+ nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ if (nuidp->nu_hnext = *nuh)
+ nuidp->nu_hnext->nu_hprev = nuidp;
+ nuidp->nu_hprev = (struct nfsuid *)0;
+ *nuh = nuidp;
+ }
+ }
+ }
+ if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+ nfsd->nd_flag |= NFSD_AUTHFAIL;
+ error = nfssvc_nfsd(nsd, uap->argp, p);
+ }
+ if (error == EINTR || error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+ struct file *fp;
+ struct mbuf *mynam;
+{
+ register struct mbuf *m;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ struct nfssvc_sock *tslp;
+ int error, s;
+
+ so = (struct socket *)fp->f_data;
+ tslp = (struct nfssvc_sock *)0;
+ /*
+ * Add it to the list, as required.
+ */
+ if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+ tslp = nfs_udpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#ifdef ISO
+ } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ tslp = nfs_cltpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#endif /* ISO */
+ }
+ if (so->so_type == SOCK_STREAM)
+ siz = NFS_MAXPACKET + sizeof (u_long);
+ else
+ siz = NFS_MAXPACKET;
+ if (error = soreserve(so, siz, siz)) {
+ m_freem(mynam);
+ return (error);
+ }
+
+ /*
+ * Set protocol specific options { for now TCP only } and
+ * reserve some space. For datagram sockets, this can get called
+ * repeatedly for the same socket, but that isn't harmful.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_domain->dom_family == AF_INET &&
+ so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ so->so_rcv.sb_flags &= ~SB_NOINTR;
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_flags &= ~SB_NOINTR;
+ so->so_snd.sb_timeo = 0;
+ if (tslp)
+ slp = tslp;
+ else {
+ slp = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+ slp->ns_prev = nfssvc_sockhead.ns_prev;
+ slp->ns_prev->ns_next = slp;
+ slp->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = slp;
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ }
+ slp->ns_so = so;
+ slp->ns_nam = mynam;
+ fp->f_count++;
+ slp->ns_fp = fp;
+ s = splnet();
+ so->so_upcallarg = (caddr_t)slp;
+ so->so_upcall = nfsrv_rcv;
+ slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+ nfsrv_wakenfsd(slp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+ struct nfsd_srvargs *nsd;
+ caddr_t argp;
+ struct proc *p;
+{
+ register struct mbuf *m, *nam2;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ register int *solockp;
+ struct nfsd *nd = nsd->nsd_nfsd;
+ struct mbuf *mreq, *nam;
+ struct timeval starttime;
+ struct nfsuid *uidp;
+ int error, cacherep, s;
+ int sotype;
+
+ s = splnet();
+ if (nd == (struct nfsd *)0) {
+ nsd->nsd_nfsd = nd = (struct nfsd *)
+ malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+ bzero((caddr_t)nd, sizeof (struct nfsd));
+ nd->nd_procp = p;
+ nd->nd_cr.cr_ref = 1;
+ insque(nd, &nfsd_head);
+ nd->nd_nqlflag = NQL_NOVAL;
+ nfs_numnfsd++;
+ }
+ /*
+ * Loop getting rpc requests until SIGKILL.
+ */
+ for (;;) {
+ if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+ while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+ nd->nd_flag |= NFSD_WAITING;
+ nfsd_waiting++;
+ error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+ nfsd_waiting--;
+ if (error)
+ goto done;
+ }
+ if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+ == (SLP_VALID | SLP_DOREC)) {
+ slp->ns_flag &= ~SLP_DOREC;
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ break;
+ }
+ slp = slp->ns_next;
+ }
+ if (slp == &nfssvc_sockhead)
+ nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+ }
+ if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+ continue;
+ if (slp->ns_flag & SLP_VALID) {
+ if (slp->ns_flag & SLP_DISCONN)
+ nfsrv_zapsock(slp);
+ else if (slp->ns_flag & SLP_NEEDQ) {
+ slp->ns_flag &= ~SLP_NEEDQ;
+ (void) nfs_sndlock(&slp->ns_solock,
+ (struct nfsreq *)0);
+ nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+ M_WAIT);
+ nfs_sndunlock(&slp->ns_solock);
+ }
+ error = nfsrv_dorec(slp, nd);
+ nd->nd_flag |= NFSD_REQINPROG;
+ }
+ } else {
+ error = 0;
+ slp = nd->nd_slp;
+ }
+ if (error || (slp->ns_flag & SLP_VALID) == 0) {
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nfsrv_slpderef(slp);
+ continue;
+ }
+ splx(s);
+ so = slp->ns_so;
+ sotype = so->so_type;
+ starttime = time;
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+ solockp = &slp->ns_solock;
+ else
+ solockp = (int *)0;
+ /*
+ * nam == nam2 for connectionless protocols such as UDP
+ * nam2 == NULL for connection based protocols to disable
+ * recent request caching.
+ */
+ if (nam2 = nd->nd_nam) {
+ nam = nam2;
+ cacherep = RC_CHECKIT;
+ } else {
+ nam = slp->ns_nam;
+ cacherep = RC_DOIT;
+ }
+
+ /*
+ * Check to see if authorization is needed.
+ */
+ if (nd->nd_flag & NFSD_NEEDAUTH) {
+ static int logauth = 0;
+
+ nd->nd_flag &= ~NFSD_NEEDAUTH;
+ /*
+ * Check for a mapping already installed.
+ */
+ uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == nd->nd_cr.cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (!uidp) {
+ nsd->nsd_uid = nd->nd_cr.cr_uid;
+ if (nam2 && logauth++ == 0)
+ log(LOG_WARNING, "Kerberized NFS using UDP\n");
+ nsd->nsd_haddr =
+ mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ nsd->nsd_authlen = nd->nd_authlen;
+ if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+ nd->nd_authlen) == 0 &&
+ copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+ return (ENEEDAUTH);
+ cacherep = RC_DROPIT;
+ }
+ }
+ if (cacherep == RC_CHECKIT)
+ cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+ /*
+ * Check for just starting up for NQNFS and send
+ * fake "try again later" replies to the NQNFS clients.
+ */
+ if (notstarted && nqnfsstarttime <= time.tv_sec) {
+ if (modify_flag) {
+ nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+ modify_flag = 0;
+ } else
+ notstarted = 0;
+ }
+ if (notstarted) {
+ if (nd->nd_nqlflag == NQL_NOVAL)
+ cacherep = RC_DROPIT;
+ else if (nd->nd_procnum != NFSPROC_WRITE) {
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_TRYLATER;
+ cacherep = RC_DOIT;
+ } else
+ modify_flag = 1;
+ } else if (nd->nd_flag & NFSD_AUTHFAIL) {
+ nd->nd_flag &= ~NFSD_AUTHFAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_AUTHERR;
+ cacherep = RC_DOIT;
+ }
+
+ switch (cacherep) {
+ case RC_DOIT:
+ error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+ nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+ nam, &mreq);
+ if (nd->nd_cr.cr_ref != 1) {
+ printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+ panic("nfssvc cref");
+ }
+ if (error) {
+ if (nd->nd_procnum != NQNFSPROC_VACATED)
+ nfsstats.srv_errs++;
+ if (nam2) {
+ nfsrv_updatecache(nam2, nd, FALSE, mreq);
+ m_freem(nam2);
+ }
+ break;
+ }
+ nfsstats.srvrpccnt[nd->nd_procnum]++;
+ if (nam2)
+ nfsrv_updatecache(nam2, nd, TRUE, mreq);
+ nd->nd_mrep = (struct mbuf *)0;
+ case RC_REPLY:
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = mreq;
+ m->m_pkthdr.len = siz;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 | siz);
+ }
+ if (solockp)
+ (void) nfs_sndlock(solockp, (struct nfsreq *)0);
+ if (slp->ns_flag & SLP_VALID)
+ error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+ else {
+ error = EPIPE;
+ m_freem(m);
+ }
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ if (nam2)
+ MFREE(nam2, m);
+ if (nd->nd_mrep)
+ m_freem(nd->nd_mrep);
+ if (error == EPIPE)
+ nfsrv_zapsock(slp);
+ if (solockp)
+ nfs_sndunlock(solockp);
+ if (error == EINTR || error == ERESTART) {
+ nfsrv_slpderef(slp);
+ s = splnet();
+ goto done;
+ }
+ break;
+ case RC_DROPIT:
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ m_freem(nd->nd_mrep);
+ m_freem(nam2);
+ break;
+ };
+ s = splnet();
+ if (nfsrv_dorec(slp, nd)) {
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nfsrv_slpderef(slp);
+ }
+ }
+done:
+ remque(nd);
+ splx(s);
+ free((caddr_t)nd, M_NFSD);
+ nsd->nsd_nfsd = (struct nfsd *)0;
+ if (--nfs_numnfsd == 0)
+ nfsrv_init(TRUE); /* Reinitialize everything */
+ return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+ struct proc *p;
+{
+ register struct buf *bp;
+ register int i, myiod;
+ int error = 0;
+
+ /*
+ * Assign my position or return error if too many already running
+ */
+ myiod = -1;
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_asyncdaemon[i] == 0) {
+ nfs_asyncdaemon[i]++;
+ myiod = i;
+ break;
+ }
+ if (myiod == -1)
+ return (EBUSY);
+ nfs_numasync++;
+ /*
+ * Just loop around doin our stuff until SIGKILL
+ */
+ for (;;) {
+ while (nfs_bufq.tqh_first == NULL && error == 0) {
+ nfs_iodwant[myiod] = p;
+ error = tsleep((caddr_t)&nfs_iodwant[myiod],
+ PWAIT | PCATCH, "nfsidl", 0);
+ }
+ while ((bp = nfs_bufq.tqh_first) != NULL) {
+ /* Take one off the front of the list */
+ TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+ if (bp->b_flags & B_READ)
+ (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+ else
+ (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+ }
+ if (error) {
+ nfs_asyncdaemon[myiod] = 0;
+ nfs_numasync--;
+ return (error);
+ }
+ }
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+ register struct nfssvc_sock *slp;
+{
+ register struct nfsuid *nuidp, *onuidp;
+ register int i;
+ struct socket *so;
+ struct file *fp;
+ struct mbuf *m;
+
+ slp->ns_flag &= ~SLP_ALLFLAGS;
+ if (fp = slp->ns_fp) {
+ slp->ns_fp = (struct file *)0;
+ so = slp->ns_so;
+ so->so_upcall = NULL;
+ soshutdown(so, 2);
+ closef(fp, (struct proc *)0);
+ if (slp->ns_nam)
+ MFREE(slp->ns_nam, m);
+ m_freem(slp->ns_raw);
+ m_freem(slp->ns_rec);
+ nuidp = slp->ns_lrunext;
+ while (nuidp != (struct nfsuid *)slp) {
+ onuidp = nuidp;
+ nuidp = nuidp->nu_lrunext;
+ free((caddr_t)onuidp, M_NFSUID);
+ }
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ for (i = 0; i < NUIDHASHSIZ; i++)
+ slp->ns_uidh[i] = (struct nfsuid *)0;
+ }
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+ struct ucred *cred;
+ int *auth_type;
+ char **auth_str;
+ int *auth_len;
+{
+ int error = 0;
+
+ while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+ nmp->nm_flag |= NFSMNT_WANTAUTH;
+ (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+ "nfsauth1", 2 * hz);
+ if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ return (error);
+ }
+ }
+ nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+ nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+ nmp->nm_authuid = cred->cr_uid;
+ wakeup((caddr_t)&nmp->nm_authstr);
+
+ /*
+ * And wait for mount_nfs to do its stuff.
+ */
+ while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+ (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+ "nfsauth2", 2 * hz);
+ error = nfs_sigintr(nmp, rep, rep->r_procp);
+ }
+ if (nmp->nm_flag & NFSMNT_AUTHERR) {
+ nmp->nm_flag &= ~NFSMNT_AUTHERR;
+ error = EAUTH;
+ }
+ if (error)
+ free((caddr_t)*auth_str, M_TEMP);
+ else {
+ *auth_type = nmp->nm_authtype;
+ *auth_len = nmp->nm_authlen;
+ }
+ nmp->nm_flag &= ~NFSMNT_HASAUTH;
+ nmp->nm_flag |= NFSMNT_WAITAUTH;
+ if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ wakeup((caddr_t)&nmp->nm_authtype);
+ }
+ return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+ register struct nfssvc_sock *slp;
+{
+ if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+ slp->ns_prev->ns_next = slp->ns_next;
+ slp->ns_next->ns_prev = slp->ns_prev;
+ free((caddr_t)slp, M_NFSSVC);
+ }
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+ int terminating;
+{
+ register struct nfssvc_sock *slp;
+ struct nfssvc_sock *oslp;
+
+ if (nfssvc_sockhead.ns_flag & SLP_INIT)
+ panic("nfsd init");
+ nfssvc_sockhead.ns_flag |= SLP_INIT;
+ if (terminating) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if (slp->ns_flag & SLP_VALID)
+ nfsrv_zapsock(slp);
+ slp->ns_next->ns_prev = slp->ns_prev;
+ slp->ns_prev->ns_next = slp->ns_next;
+ oslp = slp;
+ slp = slp->ns_next;
+ free((caddr_t)oslp, M_NFSSVC);
+ }
+ nfsrv_cleancache(); /* And clear out server cache */
+ }
+ nfs_udpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+ nfs_cltpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+ nfssvc_sockhead.ns_next = nfs_udpsock;
+ nfs_udpsock->ns_next = nfs_cltpsock;
+ nfs_cltpsock->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = nfs_cltpsock;
+ nfs_cltpsock->ns_prev = nfs_udpsock;
+ nfs_udpsock->ns_prev = &nfssvc_sockhead;
+ nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+ (struct nfsuid *)nfs_udpsock;
+ nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+ (struct nfsuid *)nfs_cltpsock;
+ nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+ nfsd_head.nd_flag = 0;
+ nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+ if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+ nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+ wakeup((caddr_t)&nfssvc_sockhead);
+ }
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+ struct timeval *startp;
+ int sotype;
+ register struct nfsd *nd;
+ struct mbuf *nam;
+ int cacherep;
+{
+ register struct drt *rt;
+
+ rt = &nfsdrt.drt[nfsdrt.pos];
+ if (cacherep == RC_DOIT)
+ rt->flag = 0;
+ else if (cacherep == RC_REPLY)
+ rt->flag = DRT_CACHEREPLY;
+ else
+ rt->flag = DRT_CACHEDROP;
+ if (sotype == SOCK_STREAM)
+ rt->flag |= DRT_TCP;
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ rt->flag |= DRT_NQNFS;
+ rt->proc = nd->nd_procnum;
+ if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+ rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ else
+ rt->ipadr = INADDR_ANY;
+ rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+ (time.tv_usec - startp->tv_usec);
+ rt->tstamp = time;
+ nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
new file mode 100644
index 000000000000..032bdef0d5ab
--- /dev/null
+++ b/sys/nfsclient/nfs_node.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_node.c 8.2 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct nfsnode **nheadhashtbl;
+u_long nheadhash;
+#define NFSNOHASH(fhsum) ((fhsum)&nheadhash)
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Initialize hash links for nfsnodes
+ * and build nfsnode free list.
+ */
+nfs_nhinit()
+{
+
+#ifndef lint
+ if ((sizeof(struct nfsnode) - 1) & sizeof(struct nfsnode))
+ printf("nfs_nhinit: bad size %d\n", sizeof(struct nfsnode));
+#endif /* not lint */
+ nheadhashtbl = hashinit(desiredvnodes, M_NFSNODE, &nheadhash);
+}
+
+/*
+ * Compute an entry in the NFS hash table structure
+ */
+struct nfsnode **
+nfs_hash(fhp)
+ register nfsv2fh_t *fhp;
+{
+ register u_char *fhpp;
+ register u_long fhsum;
+ int i;
+
+ fhpp = &fhp->fh_bytes[0];
+ fhsum = 0;
+ for (i = 0; i < NFSX_FH; i++)
+ fhsum += *fhpp++;
+ return (&nheadhashtbl[NFSNOHASH(fhsum)]);
+}
+
+/*
+ * Look up a vnode/nfsnode by file handle.
+ * Callers must check for mount points!!
+ * In all cases, a pointer to a
+ * nfsnode structure is returned.
+ */
+nfs_nget(mntp, fhp, npp)
+ struct mount *mntp;
+ register nfsv2fh_t *fhp;
+ struct nfsnode **npp;
+{
+ register struct nfsnode *np, *nq, **nhpp;
+ register struct vnode *vp;
+ extern int (**nfsv2_vnodeop_p)();
+ struct vnode *nvp;
+ int error;
+
+ nhpp = nfs_hash(fhp);
+loop:
+ for (np = *nhpp; np; np = np->n_forw) {
+ if (mntp != NFSTOV(np)->v_mount ||
+ bcmp((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH))
+ continue;
+ vp = NFSTOV(np);
+ if (vget(vp, 1))
+ goto loop;
+ *npp = np;
+ return(0);
+ }
+ if (error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp)) {
+ *npp = 0;
+ return (error);
+ }
+ vp = nvp;
+ MALLOC(np, struct nfsnode *, sizeof *np, M_NFSNODE, M_WAITOK);
+ vp->v_data = np;
+ np->n_vnode = vp;
+ /*
+ * Insert the nfsnode in the hash queue for its new file handle
+ */
+ np->n_flag = 0;
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ bcopy((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH);
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ np->n_sillyrename = (struct sillyrename *)0;
+ np->n_size = 0;
+ np->n_mtime = 0;
+ if (VFSTONFS(mntp)->nm_flag & NFSMNT_NQNFS) {
+ np->n_brev = 0;
+ np->n_lrev = 0;
+ np->n_expiry = (time_t)0;
+ np->n_tnext = (struct nfsnode *)0;
+ }
+ *npp = np;
+ return (0);
+}
+
+nfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct nfsnode *np;
+ register struct sillyrename *sp;
+ struct proc *p = curproc; /* XXX */
+ extern int prtactive;
+
+ np = VTONFS(ap->a_vp);
+ if (prtactive && ap->a_vp->v_usecount != 0)
+ vprint("nfs_inactive: pushing active", ap->a_vp);
+ sp = np->n_sillyrename;
+ np->n_sillyrename = (struct sillyrename *)0;
+ if (sp) {
+ /*
+ * Remove the silly file that was rename'd earlier
+ */
+ (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+ nfs_removeit(sp);
+ crfree(sp->s_cred);
+ vrele(sp->s_dvp);
+#ifdef SILLYSEPARATE
+ free((caddr_t)sp, M_NFSREQ);
+#endif
+ }
+ np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED |
+ NQNFSNONCACHE | NQNFSWRITE);
+ return (0);
+}
+
+/*
+ * Reclaim an nfsnode so that it can be used for other purposes.
+ */
+nfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ register struct nfsnode *nq;
+ extern int prtactive;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("nfs_reclaim: pushing active", vp);
+ /*
+ * Remove the nfsnode from its hash chain.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+
+ /*
+ * For nqnfs, take it off the timer queue as required.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_tnext) {
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np->n_tprev;
+ else
+ np->n_tnext->n_tprev = np->n_tprev;
+ if (np->n_tprev == (struct nfsnode *)nmp)
+ nmp->nm_tnext = np->n_tnext;
+ else
+ np->n_tprev->n_tnext = np->n_tnext;
+ }
+ cache_purge(vp);
+ FREE(vp->v_data, M_NFSNODE);
+ vp->v_data = (void *)0;
+ return (0);
+}
+
+/*
+ * Lock an nfsnode
+ */
+nfs_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ /*
+ * Ugh, another place where interruptible mounts will get hung.
+ * If you make this sleep interruptible, then you have to fix all
+ * the VOP_LOCK() calls to expect interruptibility.
+ */
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ }
+ if (vp->v_tag == VT_NON)
+ return (ENOENT);
+ return (0);
+}
+
+/*
+ * Unlock an nfsnode
+ */
+nfs_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Check for a locked nfsnode
+ */
+nfs_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. Currently nothing to do.
+ */
+/* ARGSUSED */
+int
+nfs_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return (0);
+}
diff --git a/sys/nfsclient/nfs_socket.c b/sys/nfsclient/nfs_socket.c
new file mode 100644
index 000000000000..cf88ed33d92d
--- /dev/null
+++ b/sys/nfsclient/nfs_socket.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write - A+4D
+ * other - nm_timeo
+ */
+#define NFS_RTO(n, t) \
+ ((t) == 0 ? (n)->nm_timeo : \
+ ((t) < 3 ? \
+ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+ rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+ rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+ NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
+ NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
+ NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+ 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define NFS_CWNDSCALE 256
+#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int nfs_sbwait();
+void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int nfsrv_null(),
+ nfsrv_getattr(),
+ nfsrv_setattr(),
+ nfsrv_lookup(),
+ nfsrv_readlink(),
+ nfsrv_read(),
+ nfsrv_write(),
+ nfsrv_create(),
+ nfsrv_remove(),
+ nfsrv_rename(),
+ nfsrv_link(),
+ nfsrv_symlink(),
+ nfsrv_mkdir(),
+ nfsrv_rmdir(),
+ nfsrv_readdir(),
+ nfsrv_statfs(),
+ nfsrv_noop(),
+ nqnfsrv_readdirlook(),
+ nqnfsrv_getlease(),
+ nqnfsrv_vacated(),
+ nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+ nfsrv_null,
+ nfsrv_getattr,
+ nfsrv_setattr,
+ nfsrv_noop,
+ nfsrv_lookup,
+ nfsrv_readlink,
+ nfsrv_read,
+ nfsrv_noop,
+ nfsrv_write,
+ nfsrv_create,
+ nfsrv_remove,
+ nfsrv_rename,
+ nfsrv_link,
+ nfsrv_symlink,
+ nfsrv_mkdir,
+ nfsrv_rmdir,
+ nfsrv_readdir,
+ nfsrv_statfs,
+ nqnfsrv_readdirlook,
+ nqnfsrv_getlease,
+ nqnfsrv_vacated,
+ nfsrv_noop,
+ nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+{
+ register struct socket *so;
+ int s, error, rcvreserve, sndreserve;
+ struct sockaddr *saddr;
+ struct sockaddr_in *sin;
+ struct mbuf *m;
+ u_short tport;
+
+ nmp->nm_so = (struct socket *)0;
+ saddr = mtod(nmp->nm_nam, struct sockaddr *);
+ if (error = socreate(saddr->sa_family,
+ &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+ goto bad;
+ so = nmp->nm_so;
+ nmp->nm_soflags = so->so_proto->pr_flags;
+
+ /*
+ * Some servers require that the client port be a reserved port number.
+ */
+ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+ MGET(m, M_WAIT, MT_SONAME);
+ sin = mtod(m, struct sockaddr_in *);
+ sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ tport = IPPORT_RESERVED - 1;
+ sin->sin_port = htons(tport);
+ while ((error = sobind(so, m)) == EADDRINUSE &&
+ --tport > IPPORT_RESERVED / 2)
+ sin->sin_port = htons(tport);
+ m_freem(m);
+ if (error)
+ goto bad;
+ }
+
+ /*
+ * Protocols that do not require connections may be optionally left
+ * unconnected for servers that reply from a port other than NFS_PORT.
+ */
+ if (nmp->nm_flag & NFSMNT_NOCONN) {
+ if (nmp->nm_soflags & PR_CONNREQUIRED) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ } else {
+ if (error = soconnect(so, nmp->nm_nam))
+ goto bad;
+
+ /*
+ * Wait for the connection to complete. Cribbed from the
+ * connect system call but with the wait timing out so
+ * that interruptible mounts don't hang here for a long time.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+ "nfscon", 2 * hz);
+ if ((so->so_state & SS_ISCONNECTING) &&
+ so->so_error == 0 && rep &&
+ (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+ so->so_state &= ~SS_ISCONNECTING;
+ splx(s);
+ goto bad;
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto bad;
+ }
+ splx(s);
+ }
+ if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+ so->so_rcv.sb_timeo = (5 * hz);
+ so->so_snd.sb_timeo = (5 * hz);
+ } else {
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ }
+ if (nmp->nm_sotype == SOCK_DGRAM) {
+ sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+ rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+ } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+ } else {
+ if (nmp->nm_sotype != SOCK_STREAM)
+ panic("nfscon sotype");
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ }
+ if (error = soreserve(so, sndreserve, rcvreserve))
+ goto bad;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+ /* Initialize other non-zero congestion variables */
+ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+ nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+ nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+ nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+ nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
+ nmp->nm_sent = 0;
+ nmp->nm_timeouts = 0;
+ return (0);
+
+bad:
+ nfs_disconnect(nmp);
+ return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+ register struct nfsreq *rep;
+{
+ register struct nfsreq *rp;
+ register struct nfsmount *nmp = rep->r_nmp;
+ int error;
+
+ nfs_disconnect(nmp);
+ while (error = nfs_connect(nmp, rep)) {
+ if (error == EINTR || error == ERESTART)
+ return (EINTR);
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+ }
+
+ /*
+ * Loop through outstanding request list and fix up all requests
+ * on old socket.
+ */
+ rp = nfsreqh.r_next;
+ while (rp != &nfsreqh) {
+ if (rp->r_nmp == nmp)
+ rp->r_flags |= R_MUSTRESEND;
+ rp = rp->r_next;
+ }
+ return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+ register struct nfsmount *nmp;
+{
+ register struct socket *so;
+
+ if (nmp->nm_so) {
+ so = nmp->nm_so;
+ nmp->nm_so = (struct socket *)0;
+ soshutdown(so, 2);
+ soclose(so);
+ }
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+ register struct socket *so;
+ struct mbuf *nam;
+ register struct mbuf *top;
+ struct nfsreq *rep;
+{
+ struct mbuf *sendnam;
+ int error, soflags, flags;
+
+ if (rep) {
+ if (rep->r_flags & R_SOFTTERM) {
+ m_freem(top);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ rep->r_flags |= R_MUSTRESEND;
+ m_freem(top);
+ return (0);
+ }
+ rep->r_flags &= ~R_MUSTRESEND;
+ soflags = rep->r_nmp->nm_soflags;
+ } else
+ soflags = so->so_proto->pr_flags;
+ if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+ sendnam = (struct mbuf *)0;
+ else
+ sendnam = nam;
+ if (so->so_type == SOCK_SEQPACKET)
+ flags = MSG_EOR;
+ else
+ flags = 0;
+
+ error = sosend(so, sendnam, (struct uio *)0, top,
+ (struct mbuf *)0, flags);
+ if (error) {
+ if (rep) {
+ log(LOG_INFO, "nfs send error %d for server %s\n",error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ /*
+ * Deal with errors for the client side.
+ */
+ if (rep->r_flags & R_SOFTTERM)
+ error = EINTR;
+ else
+ rep->r_flags |= R_MUSTRESEND;
+ } else
+ log(LOG_INFO, "nfsd send error %d\n", error);
+
+ /*
+ * Handle any recoverable (soft) socket errors here. (???)
+ */
+ if (error != EINTR && error != ERESTART &&
+ error != EWOULDBLOCK && error != EPIPE)
+ error = 0;
+ }
+ return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ * small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+ register struct nfsreq *rep;
+ struct mbuf **aname;
+ struct mbuf **mp;
+{
+ register struct socket *so;
+ struct uio auio;
+ struct iovec aio;
+ register struct mbuf *m;
+ struct mbuf *control;
+ u_long len;
+ struct mbuf **getnam;
+ int error, sotype, rcvflg;
+ struct proc *p = curproc; /* XXX */
+
+ /*
+ * Set up arguments for soreceive()
+ */
+ *mp = (struct mbuf *)0;
+ *aname = (struct mbuf *)0;
+ sotype = rep->r_nmp->nm_sotype;
+
+ /*
+ * For reliable protocols, lock against other senders/receivers
+ * in case a reconnect is necessary.
+ * For SOCK_STREAM, first get the Record Mark to find out how much
+ * more there is to get.
+ * We must lock the socket against other receivers
+ * until we have an entire rpc request/reply.
+ */
+ if (sotype != SOCK_DGRAM) {
+ if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+ return (error);
+tryagain:
+ /*
+ * Check for fatal errors and resending request.
+ */
+ /*
+ * Ugh: If a reconnect attempt just happened, nm_so
+ * would have changed. NULL indicates a failed
+ * attempt that has essentially shut down this
+ * mount point.
+ */
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ if (error = nfs_reconnect(rep)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ while (rep->r_flags & R_MUSTRESEND) {
+ m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+ nfsstats.rpcretries++;
+ if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+ if (error == EINTR || error == ERESTART ||
+ (error = nfs_reconnect(rep))) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ }
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ if (sotype == SOCK_STREAM) {
+ aio.iov_base = (caddr_t) &len;
+ aio.iov_len = sizeof(u_long);
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(u_long);
+ auio.uio_procp = p;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0, &auio,
+ (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ sizeof(u_long) - auio.uio_resid,
+ sizeof(u_long),
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ if (error)
+ goto errout;
+ len = ntohl(len) & ~0x80000000;
+ /*
+ * This is SERIOUS! We are out of sync with the sender
+ * and forcing a disconnect/reconnect is all I can do.
+ */
+ if (len > NFS_MAXPACKET) {
+ log(LOG_ERR, "%s (%d) from nfs server %s\n",
+ "impossible packet length",
+ len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EFBIG;
+ goto errout;
+ }
+ auio.uio_resid = len;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, (struct mbuf **)0, &rcvflg);
+ } while (error == EWOULDBLOCK || error == EINTR ||
+ error == ERESTART);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ len - auio.uio_resid, len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ } else {
+ /*
+ * NB: Since uio_resid is big, MSG_WAITALL is ignored
+ * and soreceive() will return when it has either a
+ * control msg or a data msg.
+ * We have no use for control msg., but must grab them
+ * and then throw them away so we know what is going
+ * on.
+ */
+ auio.uio_resid = len = 100000000; /* Anything Big */
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, &control, &rcvflg);
+ if (control)
+ m_freem(control);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK ||
+ (!error && *mp == NULL && control));
+ if ((rcvflg & MSG_EOR) == 0)
+ printf("Egad!!\n");
+ if (!error && *mp == NULL)
+ error = EPIPE;
+ len -= auio.uio_resid;
+ }
+errout:
+ if (error && error != EINTR && error != ERESTART) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ if (error != EPIPE)
+ log(LOG_INFO,
+ "receive error %d from nfs server %s\n",
+ error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+ if (!error)
+ error = nfs_reconnect(rep);
+ if (!error)
+ goto tryagain;
+ }
+ } else {
+ if ((so = rep->r_nmp->nm_so) == NULL)
+ return (EACCES);
+ if (so->so_state & SS_ISCONNECTED)
+ getnam = (struct mbuf **)0;
+ else
+ getnam = aname;
+ auio.uio_resid = len = 1000000;
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, getnam, &auio, mp,
+ (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK &&
+ (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ } while (error == EWOULDBLOCK);
+ len -= auio.uio_resid;
+ }
+ if (error) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ }
+ /*
+ * Search for any mbufs that are not a multiple of 4 bytes long
+ * or with m_data not longword aligned.
+ * These could cause pointer alignment problems, so copy them to
+ * well aligned mbufs.
+ */
+ nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+ return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+ struct nfsreq *myrep;
+{
+ register struct nfsreq *rep;
+ register struct nfsmount *nmp = myrep->r_nmp;
+ register long t1;
+ struct mbuf *mrep, *nam, *md;
+ u_long rxid, *tl;
+ caddr_t dpos, cp2;
+ int error;
+
+ /*
+ * Loop around until we get our own reply
+ */
+ for (;;) {
+ /*
+ * Lock against other receivers so that I don't get stuck in
+ * sbwait() after someone else has received my reply for me.
+ * Also necessary for connection based protocols to avoid
+ * race conditions during a reconnect.
+ */
+ if (error = nfs_rcvlock(myrep))
+ return (error);
+ /* Already received, bye bye */
+ if (myrep->r_mrep != NULL) {
+ nfs_rcvunlock(&nmp->nm_flag);
+ return (0);
+ }
+ /*
+ * Get the next Rpc reply off the socket
+ */
+ error = nfs_receive(myrep, &nam, &mrep);
+ nfs_rcvunlock(&nmp->nm_flag);
+ if (error) {
+
+ /*
+ * Ignore routing errors on connectionless protocols??
+ */
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+ nmp->nm_so->so_error = 0;
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+ return (error);
+ }
+ if (nam)
+ m_freem(nam);
+
+ /*
+ * Get the xid and check that it is an rpc reply
+ */
+ md = mrep;
+ dpos = mtod(md, caddr_t);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ rxid = *tl++;
+ if (*tl != rpc_reply) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (nqnfs_callback(nmp, mrep, md, dpos))
+ nfsstats.rpcinvalid++;
+ } else {
+ nfsstats.rpcinvalid++;
+ m_freem(mrep);
+ }
+nfsmout:
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+
+ /*
+ * Loop through the request list to match up the reply
+ * Iff no match, just drop the datagram
+ */
+ rep = nfsreqh.r_next;
+ while (rep != &nfsreqh) {
+ if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+ /* Found it.. */
+ rep->r_mrep = mrep;
+ rep->r_md = md;
+ rep->r_dpos = dpos;
+ if (nfsrtton) {
+ struct rttl *rt;
+
+ rt = &nfsrtt.rttl[nfsrtt.pos];
+ rt->proc = rep->r_procnum;
+ rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+ rt->sent = nmp->nm_sent;
+ rt->cwnd = nmp->nm_cwnd;
+ rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+ rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+ rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+ rt->tstamp = time;
+ if (rep->r_flags & R_TIMING)
+ rt->rtt = rep->r_rtt;
+ else
+ rt->rtt = 1000000;
+ nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+ }
+ /*
+ * Update congestion window.
+ * Do the additive increase of
+ * one rpc/rtt.
+ */
+ if (nmp->nm_cwnd <= nmp->nm_sent) {
+ nmp->nm_cwnd +=
+ (NFS_CWNDSCALE * NFS_CWNDSCALE +
+ (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+ if (nmp->nm_cwnd > NFS_MAXCWND)
+ nmp->nm_cwnd = NFS_MAXCWND;
+ }
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ /*
+ * Update rtt using a gain of 0.125 on the mean
+ * and a gain of 0.25 on the deviation.
+ */
+ if (rep->r_flags & R_TIMING) {
+ /*
+ * Since the timer resolution of
+ * NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since
+ * r_rtt == N means that the actual
+ * rtt is between N+dt and N+2-dt ticks,
+ * add 1.
+ */
+ t1 = rep->r_rtt + 1;
+ t1 -= (NFS_SRTT(rep) >> 3);
+ NFS_SRTT(rep) += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= (NFS_SDRTT(rep) >> 2);
+ NFS_SDRTT(rep) += t1;
+ }
+ nmp->nm_timeouts = 0;
+ break;
+ }
+ rep = rep->r_next;
+ }
+ /*
+ * If not matched to a request, drop it.
+ * If it's mine, get out.
+ */
+ if (rep == &nfsreqh) {
+ nfsstats.rpcunexpected++;
+ m_freem(mrep);
+ } else if (rep == myrep) {
+ if (rep->r_mrep == NULL)
+ panic("nfsreply nil");
+ return (0);
+ }
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ }
+}
+
+/*
+ * nfs_request - goes something like this
+ * - fill in request struct
+ * - links it into list
+ * - calls nfs_send() for first transmit
+ * - calls nfs_receive() to get reply
+ * - break down rpc header and return with nfs reply pointed to
+ * by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+ struct vnode *vp;
+ struct mbuf *mrest;
+ int procnum;
+ struct proc *procp;
+ struct ucred *cred;
+ struct mbuf **mrp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+{
+ register struct mbuf *m, *mrep;
+ register struct nfsreq *rep;
+ register u_long *tl;
+ register int i;
+ struct nfsmount *nmp;
+ struct mbuf *md, *mheadend;
+ struct nfsreq *reph;
+ struct nfsnode *np;
+ time_t reqtime, waituntil;
+ caddr_t dpos, cp2;
+ int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+ int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+ u_long xid;
+ u_quad_t frev;
+ char *auth_str;
+
+ nmp = VFSTONFS(vp->v_mount);
+ MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+ rep->r_nmp = nmp;
+ rep->r_vp = vp;
+ rep->r_procp = procp;
+ rep->r_procnum = procnum;
+ i = 0;
+ m = mrest;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ mrest_len = i;
+
+ /*
+ * Get the RPC header with authorization.
+ */
+kerbauth:
+ auth_str = (char *)0;
+ if (nmp->nm_flag & NFSMNT_KERB) {
+ if (failed_auth) {
+ error = nfs_getauth(nmp, rep, cred, &auth_type,
+ &auth_str, &auth_len);
+ if (error) {
+ free((caddr_t)rep, M_NFSREQ);
+ m_freem(mrest);
+ return (error);
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ auth_len = 5 * NFSX_UNSIGNED;
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ if (cred->cr_ngroups < 1)
+ panic("nfsreq nogrps");
+ auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+ nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+ 5 * NFSX_UNSIGNED;
+ }
+ m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+ auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+ if (auth_str)
+ free(auth_str, M_TEMP);
+
+ /*
+ * For stream protocols, insert a Sun RPC Record Mark.
+ */
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ rep->r_mreq = m;
+ rep->r_xid = xid;
+tryagain:
+ if (nmp->nm_flag & NFSMNT_SOFT)
+ rep->r_retry = nmp->nm_retry;
+ else
+ rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
+ rep->r_rtt = rep->r_rexmit = 0;
+ if (proct[procnum] > 0)
+ rep->r_flags = R_TIMING;
+ else
+ rep->r_flags = 0;
+ rep->r_mrep = NULL;
+
+ /*
+ * Do the client side RPC.
+ */
+ nfsstats.rpcrequests++;
+ /*
+ * Chain request into list of outstanding requests. Be sure
+ * to put it LAST so timer finds oldest requests first.
+ */
+ s = splsoftclock();
+ reph = &nfsreqh;
+ reph->r_prev->r_next = rep;
+ rep->r_prev = reph->r_prev;
+ reph->r_prev = rep;
+ rep->r_next = reph;
+
+ /* Get send time for nqnfs */
+ reqtime = time.tv_sec;
+
+ /*
+ * If backing off another request or avoiding congestion, don't
+ * send this one now but let timer do it. If not timing a request,
+ * do it now.
+ */
+ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+ (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ nmp->nm_sent < nmp->nm_cwnd)) {
+ splx(s);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ error = nfs_sndlock(&nmp->nm_flag, rep);
+ if (!error) {
+ m = m_copym(m, 0, M_COPYALL, M_WAIT);
+ error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(&nmp->nm_flag);
+ }
+ if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+ nmp->nm_sent += NFS_CWNDSCALE;
+ rep->r_flags |= R_SENT;
+ }
+ } else {
+ splx(s);
+ rep->r_rtt = -1;
+ }
+
+ /*
+ * Wait for the reply from our send or the timer's.
+ */
+ if (!error || error == EPIPE)
+ error = nfs_reply(rep);
+
+ /*
+ * RPC done, unlink the request.
+ */
+ s = splsoftclock();
+ rep->r_prev->r_next = rep->r_next;
+ rep->r_next->r_prev = rep->r_prev;
+ splx(s);
+
+ /*
+ * Decrement the outstanding request count.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT; /* paranoia */
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
+
+ /*
+ * If there was a successful reply and a tprintf msg.
+ * tprintf a response.
+ */
+ if (!error && (rep->r_flags & R_TPRINTFMSG))
+ nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "is alive again");
+ mrep = rep->r_mrep;
+ md = rep->r_md;
+ dpos = rep->r_dpos;
+ if (error) {
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * break down the rpc header and check if ok
+ */
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ if (*tl++ == rpc_msgdenied) {
+ if (*tl == rpc_mismatch)
+ error = EOPNOTSUPP;
+ else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+ if (*tl == rpc_rejectedcred && failed_auth == 0) {
+ failed_auth++;
+ mheadend->m_next = (struct mbuf *)0;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ goto kerbauth;
+ } else
+ error = EAUTH;
+ } else
+ error = EACCES;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * skip over the auth_verf, someday we may want to cache auth_short's
+ * for nfs_reqhead(), but for now just dump it
+ */
+ if (*++tl != 0) {
+ i = nfsm_rndup(fxdr_unsigned(long, *tl));
+ nfsm_adv(i);
+ }
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ /* 0 == ok */
+ if (*tl == 0) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl != 0) {
+ error = fxdr_unsigned(int, *tl);
+ m_freem(mrep);
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ error == NQNFS_TRYLATER) {
+ error = 0;
+ waituntil = time.tv_sec + trylater_delay;
+ while (time.tv_sec < waituntil)
+ (void) tsleep((caddr_t)&lbolt,
+ PSOCK, "nqnfstry", 0);
+ trylater_delay *= nfs_backoff[trylater_cnt];
+ if (trylater_cnt < 7)
+ trylater_cnt++;
+ goto tryagain;
+ }
+
+ /*
+ * If the File Handle was stale, invalidate the
+ * lookup cache, just in case.
+ */
+ if (error == ESTALE)
+ cache_purge(vp);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * For nqnfs, get any lease in reply
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ np = VTONFS(vp);
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time.tv_sec) {
+ fxdr_hyper(tl, &frev);
+ nqnfs_clientlease(nmp, np, nqlflag,
+ cachable, reqtime, frev);
+ }
+ }
+ }
+ *mrp = mrep;
+ *mdp = md;
+ *dposp = dpos;
+ m_freem(rep->r_mreq);
+ FREE((caddr_t)rep, M_NFSREQ);
+ return (0);
+ }
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ error = EPROTONOSUPPORT;
+nfsmout:
+ return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+ int siz;
+ struct nfsd *nd;
+ int err;
+ int cache;
+ u_quad_t *frev;
+ struct mbuf **mrq;
+ struct mbuf **mbp;
+ caddr_t *bposp;
+{
+ register u_long *tl;
+ register struct mbuf *mreq;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2;
+
+ MGETHDR(mreq, M_WAIT, MT_DATA);
+ mb = mreq;
+ /*
+ * If this is a big reply, use a cluster else
+ * try and leave leading space for the lower level headers.
+ */
+ siz += RPC_REPLYSIZ;
+ if (siz >= MINCLSIZE) {
+ MCLGET(mreq, M_WAIT);
+ } else
+ mreq->m_data += max_hdr;
+ tl = mtod(mreq, u_long *);
+ mreq->m_len = 6*NFSX_UNSIGNED;
+ bpos = ((caddr_t)tl)+mreq->m_len;
+ *tl++ = nd->nd_retxid;
+ *tl++ = rpc_reply;
+ if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+ *tl++ = rpc_msgdenied;
+ if (err == NQNFS_AUTHERR) {
+ *tl++ = rpc_autherr;
+ *tl = rpc_rejectedcred;
+ mreq->m_len -= NFSX_UNSIGNED;
+ bpos -= NFSX_UNSIGNED;
+ } else {
+ *tl++ = rpc_mismatch;
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2);
+ }
+ } else {
+ *tl++ = rpc_msgaccepted;
+ *tl++ = 0;
+ *tl++ = 0;
+ switch (err) {
+ case EPROGUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROGUNAVAIL);
+ break;
+ case EPROGMISMATCH:
+ *tl = txdr_unsigned(RPC_PROGMISMATCH);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2); /* someday 3 */
+ break;
+ case EPROCUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROCUNAVAIL);
+ break;
+ default:
+ *tl = 0;
+ if (err != VNOVAL) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if (err)
+ *tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+ else
+ *tl = 0;
+ }
+ break;
+ };
+ }
+
+ /*
+ * For nqnfs, piggyback lease as requested.
+ */
+ if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+ if (nd->nd_nqlflag) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nd->nd_nqlflag);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nd->nd_duration);
+ txdr_hyper(frev, tl);
+ } else {
+ if (nd->nd_nqlflag != 0)
+ panic("nqreph");
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ *mrq = mreq;
+ *mbp = mb;
+ *bposp = bpos;
+ if (err != 0 && err != VNOVAL)
+ nfsstats.srvrpc_errs++;
+ return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+ void *arg;
+{
+ register struct nfsreq *rep;
+ register struct mbuf *m;
+ register struct socket *so;
+ register struct nfsmount *nmp;
+ register int timeo;
+ static long lasttime = 0;
+ int s, error;
+
+ s = splnet();
+ for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+ nmp = rep->r_nmp;
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+ continue;
+ if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (rep->r_rtt >= 0) {
+ rep->r_rtt++;
+ if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+ timeo = nmp->nm_timeo;
+ else
+ timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+ if (nmp->nm_timeouts > 0)
+ timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+ if (rep->r_rtt <= timeo)
+ continue;
+ if (nmp->nm_timeouts < 8)
+ nmp->nm_timeouts++;
+ }
+ /*
+ * Check for server not responding
+ */
+ if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+ rep->r_rexmit > nmp->nm_deadthresh) {
+ nfs_msg(rep->r_procp,
+ nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "not responding");
+ rep->r_flags |= R_TPRINTFMSG;
+ }
+ if (rep->r_rexmit >= rep->r_retry) { /* too many */
+ nfsstats.rpctimeouts++;
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (nmp->nm_sotype != SOCK_DGRAM) {
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ continue;
+ }
+ if ((so = nmp->nm_so) == NULL)
+ continue;
+
+ /*
+ * If there is enough space and the window allows..
+ * Resend it
+ * Set r_rtt to -1 in case we fail to send it now.
+ */
+ rep->r_rtt = -1;
+ if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+ ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ (rep->r_flags & R_SENT) ||
+ nmp->nm_sent < nmp->nm_cwnd) &&
+ (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ (struct mbuf *)0, (struct mbuf *)0);
+ else
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ nmp->nm_nam, (struct mbuf *)0);
+ if (error) {
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+ so->so_error = 0;
+ } else {
+ /*
+ * Iff first send, start timing
+ * else turn timing off, backoff timer
+ * and divide congestion window by 2.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_TIMING;
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ nmp->nm_cwnd >>= 1;
+ if (nmp->nm_cwnd < NFS_CWNDSCALE)
+ nmp->nm_cwnd = NFS_CWNDSCALE;
+ nfsstats.rpcretries++;
+ } else {
+ rep->r_flags |= R_SENT;
+ nmp->nm_sent += NFS_CWNDSCALE;
+ }
+ rep->r_rtt = 0;
+ }
+ }
+ }
+
+ /*
+ * Call the nqnfs server timer once a second to handle leases.
+ */
+ if (lasttime != time.tv_sec) {
+ lasttime = time.tv_sec;
+ nqnfs_serverd();
+ }
+ splx(s);
+ timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+ struct nfsmount *nmp;
+ struct nfsreq *rep;
+ register struct proc *p;
+{
+
+ if (rep && (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ if (!(nmp->nm_flag & NFSMNT_INT))
+ return (0);
+ if (p && p->p_siglist &&
+ (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+ NFSINT_SIGMASK))
+ return (EINTR);
+ return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+ register int *flagp;
+ struct nfsreq *rep;
+{
+ struct proc *p;
+ int slpflag = 0, slptimeo = 0;
+
+ if (rep) {
+ p = rep->r_procp;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ } else
+ p = (struct proc *)0;
+ while (*flagp & NFSMNT_SNDLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, p))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTSND;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_SNDLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_SNDLOCK) == 0)
+ panic("nfs sndunlock");
+ *flagp &= ~NFSMNT_SNDLOCK;
+ if (*flagp & NFSMNT_WANTSND) {
+ *flagp &= ~NFSMNT_WANTSND;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+nfs_rcvlock(rep)
+ register struct nfsreq *rep;
+{
+ register int *flagp = &rep->r_nmp->nm_flag;
+ int slpflag, slptimeo = 0;
+
+ if (*flagp & NFSMNT_INT)
+ slpflag = PCATCH;
+ else
+ slpflag = 0;
+ while (*flagp & NFSMNT_RCVLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTRCV;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_RCVLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_RCVLOCK) == 0)
+ panic("nfs rcvunlock");
+ *flagp &= ~NFSMNT_RCVLOCK;
+ if (*flagp & NFSMNT_WANTRCV) {
+ *flagp &= ~NFSMNT_WANTRCV;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+ register struct mbuf *m;
+ int hsiz;
+{
+ register struct mbuf *m2;
+ register int siz, mlen, olen;
+ register caddr_t tcp, fcp;
+ struct mbuf *mnew;
+
+ while (m) {
+ /*
+ * This never happens for UDP, rarely happens for TCP
+ * but frequently happens for iso transport.
+ */
+ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ if ((int)fcp & 0x3) {
+ m->m_flags &= ~M_PKTHDR;
+ if (m->m_flags & M_EXT)
+ m->m_data = m->m_ext.ext_buf +
+ ((m->m_ext.ext_size - olen) & ~0x3);
+ else
+ m->m_data = m->m_dat;
+ }
+ m->m_len = 0;
+ tcp = mtod(m, caddr_t);
+ mnew = m;
+ m2 = m->m_next;
+
+ /*
+ * If possible, only put the first invariant part
+ * of the RPC header in the first mbuf.
+ */
+ mlen = M_TRAILINGSPACE(m);
+ if (olen <= hsiz && mlen > hsiz)
+ mlen = hsiz;
+
+ /*
+ * Loop through the mbuf list consolidating data.
+ */
+ while (m) {
+ while (olen > 0) {
+ if (mlen == 0) {
+ m2->m_flags &= ~M_PKTHDR;
+ if (m2->m_flags & M_EXT)
+ m2->m_data = m2->m_ext.ext_buf;
+ else
+ m2->m_data = m2->m_dat;
+ m2->m_len = 0;
+ mlen = M_TRAILINGSPACE(m2);
+ tcp = mtod(m2, caddr_t);
+ mnew = m2;
+ m2 = m2->m_next;
+ }
+ siz = min(mlen, olen);
+ if (tcp != fcp)
+ bcopy(fcp, tcp, siz);
+ mnew->m_len += siz;
+ mlen -= siz;
+ olen -= siz;
+ tcp += siz;
+ fcp += siz;
+ }
+ m = m->m_next;
+ if (m) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ }
+ }
+
+ /*
+ * Finally, set m_len == 0 for any trailing mbufs that have
+ * been copied out of.
+ */
+ while (m2) {
+ m2->m_len = 0;
+ m2 = m2->m_next;
+ }
+ return;
+ }
+ m = m->m_next;
+ }
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+ struct socket *so;
+ caddr_t arg;
+ int waitflag;
+{
+ register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+ register struct mbuf *m;
+ struct mbuf *mp, *nam;
+ struct uio auio;
+ int flags, error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+#ifdef notdef
+ /*
+ * Define this to test for nfsds handling this under heavy load.
+ */
+ if (waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+ }
+#endif
+ auio.uio_procp = NULL;
+ if (so->so_type == SOCK_STREAM) {
+ /*
+ * If there are already records on the queue, defer soreceive()
+ * to an nfsd so that there is feedback to the TCP layer that
+ * the nfs servers are heavily loaded.
+ */
+ if (slp->ns_rec && waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ;
+ goto dorecs;
+ }
+
+ /*
+ * Do soreceive().
+ */
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+ if (error || mp == (struct mbuf *)0) {
+ if (error == EWOULDBLOCK)
+ slp->ns_flag |= SLP_NEEDQ;
+ else
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ m = mp;
+ if (slp->ns_rawend) {
+ slp->ns_rawend->m_next = m;
+ slp->ns_cc += 1000000000 - auio.uio_resid;
+ } else {
+ slp->ns_raw = m;
+ slp->ns_cc = 1000000000 - auio.uio_resid;
+ }
+ while (m->m_next)
+ m = m->m_next;
+ slp->ns_rawend = m;
+
+ /*
+ * Now try and parse record(s) out of the raw stream data.
+ */
+ if (error = nfsrv_getstream(slp, waitflag)) {
+ if (error == EPERM)
+ slp->ns_flag |= SLP_DISCONN;
+ else
+ slp->ns_flag |= SLP_NEEDQ;
+ }
+ } else {
+ do {
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp,
+ (struct mbuf **)0, &flags);
+ if (mp) {
+ nfs_realign(mp, 10 * NFSX_UNSIGNED);
+ if (nam) {
+ m = nam;
+ m->m_next = mp;
+ } else
+ m = mp;
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = m;
+ else
+ slp->ns_rec = m;
+ slp->ns_recend = m;
+ m->m_nextpkt = (struct mbuf *)0;
+ }
+ if (error) {
+ if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+ && error != EWOULDBLOCK) {
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ }
+ } while (mp);
+ }
+
+ /*
+ * Now try and process the request records, non-blocking.
+ */
+dorecs:
+ if (waitflag == M_DONTWAIT &&
+ (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+ nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+ register struct nfssvc_sock *slp;
+ int waitflag;
+{
+ register struct mbuf *m;
+ register char *cp1, *cp2;
+ register int len;
+ struct mbuf *om, *m2, *recm;
+ u_long recmark;
+
+ if (slp->ns_flag & SLP_GETSTREAM)
+ panic("nfs getstream");
+ slp->ns_flag |= SLP_GETSTREAM;
+ for (;;) {
+ if (slp->ns_reclen == 0) {
+ if (slp->ns_cc < NFSX_UNSIGNED) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ m = slp->ns_raw;
+ if (m->m_len >= NFSX_UNSIGNED) {
+ bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+ m->m_data += NFSX_UNSIGNED;
+ m->m_len -= NFSX_UNSIGNED;
+ } else {
+ cp1 = (caddr_t)&recmark;
+ cp2 = mtod(m, caddr_t);
+ while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+ while (m->m_len == 0) {
+ m = m->m_next;
+ cp2 = mtod(m, caddr_t);
+ }
+ *cp1++ = *cp2++;
+ m->m_data++;
+ m->m_len--;
+ }
+ }
+ slp->ns_cc -= NFSX_UNSIGNED;
+ slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+ if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EPERM);
+ }
+ }
+
+ /*
+ * Now get the record part.
+ */
+ if (slp->ns_cc == slp->ns_reclen) {
+ recm = slp->ns_raw;
+ slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+ slp->ns_cc = slp->ns_reclen = 0;
+ } else if (slp->ns_cc > slp->ns_reclen) {
+ len = 0;
+ m = slp->ns_raw;
+ om = (struct mbuf *)0;
+ while (len < slp->ns_reclen) {
+ if ((len + m->m_len) > slp->ns_reclen) {
+ m2 = m_copym(m, 0, slp->ns_reclen - len,
+ waitflag);
+ if (m2) {
+ if (om) {
+ om->m_next = m2;
+ recm = slp->ns_raw;
+ } else
+ recm = m2;
+ m->m_data += slp->ns_reclen - len;
+ m->m_len -= slp->ns_reclen - len;
+ len = slp->ns_reclen;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EWOULDBLOCK);
+ }
+ } else if ((len + m->m_len) == slp->ns_reclen) {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ recm = slp->ns_raw;
+ om->m_next = (struct mbuf *)0;
+ } else {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ }
+ }
+ slp->ns_raw = m;
+ slp->ns_cc -= len;
+ slp->ns_reclen = 0;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ nfs_realign(recm, 10 * NFSX_UNSIGNED);
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = recm;
+ else
+ slp->ns_rec = recm;
+ slp->ns_recend = recm;
+ }
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+ register struct nfssvc_sock *slp;
+ register struct nfsd *nd;
+{
+ register struct mbuf *m;
+ int error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0 ||
+ (m = slp->ns_rec) == (struct mbuf *)0)
+ return (ENOBUFS);
+ if (slp->ns_rec = m->m_nextpkt)
+ m->m_nextpkt = (struct mbuf *)0;
+ else
+ slp->ns_recend = (struct mbuf *)0;
+ if (m->m_type == MT_SONAME) {
+ nd->nd_nam = m;
+ nd->nd_md = nd->nd_mrep = m->m_next;
+ m->m_next = (struct mbuf *)0;
+ } else {
+ nd->nd_nam = (struct mbuf *)0;
+ nd->nd_md = nd->nd_mrep = m;
+ }
+ nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+ if (error = nfs_getreq(nd, TRUE)) {
+ m_freem(nd->nd_nam);
+ return (error);
+ }
+ return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+ register struct nfsd *nd;
+ int has_header;
+{
+ register int len, i;
+ register u_long *tl;
+ register long t1;
+ struct uio uio;
+ struct iovec iov;
+ caddr_t dpos, cp2;
+ u_long nfsvers, auth_type;
+ int error = 0, nqnfs = 0;
+ struct mbuf *mrep, *md;
+
+ mrep = nd->nd_mrep;
+ md = nd->nd_md;
+ dpos = nd->nd_dpos;
+ if (has_header) {
+ nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+ nd->nd_retxid = *tl++;
+ if (*tl++ != rpc_call) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ } else {
+ nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+ }
+ nd->nd_repstat = 0;
+ if (*tl++ != rpc_vers) {
+ nd->nd_repstat = ERPCMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsvers = nfs_vers;
+ if (*tl != nfs_prog) {
+ if (*tl == nqnfs_prog) {
+ nqnfs++;
+ nfsvers = nqnfs_vers;
+ } else {
+ nd->nd_repstat = EPROGUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ }
+ tl++;
+ if (*tl++ != nfsvers) {
+ nd->nd_repstat = EPROGMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+ if (nd->nd_procnum == NFSPROC_NULL)
+ return (0);
+ if (nd->nd_procnum >= NFS_NPROCS ||
+ (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+ (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+ nd->nd_repstat = EPROCUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ auth_type = *tl++;
+ len = fxdr_unsigned(int, *tl++);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+
+ /*
+ * Handle auth_unix or auth_kerb.
+ */
+ if (auth_type == rpc_auth_unix) {
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > NFS_MAXNAMLEN) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_adv(nfsm_rndup(len));
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+ for (i = 1; i <= len; i++)
+ if (i < NGROUPS)
+ nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+ else
+ tl++;
+ nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+ } else if (auth_type == rpc_auth_kerb) {
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_authlen = fxdr_unsigned(int, *tl);
+ uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+ if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ uio.uio_offset = 0;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ iov.iov_base = (caddr_t)nd->nd_authstr;
+ iov.iov_len = RPCAUTH_MAXSIZ;
+ nfsm_mtouio(&uio, uio.uio_resid);
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ nd->nd_flag |= NFSD_NEEDAUTH;
+ }
+
+ /*
+ * Do we have any use for the verifier.
+ * According to the "Remote Procedure Call Protocol Spec." it
+ * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+ * For now, just skip over it
+ */
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ if (len > 0) {
+ nfsm_adv(nfsm_rndup(len));
+ }
+
+ /*
+ * For nqnfs, get piggybacked lease request.
+ */
+ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+ if (nd->nd_nqlflag) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_duration = fxdr_unsigned(int, *tl);
+ } else
+ nd->nd_duration = NQ_MINLEASE;
+ } else {
+ nd->nd_nqlflag = NQL_NOVAL;
+ nd->nd_duration = NQ_MINLEASE;
+ }
+ nd->nd_md = md;
+ nd->nd_dpos = dpos;
+ return (0);
+nfsmout:
+ return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+ struct nfssvc_sock *slp;
+{
+ register struct nfsd *nd = nfsd_head.nd_next;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+ while (nd != (struct nfsd *)&nfsd_head) {
+ if (nd->nd_flag & NFSD_WAITING) {
+ nd->nd_flag &= ~NFSD_WAITING;
+ if (nd->nd_slp)
+ panic("nfsd wakeup");
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ wakeup((caddr_t)nd);
+ return;
+ }
+ nd = nd->nd_next;
+ }
+ slp->ns_flag |= SLP_DOREC;
+ nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+ struct proc *p;
+ char *server, *msg;
+{
+ tpr_t tpr;
+
+ if (p)
+ tpr = tprintf_open(p);
+ else
+ tpr = NULL;
+ tprintf(tpr, "nfs server %s: %s\n", server, msg);
+ tprintf_close(tpr);
+}
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
new file mode 100644
index 000000000000..5778f7d7f01a
--- /dev/null
+++ b/sys/nfsclient/nfs_subs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+ rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+ mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nqnfs;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_long *xidp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ if (auth_type == RPCAUTH_NQNFS)
+ authsiz += 2 * NFSX_UNSIGNED;
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+ if (++nfs_xid == 0)
+ nfs_xid++;
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nqnfs) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER1);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ *tl++ = txdr_unsigned(procid);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_long *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_NQNFS:
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl = txdr_unsigned(auth_len);
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EINVAL);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ char *cp;
+ long siz;
+{
+ register struct mbuf *m1, *m2;
+ long left, xfer, len, tlen;
+ u_long *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_long *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_long *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+ register int i;
+
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+ nfs_vers = txdr_unsigned(NFS_VER2);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ /* Loop thru nfs procids */
+ for (i = 0; i < NFS_NPROCS; i++)
+ nfs_procids[i] = txdr_unsigned(i);
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ nfs_iodwant[i] = (struct proc *)0;
+ TAILQ_INIT(&nfs_bufq);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_nhinit(); /* Init the nfsnode table */
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+ nqthead.th_head[0] = &nqthead;
+ nqthead.th_head[1] = &nqthead;
+ nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+ nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfsv2_fattr *fp;
+ extern int (**spec_nfsv2nodeop_p)();
+ register struct nfsnode *np, *nq, **nhpp;
+ register long t1;
+ caddr_t dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ long rdev;
+ struct timespec mtime;
+ struct vnode *nvp;
+
+ md = *mdp;
+ dpos = *dposp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+ return (error);
+ fp = (struct nfsv2_fattr *)cp2;
+ vtyp = nfstov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ if (vtyp == VNON || vtyp == VREG)
+ vtyp = IFTOVT(vmode);
+ if (isnq) {
+ rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+ fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+ } else {
+ rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+ fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+ }
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type == VNON) {
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vp->v_type = vtyp = VFIFO;
+ else
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+#ifdef FIFO
+ extern int (**fifo_nfsv2nodeop_p)();
+ vp->v_op = fifo_nfsv2nodeop_p;
+#else
+ return (EOPNOTSUPP);
+#endif /* FIFO */
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+ /*
+ * Discard unneeded vnode, but save its nfsnode.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased node.
+ */
+ np->n_vnode = nvp;
+ nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ *vpp = vp = nvp;
+ }
+ }
+ np->n_mtime = mtime.ts_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_rdev = (dev_t)rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (isnq) {
+ fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+ fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+ fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+ vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+ fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+ fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+ } else {
+ vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+ vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+ fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+ vap->va_ctime.ts_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ np->n_attrstamp = time.tv_sec;
+ *dposp = dpos;
+ *mdp = md;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+ if (np->n_size > vap->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap;
+
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+ if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ vap = &np->n_vattr;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) == 0) {
+ np->n_size = vaper->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if (np->n_size > vaper->va_size)
+ if (np->n_size > vaper->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct proc *p;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp;
+ struct vnode *dp;
+ int error, rdonly;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ cnp->cn_hash = 0;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || *fromcp == '/') {
+ error = EINVAL;
+ goto out;
+ }
+ cnp->cn_hash += (unsigned char)*fromcp;
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if (error = nfs_adv(mdp, dposp, len, rem))
+ goto out;
+ }
+ ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Extract and set starting directory.
+ */
+ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly))
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+ ndp->ni_startdir = dp;
+ if (rdonly)
+ cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+ else
+ cnp->cn_flags |= NOCROSSMOUNT;
+ /*
+ * And call lookup() to do the real work
+ */
+ cnp->cn_proc = p;
+ if (error = lookup(ndp))
+ goto out;
+ /*
+ * Check for encountering a symbolic link
+ */
+ if (cnp->cn_flags & ISSYMLINK) {
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Check for saved name request
+ */
+ if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ int *rdonlyp;
+{
+ register struct mount *mp;
+ register struct nfsuid *uidp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+
+ *vpp = (struct vnode *)0;
+ if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+ return (error);
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == cred->cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (uidp) {
+ cred->cr_uid = uidp->nu_cr.cr_uid;
+ for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+ cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+ } else {
+ vput(*vpp);
+ return (NQNFS_AUTHERR);
+ }
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+ if (!lockflag)
+ VOP_UNLOCK(*vpp);
+ return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = mtod(nam, struct sockaddr_in *);
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = mtod(nam, struct sockaddr_iso *);
+ isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
new file mode 100644
index 000000000000..1f1867606892
--- /dev/null
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vfsops.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/signal.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsdiskless.h>
+#include <nfs/nqnfs.h>
+
+/*
+ * nfs vfs operations.
+ */
+struct vfsops nfs_vfsops = {
+ nfs_mount,
+ nfs_start,
+ nfs_unmount,
+ nfs_root,
+ nfs_quotactl,
+ nfs_statfs,
+ nfs_sync,
+ nfs_vget,
+ nfs_fhtovp,
+ nfs_vptofh,
+ nfs_init,
+};
+
+/*
+ * This structure must be filled in by a primary bootstrap or bootstrap
+ * server for a diskless/dataless machine. It is initialized below just
+ * to ensure that it is allocated to initialized data (.data not .bss).
+ */
+struct nfs_diskless nfs_diskless = { 0 };
+
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers;
+void nfs_disconnect __P((struct nfsmount *));
+void nfsargs_ntoh __P((struct nfs_args *));
+static struct mount *nfs_mountdiskless __P((char *, char *, int,
+ struct sockaddr_in *, struct nfs_args *, register struct vnode **));
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * nfs statfs call
+ */
+int
+nfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ register struct nfsv2_statfs *sfp;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ struct ucred *cred;
+ struct nfsnode *np;
+
+ nmp = VFSTONFS(mp);
+ isnq = (nmp->nm_flag & NFSMNT_NQNFS);
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return (error);
+ vp = NFSTOV(np);
+ nfsstats.rpccnt[NFSPROC_STATFS]++;
+ cred = crget();
+ cred->cr_ngroups = 1;
+ nfsm_reqhead(vp, NFSPROC_STATFS, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_STATFS, p, cred);
+ nfsm_dissect(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+ sbp->f_type = MOUNT_NFS;
+ sbp->f_flags = nmp->nm_flag;
+ sbp->f_iosize = NFS_MAXDGRAMDATA;
+ sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize);
+ sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
+ sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
+ sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
+ if (isnq) {
+ sbp->f_files = fxdr_unsigned(long, sfp->sf_files);
+ sbp->f_ffree = fxdr_unsigned(long, sfp->sf_ffree);
+ } else {
+ sbp->f_files = 0;
+ sbp->f_ffree = 0;
+ }
+ if (sbp != &mp->mnt_stat) {
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ nfsm_reqdone;
+ vrele(vp);
+ crfree(cred);
+ return (error);
+}
+
+/*
+ * Mount a remote root fs via. nfs. This depends on the info in the
+ * nfs_diskless structure that has been filled in properly by some primary
+ * bootstrap.
+ * It goes something like this:
+ * - do enough of "ifconfig" by calling ifioctl() so that the system
+ * can talk to the server
+ * - If nfs_diskless.mygateway is filled in, use that address as
+ * a default gateway.
+ * - hand craft the swap nfs vnode hanging off a fake mount point
+ * if swdevt[0].sw_dev == NODEV
+ * - build the rootfs mount point and call mountnfs() to do the rest.
+ */
+int
+nfs_mountroot()
+{
+ register struct mount *mp;
+ register struct nfs_diskless *nd = &nfs_diskless;
+ struct socket *so;
+ struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+ int error, i;
+
+ /*
+ * XXX time must be non-zero when we init the interface or else
+ * the arp code will wedge...
+ */
+ if (time.tv_sec == 0)
+ time.tv_sec = 1;
+
+#ifdef notyet
+ /* Set up swap credentials. */
+ proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid);
+ proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid);
+ if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) >
+ NGROUPS)
+ proc0.p_ucred->cr_ngroups = NGROUPS;
+ for (i = 0; i < proc0.p_ucred->cr_ngroups; i++)
+ proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]);
+#endif
+
+ /*
+ * Do enough of ifconfig(8) so that the critical net interface can
+ * talk to the server.
+ */
+ if (error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
+ panic("nfs_mountroot: socreate: %d", error);
+ if (error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p))
+ panic("nfs_mountroot: SIOCAIFADDR: %d", error);
+ soclose(so);
+
+ /*
+ * If the gateway field is filled in, set it as the default route.
+ */
+ if (nd->mygateway.sin_len != 0) {
+ struct sockaddr_in mask, sin;
+
+ bzero((caddr_t)&mask, sizeof(mask));
+ sin = mask;
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ if (error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
+ (struct sockaddr *)&nd->mygateway,
+ (struct sockaddr *)&mask,
+ RTF_UP | RTF_GATEWAY, (struct rtentry **)0))
+ panic("nfs_mountroot: RTM_ADD: %d", error);
+ }
+
+ /*
+ * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
+ * Create a fake mount point just for the swap vnode so that the
+ * swap file can be on a different server from the rootfs.
+ */
+ if (swdevt[0].sw_dev == NODEV) {
+ nd->swap_args.fh = (nfsv2fh_t *)nd->swap_fh;
+ (void) nfs_mountdiskless(nd->swap_hostnam, "/swap", 0,
+ &nd->swap_saddr, &nd->swap_args, &vp);
+
+ /*
+ * Since the swap file is not the root dir of a file system,
+ * hack it to a regular file.
+ */
+ vp->v_type = VREG;
+ vp->v_flag = 0;
+ swapdev_vp = vp;
+ VREF(vp);
+ swdevt[0].sw_vp = vp;
+ swdevt[0].sw_nblks = ntohl(nd->swap_nblks);
+ } else if (bdevvp(swapdev, &swapdev_vp))
+ panic("nfs_mountroot: can't setup swapdev_vp");
+
+ /*
+ * Create the rootfs mount point.
+ */
+ nd->root_args.fh = (nfsv2fh_t *)nd->root_fh;
+ mp = nfs_mountdiskless(nd->root_hostnam, "/", MNT_RDONLY,
+ &nd->root_saddr, &nd->root_args, &vp);
+
+ if (vfs_lock(mp))
+ panic("nfs_mountroot: vfs_lock");
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ vfs_unlock(mp);
+ rootvp = vp;
+
+ /*
+ * This is not really an nfs issue, but it is much easier to
+ * set hostname here and then let the "/etc/rc.xxx" files
+ * mount the right /var based upon its preset value.
+ */
+ bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
+ hostname[MAXHOSTNAMELEN - 1] = '\0';
+ for (i = 0; i < MAXHOSTNAMELEN; i++)
+ if (hostname[i] == '\0')
+ break;
+ hostnamelen = i;
+ inittodr(ntohl(nd->root_time));
+ return (0);
+}
+
+/*
+ * Internal version of mount system call for diskless setup.
+ */
+static struct mount *
+nfs_mountdiskless(path, which, mountflag, sin, args, vpp)
+ char *path;
+ char *which;
+ int mountflag;
+ struct sockaddr_in *sin;
+ struct nfs_args *args;
+ register struct vnode **vpp;
+{
+ register struct mount *mp;
+ register struct mbuf *m;
+ register int error;
+
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_NOWAIT);
+ if (mp == NULL)
+ panic("nfs_mountroot: %s mount malloc", which);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &nfs_vfsops;
+ mp->mnt_flag = mountflag;
+
+ MGET(m, MT_SONAME, M_DONTWAIT);
+ if (m == NULL)
+ panic("nfs_mountroot: %s mount mbuf", which);
+ bcopy((caddr_t)sin, mtod(m, caddr_t), sin->sin_len);
+ m->m_len = sin->sin_len;
+ nfsargs_ntoh(args);
+ if (error = mountnfs(args, mp, m, which, path, vpp))
+ panic("nfs_mountroot: mount %s on %s: %d", path, which, error);
+
+ return (mp);
+}
+
+/*
+ * Convert the integer fields of the nfs_args structure from net byte order
+ * to host byte order. Called by nfs_mountroot() above.
+ */
+void
+nfsargs_ntoh(nfsp)
+ register struct nfs_args *nfsp;
+{
+
+ NTOHL(nfsp->sotype);
+ NTOHL(nfsp->proto);
+ NTOHL(nfsp->flags);
+ NTOHL(nfsp->wsize);
+ NTOHL(nfsp->rsize);
+ NTOHL(nfsp->timeo);
+ NTOHL(nfsp->retrans);
+ NTOHL(nfsp->maxgrouplist);
+ NTOHL(nfsp->readahead);
+ NTOHL(nfsp->leaseterm);
+ NTOHL(nfsp->deadthresh);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ * It seems a bit dumb to copyinstr() the host and path here and then
+ * bcopy() them in mountnfs(), but I wanted to detect errors before
+ * doing the sockargs() call because sockargs() allocates an mbuf and
+ * an error after that means that I have to release the mbuf.
+ */
+/* ARGSUSED */
+int
+nfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error;
+ struct nfs_args args;
+ struct mbuf *nam;
+ struct vnode *vp;
+ char pth[MNAMELEN], hst[MNAMELEN];
+ u_int len;
+ nfsv2fh_t nfh;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
+ return (error);
+ if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
+ return (error);
+ if (error = copyinstr(path, pth, MNAMELEN-1, &len))
+ return (error);
+ bzero(&pth[len], MNAMELEN - len);
+ if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
+ return (error);
+ bzero(&hst[len], MNAMELEN - len);
+ /* sockargs() call must be after above copyin() calls */
+ if (error = sockargs(&nam, (caddr_t)args.addr,
+ args.addrlen, MT_SONAME))
+ return (error);
+ args.fh = &nfh;
+ error = mountnfs(&args, mp, nam, pth, hst, &vp);
+ return (error);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+mountnfs(argp, mp, nam, pth, hst, vpp)
+ register struct nfs_args *argp;
+ register struct mount *mp;
+ struct mbuf *nam;
+ char *pth, *hst;
+ struct vnode **vpp;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+
+ if (mp->mnt_flag & MNT_UPDATE) {
+ nmp = VFSTONFS(mp);
+ /* update paths, file handles, etc, here XXX */
+ m_freem(nam);
+ return (0);
+ } else {
+ MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount),
+ M_NFSMNT, M_WAITOK);
+ bzero((caddr_t)nmp, sizeof (struct nfsmount));
+ mp->mnt_data = (qaddr_t)nmp;
+ }
+ getnewfsid(mp, MOUNT_NFS);
+ nmp->nm_mountp = mp;
+ nmp->nm_flag = argp->flags;
+ if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_MYWRITE)) ==
+ (NFSMNT_NQNFS | NFSMNT_MYWRITE)) {
+ error = EPERM;
+ goto bad;
+ }
+ if (nmp->nm_flag & NFSMNT_NQNFS)
+ /*
+ * We have to set mnt_maxsymlink to a non-zero value so
+ * that COMPAT_43 routines will know that we are setting
+ * the d_type field in directories (and can zero it for
+ * unsuspecting binaries).
+ */
+ mp->mnt_maxsymlinklen = 1;
+ nmp->nm_timeo = NFS_TIMEO;
+ nmp->nm_retry = NFS_RETRANS;
+ nmp->nm_wsize = NFS_WSIZE;
+ nmp->nm_rsize = NFS_RSIZE;
+ nmp->nm_numgrps = NFS_MAXGRPS;
+ nmp->nm_readahead = NFS_DEFRAHEAD;
+ nmp->nm_leaseterm = NQ_DEFLEASE;
+ nmp->nm_deadthresh = NQ_DEADTHRESH;
+ nmp->nm_tnext = (struct nfsnode *)nmp;
+ nmp->nm_tprev = (struct nfsnode *)nmp;
+ nmp->nm_inprog = NULLVP;
+ bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
+ mp->mnt_stat.f_type = MOUNT_NFS;
+ bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
+ nmp->nm_nam = nam;
+
+ if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
+ nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+ if (nmp->nm_timeo < NFS_MINTIMEO)
+ nmp->nm_timeo = NFS_MINTIMEO;
+ else if (nmp->nm_timeo > NFS_MAXTIMEO)
+ nmp->nm_timeo = NFS_MAXTIMEO;
+ }
+
+ if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
+ nmp->nm_retry = argp->retrans;
+ if (nmp->nm_retry > NFS_MAXREXMIT)
+ nmp->nm_retry = NFS_MAXREXMIT;
+ }
+
+ if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
+ nmp->nm_wsize = argp->wsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_wsize &= ~0x1ff;
+ if (nmp->nm_wsize <= 0)
+ nmp->nm_wsize = 512;
+ else if (nmp->nm_wsize > NFS_MAXDATA)
+ nmp->nm_wsize = NFS_MAXDATA;
+ }
+ if (nmp->nm_wsize > MAXBSIZE)
+ nmp->nm_wsize = MAXBSIZE;
+
+ if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
+ nmp->nm_rsize = argp->rsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_rsize &= ~0x1ff;
+ if (nmp->nm_rsize <= 0)
+ nmp->nm_rsize = 512;
+ else if (nmp->nm_rsize > NFS_MAXDATA)
+ nmp->nm_rsize = NFS_MAXDATA;
+ }
+ if (nmp->nm_rsize > MAXBSIZE)
+ nmp->nm_rsize = MAXBSIZE;
+ if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
+ argp->maxgrouplist <= NFS_MAXGRPS)
+ nmp->nm_numgrps = argp->maxgrouplist;
+ if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
+ argp->readahead <= NFS_MAXRAHEAD)
+ nmp->nm_readahead = argp->readahead;
+ if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
+ argp->leaseterm <= NQ_MAXLEASE)
+ nmp->nm_leaseterm = argp->leaseterm;
+ if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
+ argp->deadthresh <= NQ_NEVERDEAD)
+ nmp->nm_deadthresh = argp->deadthresh;
+ /* Set up the sockets and per-host congestion */
+ nmp->nm_sotype = argp->sotype;
+ nmp->nm_soproto = argp->proto;
+
+ /*
+ * For Connection based sockets (TCP,...) defer the connect until
+ * the first request, in case the server is not responding.
+ */
+ if (nmp->nm_sotype == SOCK_DGRAM &&
+ (error = nfs_connect(nmp, (struct nfsreq *)0)))
+ goto bad;
+
+ /*
+ * This is silly, but it has to be set so that vinifod() works.
+ * We do not want to do an nfs_statfs() here since we can get
+ * stuck on a dead server and we are holding a lock on the mount
+ * point.
+ */
+ mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+ /*
+ * A reference count is needed on the nfsnode representing the
+ * remote root. If this object is not persistent, then backward
+ * traversals of the mount point (i.e. "..") will not work if
+ * the nfsnode gets flushed out of the cache. Ufs does not have
+ * this problem, because one can identify root inodes by their
+ * number == ROOTINO (2).
+ */
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ goto bad;
+ *vpp = NFSTOV(np);
+
+ return (0);
+bad:
+ nfs_disconnect(nmp);
+ free((caddr_t)nmp, M_NFSMNT);
+ m_freem(nam);
+ return (error);
+}
+
+/*
+ * unmount system call
+ */
+int
+nfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *vp;
+ int error, flags = 0;
+ extern int doforce;
+
+ if (mntflags & MNT_FORCE) {
+ if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+ nmp = VFSTONFS(mp);
+ /*
+ * Goes something like this..
+ * - Check for activity on the root vnode (other than ourselves).
+ * - Call vflush() to clear out vnodes for this file system,
+ * except for the root vnode.
+ * - Decrement reference on the vnode representing remote root.
+ * - Close the socket
+ * - Free up the data structures
+ */
+ /*
+ * We need to decrement the ref. count on the nfsnode representing
+ * the remote root. See comment in mountnfs(). The VFS unmount()
+ * has done vput on this vnode, otherwise we would get deadlock!
+ */
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return(error);
+ vp = NFSTOV(np);
+ if (vp->v_usecount > 2) {
+ vput(vp);
+ return (EBUSY);
+ }
+
+ /*
+ * Must handshake with nqnfs_clientd() if it is active.
+ */
+ nmp->nm_flag |= NFSMNT_DISMINPROG;
+ while (nmp->nm_inprog != NULLVP)
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+ if (error = vflush(mp, vp, flags)) {
+ vput(vp);
+ nmp->nm_flag &= ~NFSMNT_DISMINPROG;
+ return (error);
+ }
+
+ /*
+ * We are now committed to the unmount.
+ * For NQNFS, let the server daemon free the nfsmount structure.
+ */
+ if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB))
+ nmp->nm_flag |= NFSMNT_DISMNT;
+
+ /*
+ * There are two reference counts to get rid of here.
+ */
+ vrele(vp);
+ vrele(vp);
+ vgone(vp);
+ nfs_disconnect(nmp);
+ m_freem(nmp->nm_nam);
+
+ if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0)
+ free((caddr_t)nmp, M_NFSMNT);
+ return (0);
+}
+
+/*
+ * Return root of a filesystem
+ */
+int
+nfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+
+ nmp = VFSTONFS(mp);
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return (error);
+ vp = NFSTOV(np);
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ *vpp = vp;
+ return (0);
+}
+
+extern int syncprt;
+
+/*
+ * Flush out the buffer cache
+ */
+/* ARGSUSED */
+int
+nfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ int error, allerror = 0;
+
+ /*
+ * Force stale buffer cache information to be flushed.
+ */
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * If the vnode that we are about to sync is no longer
+ * associated with this mount point, start over.
+ */
+ if (vp->v_mount != mp)
+ goto loop;
+ if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL)
+ continue;
+ if (vget(vp, 1))
+ goto loop;
+ if (error = VOP_FSYNC(vp, cred, waitfor, p))
+ allerror = error;
+ vput(vp);
+ }
+ return (allerror);
+}
+
+/*
+ * NFS flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * At this point, this should never happen
+ */
+/* ARGSUSED */
+int
+nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Vnode pointer to File handle, should never happen either
+ */
+/* ARGSUSED */
+int
+nfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Vfs start routine, a no-op.
+ */
+/* ARGSUSED */
+int
+nfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+nfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
new file mode 100644
index 000000000000..a909b48dc67d
--- /dev/null
+++ b/sys/nfsclient/nfs_vnops.c
@@ -0,0 +1,2539 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vnops.c 8.5 (Berkeley) 2/13/94
+ */
+
+/*
+ * vnode op calls for sun nfs version 2
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**nfsv2_vnodeop_p)();
+struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, nfs_lookup }, /* lookup */
+ { &vop_create_desc, nfs_create }, /* create */
+ { &vop_mknod_desc, nfs_mknod }, /* mknod */
+ { &vop_open_desc, nfs_open }, /* open */
+ { &vop_close_desc, nfs_close }, /* close */
+ { &vop_access_desc, nfs_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfs_read }, /* read */
+ { &vop_write_desc, nfs_write }, /* write */
+ { &vop_ioctl_desc, nfs_ioctl }, /* ioctl */
+ { &vop_select_desc, nfs_select }, /* select */
+ { &vop_mmap_desc, nfs_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, nfs_seek }, /* seek */
+ { &vop_remove_desc, nfs_remove }, /* remove */
+ { &vop_link_desc, nfs_link }, /* link */
+ { &vop_rename_desc, nfs_rename }, /* rename */
+ { &vop_mkdir_desc, nfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, nfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, nfs_symlink }, /* symlink */
+ { &vop_readdir_desc, nfs_readdir }, /* readdir */
+ { &vop_readlink_desc, nfs_readlink }, /* readlink */
+ { &vop_abortop_desc, nfs_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, nfs_bmap }, /* bmap */
+ { &vop_strategy_desc, nfs_strategy }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, nfs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, nfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, nfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, nfs_valloc }, /* valloc */
+ { &vop_reallocblks_desc, nfs_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, nfs_vfree }, /* vfree */
+ { &vop_truncate_desc, nfs_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
+ { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**spec_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, nfsspec_close }, /* close */
+ { &vop_access_desc, nfsspec_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfsspec_read }, /* read */
+ { &vop_write_desc, nfsspec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
+ { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
+
+#ifdef FIFO
+int (**fifo_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, nfsfifo_close }, /* close */
+ { &vop_access_desc, nfsspec_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfsfifo_read }, /* read */
+ { &vop_write_desc, nfsfifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_badop }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
+ { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
+#endif /* FIFO */
+
+void nqnfs_clientlease();
+
+/*
+ * Global variables
+ */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers, nfs_true, nfs_false;
+extern char nfsiobuf[MAXPHYS+NBPG];
+struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+int nfs_numasync = 0;
+#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
+
+/*
+ * nfs null call from vfs.
+ */
+int
+nfs_null(vp, cred, procp)
+ struct vnode *vp;
+ struct ucred *cred;
+ struct proc *procp;
+{
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb;
+
+ nfsm_reqhead(vp, NFSPROC_NULL, 0);
+ nfsm_request(vp, NFSPROC_NULL, procp, cred);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs access vnode op.
+ * For nfs, just return ok. File accesses may fail later.
+ * For nqnfs, use the access rpc to check accessibility. If file modes are
+ * changed on the server, accesses might still fail later.
+ */
+int
+nfs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register u_long *tl;
+ register caddr_t cp;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /*
+ * For nqnfs, do an access rpc, otherwise you are stuck emulating
+ * ufs_access() locally using the vattr. This may not be correct,
+ * since the server may apply other access criteria such as
+ * client uid-->server uid mapping that we do not know about, but
+ * this is better than just returning anything that is lying about
+ * in the cache.
+ */
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+ nfsstats.rpccnt[NQNFSPROC_ACCESS]++;
+ nfsm_reqhead(vp, NQNFSPROC_ACCESS, NFSX_FH + 3 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+ if (ap->a_mode & VREAD)
+ *tl++ = nfs_true;
+ else
+ *tl++ = nfs_false;
+ if (ap->a_mode & VWRITE)
+ *tl++ = nfs_true;
+ else
+ *tl++ = nfs_false;
+ if (ap->a_mode & VEXEC)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ nfsm_request(vp, NQNFSPROC_ACCESS, ap->a_p, ap->a_cred);
+ nfsm_reqdone;
+ return (error);
+ } else
+ return (nfsspec_access(ap));
+}
+
+/*
+ * nfs open vnode op
+ * Check to see if the type is ok
+ * and that deletion is not in progress.
+ * For paged in text files, you will need to flush the page cache
+ * if consistency is lost.
+ */
+/* ARGSUSED */
+int
+nfs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ struct vattr vattr;
+ int error;
+
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+ return (EACCES);
+ if (vp->v_flag & VTEXT) {
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_READ, ap->a_cred, ap->a_p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_brev = np->n_lrev;
+ }
+ }
+ } else {
+ if (np->n_flag & NMODIFIED) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ } else {
+ if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.ts_sec) {
+ np->n_direofoffset = 0;
+ if ((error = nfs_vinvalbuf(vp, V_SAVE,
+ ap->a_cred, ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ }
+ }
+ }
+ } else if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ np->n_attrstamp = 0; /* For Open/Close consistency */
+ return (0);
+}
+
+/*
+ * nfs close vnode op
+ * For reg files, invalidate any buffer cache entries.
+ */
+/* ARGSUSED */
+int
+nfs_close(ap)
+ struct vop_close_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ int error = 0;
+
+ if (vp->v_type == VREG) {
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
+ (np->n_flag & NMODIFIED)) {
+ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+ np->n_attrstamp = 0;
+ }
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ error = np->n_error;
+ }
+ }
+ return (error);
+}
+
+/*
+ * nfs getattr call from vfs.
+ */
+int
+nfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register caddr_t cp;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /*
+ * Update local times for special files.
+ */
+ if (np->n_flag & (NACC | NUPD))
+ np->n_flag |= NCHG;
+ /*
+ * First look in the cache.
+ */
+ if (nfs_getattrcache(vp, ap->a_vap) == 0)
+ return (0);
+ nfsstats.rpccnt[NFSPROC_GETATTR]++;
+ nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+ nfsm_loadattr(vp, ap->a_vap);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs setattr call.
+ */
+int
+nfs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct nfsv2_sattr *sp;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ u_long *tl;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap = ap->a_vap;
+ u_quad_t frev, tsize;
+
+ if (vap->va_size != VNOVAL || vap->va_mtime.ts_sec != VNOVAL ||
+ vap->va_atime.ts_sec != VNOVAL) {
+ if (vap->va_size != VNOVAL) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size == 0)
+ error = nfs_vinvalbuf(vp, 0, ap->a_cred,
+ ap->a_p, 1);
+ else
+ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1);
+ if (error)
+ return (error);
+ }
+ tsize = np->n_size;
+ np->n_size = np->n_vattr.va_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if ((np->n_flag & NMODIFIED) &&
+ (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_SETATTR]++;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH+NFSX_SATTR(isnq));
+ nfsm_fhtom(vp);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ if (vap->va_mode == (u_short)-1)
+ sp->sa_mode = VNOVAL;
+ else
+ sp->sa_mode = vtonfs_mode(vp->v_type, vap->va_mode);
+ if (vap->va_uid == (uid_t)-1)
+ sp->sa_uid = VNOVAL;
+ else
+ sp->sa_uid = txdr_unsigned(vap->va_uid);
+ if (vap->va_gid == (gid_t)-1)
+ sp->sa_gid = VNOVAL;
+ else
+ sp->sa_gid = txdr_unsigned(vap->va_gid);
+ if (isnq) {
+ txdr_hyper(&vap->va_size, &sp->sa_nqsize);
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ sp->sa_nqflags = txdr_unsigned(vap->va_flags);
+ sp->sa_nqrdev = VNOVAL;
+ } else {
+ sp->sa_nfssize = txdr_unsigned(vap->va_size);
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(vp, NFSPROC_SETATTR, ap->a_p, ap->a_cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ fxdr_hyper(tl, &frev);
+ if (frev > np->n_brev)
+ np->n_brev = frev;
+ }
+ nfsm_reqdone;
+ if (error) {
+ np->n_size = np->n_vattr.va_size = tsize;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ }
+ return (error);
+}
+
+/*
+ * nfs lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the rpc
+ */
+int
+nfs_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct componentname *cnp = ap->a_cnp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vnode **vpp = ap->a_vpp;
+ register int flags = cnp->cn_flags;
+ register struct vnode *vdp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ struct nfsmount *nmp;
+ caddr_t bpos, dpos, cp2;
+ time_t reqtime;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vnode *newvp;
+ long len;
+ nfsv2fh_t *fhp;
+ struct nfsnode *np;
+ int lockparent, wantparent, error = 0;
+ int nqlflag, cachable;
+ u_quad_t frev;
+
+ *vpp = NULL;
+ if (dvp->v_type != VDIR)
+ return (ENOTDIR);
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+ nmp = VFSTONFS(dvp->v_mount);
+ np = VTONFS(dvp);
+ if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
+ struct vattr vattr;
+ int vpid;
+
+ vdp = *vpp;
+ vpid = vdp->v_id;
+ /*
+ * See the comment starting `Step through' in ufs/ufs_lookup.c
+ * for an explanation of the locking protocol
+ */
+ if (dvp == vdp) {
+ VREF(vdp);
+ error = 0;
+ } else
+ error = vget(vdp, 1);
+ if (!error) {
+ if (vpid == vdp->v_id) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) == 0) {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ } else if (NQNFS_CKCACHABLE(dvp, NQL_READ)) {
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NMODIFIED)) {
+ np->n_direofoffset = 0;
+ cache_purge(dvp);
+ error = nfs_vinvalbuf(dvp, 0,
+ cnp->cn_cred, cnp->cn_proc,
+ 1);
+ if (error == EINTR)
+ return (error);
+ np->n_brev = np->n_lrev;
+ } else {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+ }
+ } else if (!VOP_GETATTR(vdp, &vattr, cnp->cn_cred, cnp->cn_proc) &&
+ vattr.va_ctime.ts_sec == VTONFS(vdp)->n_ctime) {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+ cache_purge(vdp);
+ }
+ vrele(vdp);
+ }
+ *vpp = NULLVP;
+ }
+ error = 0;
+ nfsstats.lookupcache_misses++;
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ len = cnp->cn_namelen;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+
+ /*
+ * For nqnfs optionally piggyback a getlease request for the name
+ * being looked up.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+ ((cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))))
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ else
+ *tl = 0;
+ }
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ reqtime = time.tv_sec;
+ nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+nfsmout:
+ if (error) {
+ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+ (flags & ISLASTCN) && error == ENOENT)
+ error = EJUSTRETURN;
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (error);
+ }
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ fxdr_hyper(tl, &frev);
+ } else
+ nqlflag = 0;
+ }
+ nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+
+ /*
+ * Handle RENAME case...
+ */
+ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
+ if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ m_freem(mrep);
+ return (EISDIR);
+ }
+ if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ if (error =
+ nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+ vrele(newvp);
+ m_freem(mrep);
+ return (error);
+ }
+ *vpp = newvp;
+ m_freem(mrep);
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+
+ if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ VREF(dvp);
+ newvp = dvp;
+ } else {
+ if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ }
+ if (error = nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+ vrele(newvp);
+ m_freem(mrep);
+ return (error);
+ }
+ m_freem(mrep);
+ *vpp = newvp;
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ if ((cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ np->n_ctime = np->n_vattr.va_ctime.ts_sec;
+ else if (nqlflag && reqtime > time.tv_sec)
+ nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime,
+ frev);
+ cache_enter(dvp, *vpp, cnp);
+ }
+ return (0);
+}
+
+/*
+ * nfs read call.
+ * Just call nfs_bioread() to do the work.
+ */
+int
+nfs_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VREG)
+ return (EPERM);
+ return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
+}
+
+/*
+ * nfs readlink call
+ */
+int
+nfs_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VLNK)
+ return (EPERM);
+ return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Do a readlink rpc.
+ * Called by nfs_doio() from below the buffer cache.
+ */
+int
+nfs_readlinkrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ long len;
+
+ nfsstats.rpccnt[NFSPROC_READLINK]++;
+ nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+ nfsm_strsiz(len, NFS_MAXPATHLEN);
+ nfsm_mtouio(uiop, len);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs read rpc call
+ * Ditto above
+ */
+int
+nfs_readrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ long len, retlen, tsiz;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > 0xffffffff &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_READ]++;
+ len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH+NFSX_UNSIGNED*3);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED*3);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ txdr_hyper(&uiop->uio_offset, tl);
+ *(tl + 2) = txdr_unsigned(len);
+ } else {
+ *tl++ = txdr_unsigned(uiop->uio_offset);
+ *tl++ = txdr_unsigned(len);
+ *tl = 0;
+ }
+ nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ nfsm_strsiz(retlen, nmp->nm_rsize);
+ nfsm_mtouio(uiop, retlen);
+ m_freem(mrep);
+ if (retlen < len)
+ tsiz = 0;
+ else
+ tsiz -= len;
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * nfs write call
+ */
+int
+nfs_writerpc(vp, uiop, cred, ioflags)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+ int ioflags;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ struct nfsnode *np = VTONFS(vp);
+ u_quad_t frev;
+ long len, tsiz;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > 0xffffffff &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_WRITE]++;
+ len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_WRITE,
+ NFSX_FH+NFSX_UNSIGNED*4+nfsm_rndup(len));
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED * 4);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ txdr_hyper(&uiop->uio_offset, tl);
+ tl += 2;
+ if (ioflags & IO_APPEND)
+ *tl++ = txdr_unsigned(1);
+ else
+ *tl++ = 0;
+ } else {
+ *++tl = txdr_unsigned(uiop->uio_offset);
+ tl += 2;
+ }
+ *tl = txdr_unsigned(len);
+ nfsm_uiotom(uiop, len);
+ nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ if (nmp->nm_flag & NFSMNT_MYWRITE)
+ VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.ts_sec;
+ else if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ fxdr_hyper(tl, &frev);
+ if (frev > np->n_brev)
+ np->n_brev = frev;
+ }
+ m_freem(mrep);
+ tsiz -= len;
+ }
+nfsmout:
+ if (error)
+ uiop->uio_resid = tsiz;
+ return (error);
+}
+
+/*
+ * nfs mknod call
+ * This is a kludge. Use a create rpc but with the IFMT bits of the mode
+ * set to specify the file type and the size field for rdev.
+ */
+/* ARGSUSED */
+int
+nfs_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ struct vnode *newvp;
+ struct vattr vattr;
+ char *cp2;
+ caddr_t bpos, dpos;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ u_long rdev;
+
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (vap->va_type == VCHR || vap->va_type == VBLK)
+ rdev = txdr_unsigned(vap->va_rdev);
+#ifdef FIFO
+ else if (vap->va_type == VFIFO)
+ rdev = 0xffffffff;
+#endif /* FIFO */
+ else {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (EOPNOTSUPP);
+ }
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_CREATE]++;
+ nfsm_reqhead(dvp, NFSPROC_CREATE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ sp->sa_nqrdev = rdev;
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = rdev;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, newvp);
+ nfsm_reqdone;
+ if (!error && (cnp->cn_flags & MAKEENTRY))
+ cache_enter(dvp, newvp, cnp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs file create call
+ */
+int
+nfs_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_CREATE]++;
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(dvp, NFSPROC_CREATE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ u_quad_t qval = 0;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ sp->sa_nqrdev = -1;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = 0;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *ap->a_vpp);
+ nfsm_reqdone;
+ if (!error && (cnp->cn_flags & MAKEENTRY))
+ cache_enter(dvp, *ap->a_vpp, cnp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs file remove call
+ * To try and make nfs semantics closer to ufs semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If v_usecount > 1
+ * If a rename is not already in the works
+ * call nfs_sillyrename() to set it up
+ * else
+ * do the remove rpc
+ */
+int
+nfs_remove(ap)
+ struct vop_remove_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_dvp;
+ struct vnode * a_vp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsnode *np = VTONFS(vp);
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (vp->v_usecount > 1) {
+ if (!np->n_sillyrename)
+ error = nfs_sillyrename(dvp, vp, cnp);
+ } else {
+ /*
+ * Purge the name cache so that the chance of a lookup for
+ * the name succeeding while the remove is in progress is
+ * minimized. Without node locking it can still happen, such
+ * that an I/O op returns ESTALE, but since you get this if
+ * another host removes the file..
+ */
+ cache_purge(vp);
+ /*
+ * Throw away biocache buffers. Mainly to avoid
+ * unnecessary delayed writes.
+ */
+ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
+ if (error == EINTR)
+ return (error);
+ /* Do the rpc */
+ nfsstats.rpccnt[NFSPROC_REMOVE]++;
+ nfsm_reqhead(dvp, NFSPROC_REMOVE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_REMOVE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ /*
+ * Kludge City: If the first reply to the remove rpc is lost..
+ * the reply to the retransmitted request will be ENOENT
+ * since the file was in fact removed
+ * Therefore, we cheat and return success.
+ */
+ if (error == ENOENT)
+ error = 0;
+ }
+ np->n_attrstamp = 0;
+ vrele(dvp);
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * nfs file remove rpc called from nfs_inactive
+ */
+int
+nfs_removeit(sp)
+ register struct sillyrename *sp;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_REMOVE]++;
+ nfsm_reqhead(sp->s_dvp, NFSPROC_REMOVE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(sp->s_namlen));
+ nfsm_fhtom(sp->s_dvp);
+ nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+ nfsm_request(sp->s_dvp, NFSPROC_REMOVE, NULL, sp->s_cred);
+ nfsm_reqdone;
+ VTONFS(sp->s_dvp)->n_flag |= NMODIFIED;
+ VTONFS(sp->s_dvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename call
+ */
+int
+nfs_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ register struct vnode *fvp = ap->a_fvp;
+ register struct vnode *tvp = ap->a_tvp;
+ register struct vnode *fdvp = ap->a_fdvp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *tcnp = ap->a_tcnp;
+ register struct componentname *fcnp = ap->a_fcnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /* Check for cross-device rename */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out;
+ }
+
+
+ nfsstats.rpccnt[NFSPROC_RENAME]++;
+ nfsm_reqhead(fdvp, NFSPROC_RENAME,
+ (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(fcnp->cn_namelen)+
+ nfsm_rndup(fcnp->cn_namelen)); /* or fcnp->cn_cred?*/
+ nfsm_fhtom(fdvp);
+ nfsm_strtom(fcnp->cn_nameptr, fcnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_fhtom(tdvp);
+ nfsm_strtom(tcnp->cn_nameptr, tcnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(fdvp, NFSPROC_RENAME, tcnp->cn_proc, tcnp->cn_cred);
+ nfsm_reqdone;
+ VTONFS(fdvp)->n_flag |= NMODIFIED;
+ VTONFS(fdvp)->n_attrstamp = 0;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ VTONFS(tdvp)->n_attrstamp = 0;
+ if (fvp->v_type == VDIR) {
+ if (tvp != NULL && tvp->v_type == VDIR)
+ cache_purge(tdvp);
+ cache_purge(fdvp);
+ }
+out:
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ vrele(fdvp);
+ vrele(fvp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename rpc called from nfs_remove() above
+ */
+int
+nfs_renameit(sdvp, scnp, sp)
+ struct vnode *sdvp;
+ struct componentname *scnp;
+ register struct sillyrename *sp;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_RENAME]++;
+ nfsm_reqhead(sdvp, NFSPROC_RENAME,
+ (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(scnp->cn_namelen)+
+ nfsm_rndup(sp->s_namlen));
+ nfsm_fhtom(sdvp);
+ nfsm_strtom(scnp->cn_nameptr, scnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_fhtom(sdvp);
+ nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+ nfsm_request(sdvp, NFSPROC_RENAME, scnp->cn_proc, scnp->cn_cred);
+ nfsm_reqdone;
+ FREE(scnp->cn_pnbuf, M_NAMEI);
+ VTONFS(sdvp)->n_flag |= NMODIFIED;
+ VTONFS(sdvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs hard link create call
+ */
+int
+nfs_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (vp->v_mount != tdvp->v_mount) {
+ /*VOP_ABORTOP(vp, cnp);*/
+ if (tdvp == vp)
+ vrele(vp);
+ else
+ vput(vp);
+ return (EXDEV);
+ }
+
+ nfsstats.rpccnt[NFSPROC_LINK]++;
+ nfsm_reqhead(tdvp, NFSPROC_LINK,
+ NFSX_FH*2+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(tdvp);
+ nfsm_fhtom(vp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(tdvp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(tdvp)->n_attrstamp = 0;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ VTONFS(vp)->n_attrstamp = 0;
+ vrele(vp);
+ /*
+ * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs symbolic link create call
+ */
+/* start here */
+int
+nfs_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int slen, error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_SYMLINK]++;
+ slen = strlen(ap->a_target);
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH+2*NFSX_UNSIGNED+
+ nfsm_rndup(cnp->cn_namelen)+nfsm_rndup(slen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(VLNK, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
+ if (isnq) {
+ quad_t qval = -1;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = -1;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ /*
+ * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs make dir call
+ */
+int
+nfs_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct vnode **vpp = ap->a_vpp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ register int len;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, firsttry = 1, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ len = cnp->cn_namelen;
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsstats.rpccnt[NFSPROC_MKDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_MKDIR,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(VDIR, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ quad_t qval = -1;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = -1;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *vpp);
+ nfsm_reqdone;
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ /*
+ * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
+ * if we can succeed in looking up the directory.
+ * "firsttry" is necessary since the macros may "goto nfsmout" which
+ * is above the if on errors. (Ugh)
+ */
+ if (error == EEXIST && firsttry) {
+ firsttry = 0;
+ error = 0;
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ *vpp = NULL;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *vpp);
+ if ((*vpp)->v_type != VDIR) {
+ vput(*vpp);
+ error = EEXIST;
+ }
+ m_freem(mrep);
+ }
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs remove directory call
+ */
+int
+nfs_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (dvp == vp) {
+ vrele(dvp);
+ vrele(dvp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (EINVAL);
+ }
+ nfsstats.rpccnt[NFSPROC_RMDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_RMDIR,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ cache_purge(dvp);
+ cache_purge(vp);
+ vrele(vp);
+ vrele(dvp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs readdir call
+ * Although cookie is defined as opaque, I translate it to/from net byte
+ * order so that it looks more sensible. This appears consistent with the
+ * Ultrix implementation of NFS.
+ */
+int
+nfs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct uio *uio = ap->a_uio;
+ int tresid, error;
+ struct vattr vattr;
+
+ if (vp->v_type != VDIR)
+ return (EPERM);
+ /*
+ * First, check for hit on the EOF offset cache
+ */
+ if (uio->uio_offset != 0 && uio->uio_offset == np->n_direofoffset &&
+ (np->n_flag & NMODIFIED) == 0) {
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKCACHABLE(vp, NQL_READ)) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
+ np->n_mtime == vattr.va_mtime.ts_sec) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ }
+
+ /*
+ * Call nfs_bioread() to do the real work.
+ */
+ tresid = uio->uio_resid;
+ error = nfs_bioread(vp, uio, 0, ap->a_cred);
+
+ if (!error && uio->uio_resid == tresid)
+ nfsstats.direofcache_misses++;
+ return (error);
+}
+
+/*
+ * Readdir rpc call.
+ * Called from below the buffer cache by nfs_doio().
+ */
+int
+nfs_readdirrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register long len;
+ register struct dirent *dp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ long tlen, lastlen;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct mbuf *md2;
+ caddr_t dpos2;
+ int siz;
+ int more_dirs = 1;
+ u_long off, savoff;
+ struct dirent *savdp;
+ struct nfsmount *nmp;
+ struct nfsnode *np = VTONFS(vp);
+ long tresid;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tresid = uiop->uio_resid;
+ /*
+ * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+ * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+ nfsstats.rpccnt[NFSPROC_READDIR]++;
+ nfsm_reqhead(vp, NFSPROC_READDIR,
+ NFSX_FH + 2 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+ off = (u_long)uiop->uio_offset;
+ *tl++ = txdr_unsigned(off);
+ *tl = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+ nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+ nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+ siz = 0;
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* Save the position so that we can do nfsm_mtouio() later */
+ dpos2 = dpos;
+ md2 = md;
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+#ifdef lint
+ dp = (struct dirent *)0;
+#endif /* lint */
+ while (more_dirs && siz < uiop->uio_resid) {
+ savoff = off; /* Hold onto offset and dp */
+ savdp = dp;
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ dp = (struct dirent *)tl;
+ dp->d_fileno = fxdr_unsigned(u_long, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ dp->d_namlen = (u_char)len;
+ dp->d_type = DT_UNKNOWN;
+ nfsm_adv(len); /* Point past name */
+ tlen = nfsm_rndup(len);
+ /*
+ * This should not be necessary, but some servers have
+ * broken XDR such that these bytes are not null filled.
+ */
+ if (tlen != len) {
+ *dpos = '\0'; /* Null-terminate */
+ nfsm_adv(tlen - len);
+ len = tlen;
+ }
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ off = fxdr_unsigned(u_long, *tl);
+ *tl++ = 0; /* Ensures null termination of name */
+ more_dirs = fxdr_unsigned(int, *tl);
+ dp->d_reclen = len + 4 * NFSX_UNSIGNED;
+ siz += dp->d_reclen;
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+ /*
+ * If at EOF, cache directory offset
+ */
+ if (!more_dirs)
+ np->n_direofoffset = off;
+ }
+ /*
+ * If there is too much to fit in the data buffer, use savoff and
+ * savdp to trim off the last record.
+ * --> we are not at eof
+ */
+ if (siz > uiop->uio_resid) {
+ off = savoff;
+ siz -= dp->d_reclen;
+ dp = savdp;
+ more_dirs = 0; /* Paranoia */
+ }
+ if (siz > 0) {
+ lastlen = dp->d_reclen;
+ md = md2;
+ dpos = dpos2;
+ nfsm_mtouio(uiop, siz);
+ uiop->uio_offset = (off_t)off;
+ } else
+ more_dirs = 0; /* Ugh, never happens, but in case.. */
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (uiop->uio_resid < tresid) {
+ len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+ if (len > 0) {
+ dp = (struct dirent *)
+ (uiop->uio_iov->iov_base - lastlen);
+ dp->d_reclen += len;
+ uiop->uio_iov->iov_base += len;
+ uiop->uio_iov->iov_len -= len;
+ uiop->uio_resid -= len;
+ }
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * Nqnfs readdir_and_lookup RPC. Used in place of nfs_readdirrpc().
+ */
+int
+nfs_readdirlookrpc(vp, uiop, cred)
+ struct vnode *vp;
+ register struct uio *uiop;
+ struct ucred *cred;
+{
+ register int len;
+ register struct dirent *dp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nameidata nami, *ndp = &nami;
+ struct componentname *cnp = &ndp->ni_cnd;
+ u_long off, endoff, fileno;
+ time_t reqtime, ltime;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *newvp;
+ nfsv2fh_t *fhp;
+ u_quad_t frev;
+ int error = 0, tlen, more_dirs = 1, tresid, doit, bigenough, i;
+ int cachable;
+
+ if (uiop->uio_iovcnt != 1)
+ panic("nfs rdirlook");
+ nmp = VFSTONFS(vp->v_mount);
+ tresid = uiop->uio_resid;
+ ndp->ni_dvp = vp;
+ newvp = NULLVP;
+ /*
+ * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+ * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+ nfsstats.rpccnt[NQNFSPROC_READDIRLOOK]++;
+ nfsm_reqhead(vp, NQNFSPROC_READDIRLOOK,
+ NFSX_FH + 3 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+ off = (u_long)uiop->uio_offset;
+ *tl++ = txdr_unsigned(off);
+ *tl++ = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+ nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+ if (nmp->nm_flag & NFSMNT_NQLOOKLEASE)
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ else
+ *tl = 0;
+ reqtime = time.tv_sec;
+ nfsm_request(vp, NQNFSPROC_READDIRLOOK, uiop->uio_procp, cred);
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+ bigenough = 1;
+ while (more_dirs && bigenough) {
+ doit = 1;
+ nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+ if (nmp->nm_flag & NFSMNT_NQLOOKLEASE) {
+ cachable = fxdr_unsigned(int, *tl++);
+ ltime = reqtime + fxdr_unsigned(int, *tl++);
+ fxdr_hyper(tl, &frev);
+ }
+ nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+ if (!bcmp(VTONFS(vp)->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ VREF(vp);
+ newvp = vp;
+ np = VTONFS(vp);
+ } else {
+ if (error = nfs_nget(vp->v_mount, fhp, &np))
+ doit = 0;
+ newvp = NFSTOV(np);
+ }
+ if (error = nfs_loadattrcache(&newvp, &md, &dpos,
+ (struct vattr *)0))
+ doit = 0;
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ fileno = fxdr_unsigned(u_long, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ tlen = (len + 4) & ~0x3;
+ if ((tlen + DIRHDSIZ) > uiop->uio_resid)
+ bigenough = 0;
+ if (bigenough && doit) {
+ dp = (struct dirent *)uiop->uio_iov->iov_base;
+ dp->d_fileno = fileno;
+ dp->d_namlen = len;
+ dp->d_reclen = tlen + DIRHDSIZ;
+ dp->d_type =
+ IFTODT(VTTOIF(np->n_vattr.va_type));
+ uiop->uio_resid -= DIRHDSIZ;
+ uiop->uio_iov->iov_base += DIRHDSIZ;
+ uiop->uio_iov->iov_len -= DIRHDSIZ;
+ cnp->cn_nameptr = uiop->uio_iov->iov_base;
+ cnp->cn_namelen = len;
+ ndp->ni_vp = newvp;
+ nfsm_mtouio(uiop, len);
+ cp = uiop->uio_iov->iov_base;
+ tlen -= len;
+ for (i = 0; i < tlen; i++)
+ *cp++ = '\0';
+ uiop->uio_iov->iov_base += tlen;
+ uiop->uio_iov->iov_len -= tlen;
+ uiop->uio_resid -= tlen;
+ cnp->cn_hash = 0;
+ for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++)
+ cnp->cn_hash += (unsigned char)*cp * i;
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+ ltime > time.tv_sec)
+ nqnfs_clientlease(nmp, np, NQL_READ,
+ cachable, ltime, frev);
+ if (cnp->cn_namelen <= NCHNAMLEN)
+ cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
+ } else {
+ nfsm_adv(nfsm_rndup(len));
+ }
+ if (newvp != NULLVP) {
+ vrele(newvp);
+ newvp = NULLVP;
+ }
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ if (bigenough)
+ endoff = off = fxdr_unsigned(u_long, *tl++);
+ else
+ endoff = fxdr_unsigned(u_long, *tl++);
+ more_dirs = fxdr_unsigned(int, *tl);
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+ /*
+ * If at EOF, cache directory offset
+ */
+ if (!more_dirs)
+ VTONFS(vp)->n_direofoffset = endoff;
+ }
+ if (uiop->uio_resid < tresid)
+ uiop->uio_offset = (off_t)off;
+ else
+ more_dirs = 0;
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (uiop->uio_resid < tresid) {
+ len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+ if (len > 0) {
+ dp->d_reclen += len;
+ uiop->uio_iov->iov_base += len;
+ uiop->uio_iov->iov_len -= len;
+ uiop->uio_resid -= len;
+ }
+ }
+nfsmout:
+ if (newvp != NULLVP)
+ vrele(newvp);
+ return (error);
+}
+static char hextoasc[] = "0123456789abcdef";
+
+/*
+ * Silly rename. To make the NFS filesystem that is stateless look a little
+ * more like the "ufs" a remove of an active vnode is translated to a rename
+ * to a funny looking filename that is removed by nfs_inactive on the
+ * nfsnode. There is the potential for another process on a different client
+ * to create the same funny name between the nfs_lookitup() fails and the
+ * nfs_rename() completes, but...
+ */
+int
+nfs_sillyrename(dvp, vp, cnp)
+ struct vnode *dvp, *vp;
+ struct componentname *cnp;
+{
+ register struct nfsnode *np;
+ register struct sillyrename *sp;
+ int error;
+ short pid;
+
+ cache_purge(dvp);
+ np = VTONFS(vp);
+#ifdef SILLYSEPARATE
+ MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
+ M_NFSREQ, M_WAITOK);
+#else
+ sp = &np->n_silly;
+#endif
+ sp->s_cred = crdup(cnp->cn_cred);
+ sp->s_dvp = dvp;
+ VREF(dvp);
+
+ /* Fudge together a funny name */
+ pid = cnp->cn_proc->p_pid;
+ bcopy(".nfsAxxxx4.4", sp->s_name, 13);
+ sp->s_namlen = 12;
+ sp->s_name[8] = hextoasc[pid & 0xf];
+ sp->s_name[7] = hextoasc[(pid >> 4) & 0xf];
+ sp->s_name[6] = hextoasc[(pid >> 8) & 0xf];
+ sp->s_name[5] = hextoasc[(pid >> 12) & 0xf];
+
+ /* Try lookitups until we get one that isn't there */
+ while (nfs_lookitup(sp, (nfsv2fh_t *)0, cnp->cn_proc) == 0) {
+ sp->s_name[4]++;
+ if (sp->s_name[4] > 'z') {
+ error = EINVAL;
+ goto bad;
+ }
+ }
+ if (error = nfs_renameit(dvp, cnp, sp))
+ goto bad;
+ nfs_lookitup(sp, &np->n_fh, cnp->cn_proc);
+ np->n_sillyrename = sp;
+ return (0);
+bad:
+ vrele(sp->s_dvp);
+ crfree(sp->s_cred);
+#ifdef SILLYSEPARATE
+ free((caddr_t)sp, M_NFSREQ);
+#endif
+ return (error);
+}
+
+/*
+ * Look up a file name for silly rename stuff.
+ * Just like nfs_lookup() except that it doesn't load returned values
+ * into the nfsnode table.
+ * If fhp != NULL it copies the returned file handle out
+ */
+int
+nfs_lookitup(sp, fhp, procp)
+ register struct sillyrename *sp;
+ nfsv2fh_t *fhp;
+ struct proc *procp;
+{
+ register struct vnode *vp = sp->s_dvp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ long len;
+
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ len = sp->s_namlen;
+ nfsm_reqhead(vp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+ if (isnq) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ nfsm_fhtom(vp);
+ nfsm_strtom(sp->s_name, len, NFS_MAXNAMLEN);
+ nfsm_request(vp, NFSPROC_LOOKUP, procp, sp->s_cred);
+ if (fhp != NULL) {
+ if (isnq)
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nfsm_dissect(cp, caddr_t, NFSX_FH);
+ bcopy(cp, (caddr_t)fhp, NFSX_FH);
+ }
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * Kludge City..
+ * - make nfs_bmap() essentially a no-op that does no translation
+ * - do nfs_strategy() by faking physical I/O with nfs_readrpc/nfs_writerpc
+ * after mapping the physical addresses into Kernel Virtual space in the
+ * nfsiobuf area.
+ * (Maybe I could use the process's page mapping, but I was concerned that
+ * Kernel Write might not be enabled and also figured copyout() would do
+ * a lot more work than bcopy() and also it currently happens in the
+ * context of the swapper process (2).
+ */
+int
+nfs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
+ return (0);
+}
+
+/*
+ * Strategy routine.
+ * For async requests when nfsiod(s) are running, queue the request by
+ * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
+ * request.
+ */
+int
+nfs_strategy(ap)
+ struct vop_strategy_args *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ struct ucred *cr;
+ struct proc *p;
+ int error = 0;
+
+ if (bp->b_flags & B_PHYS)
+ panic("nfs physio");
+ if (bp->b_flags & B_ASYNC)
+ p = (struct proc *)0;
+ else
+ p = curproc; /* XXX */
+ if (bp->b_flags & B_READ)
+ cr = bp->b_rcred;
+ else
+ cr = bp->b_wcred;
+ /*
+ * If the op is asynchronous and an i/o daemon is waiting
+ * queue the request, wake it up and wait for completion
+ * otherwise just do it ourselves.
+ */
+ if ((bp->b_flags & B_ASYNC) == 0 ||
+ nfs_asyncio(bp, NOCRED))
+ error = nfs_doio(bp, cr, p);
+ return (error);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Flush all the blocks associated with a vnode.
+ * Walk through the buffer pool and push any dirty pages
+ * associated with the vnode.
+ */
+/* ARGSUSED */
+int
+nfs_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_vp;
+ struct ucred * a_cred;
+ int a_waitfor;
+ struct proc * a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct buf *bp;
+ struct buf *nbp;
+ struct nfsmount *nmp;
+ int s, error = 0, slptimeo = 0, slpflag = 0;
+
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+loop:
+ s = splbio();
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (bp->b_flags & B_BUSY) {
+ if (ap->a_waitfor != MNT_WAIT)
+ continue;
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+ "nfsfsync", slptimeo);
+ splx(s);
+ if (error) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ goto loop;
+ }
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("nfs_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ bp->b_flags |= B_ASYNC;
+ VOP_BWRITE(bp);
+ goto loop;
+ }
+ splx(s);
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ error = tsleep((caddr_t)&vp->v_numoutput,
+ slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
+ if (error) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ }
+ if (vp->v_dirtyblkhd.lh_first) {
+#ifdef DIAGNOSTIC
+ vprint("nfs_fsync: dirty", vp);
+#endif
+ goto loop;
+ }
+ }
+ if (np->n_flag & NWRITEERR) {
+ error = np->n_error;
+ np->n_flag &= ~NWRITEERR;
+ }
+ return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to nfs.
+ *
+ * Currently the NFS protocol does not support getting such
+ * information from the remote server.
+ */
+/* ARGSUSED */
+nfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * NFS advisory byte-level locks.
+ * Currently unsupported.
+ */
+int
+nfs_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Print out the contents of an nfsnode.
+ */
+int
+nfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+
+ printf("tag VT_NFS, fileid %d fsid 0x%x",
+ np->n_vattr.va_fileid, np->n_vattr.va_fsid);
+#ifdef FIFO
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+#endif /* FIFO */
+ printf("\n");
+}
+
+/*
+ * NFS directory offset lookup.
+ * Currently unsupported.
+ */
+int
+nfs_blkatoff(ap)
+ struct vop_blkatoff_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ char **a_res;
+ struct buf **a_bpp;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace allocation.
+ * Currently unsupported.
+ */
+int
+nfs_valloc(ap)
+ struct vop_valloc_args /* {
+ struct vnode *a_pvp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct vnode **a_vpp;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace free.
+ * Currently unsupported.
+ */
+int
+nfs_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS file truncation.
+ */
+int
+nfs_truncate(ap)
+ struct vop_truncate_args /* {
+ struct vnode *a_vp;
+ off_t a_length;
+ int a_flags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /* Use nfs_setattr */
+ printf("nfs_truncate: need to implement!!");
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS update.
+ */
+int
+nfs_update(ap)
+ struct vop_update_args /* {
+ struct vnode *a_vp;
+ struct timeval *a_ta;
+ struct timeval *a_tm;
+ int a_waitfor;
+ } */ *ap;
+{
+
+ /* Use nfs_setattr */
+ printf("nfs_update: need to implement!!");
+ return (EOPNOTSUPP);
+}
+
+/*
+ * nfs special file access vnode op.
+ * Essentially just get vattr and then imitate iaccess() since the device is
+ * local to the client.
+ */
+int
+nfsspec_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vattr *vap;
+ register gid_t *gp;
+ register struct ucred *cred = ap->a_cred;
+ mode_t mode = ap->a_mode;
+ struct vattr vattr;
+ register int i;
+ int error;
+
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (cred->cr_uid == 0)
+ return (0);
+ vap = &vattr;
+ if (error = VOP_GETATTR(ap->a_vp, vap, cred, ap->a_p))
+ return (error);
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (cred->cr_uid != vap->va_uid) {
+ mode >>= 3;
+ gp = cred->cr_groups;
+ for (i = 0; i < cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ mode >>= 3;
+found:
+ ;
+ }
+ return ((vap->va_mode & mode) == mode ? 0 : EACCES);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+nfsspec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ np->n_atim = time;
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+nfsspec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ np->n_mtim = time;
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the nfsnode then do device close.
+ */
+int
+nfsspec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+
+ if (np->n_flag & (NACC | NUPD)) {
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC) {
+ vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+ vattr.va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vattr.va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifos.
+ */
+int
+nfsfifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ np->n_atim = time;
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifos.
+ */
+int
+nfsfifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ np->n_mtim = time;
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifos.
+ *
+ * Update the times on the nfsnode then do fifo close.
+ */
+int
+nfsfifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+ extern int (**fifo_vnodeop_p)();
+
+ if (np->n_flag & (NACC | NUPD)) {
+ if (np->n_flag & NACC)
+ np->n_atim = time;
+ if (np->n_flag & NUPD)
+ np->n_mtim = time;
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC) {
+ vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+ vattr.va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vattr.va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
diff --git a/sys/nfsclient/nfsargs.h b/sys/nfsclient/nfsargs.h
new file mode 100644
index 000000000000..261fd42657a7
--- /dev/null
+++ b/sys/nfsclient/nfsargs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsclient/nfsdiskless.h b/sys/nfsclient/nfsdiskless.h
new file mode 100644
index 000000000000..74e6b7bca438
--- /dev/null
+++ b/sys/nfsclient/nfsdiskless.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsdiskless.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structure that must be initialized for a diskless nfs client.
+ * This structure is used by nfs_mountroot() to set up the root and swap
+ * vnodes plus do a partial ifconfig(8) and route(8) so that the critical net
+ * interface can communicate with the server.
+ * The primary bootstrap is expected to fill in the appropriate fields before
+ * starting vmunix. Whether or not the swap area is nfs mounted is determined
+ * by the value in swdevt[0]. (equal to NODEV --> swap over nfs)
+ * Currently only works for AF_INET protocols.
+ * NB: All fields are stored in net byte order to avoid hassles with
+ * client/server byte ordering differences.
+ */
+struct nfs_diskless {
+ struct ifaliasreq myif; /* Default interface */
+ struct sockaddr_in mygateway; /* Default gateway */
+ struct nfs_args swap_args; /* Mount args for swap file */
+ u_char swap_fh[NFS_FHSIZE]; /* Swap file's file handle */
+ struct sockaddr_in swap_saddr; /* Address of swap server */
+ char swap_hostnam[MNAMELEN]; /* Host name for mount pt */
+ int swap_nblks; /* Size of server swap file */
+ struct ucred swap_ucred; /* Swap credentials */
+ struct nfs_args root_args; /* Mount args for root fs */
+ u_char root_fh[NFS_FHSIZE]; /* File handle of root dir */
+ struct sockaddr_in root_saddr; /* Address of root server */
+ char root_hostnam[MNAMELEN]; /* Host name for mount pt */
+ long root_time; /* Timestamp of root fs */
+ char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */
+};
diff --git a/sys/nfsclient/nfsm_subs.h b/sys/nfsclient/nfsm_subs.h
new file mode 100644
index 000000000000..879db3600577
--- /dev/null
+++ b/sys/nfsclient/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat
+#define NFSMADV(m, s) (m)->m_data += (s)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ { if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); }
+
+#define nfsm_dissect(a,c,s) \
+ { t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } }
+
+#define nfsm_fhtom(v) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+ { struct nfsnode *np; nfsv2fh_t *fhp; \
+ nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+ if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(np); \
+ nfsm_loadattr(v, (struct vattr *)0); \
+ }
+
+#define nfsm_loadattr(v,a) \
+ { struct vnode *tvp = (v); \
+ if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = tvp; }
+
+#define nfsm_strsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } }
+
+#define nfsm_srvstrsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } }
+
+#define nfsm_mtouio(p,s) \
+ if ((s) > 0 && \
+ (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_uiotom(p,s) \
+ if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_reqhead(v,a,s) \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone m_freem(mrep); \
+ nfsmout:
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ if (error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) \
+ goto nfsmout
+
+#define nfsm_strtom(a,s,m) \
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_long *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+ } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvdone \
+ nfsmout: \
+ return(error)
+
+#define nfsm_reply(s) \
+ { \
+ nfsd->nd_repstat = error; \
+ if (error) \
+ (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ m_freem(mrep); \
+ mreq = *mrq; \
+ if (error) \
+ return(0); \
+ }
+
+#define nfsm_adv(s) \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvmtofh(f) \
+ nfsm_dissect(tl, u_long *, NFSX_FH); \
+ bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define nfsm_clget \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_long *)bp
+
+#define nfsm_srvfillattr \
+ fp->fa_type = vtonfs_type(vap->va_type); \
+ fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+ fp->fa_uid = txdr_unsigned(vap->va_uid); \
+ fp->fa_gid = txdr_unsigned(vap->va_gid); \
+ if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+ fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nfsrdev = 0xffffffff; \
+ else \
+ fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+ fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+ fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+ txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+ txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+ fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+ fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+ } else { \
+ fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nqrdev = 0xffffffff; \
+ else \
+ fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+ txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+ txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+ txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+ txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+ txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+ fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+ fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+ txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+ }
+
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
new file mode 100644
index 000000000000..4d74acb38a54
--- /dev/null
+++ b/sys/nfsclient/nfsmount.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsmount.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Mount structure.
+ * One allocated on every NFS mount.
+ * Holds NFS specific information for mount.
+ */
+struct nfsmount {
+ int nm_flag; /* Flags for soft/hard... */
+ struct mount *nm_mountp; /* Vfs structure for this filesystem */
+ int nm_numgrps; /* Max. size of groupslist */
+ nfsv2fh_t nm_fh; /* File handle of root dir */
+ struct socket *nm_so; /* Rpc socket */
+ int nm_sotype; /* Type of socket */
+ int nm_soproto; /* and protocol */
+ int nm_soflags; /* pr_flags for socket protocol */
+ struct mbuf *nm_nam; /* Addr of server */
+ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */
+ int nm_retry; /* Max retries */
+ int nm_srtt[4]; /* Timers for rpcs */
+ int nm_sdrtt[4];
+ int nm_sent; /* Request send count */
+ int nm_cwnd; /* Request send window */
+ int nm_timeouts; /* Request timeouts */
+ int nm_deadthresh; /* Threshold of timeouts-->dead server*/
+ int nm_rsize; /* Max size of read rpc */
+ int nm_wsize; /* Max size of write rpc */
+ int nm_readahead; /* Num. of blocks to readahead */
+ int nm_leaseterm; /* Term (sec) for NQNFS lease */
+ struct nfsnode *nm_tnext; /* Head of lease timer queue */
+ struct nfsnode *nm_tprev;
+ struct vnode *nm_inprog; /* Vnode in prog by nqnfs_clientd() */
+ uid_t nm_authuid; /* Uid for authenticator */
+ int nm_authtype; /* Authenticator type */
+ int nm_authlen; /* and length */
+ char *nm_authstr; /* Authenticator string */
+};
+
+#ifdef KERNEL
+/*
+ * Convert mount ptr to nfsmount ptr.
+ */
+#define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data))
+#endif /* KERNEL */
+
+/*
+ * Prototypes for NFS mount operations
+ */
+int nfs_mount __P((
+ struct mount *mp,
+ char *path,
+ caddr_t data,
+ struct nameidata *ndp,
+ struct proc *p));
+int nfs_start __P((
+ struct mount *mp,
+ int flags,
+ struct proc *p));
+int nfs_unmount __P((
+ struct mount *mp,
+ int mntflags,
+ struct proc *p));
+int nfs_root __P((
+ struct mount *mp,
+ struct vnode **vpp));
+int nfs_quotactl __P((
+ struct mount *mp,
+ int cmds,
+ uid_t uid,
+ caddr_t arg,
+ struct proc *p));
+int nfs_statfs __P((
+ struct mount *mp,
+ struct statfs *sbp,
+ struct proc *p));
+int nfs_sync __P((
+ struct mount *mp,
+ int waitfor,
+ struct ucred *cred,
+ struct proc *p));
+int nfs_fhtovp __P((
+ struct mount *mp,
+ struct fid *fhp,
+ struct mbuf *nam,
+ struct vnode **vpp,
+ int *exflagsp,
+ struct ucred **credanonp));
+int nfs_vptofh __P((
+ struct vnode *vp,
+ struct fid *fhp));
+int nfs_init __P(());
diff --git a/sys/nfsclient/nfsnode.h b/sys/nfsclient/nfsnode.h
new file mode 100644
index 000000000000..f5fee5bf2f3a
--- /dev/null
+++ b/sys/nfsclient/nfsnode.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsnode.h 8.4 (Berkeley) 2/13/94
+ */
+
+/*
+ * Silly rename structure that hangs off the nfsnode until the name
+ * can be removed by nfs_inactive()
+ */
+struct sillyrename {
+ struct ucred *s_cred;
+ struct vnode *s_dvp;
+ long s_namlen;
+ char s_name[20];
+};
+
+/*
+ * The nfsnode is the nfs equivalent to ufs's inode. Any similarity
+ * is purely coincidental.
+ * There is a unique nfsnode allocated for each active file,
+ * each current directory, each mounted-on file, text file, and the root.
+ * An nfsnode is 'named' by its file handle. (nget/nfs_node.c)
+ */
+
+struct nfsnode {
+ struct nfsnode *n_forw; /* hash, forward */
+ struct nfsnode **n_back; /* hash, backward */
+ nfsv2fh_t n_fh; /* NFS File Handle */
+ long n_flag; /* Flag for locking.. */
+ struct vnode *n_vnode; /* vnode associated with this node */
+ struct vattr n_vattr; /* Vnode attribute cache */
+ time_t n_attrstamp; /* Time stamp for cached attributes */
+ struct sillyrename *n_sillyrename; /* Ptr to silly rename struct */
+ u_quad_t n_size; /* Current size of file */
+ int n_error; /* Save write error value */
+ u_long n_direofoffset; /* Dir. EOF offset cache */
+ time_t n_mtime; /* Prev modify time. */
+ time_t n_ctime; /* Prev create time. */
+ u_quad_t n_brev; /* Modify rev when cached */
+ u_quad_t n_lrev; /* Modify rev for lease */
+ time_t n_expiry; /* Lease expiry time */
+ struct nfsnode *n_tnext; /* Nqnfs timer chain */
+ struct nfsnode *n_tprev;
+ long spare1; /* To 8 byte boundary */
+ struct sillyrename n_silly; /* Silly rename struct */
+ struct timeval n_atim; /* Special file times */
+ struct timeval n_mtim;
+};
+
+/*
+ * Flags for n_flag
+ */
+#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */
+#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */
+#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
+#define NWRITEERR 0x0008 /* Flag write errors so close will know */
+#define NQNFSNONCACHE 0x0020 /* Non-cachable lease */
+#define NQNFSWRITE 0x0040 /* Write lease */
+#define NQNFSEVICTED 0x0080 /* Has been evicted */
+#define NACC 0x0100 /* Special file accessed */
+#define NUPD 0x0200 /* Special file updated */
+#define NCHG 0x0400 /* Special file times changed */
+
+/*
+ * Convert between nfsnode pointers and vnode pointers
+ */
+#define VTONFS(vp) ((struct nfsnode *)(vp)->v_data)
+#define NFSTOV(np) ((struct vnode *)(np)->n_vnode)
+
+/*
+ * Queue head for nfsiod's
+ */
+TAILQ_HEAD(nfsbufs, buf) nfs_bufq;
+
+#ifdef KERNEL
+/*
+ * Prototypes for NFS vnode operations
+ */
+int nfs_lookup __P((struct vop_lookup_args *));
+int nfs_create __P((struct vop_create_args *));
+int nfs_mknod __P((struct vop_mknod_args *));
+int nfs_open __P((struct vop_open_args *));
+int nfs_close __P((struct vop_close_args *));
+int nfsspec_close __P((struct vop_close_args *));
+#ifdef FIFO
+int nfsfifo_close __P((struct vop_close_args *));
+#endif
+int nfs_access __P((struct vop_access_args *));
+int nfsspec_access __P((struct vop_access_args *));
+int nfs_getattr __P((struct vop_getattr_args *));
+int nfs_setattr __P((struct vop_setattr_args *));
+int nfs_read __P((struct vop_read_args *));
+int nfs_write __P((struct vop_write_args *));
+int nfsspec_read __P((struct vop_read_args *));
+int nfsspec_write __P((struct vop_write_args *));
+#ifdef FIFO
+int nfsfifo_read __P((struct vop_read_args *));
+int nfsfifo_write __P((struct vop_write_args *));
+#endif
+#define nfs_ioctl ((int (*) __P((struct vop_ioctl_args *)))enoioctl)
+#define nfs_select ((int (*) __P((struct vop_select_args *)))seltrue)
+int nfs_mmap __P((struct vop_mmap_args *));
+int nfs_fsync __P((struct vop_fsync_args *));
+#define nfs_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+int nfs_remove __P((struct vop_remove_args *));
+int nfs_link __P((struct vop_link_args *));
+int nfs_rename __P((struct vop_rename_args *));
+int nfs_mkdir __P((struct vop_mkdir_args *));
+int nfs_rmdir __P((struct vop_rmdir_args *));
+int nfs_symlink __P((struct vop_symlink_args *));
+int nfs_readdir __P((struct vop_readdir_args *));
+int nfs_readlink __P((struct vop_readlink_args *));
+int nfs_abortop __P((struct vop_abortop_args *));
+int nfs_inactive __P((struct vop_inactive_args *));
+int nfs_reclaim __P((struct vop_reclaim_args *));
+int nfs_lock __P((struct vop_lock_args *));
+int nfs_unlock __P((struct vop_unlock_args *));
+int nfs_bmap __P((struct vop_bmap_args *));
+int nfs_strategy __P((struct vop_strategy_args *));
+int nfs_print __P((struct vop_print_args *));
+int nfs_islocked __P((struct vop_islocked_args *));
+int nfs_pathconf __P((struct vop_pathconf_args *));
+int nfs_advlock __P((struct vop_advlock_args *));
+int nfs_blkatoff __P((struct vop_blkatoff_args *));
+int nfs_vget __P((struct mount *, ino_t, struct vnode **));
+int nfs_valloc __P((struct vop_valloc_args *));
+#define nfs_reallocblks \
+ ((int (*) __P((struct vop_reallocblks_args *)))eopnotsupp)
+int nfs_vfree __P((struct vop_vfree_args *));
+int nfs_truncate __P((struct vop_truncate_args *));
+int nfs_update __P((struct vop_update_args *));
+int nfs_bwrite __P((struct vop_bwrite_args *));
+#endif /* KERNEL */
diff --git a/sys/nfsclient/nfsstats.h b/sys/nfsclient/nfsstats.h
new file mode 100644
index 000000000000..261fd42657a7
--- /dev/null
+++ b/sys/nfsclient/nfsstats.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsserver/nfs.h b/sys/nfsserver/nfs.h
new file mode 100644
index 000000000000..261fd42657a7
--- /dev/null
+++ b/sys/nfsserver/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c
new file mode 100644
index 000000000000..f31b96e02edc
--- /dev/null
+++ b/sys/nfsserver/nfs_serv.c
@@ -0,0 +1,1908 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * nfs version 2 server calls to vnode ops
+ * - these routines generally have 3 phases
+ * 1 - break down and validate rpc request in mbuf list
+ * 2 - do the vnode ops for the request
+ * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
+ * 3 - build the rpc reply in an mbuf list
+ * nb:
+ * - do not mix the phases, since the nfsm_?? macros can return failures
+ * on a bad rpc or similar and do not do any vrele() or vput()'s
+ *
+ * - the nfsm_reply() macro generates an nfs rpc reply with the nfs
+ * error number iff error != 0 whereas
+ * returning an error from the server function implies a fatal error
+ * such as a badly constructed rpc request that should be dropped without
+ * a reply.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsv2.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define TRUE 1
+#define FALSE 0
+
+/* Global vars */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_xdrneg1;
+extern u_long nfs_false, nfs_true;
+nfstype nfs_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
+ NFCHR, NFNON };
+
+/*
+ * nqnfs access service
+ */
+nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, mode = 0;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (*tl++ == nfs_true)
+ mode |= VREAD;
+ if (*tl++ == nfs_true)
+ mode |= VWRITE;
+ if (*tl == nfs_true)
+ mode |= VEXEC;
+ error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs getattr service
+ */
+nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs setattr service
+ */
+nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register struct nfsv2_fattr *fp;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev, frev2;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_WRITE);
+ VATTR_NULL(vap);
+ /*
+ * Nah nah nah nah na nah
+ * There is a bug in the Sun client that puts 0xffff in the mode
+ * field of sattr when it should put in 0xffffffff. The u_short
+ * doesn't sign extend.
+ * --> check the low order 2 bytes for 0xffff
+ */
+ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ if (sp->sa_uid != nfs_xdrneg1)
+ vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
+ if (sp->sa_gid != nfs_xdrneg1)
+ vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ if (sp->sa_nfssize != nfs_xdrneg1)
+ vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_nfssize);
+ if (sp->sa_nfsatime.nfs_sec != nfs_xdrneg1) {
+#ifdef notyet
+ fxdr_nfstime(&sp->sa_nfsatime, &vap->va_atime);
+#else
+ vap->va_atime.ts_sec =
+ fxdr_unsigned(long, sp->sa_nfsatime.nfs_sec);
+ vap->va_atime.ts_nsec = 0;
+#endif
+ }
+ if (sp->sa_nfsmtime.nfs_sec != nfs_xdrneg1)
+ fxdr_nfstime(&sp->sa_nfsmtime, &vap->va_mtime);
+ } else {
+ fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+ fxdr_nqtime(&sp->sa_nqatime, &vap->va_atime);
+ fxdr_nqtime(&sp->sa_nqmtime, &vap->va_mtime);
+ vap->va_flags = fxdr_unsigned(u_long, sp->sa_nqflags);
+ }
+
+ /*
+ * If the size is being changed write acces is required, otherwise
+ * just check for a read only file system.
+ */
+ if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto out;
+ }
+ } else {
+ if (vp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly,
+ nfsd->nd_procp))
+ goto out;
+ }
+ if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+out:
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ txdr_hyper(&frev2, tl);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * nfs lookup rpc
+ */
+nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct nameidata nd;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, duration2, cache2, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vattr va, *vap = &va;
+ u_quad_t frev, frev2;
+
+ fhp = &nfh.fh_generic;
+ duration2 = 0;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ duration2 = fxdr_unsigned(int, *tl);
+ }
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ nqsrv_getl(nd.ni_startdir, NQL_READ);
+ vrele(nd.ni_startdir);
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ vp = nd.ni_vp;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (duration2)
+ (void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd,
+ nam, &cache2, &frev2, cred);
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED);
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ if (duration2) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(NQL_READ);
+ *tl++ = txdr_unsigned(cache2);
+ *tl++ = txdr_unsigned(duration2);
+ txdr_hyper(&frev2, tl);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs readlink service
+ */
+nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
+ register struct iovec *ivp = iv;
+ register struct mbuf *mp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, i, tlen, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ len = 0;
+ i = 0;
+ while (len < NFS_MAXPATHLEN) {
+ MGET(mp, M_WAIT, MT_DATA);
+ MCLGET(mp, M_WAIT);
+ mp->m_len = NFSMSIZ(mp);
+ if (len == 0)
+ mp3 = mp2 = mp;
+ else {
+ mp2->m_next = mp;
+ mp2 = mp;
+ }
+ if ((len+mp->m_len) > NFS_MAXPATHLEN) {
+ mp->m_len = NFS_MAXPATHLEN-len;
+ len = NFS_MAXPATHLEN;
+ } else
+ len += mp->m_len;
+ ivp->iov_base = mtod(mp, caddr_t);
+ ivp->iov_len = mp->m_len;
+ i++;
+ ivp++;
+ }
+ uiop->uio_iov = iv;
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = 0;
+ uiop->uio_resid = len;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) {
+ m_freem(mp3);
+ nfsm_reply(0);
+ }
+ if (vp->v_type != VLNK) {
+ error = EINVAL;
+ goto out;
+ }
+ nqsrv_getl(vp, NQL_READ);
+ error = VOP_READLINK(vp, uiop, cred);
+out:
+ vput(vp);
+ if (error)
+ m_freem(mp3);
+ nfsm_reply(NFSX_UNSIGNED);
+ if (uiop->uio_resid > 0) {
+ len -= uiop->uio_resid;
+ tlen = nfsm_rndup(len);
+ nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
+ }
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(len);
+ mb->m_next = mp3;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs read service
+ */
+nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct iovec *iv;
+ struct iovec *iv2;
+ register struct mbuf *m;
+ register struct nfsv2_fattr *fp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, i, cnt, len, left, siz, tlen;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct mbuf *m2;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ struct vattr va, *vap = &va;
+ off_t off;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ off = (off_t)fxdr_unsigned(u_long, *tl);
+ } else {
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ fxdr_hyper(tl, &off);
+ }
+ nfsm_srvstrsiz(cnt, NFS_MAXDATA);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type != VREG) {
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_READ);
+ if ((error = nfsrv_access(vp, VREAD, cred, rdonly, nfsd->nd_procp)) &&
+ (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp))) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (off >= vap->va_size)
+ cnt = 0;
+ else if ((off + cnt) > vap->va_size)
+ cnt = nfsm_rndup(vap->va_size - off);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)+NFSX_UNSIGNED+nfsm_rndup(cnt));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ len = left = cnt;
+ if (cnt > 0) {
+ /*
+ * Generate the mbuf list with the uio_iov ref. to it.
+ */
+ i = 0;
+ m = m2 = mb;
+ MALLOC(iv, struct iovec *,
+ ((NFS_MAXDATA+MLEN-1)/MLEN) * sizeof (struct iovec),
+ M_TEMP, M_WAITOK);
+ iv2 = iv;
+ while (left > 0) {
+ siz = min(M_TRAILINGSPACE(m), left);
+ if (siz > 0) {
+ m->m_len += siz;
+ iv->iov_base = bpos;
+ iv->iov_len = siz;
+ iv++;
+ i++;
+ left -= siz;
+ }
+ if (left > 0) {
+ MGET(m, M_WAIT, MT_DATA);
+ MCLGET(m, M_WAIT);
+ m->m_len = 0;
+ m2->m_next = m;
+ m2 = m;
+ bpos = mtod(m, caddr_t);
+ }
+ }
+ uiop->uio_iov = iv2;
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = off;
+ uiop->uio_resid = cnt;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ error = VOP_READ(vp, uiop, IO_NODELOCKED, cred);
+ off = uiop->uio_offset;
+ FREE((caddr_t)iv2, M_TEMP);
+ if (error || (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp))) {
+ m_freem(mreq);
+ vput(vp);
+ nfsm_reply(0);
+ }
+ } else
+ uiop->uio_resid = 0;
+ vput(vp);
+ nfsm_srvfillattr;
+ len -= uiop->uio_resid;
+ tlen = nfsm_rndup(len);
+ if (cnt != tlen || tlen != len)
+ nfsm_adj(mb, cnt-tlen, tlen-len);
+ *tl = txdr_unsigned(len);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs write service
+ */
+nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct iovec *ivp;
+ register struct mbuf *mp;
+ register struct nfsv2_fattr *fp;
+ struct iovec iv[NFS_MAXIOVEC];
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, siz, len, xfer;
+ int ioflags = IO_SYNC | IO_NODELOCKED;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ off_t off;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ off = (off_t)fxdr_unsigned(u_long, *++tl);
+ tl += 2;
+ } else {
+ fxdr_hyper(tl, &off);
+ tl += 2;
+ if (fxdr_unsigned(u_long, *tl++))
+ ioflags |= IO_APPEND;
+ }
+ len = fxdr_unsigned(long, *tl);
+ if (len > NFS_MAXDATA || len <= 0) {
+ error = EBADRPC;
+ nfsm_reply(0);
+ }
+ if (dpos == (mtod(md, caddr_t)+md->m_len)) {
+ mp = md->m_next;
+ if (mp == NULL) {
+ error = EBADRPC;
+ nfsm_reply(0);
+ }
+ } else {
+ mp = md;
+ siz = dpos-mtod(mp, caddr_t);
+ mp->m_len -= siz;
+ NFSMADV(mp, siz);
+ }
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type != VREG) {
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_WRITE);
+ if (error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ uiop->uio_resid = 0;
+ uiop->uio_rw = UIO_WRITE;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ /*
+ * Do up to NFS_MAXIOVEC mbufs of write each iteration of the
+ * loop until done.
+ */
+ while (len > 0 && uiop->uio_resid == 0) {
+ ivp = iv;
+ siz = 0;
+ uiop->uio_iov = ivp;
+ uiop->uio_iovcnt = 0;
+ uiop->uio_offset = off;
+ while (len > 0 && uiop->uio_iovcnt < NFS_MAXIOVEC && mp != NULL) {
+ ivp->iov_base = mtod(mp, caddr_t);
+ if (len < mp->m_len)
+ ivp->iov_len = xfer = len;
+ else
+ ivp->iov_len = xfer = mp->m_len;
+#ifdef notdef
+ /* Not Yet .. */
+ if (M_HASCL(mp) && (((u_long)ivp->iov_base) & CLOFSET) == 0)
+ ivp->iov_op = NULL; /* what should it be ?? */
+ else
+ ivp->iov_op = NULL;
+#endif
+ uiop->uio_iovcnt++;
+ ivp++;
+ len -= xfer;
+ siz += xfer;
+ mp = mp->m_next;
+ }
+ if (len > 0 && mp == NULL) {
+ error = EBADRPC;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ uiop->uio_resid = siz;
+ if (error = VOP_WRITE(vp, uiop, ioflags, cred)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ off = uiop->uio_offset;
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ txdr_hyper(&vap->va_filerev, tl);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * nfs create service
+ * now does a truncate to 0 length via. setattr if it already exists
+ */
+nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ struct nameidata nd;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdev, cache, len, tsize;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nd.ni_cnd.cn_nameiop = 0;
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ VATTR_NULL(vap);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ /*
+ * Iff doesn't exist, create it
+ * otherwise just truncate to 0 length
+ * should I set the mode too ??
+ */
+ if (nd.ni_vp == NULL) {
+ vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode));
+ if (vap->va_type == VNON)
+ vap->va_type = VREG;
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ if (nfsd->nd_nqlflag == NQL_NOVAL)
+ rdev = fxdr_unsigned(long, sp->sa_nfssize);
+ else
+ rdev = fxdr_unsigned(long, sp->sa_nqrdev);
+ if (vap->va_type == VREG || vap->va_type == VSOCK) {
+ vrele(nd.ni_startdir);
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+ nfsm_reply(0);
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
+ vap->va_type == VFIFO) {
+ if (vap->va_type == VCHR && rdev == 0xffffffff)
+ vap->va_type = VFIFO;
+ if (vap->va_type == VFIFO) {
+#ifndef FIFO
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ error = ENXIO;
+ goto out;
+#endif /* FIFO */
+ } else if (error = suser(cred, (u_short *)0)) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ goto out;
+ } else
+ vap->va_rdev = (dev_t)rdev;
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) {
+ vrele(nd.ni_startdir);
+ nfsm_reply(0);
+ }
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART);
+ nd.ni_cnd.cn_proc = nfsd->nd_procp;
+ nd.ni_cnd.cn_cred = nfsd->nd_procp->p_ucred;
+ if (error = lookup(&nd)) {
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+ }
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (nd.ni_cnd.cn_flags & ISSYMLINK) {
+ vrele(nd.ni_dvp);
+ vput(nd.ni_vp);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ error = EINVAL;
+ nfsm_reply(0);
+ }
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ error = ENXIO;
+ goto out;
+ }
+ vp = nd.ni_vp;
+ } else {
+ vrele(nd.ni_startdir);
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ vp = nd.ni_vp;
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ tsize = fxdr_unsigned(long, sp->sa_nfssize);
+ if (tsize != -1)
+ vap->va_size = (u_quad_t)tsize;
+ else
+ vap->va_size = -1;
+ } else
+ fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+ if (vap->va_size != -1) {
+ if (error = nfsrv_access(vp, VWRITE, cred,
+ (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_WRITE);
+ if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ }
+ }
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ return (error);
+nfsmout:
+ if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags)
+ vrele(nd.ni_startdir);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vput(nd.ni_vp);
+ return (error);
+
+out:
+ vrele(nd.ni_startdir);
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+}
+
+/*
+ * nfs remove service
+ */
+nfsrv_remove(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct nameidata nd;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ if (vp->v_type == VDIR &&
+ (error = suser(cred, (u_short *)0)))
+ goto out;
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT) {
+ error = EBUSY;
+ goto out;
+ }
+ if (vp->v_flag & VTEXT)
+ (void) vnode_pager_uncache(vp);
+out:
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(vp, NQL_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs rename service
+ */
+nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len, len2;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct nameidata fromnd, tond;
+ struct vnode *fvp, *tvp, *tdvp;
+ nfsv2fh_t fnfh, tnfh;
+ fhandle_t *ffhp, *tfhp;
+ u_quad_t frev;
+ uid_t saved_uid;
+
+ ffhp = &fnfh.fh_generic;
+ tfhp = &tnfh.fh_generic;
+ fromnd.ni_cnd.cn_nameiop = 0;
+ tond.ni_cnd.cn_nameiop = 0;
+ nfsm_srvmtofh(ffhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ /*
+ * Remember our original uid so that we can reset cr_uid before
+ * the second nfs_namei() call, in case it is remapped.
+ */
+ saved_uid = cred->cr_uid;
+ fromnd.ni_cnd.cn_cred = cred;
+ fromnd.ni_cnd.cn_nameiop = DELETE;
+ fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
+ if (error = nfs_namei(&fromnd, ffhp, len, nfsd->nd_slp, nam, &md,
+ &dpos, nfsd->nd_procp))
+ nfsm_reply(0);
+ fvp = fromnd.ni_vp;
+ nfsm_srvmtofh(tfhp);
+ nfsm_strsiz(len2, NFS_MAXNAMLEN);
+ cred->cr_uid = saved_uid;
+ tond.ni_cnd.cn_cred = cred;
+ tond.ni_cnd.cn_nameiop = RENAME;
+ tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
+ if (error = nfs_namei(&tond, tfhp, len2, nfsd->nd_slp, nam, &md,
+ &dpos, nfsd->nd_procp)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ if (tvp->v_type == VDIR && tvp->v_mountedhere) {
+ error = EXDEV;
+ goto out;
+ }
+ }
+ if (fvp->v_type == VDIR && fvp->v_mountedhere) {
+ error = EBUSY;
+ goto out;
+ }
+ if (fvp->v_mount != tdvp->v_mount) {
+ error = EXDEV;
+ goto out;
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same vnode with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ nqsrv_getl(fromnd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(tdvp, NQL_WRITE);
+ if (tvp)
+ nqsrv_getl(tvp, NQL_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+ return (error);
+
+nfsmout:
+ if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) {
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+ }
+ if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) {
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ return (error);
+}
+
+/*
+ * nfs link service
+ */
+nfsrv_link(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct nameidata nd;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp, *xp;
+ nfsv2fh_t nfh, dnfh;
+ fhandle_t *fhp, *dfhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ dfhp = &dnfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvmtofh(dfhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0)))
+ goto out1;
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, dfhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ goto out1;
+ xp = nd.ni_vp;
+ if (xp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+ xp = nd.ni_dvp;
+ if (vp->v_mount != xp->v_mount)
+ error = EXDEV;
+out:
+ if (!error) {
+ nqsrv_getl(vp, NQL_WRITE);
+ nqsrv_getl(xp, NQL_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+out1:
+ vrele(vp);
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs symbolic link service
+ */
+nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ struct nameidata nd;
+ register struct vattr *vap = &va;
+ register u_long *tl;
+ register long t1;
+ struct nfsv2_sattr *sp;
+ caddr_t bpos;
+ struct uio io;
+ struct iovec iv;
+ int error = 0, cache, len, len2;
+ char *pathcp, *cp2;
+ struct mbuf *mb, *mreq;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ pathcp = (char *)0;
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ goto out;
+ nfsm_strsiz(len2, NFS_MAXPATHLEN);
+ MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
+ iv.iov_base = pathcp;
+ iv.iov_len = len2;
+ io.uio_resid = len2;
+ io.uio_offset = 0;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ nfsm_mtouio(&io, len2);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ *(pathcp + len2) = '\0';
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(vap);
+ vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode);
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
+out:
+ if (pathcp)
+ FREE(pathcp, M_TEMP);
+ nfsm_reply(0);
+ return (error);
+nfsmout:
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ if (pathcp)
+ FREE(pathcp, M_TEMP);
+ return (error);
+}
+
+/*
+ * nfs mkdir service
+ */
+nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_fattr *fp;
+ struct nameidata nd;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ VATTR_NULL(vap);
+ vap->va_type = VDIR;
+ vap->va_mode = nfstov_mode(*tl++);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ error = EEXIST;
+ nfsm_reply(0);
+ }
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ return (error);
+nfsmout:
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * nfs rmdir service
+ */
+nfsrv_rmdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct nameidata nd;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(vp, NQL_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs readdir service
+ * - mallocs what it thinks is enough to read
+ * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
+ * - calls VOP_READDIR()
+ * - loops around building the reply
+ * if the output generated exceeds count break out of loop
+ * The nfsm_clget macro is used here so that the reply will be packed
+ * tightly in mbuf clusters.
+ * - it only knows that it has encountered eof when the VOP_READDIR()
+ * reads nothing
+ * - as such one readdir rpc will return eof false although you are there
+ * and then the next will return eof
+ * - it trims out records with d_fileno == 0
+ * this doesn't matter for Unix clients, but they might confuse clients
+ * for other os'.
+ * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
+ * than requested, but this may not apply to all filesystems. For
+ * example, client NFS does not { although it is never remote mounted
+ * anyhow }
+ * The alternate call nqnfsrv_readdirlook() does lookups as well.
+ * PS: The NFS protocol spec. does not clarify what the "count" byte
+ * argument is a count of.. just name strings and file id's or the
+ * entire reply rpc or ...
+ * I tried just file name and id sizes and it confused the Sun client,
+ * so I am using the full rpc size now. The "paranoia.." comment refers
+ * to including the status longwords that are not a part of the dir.
+ * "entry" structures, but are in the rpc.
+ */
+struct flrep {
+ u_long fl_cachable;
+ u_long fl_duration;
+ u_long fl_frev[2];
+ nfsv2fh_t fl_nfh;
+ u_long fl_fattr[NFSX_NQFATTR / sizeof (u_long)];
+};
+
+nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io;
+ struct iovec iv;
+ int len, nlen, rem, xfer, tsiz, i, error = 0;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache;
+ u_quad_t frev;
+ u_long on, off, toff;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ toff = fxdr_unsigned(u_long, *tl++);
+ off = (toff & ~(NFS_DIRBLKSIZ-1));
+ on = (toff & (NFS_DIRBLKSIZ-1));
+ cnt = fxdr_unsigned(int, *tl);
+ siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+ if (cnt > NFS_MAXREADDIR)
+ siz = NFS_MAXREADDIR;
+ fullsiz = siz;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ VOP_UNLOCK(vp);
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ error = VOP_READDIR(vp, &io, cred);
+ off = (off_t)io.uio_offset;
+ if (error) {
+ vrele(vp);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(0);
+ }
+ if (io.uio_resid < fullsiz)
+ eofflag = 0;
+ else
+ eofflag = 1;
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ nfsm_reply(2*NFSX_UNSIGNED);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)rbuf, M_TEMP);
+ return (0);
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ while (cpos < cend && dp->d_fileno == 0) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ if (cpos >= cend) {
+ toff = off;
+ siz = fullsiz;
+ on = 0;
+ goto again;
+ }
+
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ len = 3*NFSX_UNSIGNED; /* paranoia, probably can be 0 */
+ nfsm_reply(siz);
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend) {
+ if (dp->d_fileno != 0) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen)-nlen;
+ len += (4*NFSX_UNSIGNED + nlen + rem);
+ if (len > cnt) {
+ eofflag = 0;
+ break;
+ }
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a long boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+ nfsm_clget;
+
+ /* Finish off the record */
+ toff += dp->d_reclen;
+ *tl = txdr_unsigned(toff);
+ bp += NFSX_UNSIGNED;
+ } else
+ toff += dp->d_reclen;
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ vrele(vp);
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE(rbuf, M_TEMP);
+ nfsm_srvdone;
+}
+
+nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp, *nvp;
+ struct flrep fl;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io;
+ struct iovec iv;
+ struct vattr va, *vap = &va;
+ struct nfsv2_fattr *fp;
+ int len, nlen, rem, xfer, tsiz, i, error = 0, duration2, cache2;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache;
+ u_quad_t frev, frev2;
+ u_long on, off, toff;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ toff = fxdr_unsigned(u_long, *tl++);
+ off = (toff & ~(NFS_DIRBLKSIZ-1));
+ on = (toff & (NFS_DIRBLKSIZ-1));
+ cnt = fxdr_unsigned(int, *tl++);
+ duration2 = fxdr_unsigned(int, *tl);
+ siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+ if (cnt > NFS_MAXREADDIR)
+ siz = NFS_MAXREADDIR;
+ fullsiz = siz;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ VOP_UNLOCK(vp);
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ error = VOP_READDIR(vp, &io, cred);
+ off = (u_long)io.uio_offset;
+ if (error) {
+ vrele(vp);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(0);
+ }
+ if (io.uio_resid < fullsiz)
+ eofflag = 0;
+ else
+ eofflag = 1;
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)rbuf, M_TEMP);
+ return (0);
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ while (cpos < cend && dp->d_fileno == 0) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ if (cpos >= cend) {
+ toff = off;
+ siz = fullsiz;
+ on = 0;
+ goto again;
+ }
+
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */
+ nfsm_reply(siz);
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend) {
+ if (dp->d_fileno != 0) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen)-nlen;
+
+ /*
+ * For readdir_and_lookup get the vnode using
+ * the file number.
+ */
+ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
+ goto invalid;
+ bzero((caddr_t)&fl.fl_nfh, sizeof (nfsv2fh_t));
+ fl.fl_nfh.fh_generic.fh_fsid =
+ nvp->v_mount->mnt_stat.f_fsid;
+ if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) {
+ vput(nvp);
+ goto invalid;
+ }
+ if (duration2) {
+ (void) nqsrv_getlease(nvp, &duration2, NQL_READ,
+ nfsd, nam, &cache2, &frev2, cred);
+ fl.fl_duration = txdr_unsigned(duration2);
+ fl.fl_cachable = txdr_unsigned(cache2);
+ txdr_hyper(&frev2, fl.fl_frev);
+ } else
+ fl.fl_duration = 0;
+ if (VOP_GETATTR(nvp, vap, cred, nfsd->nd_procp)) {
+ vput(nvp);
+ goto invalid;
+ }
+ vput(nvp);
+ fp = (struct nfsv2_fattr *)&fl.fl_fattr;
+ nfsm_srvfillattr;
+ len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH
+ + NFSX_NQFATTR);
+ if (len > cnt) {
+ eofflag = 0;
+ break;
+ }
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+
+ /*
+ * For readdir_and_lookup copy the stuff out.
+ */
+ xfer = sizeof (struct flrep);
+ cp = (caddr_t)&fl;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a long boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+ nfsm_clget;
+
+ /* Finish off the record */
+ toff += dp->d_reclen;
+ *tl = txdr_unsigned(toff);
+ bp += NFSX_UNSIGNED;
+ } else
+invalid:
+ toff += dp->d_reclen;
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ vrele(vp);
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE(rbuf, M_TEMP);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs statfs service
+ */
+nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct statfs *sf;
+ register struct nfsv2_statfs *sfp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, isnq;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct statfs statfs;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ isnq = (nfsd->nd_nqlflag != NQL_NOVAL);
+ nfsm_srvmtofh(fhp);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ sf = &statfs;
+ error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_STATFS(isnq));
+ nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+ sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
+ sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
+ sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
+ sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
+ sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
+ if (isnq) {
+ sfp->sf_files = txdr_unsigned(sf->f_files);
+ sfp->sf_ffree = txdr_unsigned(sf->f_ffree);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * Null operation, used by clients to ping server
+ */
+/* ARGSUSED */
+nfsrv_null(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ caddr_t bpos;
+ int error = VNOVAL, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ nfsm_reply(0);
+ return (error);
+}
+
+/*
+ * No operation, used for obsolete procedures
+ */
+/* ARGSUSED */
+nfsrv_noop(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ caddr_t bpos;
+ int error, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ if (nfsd->nd_repstat)
+ error = nfsd->nd_repstat;
+ else
+ error = EPROCUNAVAIL;
+ nfsm_reply(0);
+ return (error);
+}
+
+/*
+ * Perform access checking for vnodes obtained from file handles that would
+ * refer to files already opened by a Unix client. You cannot just use
+ * vn_writechk() and VOP_ACCESS() for two reasons.
+ * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
+ * 2 - The owner is to be given access irrespective of mode bits so that
+ * processes that chmod after opening a file don't break. I don't like
+ * this because it opens a security hole, but since the nfs server opens
+ * a security hole the size of a barn door anyhow, what the heck.
+ */
+nfsrv_access(vp, flags, cred, rdonly, p)
+ register struct vnode *vp;
+ int flags;
+ register struct ucred *cred;
+ int rdonly;
+ struct proc *p;
+{
+ struct vattr vattr;
+ int error;
+ if (flags & VWRITE) {
+ /* Just vn_writechk() changed to check rdonly */
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket or a block or character
+ * device resident on the file system.
+ */
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ switch (vp->v_type) {
+ case VREG: case VDIR: case VLNK:
+ return (EROFS);
+ }
+ }
+ /*
+ * If there's shared text associated with
+ * the inode, try to free it up once. If
+ * we fail, we can't allow writing.
+ */
+ if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+ return (ETXTBSY);
+ }
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ if ((error = VOP_ACCESS(vp, flags, cred, p)) &&
+ cred->cr_uid != vattr.va_uid)
+ return (error);
+ return (0);
+}
diff --git a/sys/nfsserver/nfs_srvcache.c b/sys/nfsserver/nfs_srvcache.c
new file mode 100644
index 000000000000..63d8bb72d82f
--- /dev/null
+++ b/sys/nfsserver/nfs_srvcache.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_srvcache.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Reference: Chet Juszczak, "Improving the Performance and Correctness
+ * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
+ * pages 53-63. San Diego, February 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/nfsm_subs.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nqnfs.h>
+
+long numnfsrvcache, desirednfsrvcache = NFSRVCACHESIZ;
+
+#define NFSRCHASH(xid) (((xid) + ((xid) >> 24)) & rheadhash)
+static struct nfsrvcache *nfsrvlruhead, **nfsrvlrutail = &nfsrvlruhead;
+static struct nfsrvcache **rheadhtbl;
+static u_long rheadhash;
+
+#define TRUE 1
+#define FALSE 0
+
+#define NETFAMILY(rp) \
+ (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO)
+
+/*
+ * Static array that defines which nfs rpc's are nonidempotent
+ */
+int nonidempotent[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+};
+
+/* True iff the rpc reply is an nfs status ONLY! */
+static int repliesstatus[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+};
+
+/*
+ * Initialize the server request cache list
+ */
+nfsrv_initcache()
+{
+
+ rheadhtbl = hashinit(desirednfsrvcache, M_NFSD, &rheadhash);
+}
+
+/*
+ * Look for the request in the cache
+ * If found then
+ * return action and optionally reply
+ * else
+ * insert it in the cache
+ *
+ * The rules are as follows:
+ * - if in progress, return DROP request
+ * - if completed within DELAY of the current time, return DROP it
+ * - if completed a longer time ago return REPLY if the reply was cached or
+ * return DOIT
+ * Update/add new request at end of lru list
+ */
+nfsrv_getcache(nam, nd, repp)
+ struct mbuf *nam;
+ register struct nfsd *nd;
+ struct mbuf **repp;
+{
+ register struct nfsrvcache *rp, *rq, **rpp;
+ struct mbuf *mb;
+ struct sockaddr_in *saddr;
+ caddr_t bpos;
+ int ret;
+
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ return (RC_DOIT);
+ rpp = &rheadhtbl[NFSRCHASH(nd->nd_retxid)];
+loop:
+ for (rp = *rpp; rp; rp = rp->rc_forw) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ /* If not at end of LRU chain, move it there */
+ if (rp->rc_next) {
+ /* remove from LRU chain */
+ *rp->rc_prev = rp->rc_next;
+ rp->rc_next->rc_prev = rp->rc_prev;
+ /* and replace at end of it */
+ rp->rc_next = NULL;
+ rp->rc_prev = nfsrvlrutail;
+ *nfsrvlrutail = rp;
+ nfsrvlrutail = &rp->rc_next;
+ }
+ if (rp->rc_state == RC_UNUSED)
+ panic("nfsrv cache");
+ if (rp->rc_state == RC_INPROG) {
+ nfsstats.srvcache_inproghits++;
+ ret = RC_DROPIT;
+ } else if (rp->rc_flag & RC_REPSTATUS) {
+ nfsstats.srvcache_nonidemdonehits++;
+ nfs_rephead(0, nd, rp->rc_status,
+ 0, (u_quad_t *)0, repp, &mb, &bpos);
+ ret = RC_REPLY;
+ } else if (rp->rc_flag & RC_REPMBUF) {
+ nfsstats.srvcache_nonidemdonehits++;
+ *repp = m_copym(rp->rc_reply, 0, M_COPYALL,
+ M_WAIT);
+ ret = RC_REPLY;
+ } else {
+ nfsstats.srvcache_idemdonehits++;
+ rp->rc_state = RC_INPROG;
+ ret = RC_DOIT;
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (ret);
+ }
+ }
+ nfsstats.srvcache_misses++;
+ if (numnfsrvcache < desirednfsrvcache) {
+ rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
+ M_NFSD, M_WAITOK);
+ bzero((char *)rp, sizeof *rp);
+ numnfsrvcache++;
+ rp->rc_flag = RC_LOCKED;
+ } else {
+ rp = nfsrvlruhead;
+ while ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ rp = nfsrvlruhead;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ /* remove from hash chain */
+ if (rq = rp->rc_forw)
+ rq->rc_back = rp->rc_back;
+ *rp->rc_back = rq;
+ /* remove from LRU chain */
+ *rp->rc_prev = rp->rc_next;
+ rp->rc_next->rc_prev = rp->rc_prev;
+ if (rp->rc_flag & RC_REPMBUF)
+ m_freem(rp->rc_reply);
+ if (rp->rc_flag & RC_NAM)
+ MFREE(rp->rc_nam, mb);
+ rp->rc_flag &= (RC_LOCKED | RC_WANTED);
+ }
+ /* place at end of LRU list */
+ rp->rc_next = NULL;
+ rp->rc_prev = nfsrvlrutail;
+ *nfsrvlrutail = rp;
+ nfsrvlrutail = &rp->rc_next;
+ rp->rc_state = RC_INPROG;
+ rp->rc_xid = nd->nd_retxid;
+ saddr = mtod(nam, struct sockaddr_in *);
+ switch (saddr->sin_family) {
+ case AF_INET:
+ rp->rc_flag |= RC_INETADDR;
+ rp->rc_inetaddr = saddr->sin_addr.s_addr;
+ break;
+ case AF_ISO:
+ default:
+ rp->rc_flag |= RC_NAM;
+ rp->rc_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+ break;
+ };
+ rp->rc_proc = nd->nd_procnum;
+ /* insert into hash chain */
+ if (rq = *rpp)
+ rq->rc_back = &rp->rc_forw;
+ rp->rc_forw = rq;
+ rp->rc_back = rpp;
+ *rpp = rp;
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (RC_DOIT);
+}
+
+/*
+ * Update a request cache entry after the rpc has been done
+ */
+void
+nfsrv_updatecache(nam, nd, repvalid, repmbuf)
+ struct mbuf *nam;
+ register struct nfsd *nd;
+ int repvalid;
+ struct mbuf *repmbuf;
+{
+ register struct nfsrvcache *rp;
+
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ return;
+loop:
+ for (rp = rheadhtbl[NFSRCHASH(nd->nd_retxid)]; rp; rp = rp->rc_forw) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ rp->rc_state = RC_DONE;
+ /*
+ * If we have a valid reply update status and save
+ * the reply for non-idempotent rpc's.
+ */
+ if (repvalid && nonidempotent[nd->nd_procnum]) {
+ if (repliesstatus[nd->nd_procnum]) {
+ rp->rc_status = nd->nd_repstat;
+ rp->rc_flag |= RC_REPSTATUS;
+ } else {
+ rp->rc_reply = m_copym(repmbuf,
+ 0, M_COPYALL, M_WAIT);
+ rp->rc_flag |= RC_REPMBUF;
+ }
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return;
+ }
+ }
+}
+
+/*
+ * Clean out the cache. Called when the last nfsd terminates.
+ */
+void
+nfsrv_cleancache()
+{
+ register struct nfsrvcache *rp, *nextrp;
+
+ for (rp = nfsrvlruhead; rp; rp = nextrp) {
+ nextrp = rp->rc_next;
+ free(rp, M_NFSD);
+ }
+ bzero((char *)rheadhtbl, (rheadhash + 1) * sizeof(void *));
+ nfsrvlruhead = NULL;
+ nfsrvlrutail = &nfsrvlruhead;
+ numnfsrvcache = 0;
+}
diff --git a/sys/nfsserver/nfs_srvsock.c b/sys/nfsserver/nfs_srvsock.c
new file mode 100644
index 000000000000..cf88ed33d92d
--- /dev/null
+++ b/sys/nfsserver/nfs_srvsock.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write - A+4D
+ * other - nm_timeo
+ */
+#define NFS_RTO(n, t) \
+ ((t) == 0 ? (n)->nm_timeo : \
+ ((t) < 3 ? \
+ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+ rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+ rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+ NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
+ NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
+ NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+ 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define NFS_CWNDSCALE 256
+#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int nfs_sbwait();
+void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int nfsrv_null(),
+ nfsrv_getattr(),
+ nfsrv_setattr(),
+ nfsrv_lookup(),
+ nfsrv_readlink(),
+ nfsrv_read(),
+ nfsrv_write(),
+ nfsrv_create(),
+ nfsrv_remove(),
+ nfsrv_rename(),
+ nfsrv_link(),
+ nfsrv_symlink(),
+ nfsrv_mkdir(),
+ nfsrv_rmdir(),
+ nfsrv_readdir(),
+ nfsrv_statfs(),
+ nfsrv_noop(),
+ nqnfsrv_readdirlook(),
+ nqnfsrv_getlease(),
+ nqnfsrv_vacated(),
+ nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+ nfsrv_null,
+ nfsrv_getattr,
+ nfsrv_setattr,
+ nfsrv_noop,
+ nfsrv_lookup,
+ nfsrv_readlink,
+ nfsrv_read,
+ nfsrv_noop,
+ nfsrv_write,
+ nfsrv_create,
+ nfsrv_remove,
+ nfsrv_rename,
+ nfsrv_link,
+ nfsrv_symlink,
+ nfsrv_mkdir,
+ nfsrv_rmdir,
+ nfsrv_readdir,
+ nfsrv_statfs,
+ nqnfsrv_readdirlook,
+ nqnfsrv_getlease,
+ nqnfsrv_vacated,
+ nfsrv_noop,
+ nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+{
+ register struct socket *so;
+ int s, error, rcvreserve, sndreserve;
+ struct sockaddr *saddr;
+ struct sockaddr_in *sin;
+ struct mbuf *m;
+ u_short tport;
+
+ nmp->nm_so = (struct socket *)0;
+ saddr = mtod(nmp->nm_nam, struct sockaddr *);
+ if (error = socreate(saddr->sa_family,
+ &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+ goto bad;
+ so = nmp->nm_so;
+ nmp->nm_soflags = so->so_proto->pr_flags;
+
+ /*
+ * Some servers require that the client port be a reserved port number.
+ */
+ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+ MGET(m, M_WAIT, MT_SONAME);
+ sin = mtod(m, struct sockaddr_in *);
+ sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ tport = IPPORT_RESERVED - 1;
+ sin->sin_port = htons(tport);
+ while ((error = sobind(so, m)) == EADDRINUSE &&
+ --tport > IPPORT_RESERVED / 2)
+ sin->sin_port = htons(tport);
+ m_freem(m);
+ if (error)
+ goto bad;
+ }
+
+ /*
+ * Protocols that do not require connections may be optionally left
+ * unconnected for servers that reply from a port other than NFS_PORT.
+ */
+ if (nmp->nm_flag & NFSMNT_NOCONN) {
+ if (nmp->nm_soflags & PR_CONNREQUIRED) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ } else {
+ if (error = soconnect(so, nmp->nm_nam))
+ goto bad;
+
+ /*
+ * Wait for the connection to complete. Cribbed from the
+ * connect system call but with the wait timing out so
+ * that interruptible mounts don't hang here for a long time.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+ "nfscon", 2 * hz);
+ if ((so->so_state & SS_ISCONNECTING) &&
+ so->so_error == 0 && rep &&
+ (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+ so->so_state &= ~SS_ISCONNECTING;
+ splx(s);
+ goto bad;
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto bad;
+ }
+ splx(s);
+ }
+ if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+ so->so_rcv.sb_timeo = (5 * hz);
+ so->so_snd.sb_timeo = (5 * hz);
+ } else {
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ }
+ if (nmp->nm_sotype == SOCK_DGRAM) {
+ sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+ rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+ } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+ } else {
+ if (nmp->nm_sotype != SOCK_STREAM)
+ panic("nfscon sotype");
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ }
+ if (error = soreserve(so, sndreserve, rcvreserve))
+ goto bad;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+ /* Initialize other non-zero congestion variables */
+ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+ nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+ nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+ nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+ nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
+ nmp->nm_sent = 0;
+ nmp->nm_timeouts = 0;
+ return (0);
+
+bad:
+ nfs_disconnect(nmp);
+ return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+ register struct nfsreq *rep;
+{
+ register struct nfsreq *rp;
+ register struct nfsmount *nmp = rep->r_nmp;
+ int error;
+
+ nfs_disconnect(nmp);
+ while (error = nfs_connect(nmp, rep)) {
+ if (error == EINTR || error == ERESTART)
+ return (EINTR);
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+ }
+
+ /*
+ * Loop through outstanding request list and fix up all requests
+ * on old socket.
+ */
+ rp = nfsreqh.r_next;
+ while (rp != &nfsreqh) {
+ if (rp->r_nmp == nmp)
+ rp->r_flags |= R_MUSTRESEND;
+ rp = rp->r_next;
+ }
+ return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+ register struct nfsmount *nmp;
+{
+ register struct socket *so;
+
+ if (nmp->nm_so) {
+ so = nmp->nm_so;
+ nmp->nm_so = (struct socket *)0;
+ soshutdown(so, 2);
+ soclose(so);
+ }
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+ register struct socket *so;
+ struct mbuf *nam;
+ register struct mbuf *top;
+ struct nfsreq *rep;
+{
+ struct mbuf *sendnam;
+ int error, soflags, flags;
+
+ if (rep) {
+ if (rep->r_flags & R_SOFTTERM) {
+ m_freem(top);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ rep->r_flags |= R_MUSTRESEND;
+ m_freem(top);
+ return (0);
+ }
+ rep->r_flags &= ~R_MUSTRESEND;
+ soflags = rep->r_nmp->nm_soflags;
+ } else
+ soflags = so->so_proto->pr_flags;
+ if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+ sendnam = (struct mbuf *)0;
+ else
+ sendnam = nam;
+ if (so->so_type == SOCK_SEQPACKET)
+ flags = MSG_EOR;
+ else
+ flags = 0;
+
+ error = sosend(so, sendnam, (struct uio *)0, top,
+ (struct mbuf *)0, flags);
+ if (error) {
+ if (rep) {
+ log(LOG_INFO, "nfs send error %d for server %s\n",error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ /*
+ * Deal with errors for the client side.
+ */
+ if (rep->r_flags & R_SOFTTERM)
+ error = EINTR;
+ else
+ rep->r_flags |= R_MUSTRESEND;
+ } else
+ log(LOG_INFO, "nfsd send error %d\n", error);
+
+ /*
+ * Handle any recoverable (soft) socket errors here. (???)
+ */
+ if (error != EINTR && error != ERESTART &&
+ error != EWOULDBLOCK && error != EPIPE)
+ error = 0;
+ }
+ return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ * small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+ register struct nfsreq *rep;
+ struct mbuf **aname;
+ struct mbuf **mp;
+{
+ register struct socket *so;
+ struct uio auio;
+ struct iovec aio;
+ register struct mbuf *m;
+ struct mbuf *control;
+ u_long len;
+ struct mbuf **getnam;
+ int error, sotype, rcvflg;
+ struct proc *p = curproc; /* XXX */
+
+ /*
+ * Set up arguments for soreceive()
+ */
+ *mp = (struct mbuf *)0;
+ *aname = (struct mbuf *)0;
+ sotype = rep->r_nmp->nm_sotype;
+
+ /*
+ * For reliable protocols, lock against other senders/receivers
+ * in case a reconnect is necessary.
+ * For SOCK_STREAM, first get the Record Mark to find out how much
+ * more there is to get.
+ * We must lock the socket against other receivers
+ * until we have an entire rpc request/reply.
+ */
+ if (sotype != SOCK_DGRAM) {
+ if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+ return (error);
+tryagain:
+ /*
+ * Check for fatal errors and resending request.
+ */
+ /*
+ * Ugh: If a reconnect attempt just happened, nm_so
+ * would have changed. NULL indicates a failed
+ * attempt that has essentially shut down this
+ * mount point.
+ */
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ if (error = nfs_reconnect(rep)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ while (rep->r_flags & R_MUSTRESEND) {
+ m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+ nfsstats.rpcretries++;
+ if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+ if (error == EINTR || error == ERESTART ||
+ (error = nfs_reconnect(rep))) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ }
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ if (sotype == SOCK_STREAM) {
+ aio.iov_base = (caddr_t) &len;
+ aio.iov_len = sizeof(u_long);
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(u_long);
+ auio.uio_procp = p;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0, &auio,
+ (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ sizeof(u_long) - auio.uio_resid,
+ sizeof(u_long),
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ if (error)
+ goto errout;
+ len = ntohl(len) & ~0x80000000;
+ /*
+ * This is SERIOUS! We are out of sync with the sender
+ * and forcing a disconnect/reconnect is all I can do.
+ */
+ if (len > NFS_MAXPACKET) {
+ log(LOG_ERR, "%s (%d) from nfs server %s\n",
+ "impossible packet length",
+ len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EFBIG;
+ goto errout;
+ }
+ auio.uio_resid = len;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, (struct mbuf **)0, &rcvflg);
+ } while (error == EWOULDBLOCK || error == EINTR ||
+ error == ERESTART);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ len - auio.uio_resid, len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ } else {
+ /*
+ * NB: Since uio_resid is big, MSG_WAITALL is ignored
+ * and soreceive() will return when it has either a
+ * control msg or a data msg.
+ * We have no use for control msg., but must grab them
+ * and then throw them away so we know what is going
+ * on.
+ */
+ auio.uio_resid = len = 100000000; /* Anything Big */
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, &control, &rcvflg);
+ if (control)
+ m_freem(control);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK ||
+ (!error && *mp == NULL && control));
+ if ((rcvflg & MSG_EOR) == 0)
+ printf("Egad!!\n");
+ if (!error && *mp == NULL)
+ error = EPIPE;
+ len -= auio.uio_resid;
+ }
+errout:
+ if (error && error != EINTR && error != ERESTART) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ if (error != EPIPE)
+ log(LOG_INFO,
+ "receive error %d from nfs server %s\n",
+ error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+ if (!error)
+ error = nfs_reconnect(rep);
+ if (!error)
+ goto tryagain;
+ }
+ } else {
+ if ((so = rep->r_nmp->nm_so) == NULL)
+ return (EACCES);
+ if (so->so_state & SS_ISCONNECTED)
+ getnam = (struct mbuf **)0;
+ else
+ getnam = aname;
+ auio.uio_resid = len = 1000000;
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, getnam, &auio, mp,
+ (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK &&
+ (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ } while (error == EWOULDBLOCK);
+ len -= auio.uio_resid;
+ }
+ if (error) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ }
+ /*
+ * Search for any mbufs that are not a multiple of 4 bytes long
+ * or with m_data not longword aligned.
+ * These could cause pointer alignment problems, so copy them to
+ * well aligned mbufs.
+ */
+ nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+ return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+ struct nfsreq *myrep;
+{
+ register struct nfsreq *rep;
+ register struct nfsmount *nmp = myrep->r_nmp;
+ register long t1;
+ struct mbuf *mrep, *nam, *md;
+ u_long rxid, *tl;
+ caddr_t dpos, cp2;
+ int error;
+
+ /*
+ * Loop around until we get our own reply
+ */
+ for (;;) {
+ /*
+ * Lock against other receivers so that I don't get stuck in
+ * sbwait() after someone else has received my reply for me.
+ * Also necessary for connection based protocols to avoid
+ * race conditions during a reconnect.
+ */
+ if (error = nfs_rcvlock(myrep))
+ return (error);
+ /* Already received, bye bye */
+ if (myrep->r_mrep != NULL) {
+ nfs_rcvunlock(&nmp->nm_flag);
+ return (0);
+ }
+ /*
+ * Get the next Rpc reply off the socket
+ */
+ error = nfs_receive(myrep, &nam, &mrep);
+ nfs_rcvunlock(&nmp->nm_flag);
+ if (error) {
+
+ /*
+ * Ignore routing errors on connectionless protocols??
+ */
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+ nmp->nm_so->so_error = 0;
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+ return (error);
+ }
+ if (nam)
+ m_freem(nam);
+
+ /*
+ * Get the xid and check that it is an rpc reply
+ */
+ md = mrep;
+ dpos = mtod(md, caddr_t);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ rxid = *tl++;
+ if (*tl != rpc_reply) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (nqnfs_callback(nmp, mrep, md, dpos))
+ nfsstats.rpcinvalid++;
+ } else {
+ nfsstats.rpcinvalid++;
+ m_freem(mrep);
+ }
+nfsmout:
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+
+ /*
+ * Loop through the request list to match up the reply
+ * Iff no match, just drop the datagram
+ */
+ rep = nfsreqh.r_next;
+ while (rep != &nfsreqh) {
+ if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+ /* Found it.. */
+ rep->r_mrep = mrep;
+ rep->r_md = md;
+ rep->r_dpos = dpos;
+ if (nfsrtton) {
+ struct rttl *rt;
+
+ rt = &nfsrtt.rttl[nfsrtt.pos];
+ rt->proc = rep->r_procnum;
+ rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+ rt->sent = nmp->nm_sent;
+ rt->cwnd = nmp->nm_cwnd;
+ rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+ rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+ rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+ rt->tstamp = time;
+ if (rep->r_flags & R_TIMING)
+ rt->rtt = rep->r_rtt;
+ else
+ rt->rtt = 1000000;
+ nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+ }
+ /*
+ * Update congestion window.
+ * Do the additive increase of
+ * one rpc/rtt.
+ */
+ if (nmp->nm_cwnd <= nmp->nm_sent) {
+ nmp->nm_cwnd +=
+ (NFS_CWNDSCALE * NFS_CWNDSCALE +
+ (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+ if (nmp->nm_cwnd > NFS_MAXCWND)
+ nmp->nm_cwnd = NFS_MAXCWND;
+ }
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ /*
+ * Update rtt using a gain of 0.125 on the mean
+ * and a gain of 0.25 on the deviation.
+ */
+ if (rep->r_flags & R_TIMING) {
+ /*
+ * Since the timer resolution of
+ * NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since
+ * r_rtt == N means that the actual
+ * rtt is between N+dt and N+2-dt ticks,
+ * add 1.
+ */
+ t1 = rep->r_rtt + 1;
+ t1 -= (NFS_SRTT(rep) >> 3);
+ NFS_SRTT(rep) += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= (NFS_SDRTT(rep) >> 2);
+ NFS_SDRTT(rep) += t1;
+ }
+ nmp->nm_timeouts = 0;
+ break;
+ }
+ rep = rep->r_next;
+ }
+ /*
+ * If not matched to a request, drop it.
+ * If it's mine, get out.
+ */
+ if (rep == &nfsreqh) {
+ nfsstats.rpcunexpected++;
+ m_freem(mrep);
+ } else if (rep == myrep) {
+ if (rep->r_mrep == NULL)
+ panic("nfsreply nil");
+ return (0);
+ }
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ }
+}
+
+/*
+ * nfs_request - goes something like this
+ * - fill in request struct
+ * - links it into list
+ * - calls nfs_send() for first transmit
+ * - calls nfs_receive() to get reply
+ * - break down rpc header and return with nfs reply pointed to
+ * by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+ struct vnode *vp;
+ struct mbuf *mrest;
+ int procnum;
+ struct proc *procp;
+ struct ucred *cred;
+ struct mbuf **mrp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+{
+ register struct mbuf *m, *mrep;
+ register struct nfsreq *rep;
+ register u_long *tl;
+ register int i;
+ struct nfsmount *nmp;
+ struct mbuf *md, *mheadend;
+ struct nfsreq *reph;
+ struct nfsnode *np;
+ time_t reqtime, waituntil;
+ caddr_t dpos, cp2;
+ int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+ int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+ u_long xid;
+ u_quad_t frev;
+ char *auth_str;
+
+ nmp = VFSTONFS(vp->v_mount);
+ MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+ rep->r_nmp = nmp;
+ rep->r_vp = vp;
+ rep->r_procp = procp;
+ rep->r_procnum = procnum;
+ i = 0;
+ m = mrest;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ mrest_len = i;
+
+ /*
+ * Get the RPC header with authorization.
+ */
+kerbauth:
+ auth_str = (char *)0;
+ if (nmp->nm_flag & NFSMNT_KERB) {
+ if (failed_auth) {
+ error = nfs_getauth(nmp, rep, cred, &auth_type,
+ &auth_str, &auth_len);
+ if (error) {
+ free((caddr_t)rep, M_NFSREQ);
+ m_freem(mrest);
+ return (error);
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ auth_len = 5 * NFSX_UNSIGNED;
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ if (cred->cr_ngroups < 1)
+ panic("nfsreq nogrps");
+ auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+ nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+ 5 * NFSX_UNSIGNED;
+ }
+ m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+ auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+ if (auth_str)
+ free(auth_str, M_TEMP);
+
+ /*
+ * For stream protocols, insert a Sun RPC Record Mark.
+ */
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ rep->r_mreq = m;
+ rep->r_xid = xid;
+tryagain:
+ if (nmp->nm_flag & NFSMNT_SOFT)
+ rep->r_retry = nmp->nm_retry;
+ else
+ rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
+ rep->r_rtt = rep->r_rexmit = 0;
+ if (proct[procnum] > 0)
+ rep->r_flags = R_TIMING;
+ else
+ rep->r_flags = 0;
+ rep->r_mrep = NULL;
+
+ /*
+ * Do the client side RPC.
+ */
+ nfsstats.rpcrequests++;
+ /*
+ * Chain request into list of outstanding requests. Be sure
+ * to put it LAST so timer finds oldest requests first.
+ */
+ s = splsoftclock();
+ reph = &nfsreqh;
+ reph->r_prev->r_next = rep;
+ rep->r_prev = reph->r_prev;
+ reph->r_prev = rep;
+ rep->r_next = reph;
+
+ /* Get send time for nqnfs */
+ reqtime = time.tv_sec;
+
+ /*
+ * If backing off another request or avoiding congestion, don't
+ * send this one now but let timer do it. If not timing a request,
+ * do it now.
+ */
+ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+ (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ nmp->nm_sent < nmp->nm_cwnd)) {
+ splx(s);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ error = nfs_sndlock(&nmp->nm_flag, rep);
+ if (!error) {
+ m = m_copym(m, 0, M_COPYALL, M_WAIT);
+ error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(&nmp->nm_flag);
+ }
+ if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+ nmp->nm_sent += NFS_CWNDSCALE;
+ rep->r_flags |= R_SENT;
+ }
+ } else {
+ splx(s);
+ rep->r_rtt = -1;
+ }
+
+ /*
+ * Wait for the reply from our send or the timer's.
+ */
+ if (!error || error == EPIPE)
+ error = nfs_reply(rep);
+
+ /*
+ * RPC done, unlink the request.
+ */
+ s = splsoftclock();
+ rep->r_prev->r_next = rep->r_next;
+ rep->r_next->r_prev = rep->r_prev;
+ splx(s);
+
+ /*
+ * Decrement the outstanding request count.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT; /* paranoia */
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
+
+ /*
+ * If there was a successful reply and a tprintf msg.
+ * tprintf a response.
+ */
+ if (!error && (rep->r_flags & R_TPRINTFMSG))
+ nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "is alive again");
+ mrep = rep->r_mrep;
+ md = rep->r_md;
+ dpos = rep->r_dpos;
+ if (error) {
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * break down the rpc header and check if ok
+ */
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ if (*tl++ == rpc_msgdenied) {
+ if (*tl == rpc_mismatch)
+ error = EOPNOTSUPP;
+ else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+ if (*tl == rpc_rejectedcred && failed_auth == 0) {
+ failed_auth++;
+ mheadend->m_next = (struct mbuf *)0;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ goto kerbauth;
+ } else
+ error = EAUTH;
+ } else
+ error = EACCES;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * skip over the auth_verf, someday we may want to cache auth_short's
+ * for nfs_reqhead(), but for now just dump it
+ */
+ if (*++tl != 0) {
+ i = nfsm_rndup(fxdr_unsigned(long, *tl));
+ nfsm_adv(i);
+ }
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ /* 0 == ok */
+ if (*tl == 0) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl != 0) {
+ error = fxdr_unsigned(int, *tl);
+ m_freem(mrep);
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ error == NQNFS_TRYLATER) {
+ error = 0;
+ waituntil = time.tv_sec + trylater_delay;
+ while (time.tv_sec < waituntil)
+ (void) tsleep((caddr_t)&lbolt,
+ PSOCK, "nqnfstry", 0);
+ trylater_delay *= nfs_backoff[trylater_cnt];
+ if (trylater_cnt < 7)
+ trylater_cnt++;
+ goto tryagain;
+ }
+
+ /*
+ * If the File Handle was stale, invalidate the
+ * lookup cache, just in case.
+ */
+ if (error == ESTALE)
+ cache_purge(vp);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * For nqnfs, get any lease in reply
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ np = VTONFS(vp);
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time.tv_sec) {
+ fxdr_hyper(tl, &frev);
+ nqnfs_clientlease(nmp, np, nqlflag,
+ cachable, reqtime, frev);
+ }
+ }
+ }
+ *mrp = mrep;
+ *mdp = md;
+ *dposp = dpos;
+ m_freem(rep->r_mreq);
+ FREE((caddr_t)rep, M_NFSREQ);
+ return (0);
+ }
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ error = EPROTONOSUPPORT;
+nfsmout:
+ return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+ int siz;
+ struct nfsd *nd;
+ int err;
+ int cache;
+ u_quad_t *frev;
+ struct mbuf **mrq;
+ struct mbuf **mbp;
+ caddr_t *bposp;
+{
+ register u_long *tl;
+ register struct mbuf *mreq;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2;
+
+ MGETHDR(mreq, M_WAIT, MT_DATA);
+ mb = mreq;
+ /*
+ * If this is a big reply, use a cluster else
+ * try and leave leading space for the lower level headers.
+ */
+ siz += RPC_REPLYSIZ;
+ if (siz >= MINCLSIZE) {
+ MCLGET(mreq, M_WAIT);
+ } else
+ mreq->m_data += max_hdr;
+ tl = mtod(mreq, u_long *);
+ mreq->m_len = 6*NFSX_UNSIGNED;
+ bpos = ((caddr_t)tl)+mreq->m_len;
+ *tl++ = nd->nd_retxid;
+ *tl++ = rpc_reply;
+ if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+ *tl++ = rpc_msgdenied;
+ if (err == NQNFS_AUTHERR) {
+ *tl++ = rpc_autherr;
+ *tl = rpc_rejectedcred;
+ mreq->m_len -= NFSX_UNSIGNED;
+ bpos -= NFSX_UNSIGNED;
+ } else {
+ *tl++ = rpc_mismatch;
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2);
+ }
+ } else {
+ *tl++ = rpc_msgaccepted;
+ *tl++ = 0;
+ *tl++ = 0;
+ switch (err) {
+ case EPROGUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROGUNAVAIL);
+ break;
+ case EPROGMISMATCH:
+ *tl = txdr_unsigned(RPC_PROGMISMATCH);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2); /* someday 3 */
+ break;
+ case EPROCUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROCUNAVAIL);
+ break;
+ default:
+ *tl = 0;
+ if (err != VNOVAL) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if (err)
+ *tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+ else
+ *tl = 0;
+ }
+ break;
+ };
+ }
+
+ /*
+ * For nqnfs, piggyback lease as requested.
+ */
+ if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+ if (nd->nd_nqlflag) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nd->nd_nqlflag);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nd->nd_duration);
+ txdr_hyper(frev, tl);
+ } else {
+ if (nd->nd_nqlflag != 0)
+ panic("nqreph");
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ *mrq = mreq;
+ *mbp = mb;
+ *bposp = bpos;
+ if (err != 0 && err != VNOVAL)
+ nfsstats.srvrpc_errs++;
+ return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+ void *arg;
+{
+ register struct nfsreq *rep;
+ register struct mbuf *m;
+ register struct socket *so;
+ register struct nfsmount *nmp;
+ register int timeo;
+ static long lasttime = 0;
+ int s, error;
+
+ s = splnet();
+ for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+ nmp = rep->r_nmp;
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+ continue;
+ if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (rep->r_rtt >= 0) {
+ rep->r_rtt++;
+ if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+ timeo = nmp->nm_timeo;
+ else
+ timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+ if (nmp->nm_timeouts > 0)
+ timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+ if (rep->r_rtt <= timeo)
+ continue;
+ if (nmp->nm_timeouts < 8)
+ nmp->nm_timeouts++;
+ }
+ /*
+ * Check for server not responding
+ */
+ if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+ rep->r_rexmit > nmp->nm_deadthresh) {
+ nfs_msg(rep->r_procp,
+ nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "not responding");
+ rep->r_flags |= R_TPRINTFMSG;
+ }
+ if (rep->r_rexmit >= rep->r_retry) { /* too many */
+ nfsstats.rpctimeouts++;
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (nmp->nm_sotype != SOCK_DGRAM) {
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ continue;
+ }
+ if ((so = nmp->nm_so) == NULL)
+ continue;
+
+ /*
+ * If there is enough space and the window allows..
+ * Resend it
+ * Set r_rtt to -1 in case we fail to send it now.
+ */
+ rep->r_rtt = -1;
+ if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+ ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ (rep->r_flags & R_SENT) ||
+ nmp->nm_sent < nmp->nm_cwnd) &&
+ (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ (struct mbuf *)0, (struct mbuf *)0);
+ else
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ nmp->nm_nam, (struct mbuf *)0);
+ if (error) {
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+ so->so_error = 0;
+ } else {
+ /*
+ * Iff first send, start timing
+ * else turn timing off, backoff timer
+ * and divide congestion window by 2.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_TIMING;
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ nmp->nm_cwnd >>= 1;
+ if (nmp->nm_cwnd < NFS_CWNDSCALE)
+ nmp->nm_cwnd = NFS_CWNDSCALE;
+ nfsstats.rpcretries++;
+ } else {
+ rep->r_flags |= R_SENT;
+ nmp->nm_sent += NFS_CWNDSCALE;
+ }
+ rep->r_rtt = 0;
+ }
+ }
+ }
+
+ /*
+ * Call the nqnfs server timer once a second to handle leases.
+ */
+ if (lasttime != time.tv_sec) {
+ lasttime = time.tv_sec;
+ nqnfs_serverd();
+ }
+ splx(s);
+ timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+ struct nfsmount *nmp;
+ struct nfsreq *rep;
+ register struct proc *p;
+{
+
+ if (rep && (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ if (!(nmp->nm_flag & NFSMNT_INT))
+ return (0);
+ if (p && p->p_siglist &&
+ (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+ NFSINT_SIGMASK))
+ return (EINTR);
+ return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+ register int *flagp;
+ struct nfsreq *rep;
+{
+ struct proc *p;
+ int slpflag = 0, slptimeo = 0;
+
+ if (rep) {
+ p = rep->r_procp;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ } else
+ p = (struct proc *)0;
+ while (*flagp & NFSMNT_SNDLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, p))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTSND;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_SNDLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_SNDLOCK) == 0)
+ panic("nfs sndunlock");
+ *flagp &= ~NFSMNT_SNDLOCK;
+ if (*flagp & NFSMNT_WANTSND) {
+ *flagp &= ~NFSMNT_WANTSND;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+nfs_rcvlock(rep)
+ register struct nfsreq *rep;
+{
+ register int *flagp = &rep->r_nmp->nm_flag;
+ int slpflag, slptimeo = 0;
+
+ if (*flagp & NFSMNT_INT)
+ slpflag = PCATCH;
+ else
+ slpflag = 0;
+ while (*flagp & NFSMNT_RCVLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTRCV;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_RCVLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_RCVLOCK) == 0)
+ panic("nfs rcvunlock");
+ *flagp &= ~NFSMNT_RCVLOCK;
+ if (*flagp & NFSMNT_WANTRCV) {
+ *flagp &= ~NFSMNT_WANTRCV;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+ register struct mbuf *m;
+ int hsiz;
+{
+ register struct mbuf *m2;
+ register int siz, mlen, olen;
+ register caddr_t tcp, fcp;
+ struct mbuf *mnew;
+
+ while (m) {
+ /*
+ * This never happens for UDP, rarely happens for TCP
+ * but frequently happens for iso transport.
+ */
+ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ if ((int)fcp & 0x3) {
+ m->m_flags &= ~M_PKTHDR;
+ if (m->m_flags & M_EXT)
+ m->m_data = m->m_ext.ext_buf +
+ ((m->m_ext.ext_size - olen) & ~0x3);
+ else
+ m->m_data = m->m_dat;
+ }
+ m->m_len = 0;
+ tcp = mtod(m, caddr_t);
+ mnew = m;
+ m2 = m->m_next;
+
+ /*
+ * If possible, only put the first invariant part
+ * of the RPC header in the first mbuf.
+ */
+ mlen = M_TRAILINGSPACE(m);
+ if (olen <= hsiz && mlen > hsiz)
+ mlen = hsiz;
+
+ /*
+ * Loop through the mbuf list consolidating data.
+ */
+ while (m) {
+ while (olen > 0) {
+ if (mlen == 0) {
+ m2->m_flags &= ~M_PKTHDR;
+ if (m2->m_flags & M_EXT)
+ m2->m_data = m2->m_ext.ext_buf;
+ else
+ m2->m_data = m2->m_dat;
+ m2->m_len = 0;
+ mlen = M_TRAILINGSPACE(m2);
+ tcp = mtod(m2, caddr_t);
+ mnew = m2;
+ m2 = m2->m_next;
+ }
+ siz = min(mlen, olen);
+ if (tcp != fcp)
+ bcopy(fcp, tcp, siz);
+ mnew->m_len += siz;
+ mlen -= siz;
+ olen -= siz;
+ tcp += siz;
+ fcp += siz;
+ }
+ m = m->m_next;
+ if (m) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ }
+ }
+
+ /*
+ * Finally, set m_len == 0 for any trailing mbufs that have
+ * been copied out of.
+ */
+ while (m2) {
+ m2->m_len = 0;
+ m2 = m2->m_next;
+ }
+ return;
+ }
+ m = m->m_next;
+ }
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+ struct socket *so;
+ caddr_t arg;
+ int waitflag;
+{
+ register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+ register struct mbuf *m;
+ struct mbuf *mp, *nam;
+ struct uio auio;
+ int flags, error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+#ifdef notdef
+ /*
+ * Define this to test for nfsds handling this under heavy load.
+ */
+ if (waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+ }
+#endif
+ auio.uio_procp = NULL;
+ if (so->so_type == SOCK_STREAM) {
+ /*
+ * If there are already records on the queue, defer soreceive()
+ * to an nfsd so that there is feedback to the TCP layer that
+ * the nfs servers are heavily loaded.
+ */
+ if (slp->ns_rec && waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ;
+ goto dorecs;
+ }
+
+ /*
+ * Do soreceive().
+ */
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+ if (error || mp == (struct mbuf *)0) {
+ if (error == EWOULDBLOCK)
+ slp->ns_flag |= SLP_NEEDQ;
+ else
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ m = mp;
+ if (slp->ns_rawend) {
+ slp->ns_rawend->m_next = m;
+ slp->ns_cc += 1000000000 - auio.uio_resid;
+ } else {
+ slp->ns_raw = m;
+ slp->ns_cc = 1000000000 - auio.uio_resid;
+ }
+ while (m->m_next)
+ m = m->m_next;
+ slp->ns_rawend = m;
+
+ /*
+ * Now try and parse record(s) out of the raw stream data.
+ */
+ if (error = nfsrv_getstream(slp, waitflag)) {
+ if (error == EPERM)
+ slp->ns_flag |= SLP_DISCONN;
+ else
+ slp->ns_flag |= SLP_NEEDQ;
+ }
+ } else {
+ do {
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp,
+ (struct mbuf **)0, &flags);
+ if (mp) {
+ nfs_realign(mp, 10 * NFSX_UNSIGNED);
+ if (nam) {
+ m = nam;
+ m->m_next = mp;
+ } else
+ m = mp;
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = m;
+ else
+ slp->ns_rec = m;
+ slp->ns_recend = m;
+ m->m_nextpkt = (struct mbuf *)0;
+ }
+ if (error) {
+ if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+ && error != EWOULDBLOCK) {
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ }
+ } while (mp);
+ }
+
+ /*
+ * Now try and process the request records, non-blocking.
+ */
+dorecs:
+ if (waitflag == M_DONTWAIT &&
+ (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+ nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+ register struct nfssvc_sock *slp;
+ int waitflag;
+{
+ register struct mbuf *m;
+ register char *cp1, *cp2;
+ register int len;
+ struct mbuf *om, *m2, *recm;
+ u_long recmark;
+
+ if (slp->ns_flag & SLP_GETSTREAM)
+ panic("nfs getstream");
+ slp->ns_flag |= SLP_GETSTREAM;
+ for (;;) {
+ if (slp->ns_reclen == 0) {
+ if (slp->ns_cc < NFSX_UNSIGNED) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ m = slp->ns_raw;
+ if (m->m_len >= NFSX_UNSIGNED) {
+ bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+ m->m_data += NFSX_UNSIGNED;
+ m->m_len -= NFSX_UNSIGNED;
+ } else {
+ cp1 = (caddr_t)&recmark;
+ cp2 = mtod(m, caddr_t);
+ while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+ while (m->m_len == 0) {
+ m = m->m_next;
+ cp2 = mtod(m, caddr_t);
+ }
+ *cp1++ = *cp2++;
+ m->m_data++;
+ m->m_len--;
+ }
+ }
+ slp->ns_cc -= NFSX_UNSIGNED;
+ slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+ if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EPERM);
+ }
+ }
+
+ /*
+ * Now get the record part.
+ */
+ if (slp->ns_cc == slp->ns_reclen) {
+ recm = slp->ns_raw;
+ slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+ slp->ns_cc = slp->ns_reclen = 0;
+ } else if (slp->ns_cc > slp->ns_reclen) {
+ len = 0;
+ m = slp->ns_raw;
+ om = (struct mbuf *)0;
+ while (len < slp->ns_reclen) {
+ if ((len + m->m_len) > slp->ns_reclen) {
+ m2 = m_copym(m, 0, slp->ns_reclen - len,
+ waitflag);
+ if (m2) {
+ if (om) {
+ om->m_next = m2;
+ recm = slp->ns_raw;
+ } else
+ recm = m2;
+ m->m_data += slp->ns_reclen - len;
+ m->m_len -= slp->ns_reclen - len;
+ len = slp->ns_reclen;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EWOULDBLOCK);
+ }
+ } else if ((len + m->m_len) == slp->ns_reclen) {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ recm = slp->ns_raw;
+ om->m_next = (struct mbuf *)0;
+ } else {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ }
+ }
+ slp->ns_raw = m;
+ slp->ns_cc -= len;
+ slp->ns_reclen = 0;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ nfs_realign(recm, 10 * NFSX_UNSIGNED);
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = recm;
+ else
+ slp->ns_rec = recm;
+ slp->ns_recend = recm;
+ }
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+ register struct nfssvc_sock *slp;
+ register struct nfsd *nd;
+{
+ register struct mbuf *m;
+ int error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0 ||
+ (m = slp->ns_rec) == (struct mbuf *)0)
+ return (ENOBUFS);
+ if (slp->ns_rec = m->m_nextpkt)
+ m->m_nextpkt = (struct mbuf *)0;
+ else
+ slp->ns_recend = (struct mbuf *)0;
+ if (m->m_type == MT_SONAME) {
+ nd->nd_nam = m;
+ nd->nd_md = nd->nd_mrep = m->m_next;
+ m->m_next = (struct mbuf *)0;
+ } else {
+ nd->nd_nam = (struct mbuf *)0;
+ nd->nd_md = nd->nd_mrep = m;
+ }
+ nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+ if (error = nfs_getreq(nd, TRUE)) {
+ m_freem(nd->nd_nam);
+ return (error);
+ }
+ return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+ register struct nfsd *nd;
+ int has_header;
+{
+ register int len, i;
+ register u_long *tl;
+ register long t1;
+ struct uio uio;
+ struct iovec iov;
+ caddr_t dpos, cp2;
+ u_long nfsvers, auth_type;
+ int error = 0, nqnfs = 0;
+ struct mbuf *mrep, *md;
+
+ mrep = nd->nd_mrep;
+ md = nd->nd_md;
+ dpos = nd->nd_dpos;
+ if (has_header) {
+ nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+ nd->nd_retxid = *tl++;
+ if (*tl++ != rpc_call) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ } else {
+ nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+ }
+ nd->nd_repstat = 0;
+ if (*tl++ != rpc_vers) {
+ nd->nd_repstat = ERPCMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsvers = nfs_vers;
+ if (*tl != nfs_prog) {
+ if (*tl == nqnfs_prog) {
+ nqnfs++;
+ nfsvers = nqnfs_vers;
+ } else {
+ nd->nd_repstat = EPROGUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ }
+ tl++;
+ if (*tl++ != nfsvers) {
+ nd->nd_repstat = EPROGMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+ if (nd->nd_procnum == NFSPROC_NULL)
+ return (0);
+ if (nd->nd_procnum >= NFS_NPROCS ||
+ (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+ (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+ nd->nd_repstat = EPROCUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ auth_type = *tl++;
+ len = fxdr_unsigned(int, *tl++);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+
+ /*
+ * Handle auth_unix or auth_kerb.
+ */
+ if (auth_type == rpc_auth_unix) {
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > NFS_MAXNAMLEN) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_adv(nfsm_rndup(len));
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+ for (i = 1; i <= len; i++)
+ if (i < NGROUPS)
+ nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+ else
+ tl++;
+ nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+ } else if (auth_type == rpc_auth_kerb) {
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_authlen = fxdr_unsigned(int, *tl);
+ uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+ if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ uio.uio_offset = 0;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ iov.iov_base = (caddr_t)nd->nd_authstr;
+ iov.iov_len = RPCAUTH_MAXSIZ;
+ nfsm_mtouio(&uio, uio.uio_resid);
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ nd->nd_flag |= NFSD_NEEDAUTH;
+ }
+
+ /*
+ * Do we have any use for the verifier.
+ * According to the "Remote Procedure Call Protocol Spec." it
+ * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+ * For now, just skip over it
+ */
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ if (len > 0) {
+ nfsm_adv(nfsm_rndup(len));
+ }
+
+ /*
+ * For nqnfs, get piggybacked lease request.
+ */
+ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+ if (nd->nd_nqlflag) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_duration = fxdr_unsigned(int, *tl);
+ } else
+ nd->nd_duration = NQ_MINLEASE;
+ } else {
+ nd->nd_nqlflag = NQL_NOVAL;
+ nd->nd_duration = NQ_MINLEASE;
+ }
+ nd->nd_md = md;
+ nd->nd_dpos = dpos;
+ return (0);
+nfsmout:
+ return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+ struct nfssvc_sock *slp;
+{
+ register struct nfsd *nd = nfsd_head.nd_next;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+ while (nd != (struct nfsd *)&nfsd_head) {
+ if (nd->nd_flag & NFSD_WAITING) {
+ nd->nd_flag &= ~NFSD_WAITING;
+ if (nd->nd_slp)
+ panic("nfsd wakeup");
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ wakeup((caddr_t)nd);
+ return;
+ }
+ nd = nd->nd_next;
+ }
+ slp->ns_flag |= SLP_DOREC;
+ nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+ struct proc *p;
+ char *server, *msg;
+{
+ tpr_t tpr;
+
+ if (p)
+ tpr = tprintf_open(p);
+ else
+ tpr = NULL;
+ tprintf(tpr, "nfs server %s: %s\n", server, msg);
+ tprintf_close(tpr);
+}
diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c
new file mode 100644
index 000000000000..5778f7d7f01a
--- /dev/null
+++ b/sys/nfsserver/nfs_srvsubs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+ rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+ mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nqnfs;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_long *xidp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ if (auth_type == RPCAUTH_NQNFS)
+ authsiz += 2 * NFSX_UNSIGNED;
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+ if (++nfs_xid == 0)
+ nfs_xid++;
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nqnfs) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER1);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ *tl++ = txdr_unsigned(procid);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_long *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_NQNFS:
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl = txdr_unsigned(auth_len);
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EINVAL);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ char *cp;
+ long siz;
+{
+ register struct mbuf *m1, *m2;
+ long left, xfer, len, tlen;
+ u_long *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_long *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_long *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+ register int i;
+
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+ nfs_vers = txdr_unsigned(NFS_VER2);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ /* Loop thru nfs procids */
+ for (i = 0; i < NFS_NPROCS; i++)
+ nfs_procids[i] = txdr_unsigned(i);
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ nfs_iodwant[i] = (struct proc *)0;
+ TAILQ_INIT(&nfs_bufq);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_nhinit(); /* Init the nfsnode table */
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+ nqthead.th_head[0] = &nqthead;
+ nqthead.th_head[1] = &nqthead;
+ nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+ nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfsv2_fattr *fp;
+ extern int (**spec_nfsv2nodeop_p)();
+ register struct nfsnode *np, *nq, **nhpp;
+ register long t1;
+ caddr_t dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ long rdev;
+ struct timespec mtime;
+ struct vnode *nvp;
+
+ md = *mdp;
+ dpos = *dposp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+ return (error);
+ fp = (struct nfsv2_fattr *)cp2;
+ vtyp = nfstov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ if (vtyp == VNON || vtyp == VREG)
+ vtyp = IFTOVT(vmode);
+ if (isnq) {
+ rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+ fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+ } else {
+ rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+ fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+ }
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type == VNON) {
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vp->v_type = vtyp = VFIFO;
+ else
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+#ifdef FIFO
+ extern int (**fifo_nfsv2nodeop_p)();
+ vp->v_op = fifo_nfsv2nodeop_p;
+#else
+ return (EOPNOTSUPP);
+#endif /* FIFO */
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+ /*
+ * Discard unneeded vnode, but save its nfsnode.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased node.
+ */
+ np->n_vnode = nvp;
+ nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ *vpp = vp = nvp;
+ }
+ }
+ np->n_mtime = mtime.ts_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_rdev = (dev_t)rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (isnq) {
+ fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+ fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+ fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+ vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+ fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+ fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+ } else {
+ vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+ vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+ fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+ vap->va_ctime.ts_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ np->n_attrstamp = time.tv_sec;
+ *dposp = dpos;
+ *mdp = md;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+ if (np->n_size > vap->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap;
+
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+ if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ vap = &np->n_vattr;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) == 0) {
+ np->n_size = vaper->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if (np->n_size > vaper->va_size)
+ if (np->n_size > vaper->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct proc *p;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp;
+ struct vnode *dp;
+ int error, rdonly;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ cnp->cn_hash = 0;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || *fromcp == '/') {
+ error = EINVAL;
+ goto out;
+ }
+ cnp->cn_hash += (unsigned char)*fromcp;
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if (error = nfs_adv(mdp, dposp, len, rem))
+ goto out;
+ }
+ ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Extract and set starting directory.
+ */
+ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly))
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+ ndp->ni_startdir = dp;
+ if (rdonly)
+ cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+ else
+ cnp->cn_flags |= NOCROSSMOUNT;
+ /*
+ * And call lookup() to do the real work
+ */
+ cnp->cn_proc = p;
+ if (error = lookup(ndp))
+ goto out;
+ /*
+ * Check for encountering a symbolic link
+ */
+ if (cnp->cn_flags & ISSYMLINK) {
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Check for saved name request
+ */
+ if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ int *rdonlyp;
+{
+ register struct mount *mp;
+ register struct nfsuid *uidp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+
+ *vpp = (struct vnode *)0;
+ if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+ return (error);
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == cred->cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (uidp) {
+ cred->cr_uid = uidp->nu_cr.cr_uid;
+ for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+ cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+ } else {
+ vput(*vpp);
+ return (NQNFS_AUTHERR);
+ }
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+ if (!lockflag)
+ VOP_UNLOCK(*vpp);
+ return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = mtod(nam, struct sockaddr_in *);
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = mtod(nam, struct sockaddr_iso *);
+ isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
diff --git a/sys/nfsserver/nfs_syscalls.c b/sys/nfsserver/nfs_syscalls.c
new file mode 100644
index 000000000000..5d86b42ee20a
--- /dev/null
+++ b/sys/nfsserver/nfs_syscalls.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_syscalls.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define TRUE 1
+#define FALSE 0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+ char *fname;
+ fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+ struct proc *p;
+ register struct getfh_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ fhandle_t fh;
+ int error;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ bzero((caddr_t)&fh, sizeof(fh));
+ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fh.fh_fid);
+ vput(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+ return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+ int flag;
+ caddr_t argp;
+};
+nfssvc(p, uap, retval)
+ struct proc *p;
+ register struct nfssvc_args *uap;
+ int *retval;
+{
+ struct nameidata nd;
+ struct file *fp;
+ struct mbuf *nam;
+ struct nfsd_args nfsdarg;
+ struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+ struct nfsd_cargs ncd;
+ struct nfsd *nfsd;
+ struct nfssvc_sock *slp;
+ struct nfsuid *nuidp, **nuh;
+ struct nfsmount *nmp;
+ int error;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+ nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+ (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+ }
+ if (uap->flag & NFSSVC_BIOD)
+ error = nfssvc_iod(p);
+ else if (uap->flag & NFSSVC_MNTD) {
+ if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+ ncd.ncd_dirp, p);
+ if (error = namei(&nd))
+ return (error);
+ if ((nd.ni_vp->v_flag & VROOT) == 0)
+ error = EINVAL;
+ nmp = VFSTONFS(nd.ni_vp->v_mount);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ if ((nmp->nm_flag & NFSMNT_MNTD) &&
+ (uap->flag & NFSSVC_GOTAUTH) == 0)
+ return (0);
+ nmp->nm_flag |= NFSMNT_MNTD;
+ error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+ uap->argp, p);
+ } else if (uap->flag & NFSSVC_ADDSOCK) {
+ if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+ sizeof(nfsdarg)))
+ return (error);
+ if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+ return (error);
+ /*
+ * Get the client address for connected sockets.
+ */
+ if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+ nam = (struct mbuf *)0;
+ else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+ MT_SONAME))
+ return (error);
+ error = nfssvc_addsock(fp, nam);
+ } else {
+ if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+ return (error);
+ if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+ (nfsd->nd_slp->ns_flag & SLP_VALID)) {
+ slp = nfsd->nd_slp;
+
+ /*
+ * First check to see if another nfsd has already
+ * added this credential.
+ */
+ nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ while (nuidp) {
+ if (nuidp->nu_uid == nsd->nsd_uid)
+ break;
+ nuidp = nuidp->nu_hnext;
+ }
+ if (!nuidp) {
+ /*
+ * Nope, so we will.
+ */
+ if (slp->ns_numuids < nuidhash_max) {
+ slp->ns_numuids++;
+ nuidp = (struct nfsuid *)
+ malloc(sizeof (struct nfsuid), M_NFSUID,
+ M_WAITOK);
+ } else
+ nuidp = (struct nfsuid *)0;
+ if ((slp->ns_flag & SLP_VALID) == 0) {
+ if (nuidp)
+ free((caddr_t)nuidp, M_NFSUID);
+ } else {
+ if (nuidp == (struct nfsuid *)0) {
+ nuidp = slp->ns_lruprev;
+ remque(nuidp);
+ if (nuidp->nu_hprev)
+ nuidp->nu_hprev->nu_hnext =
+ nuidp->nu_hnext;
+ if (nuidp->nu_hnext)
+ nuidp->nu_hnext->nu_hprev =
+ nuidp->nu_hprev;
+ }
+ nuidp->nu_cr = nsd->nsd_cr;
+ if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+ nuidp->nu_cr.cr_ngroups = NGROUPS;
+ nuidp->nu_cr.cr_ref = 1;
+ nuidp->nu_uid = nsd->nsd_uid;
+ insque(nuidp, (struct nfsuid *)slp);
+ nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ if (nuidp->nu_hnext = *nuh)
+ nuidp->nu_hnext->nu_hprev = nuidp;
+ nuidp->nu_hprev = (struct nfsuid *)0;
+ *nuh = nuidp;
+ }
+ }
+ }
+ if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+ nfsd->nd_flag |= NFSD_AUTHFAIL;
+ error = nfssvc_nfsd(nsd, uap->argp, p);
+ }
+ if (error == EINTR || error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+ struct file *fp;
+ struct mbuf *mynam;
+{
+ register struct mbuf *m;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ struct nfssvc_sock *tslp;
+ int error, s;
+
+ so = (struct socket *)fp->f_data;
+ tslp = (struct nfssvc_sock *)0;
+ /*
+ * Add it to the list, as required.
+ */
+ if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+ tslp = nfs_udpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#ifdef ISO
+ } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ tslp = nfs_cltpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#endif /* ISO */
+ }
+ if (so->so_type == SOCK_STREAM)
+ siz = NFS_MAXPACKET + sizeof (u_long);
+ else
+ siz = NFS_MAXPACKET;
+ if (error = soreserve(so, siz, siz)) {
+ m_freem(mynam);
+ return (error);
+ }
+
+ /*
+ * Set protocol specific options { for now TCP only } and
+ * reserve some space. For datagram sockets, this can get called
+ * repeatedly for the same socket, but that isn't harmful.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_domain->dom_family == AF_INET &&
+ so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ so->so_rcv.sb_flags &= ~SB_NOINTR;
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_flags &= ~SB_NOINTR;
+ so->so_snd.sb_timeo = 0;
+ if (tslp)
+ slp = tslp;
+ else {
+ slp = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+ slp->ns_prev = nfssvc_sockhead.ns_prev;
+ slp->ns_prev->ns_next = slp;
+ slp->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = slp;
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ }
+ slp->ns_so = so;
+ slp->ns_nam = mynam;
+ fp->f_count++;
+ slp->ns_fp = fp;
+ s = splnet();
+ so->so_upcallarg = (caddr_t)slp;
+ so->so_upcall = nfsrv_rcv;
+ slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+ nfsrv_wakenfsd(slp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+ struct nfsd_srvargs *nsd;
+ caddr_t argp;
+ struct proc *p;
+{
+ register struct mbuf *m, *nam2;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ register int *solockp;
+ struct nfsd *nd = nsd->nsd_nfsd;
+ struct mbuf *mreq, *nam;
+ struct timeval starttime;
+ struct nfsuid *uidp;
+ int error, cacherep, s;
+ int sotype;
+
+ s = splnet();
+ if (nd == (struct nfsd *)0) {
+ nsd->nsd_nfsd = nd = (struct nfsd *)
+ malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+ bzero((caddr_t)nd, sizeof (struct nfsd));
+ nd->nd_procp = p;
+ nd->nd_cr.cr_ref = 1;
+ insque(nd, &nfsd_head);
+ nd->nd_nqlflag = NQL_NOVAL;
+ nfs_numnfsd++;
+ }
+ /*
+ * Loop getting rpc requests until SIGKILL.
+ */
+ for (;;) {
+ if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+ while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+ nd->nd_flag |= NFSD_WAITING;
+ nfsd_waiting++;
+ error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+ nfsd_waiting--;
+ if (error)
+ goto done;
+ }
+ if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+ == (SLP_VALID | SLP_DOREC)) {
+ slp->ns_flag &= ~SLP_DOREC;
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ break;
+ }
+ slp = slp->ns_next;
+ }
+ if (slp == &nfssvc_sockhead)
+ nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+ }
+ if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+ continue;
+ if (slp->ns_flag & SLP_VALID) {
+ if (slp->ns_flag & SLP_DISCONN)
+ nfsrv_zapsock(slp);
+ else if (slp->ns_flag & SLP_NEEDQ) {
+ slp->ns_flag &= ~SLP_NEEDQ;
+ (void) nfs_sndlock(&slp->ns_solock,
+ (struct nfsreq *)0);
+ nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+ M_WAIT);
+ nfs_sndunlock(&slp->ns_solock);
+ }
+ error = nfsrv_dorec(slp, nd);
+ nd->nd_flag |= NFSD_REQINPROG;
+ }
+ } else {
+ error = 0;
+ slp = nd->nd_slp;
+ }
+ if (error || (slp->ns_flag & SLP_VALID) == 0) {
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nfsrv_slpderef(slp);
+ continue;
+ }
+ splx(s);
+ so = slp->ns_so;
+ sotype = so->so_type;
+ starttime = time;
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+ solockp = &slp->ns_solock;
+ else
+ solockp = (int *)0;
+ /*
+ * nam == nam2 for connectionless protocols such as UDP
+ * nam2 == NULL for connection based protocols to disable
+ * recent request caching.
+ */
+ if (nam2 = nd->nd_nam) {
+ nam = nam2;
+ cacherep = RC_CHECKIT;
+ } else {
+ nam = slp->ns_nam;
+ cacherep = RC_DOIT;
+ }
+
+ /*
+ * Check to see if authorization is needed.
+ */
+ if (nd->nd_flag & NFSD_NEEDAUTH) {
+ static int logauth = 0;
+
+ nd->nd_flag &= ~NFSD_NEEDAUTH;
+ /*
+ * Check for a mapping already installed.
+ */
+ uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == nd->nd_cr.cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (!uidp) {
+ nsd->nsd_uid = nd->nd_cr.cr_uid;
+ if (nam2 && logauth++ == 0)
+ log(LOG_WARNING, "Kerberized NFS using UDP\n");
+ nsd->nsd_haddr =
+ mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ nsd->nsd_authlen = nd->nd_authlen;
+ if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+ nd->nd_authlen) == 0 &&
+ copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+ return (ENEEDAUTH);
+ cacherep = RC_DROPIT;
+ }
+ }
+ if (cacherep == RC_CHECKIT)
+ cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+ /*
+ * Check for just starting up for NQNFS and send
+ * fake "try again later" replies to the NQNFS clients.
+ */
+ if (notstarted && nqnfsstarttime <= time.tv_sec) {
+ if (modify_flag) {
+ nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+ modify_flag = 0;
+ } else
+ notstarted = 0;
+ }
+ if (notstarted) {
+ if (nd->nd_nqlflag == NQL_NOVAL)
+ cacherep = RC_DROPIT;
+ else if (nd->nd_procnum != NFSPROC_WRITE) {
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_TRYLATER;
+ cacherep = RC_DOIT;
+ } else
+ modify_flag = 1;
+ } else if (nd->nd_flag & NFSD_AUTHFAIL) {
+ nd->nd_flag &= ~NFSD_AUTHFAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_AUTHERR;
+ cacherep = RC_DOIT;
+ }
+
+ switch (cacherep) {
+ case RC_DOIT:
+ error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+ nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+ nam, &mreq);
+ if (nd->nd_cr.cr_ref != 1) {
+ printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+ panic("nfssvc cref");
+ }
+ if (error) {
+ if (nd->nd_procnum != NQNFSPROC_VACATED)
+ nfsstats.srv_errs++;
+ if (nam2) {
+ nfsrv_updatecache(nam2, nd, FALSE, mreq);
+ m_freem(nam2);
+ }
+ break;
+ }
+ nfsstats.srvrpccnt[nd->nd_procnum]++;
+ if (nam2)
+ nfsrv_updatecache(nam2, nd, TRUE, mreq);
+ nd->nd_mrep = (struct mbuf *)0;
+ case RC_REPLY:
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = mreq;
+ m->m_pkthdr.len = siz;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 | siz);
+ }
+ if (solockp)
+ (void) nfs_sndlock(solockp, (struct nfsreq *)0);
+ if (slp->ns_flag & SLP_VALID)
+ error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+ else {
+ error = EPIPE;
+ m_freem(m);
+ }
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ if (nam2)
+ MFREE(nam2, m);
+ if (nd->nd_mrep)
+ m_freem(nd->nd_mrep);
+ if (error == EPIPE)
+ nfsrv_zapsock(slp);
+ if (solockp)
+ nfs_sndunlock(solockp);
+ if (error == EINTR || error == ERESTART) {
+ nfsrv_slpderef(slp);
+ s = splnet();
+ goto done;
+ }
+ break;
+ case RC_DROPIT:
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ m_freem(nd->nd_mrep);
+ m_freem(nam2);
+ break;
+ };
+ s = splnet();
+ if (nfsrv_dorec(slp, nd)) {
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nfsrv_slpderef(slp);
+ }
+ }
+done:
+ remque(nd);
+ splx(s);
+ free((caddr_t)nd, M_NFSD);
+ nsd->nsd_nfsd = (struct nfsd *)0;
+ if (--nfs_numnfsd == 0)
+ nfsrv_init(TRUE); /* Reinitialize everything */
+ return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+ struct proc *p;
+{
+ register struct buf *bp;
+ register int i, myiod;
+ int error = 0;
+
+ /*
+ * Assign my position or return error if too many already running
+ */
+ myiod = -1;
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_asyncdaemon[i] == 0) {
+ nfs_asyncdaemon[i]++;
+ myiod = i;
+ break;
+ }
+ if (myiod == -1)
+ return (EBUSY);
+ nfs_numasync++;
+ /*
+ * Just loop around doin our stuff until SIGKILL
+ */
+ for (;;) {
+ while (nfs_bufq.tqh_first == NULL && error == 0) {
+ nfs_iodwant[myiod] = p;
+ error = tsleep((caddr_t)&nfs_iodwant[myiod],
+ PWAIT | PCATCH, "nfsidl", 0);
+ }
+ while ((bp = nfs_bufq.tqh_first) != NULL) {
+ /* Take one off the front of the list */
+ TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+ if (bp->b_flags & B_READ)
+ (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+ else
+ (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+ }
+ if (error) {
+ nfs_asyncdaemon[myiod] = 0;
+ nfs_numasync--;
+ return (error);
+ }
+ }
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+ register struct nfssvc_sock *slp;
+{
+ register struct nfsuid *nuidp, *onuidp;
+ register int i;
+ struct socket *so;
+ struct file *fp;
+ struct mbuf *m;
+
+ slp->ns_flag &= ~SLP_ALLFLAGS;
+ if (fp = slp->ns_fp) {
+ slp->ns_fp = (struct file *)0;
+ so = slp->ns_so;
+ so->so_upcall = NULL;
+ soshutdown(so, 2);
+ closef(fp, (struct proc *)0);
+ if (slp->ns_nam)
+ MFREE(slp->ns_nam, m);
+ m_freem(slp->ns_raw);
+ m_freem(slp->ns_rec);
+ nuidp = slp->ns_lrunext;
+ while (nuidp != (struct nfsuid *)slp) {
+ onuidp = nuidp;
+ nuidp = nuidp->nu_lrunext;
+ free((caddr_t)onuidp, M_NFSUID);
+ }
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ for (i = 0; i < NUIDHASHSIZ; i++)
+ slp->ns_uidh[i] = (struct nfsuid *)0;
+ }
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+ struct ucred *cred;
+ int *auth_type;
+ char **auth_str;
+ int *auth_len;
+{
+ int error = 0;
+
+ while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+ nmp->nm_flag |= NFSMNT_WANTAUTH;
+ (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+ "nfsauth1", 2 * hz);
+ if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ return (error);
+ }
+ }
+ nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+ nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+ nmp->nm_authuid = cred->cr_uid;
+ wakeup((caddr_t)&nmp->nm_authstr);
+
+ /*
+ * And wait for mount_nfs to do its stuff.
+ */
+ while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+ (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+ "nfsauth2", 2 * hz);
+ error = nfs_sigintr(nmp, rep, rep->r_procp);
+ }
+ if (nmp->nm_flag & NFSMNT_AUTHERR) {
+ nmp->nm_flag &= ~NFSMNT_AUTHERR;
+ error = EAUTH;
+ }
+ if (error)
+ free((caddr_t)*auth_str, M_TEMP);
+ else {
+ *auth_type = nmp->nm_authtype;
+ *auth_len = nmp->nm_authlen;
+ }
+ nmp->nm_flag &= ~NFSMNT_HASAUTH;
+ nmp->nm_flag |= NFSMNT_WAITAUTH;
+ if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ wakeup((caddr_t)&nmp->nm_authtype);
+ }
+ return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+ register struct nfssvc_sock *slp;
+{
+ if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+ slp->ns_prev->ns_next = slp->ns_next;
+ slp->ns_next->ns_prev = slp->ns_prev;
+ free((caddr_t)slp, M_NFSSVC);
+ }
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+ int terminating;
+{
+ register struct nfssvc_sock *slp;
+ struct nfssvc_sock *oslp;
+
+ if (nfssvc_sockhead.ns_flag & SLP_INIT)
+ panic("nfsd init");
+ nfssvc_sockhead.ns_flag |= SLP_INIT;
+ if (terminating) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if (slp->ns_flag & SLP_VALID)
+ nfsrv_zapsock(slp);
+ slp->ns_next->ns_prev = slp->ns_prev;
+ slp->ns_prev->ns_next = slp->ns_next;
+ oslp = slp;
+ slp = slp->ns_next;
+ free((caddr_t)oslp, M_NFSSVC);
+ }
+ nfsrv_cleancache(); /* And clear out server cache */
+ }
+ nfs_udpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+ nfs_cltpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+ nfssvc_sockhead.ns_next = nfs_udpsock;
+ nfs_udpsock->ns_next = nfs_cltpsock;
+ nfs_cltpsock->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = nfs_cltpsock;
+ nfs_cltpsock->ns_prev = nfs_udpsock;
+ nfs_udpsock->ns_prev = &nfssvc_sockhead;
+ nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+ (struct nfsuid *)nfs_udpsock;
+ nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+ (struct nfsuid *)nfs_cltpsock;
+ nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+ nfsd_head.nd_flag = 0;
+ nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+ if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+ nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+ wakeup((caddr_t)&nfssvc_sockhead);
+ }
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+ struct timeval *startp;
+ int sotype;
+ register struct nfsd *nd;
+ struct mbuf *nam;
+ int cacherep;
+{
+ register struct drt *rt;
+
+ rt = &nfsdrt.drt[nfsdrt.pos];
+ if (cacherep == RC_DOIT)
+ rt->flag = 0;
+ else if (cacherep == RC_REPLY)
+ rt->flag = DRT_CACHEREPLY;
+ else
+ rt->flag = DRT_CACHEDROP;
+ if (sotype == SOCK_STREAM)
+ rt->flag |= DRT_TCP;
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ rt->flag |= DRT_NQNFS;
+ rt->proc = nd->nd_procnum;
+ if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+ rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ else
+ rt->ipadr = INADDR_ANY;
+ rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+ (time.tv_usec - startp->tv_usec);
+ rt->tstamp = time;
+ nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfsserver/nfsm_subs.h b/sys/nfsserver/nfsm_subs.h
new file mode 100644
index 000000000000..879db3600577
--- /dev/null
+++ b/sys/nfsserver/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat
+#define NFSMADV(m, s) (m)->m_data += (s)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ { if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); }
+
+#define nfsm_dissect(a,c,s) \
+ { t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } }
+
+#define nfsm_fhtom(v) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+ { struct nfsnode *np; nfsv2fh_t *fhp; \
+ nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+ if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(np); \
+ nfsm_loadattr(v, (struct vattr *)0); \
+ }
+
+#define nfsm_loadattr(v,a) \
+ { struct vnode *tvp = (v); \
+ if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = tvp; }
+
+#define nfsm_strsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } }
+
+#define nfsm_srvstrsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } }
+
+#define nfsm_mtouio(p,s) \
+ if ((s) > 0 && \
+ (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_uiotom(p,s) \
+ if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_reqhead(v,a,s) \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone m_freem(mrep); \
+ nfsmout:
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ if (error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) \
+ goto nfsmout
+
+#define nfsm_strtom(a,s,m) \
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_long *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+ } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvdone \
+ nfsmout: \
+ return(error)
+
+#define nfsm_reply(s) \
+ { \
+ nfsd->nd_repstat = error; \
+ if (error) \
+ (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ m_freem(mrep); \
+ mreq = *mrq; \
+ if (error) \
+ return(0); \
+ }
+
+#define nfsm_adv(s) \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvmtofh(f) \
+ nfsm_dissect(tl, u_long *, NFSX_FH); \
+ bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define nfsm_clget \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_long *)bp
+
+#define nfsm_srvfillattr \
+ fp->fa_type = vtonfs_type(vap->va_type); \
+ fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+ fp->fa_uid = txdr_unsigned(vap->va_uid); \
+ fp->fa_gid = txdr_unsigned(vap->va_gid); \
+ if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+ fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nfsrdev = 0xffffffff; \
+ else \
+ fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+ fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+ fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+ txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+ txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+ fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+ fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+ } else { \
+ fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nqrdev = 0xffffffff; \
+ else \
+ fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+ txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+ txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+ txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+ txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+ txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+ fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+ fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+ txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+ }
+
diff --git a/sys/nfsserver/nfsrvcache.h b/sys/nfsserver/nfsrvcache.h
new file mode 100644
index 000000000000..26da2c275df6
--- /dev/null
+++ b/sys/nfsserver/nfsrvcache.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsrvcache.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for the server recent request cache
+ */
+
+#define NFSRVCACHESIZ 256
+
+struct nfsrvcache {
+ struct nfsrvcache *rc_forw; /* Hash chain links */
+ struct nfsrvcache **rc_back; /* Hash chain links */
+ struct nfsrvcache *rc_next; /* Lru list */
+ struct nfsrvcache **rc_prev; /* Lru list */
+ u_long rc_xid; /* rpc id number */
+ union {
+ struct mbuf *ru_repmb; /* Reply mbuf list OR */
+ int ru_repstat; /* Reply status */
+ } rc_un;
+ union nethostaddr rc_haddr; /* Host address */
+ short rc_proc; /* rpc proc number */
+ u_char rc_state; /* Current state of request */
+ u_char rc_flag; /* Flag bits */
+};
+
+#define rc_reply rc_un.ru_repmb
+#define rc_status rc_un.ru_repstat
+#define rc_inetaddr rc_haddr.had_inetaddr
+#define rc_nam rc_haddr.had_nam
+
+/* Cache entry states */
+#define RC_UNUSED 0
+#define RC_INPROG 1
+#define RC_DONE 2
+
+/* Return values */
+#define RC_DROPIT 0
+#define RC_REPLY 1
+#define RC_DOIT 2
+#define RC_CHECKIT 3
+
+/* Flag bits */
+#define RC_LOCKED 0x01
+#define RC_WANTED 0x02
+#define RC_REPSTATUS 0x04
+#define RC_REPMBUF 0x08
+#define RC_NQNFS 0x10
+#define RC_INETADDR 0x20
+#define RC_NAM 0x40
diff --git a/sys/nfsserver/nfsrvstats.h b/sys/nfsserver/nfsrvstats.h
new file mode 100644
index 000000000000..261fd42657a7
--- /dev/null
+++ b/sys/nfsserver/nfsrvstats.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/sys/_sigset.h b/sys/sys/_sigset.h
new file mode 100644
index 000000000000..8ccded41c3be
--- /dev/null
+++ b/sys/sys/_sigset.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)signal.h 8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_SIGNAL_H_
+#define _SYS_SIGNAL_H_
+
+#define NSIG 32 /* counting 0; could be 33 (mask is 1-32) */
+
+#ifndef _ANSI_SOURCE
+#include <machine/signal.h> /* sigcontext; codes for SIGILL, SIGFPE */
+#endif
+
+#define SIGHUP 1 /* hangup */
+#define SIGINT 2 /* interrupt */
+#define SIGQUIT 3 /* quit */
+#define SIGILL 4 /* illegal instruction (not reset when caught) */
+#ifndef _POSIX_SOURCE
+#define SIGTRAP 5 /* trace trap (not reset when caught) */
+#endif
+#define SIGABRT 6 /* abort() */
+#ifndef _POSIX_SOURCE
+#define SIGIOT SIGABRT /* compatibility */
+#define SIGEMT 7 /* EMT instruction */
+#endif
+#define SIGFPE 8 /* floating point exception */
+#define SIGKILL 9 /* kill (cannot be caught or ignored) */
+#ifndef _POSIX_SOURCE
+#define SIGBUS 10 /* bus error */
+#endif
+#define SIGSEGV 11 /* segmentation violation */
+#ifndef _POSIX_SOURCE
+#define SIGSYS 12 /* bad argument to system call */
+#endif
+#define SIGPIPE 13 /* write on a pipe with no one to read it */
+#define SIGALRM 14 /* alarm clock */
+#define SIGTERM 15 /* software termination signal from kill */
+#ifndef _POSIX_SOURCE
+#define SIGURG 16 /* urgent condition on IO channel */
+#endif
+#define SIGSTOP 17 /* sendable stop signal not from tty */
+#define SIGTSTP 18 /* stop signal from tty */
+#define SIGCONT 19 /* continue a stopped process */
+#define SIGCHLD 20 /* to parent on child stop or exit */
+#define SIGTTIN 21 /* to readers pgrp upon background tty read */
+#define SIGTTOU 22 /* like TTIN for output if (tp->t_local&LTOSTOP) */
+#ifndef _POSIX_SOURCE
+#define SIGIO 23 /* input/output possible signal */
+#define SIGXCPU 24 /* exceeded CPU time limit */
+#define SIGXFSZ 25 /* exceeded file size limit */
+#define SIGVTALRM 26 /* virtual time alarm */
+#define SIGPROF 27 /* profiling time alarm */
+#define SIGWINCH 28 /* window size changes */
+#define SIGINFO 29 /* information request */
+#endif
+#define SIGUSR1 30 /* user defined signal 1 */
+#define SIGUSR2 31 /* user defined signal 2 */
+
+#if defined(_ANSI_SOURCE) || defined(__cplusplus)
+/*
+ * Language spec sez we must list exactly one parameter, even though we
+ * actually supply three. Ugh!
+ */
+#define SIG_DFL (void (*)(int))0
+#define SIG_IGN (void (*)(int))1
+#define SIG_ERR (void (*)(int))-1
+#else
+#define SIG_DFL (void (*)())0
+#define SIG_IGN (void (*)())1
+#define SIG_ERR (void (*)())-1
+#endif
+
+#ifndef _ANSI_SOURCE
+typedef unsigned int sigset_t;
+
+/*
+ * Signal vector "template" used in sigaction call.
+ */
+struct sigaction {
+ void (*sa_handler)(); /* signal handler */
+ sigset_t sa_mask; /* signal mask to apply */
+ int sa_flags; /* see signal options below */
+};
+#ifndef _POSIX_SOURCE
+#define SA_ONSTACK 0x0001 /* take signal on signal stack */
+#define SA_RESTART 0x0002 /* restart system on signal return */
+#define SA_DISABLE 0x0004 /* disable taking signals on alternate stack */
+#ifdef COMPAT_SUNOS
+#define SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */
+#endif
+#endif
+#define SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define SIG_BLOCK 1 /* block specified signal set */
+#define SIG_UNBLOCK 2 /* unblock specified signal set */
+#define SIG_SETMASK 3 /* set specified signal set */
+
+#ifndef _POSIX_SOURCE
+#ifndef KERNEL
+#include <sys/cdefs.h>
+#endif
+typedef void (*sig_t) __P((int)); /* type of signal function */
+
+/*
+ * Structure used in sigaltstack call.
+ */
+struct sigaltstack {
+ char *ss_base; /* signal stack base */
+ int ss_size; /* signal stack length */
+ int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */
+};
+#define MINSIGSTKSZ 8192 /* minimum allowable stack */
+#define SIGSTKSZ (MINSIGSTKSZ + 32768) /* recommended stack size */
+
+/*
+ * 4.3 compatibility:
+ * Signal vector "template" used in sigvec call.
+ */
+struct sigvec {
+ void (*sv_handler)(); /* signal handler */
+ int sv_mask; /* signal mask to apply */
+ int sv_flags; /* see signal options below */
+};
+
+#define SV_ONSTACK SA_ONSTACK
+#define SV_INTERRUPT SA_RESTART /* same bit, opposite sense */
+#define sv_onstack sv_flags /* isn't compatibility wonderful! */
+
+/*
+ * Structure used in sigstack call.
+ */
+struct sigstack {
+ char *ss_sp; /* signal stack pointer */
+ int ss_onstack; /* current status */
+};
+
+/*
+ * Macro for converting signal number to a mask suitable for
+ * sigblock().
+ */
+#define sigmask(m) (1 << ((m)-1))
+
+#define BADSIG SIG_ERR
+
+#endif /* !_POSIX_SOURCE */
+#endif /* !_ANSI_SOURCE */
+
+/*
+ * For historical reasons; programs expect signal's return value to be
+ * defined by <sys/signal.h>.
+ */
+__BEGIN_DECLS
+void (*signal __P((int, void (*) __P((int))))) __P((int));
+__END_DECLS
+#endif /* !_SYS_SIGNAL_H_ */
diff --git a/sys/sys/acct.h b/sys/sys/acct.h
new file mode 100644
index 000000000000..edc5bdbd5638
--- /dev/null
+++ b/sys/sys/acct.h
@@ -0,0 +1,75 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)acct.h 8.2 (Berkeley) 1/21/94
+ */
+
+/*
+ * Accounting structures; these use a comp_t type which is a 3 bits base 8
+ * exponent, 13 bit fraction ``floating point'' number. Units are 1/AHZ
+ * seconds.
+ */
+typedef u_short comp_t;
+
+struct acct {
+ char ac_comm[10]; /* command name */
+ comp_t ac_utime; /* user time */
+ comp_t ac_stime; /* system time */
+ comp_t ac_etime; /* elapsed time */
+ time_t ac_btime; /* starting time */
+ uid_t ac_uid; /* user id */
+ gid_t ac_gid; /* group id */
+ short ac_mem; /* average memory usage */
+ comp_t ac_io; /* count of IO blocks */
+ dev_t ac_tty; /* controlling tty */
+#define AFORK 0x01 /* forked but not execed */
+#define ASU 0x02 /* used super-user permissions */
+#define ACOMPAT 0x04 /* used compatibility mode */
+#define ACORE 0x08 /* dumped core */
+#define AXSIG 0x10 /* killed by a signal */
+ char ac_flag; /* accounting flags */
+};
+
+/*
+ * 1/AHZ is the granularity of the data encoded in the comp_t fields.
+ * This is not necessarily equal to hz.
+ */
+#define AHZ 64
+
+#ifdef KERNEL
+struct vnode *acctp;
+#endif
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
new file mode 100644
index 000000000000..e6c329f239da
--- /dev/null
+++ b/sys/sys/bio.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)buf.h 8.7 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_BUF_H_
+#define _SYS_BUF_H_
+#include <sys/queue.h>
+
+#define NOLIST ((struct buf *)0x87654321)
+
+/*
+ * The buffer header describes an I/O operation in the kernel.
+ */
+struct buf {
+ LIST_ENTRY(buf) b_hash; /* Hash chain. */
+ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
+ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
+ struct buf *b_actf, **b_actb; /* Device driver queue when active. */
+ struct proc *b_proc; /* Associated proc; NULL if kernel. */
+ volatile long b_flags; /* B_* flags. */
+ int b_error; /* Errno value. */
+ long b_bufsize; /* Allocated buffer size. */
+ long b_bcount; /* Valid bytes in buffer. */
+ long b_resid; /* Remaining I/O. */
+ dev_t b_dev; /* Device associated with buffer. */
+ struct {
+ caddr_t b_addr; /* Memory, superblocks, indirect etc. */
+ } b_un;
+ void *b_saveaddr; /* Original b_addr for physio. */
+ daddr_t b_lblkno; /* Logical block number. */
+ daddr_t b_blkno; /* Underlying physical block number. */
+ /* Function to call upon completion. */
+ void (*b_iodone) __P((struct buf *));
+ struct vnode *b_vp; /* Device vnode. */
+ int b_pfcent; /* Center page when swapping cluster. */
+ int b_dirtyoff; /* Offset in buffer of dirty region. */
+ int b_dirtyend; /* Offset of end of dirty region. */
+ struct ucred *b_rcred; /* Read credentials reference. */
+ struct ucred *b_wcred; /* Write credentials reference. */
+ int b_validoff; /* Offset in buffer of valid region. */
+ int b_validend; /* Offset of end of valid region. */
+};
+
+/* Device driver compatibility definitions. */
+#define b_active b_bcount /* Driver queue head: drive active. */
+#define b_data b_un.b_addr /* b_un.b_addr is not changeable. */
+#define b_errcnt b_resid /* Retry count while I/O in progress. */
+#define iodone biodone /* Old name for biodone. */
+#define iowait biowait /* Old name for biowait. */
+
+/*
+ * These flags are kept in b_flags.
+ */
+#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
+#define B_APPENDWRITE 0x00000002 /* Append-write in progress. */
+#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
+#define B_BAD 0x00000008 /* Bad block revectoring in progress. */
+#define B_BUSY 0x00000010 /* I/O in progress. */
+#define B_CACHE 0x00000020 /* Bread found us in the cache. */
+#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
+#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
+#define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */
+#define B_DONE 0x00000200 /* I/O completed. */
+#define B_EINTR 0x00000400 /* I/O was interrupted */
+#define B_ERROR 0x00000800 /* I/O error occurred. */
+#define B_GATHERED 0x00001000 /* LFS: already in a segment. */
+#define B_INVAL 0x00002000 /* Does not contain valid info. */
+#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
+#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
+#define B_PAGET 0x00010000 /* Page in/out of page table space. */
+#define B_PGIN 0x00020000 /* Pagein op, so swap() can count it. */
+#define B_PHYS 0x00040000 /* I/O to user memory. */
+#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
+#define B_READ 0x00100000 /* Read buffer. */
+#define B_TAPE 0x00200000 /* Magnetic tape I/O. */
+#define B_UAREA 0x00400000 /* Buffer describes Uarea I/O. */
+#define B_WANTED 0x00800000 /* Process wants this buffer. */
+#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
+#define B_WRITEINPROG 0x01000000 /* Write in progress. */
+#define B_XXX 0x02000000 /* Debugging flag. */
+
+/*
+ * This structure describes a clustered I/O. It is stored in the b_saveaddr
+ * field of the buffer on which I/O is done. At I/O completion, cluster
+ * callback uses the structure to parcel I/O's to individual buffers, and
+ * then free's this structure.
+ */
+struct cluster_save {
+ long bs_bcount; /* Saved b_bcount. */
+ long bs_bufsize; /* Saved b_bufsize. */
+ void *bs_saveaddr; /* Saved b_addr. */
+ int bs_nchildren; /* Number of associated buffers. */
+ struct buf **bs_children; /* List of associated buffers. */
+};
+
+/*
+ * Zero out the buffer's data area.
+ */
+#define clrbuf(bp) { \
+ blkclr((bp)->b_data, (u_int)(bp)->b_bcount); \
+ (bp)->b_resid = 0; \
+}
+
+/* Flags to low-level allocation routines. */
+#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
+#define B_SYNC 0x02 /* Do all allocations synchronously. */
+
+#ifdef KERNEL
+int nbuf; /* The number of buffer headers */
+struct buf *buf; /* The buffer headers. */
+char *buffers; /* The buffer contents. */
+int bufpages; /* Number of memory pages in the buffer pool. */
+struct buf *swbuf; /* Swap I/O buffer headers. */
+int nswbuf; /* Number of swap I/O buffer headers. */
+struct buf bswlist; /* Head of swap I/O buffer headers free list. */
+struct buf *bclnlist; /* Head of cleaned page list. */
+
+__BEGIN_DECLS
+int allocbuf __P((struct buf *, int));
+int bawrite __P((struct buf *));
+int bdwrite __P((struct buf *));
+void biodone __P((struct buf *));
+int biowait __P((struct buf *));
+int bread __P((struct vnode *, daddr_t, int,
+ struct ucred *, struct buf **));
+int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
+ struct ucred *, struct buf **));
+int brelse __P((struct buf *));
+void bufinit __P((void));
+int bwrite __P((struct buf *));
+void cluster_callback __P((struct buf *));
+int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
+ struct ucred *, struct buf **));
+void cluster_write __P((struct buf *, u_quad_t));
+struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
+struct buf *geteblk __P((int));
+struct buf *getnewbuf __P((int slpflag, int slptimeo));
+struct buf *incore __P((struct vnode *, daddr_t));
+u_int minphys __P((struct buf *bp));
+__END_DECLS
+#endif
+#endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
new file mode 100644
index 000000000000..e6c329f239da
--- /dev/null
+++ b/sys/sys/buf.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)buf.h 8.7 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_BUF_H_
+#define _SYS_BUF_H_
+#include <sys/queue.h>
+
+#define NOLIST ((struct buf *)0x87654321)
+
+/*
+ * The buffer header describes an I/O operation in the kernel.
+ */
+struct buf {
+ LIST_ENTRY(buf) b_hash; /* Hash chain. */
+ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
+ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
+ struct buf *b_actf, **b_actb; /* Device driver queue when active. */
+ struct proc *b_proc; /* Associated proc; NULL if kernel. */
+ volatile long b_flags; /* B_* flags. */
+ int b_error; /* Errno value. */
+ long b_bufsize; /* Allocated buffer size. */
+ long b_bcount; /* Valid bytes in buffer. */
+ long b_resid; /* Remaining I/O. */
+ dev_t b_dev; /* Device associated with buffer. */
+ struct {
+ caddr_t b_addr; /* Memory, superblocks, indirect etc. */
+ } b_un;
+ void *b_saveaddr; /* Original b_addr for physio. */
+ daddr_t b_lblkno; /* Logical block number. */
+ daddr_t b_blkno; /* Underlying physical block number. */
+ /* Function to call upon completion. */
+ void (*b_iodone) __P((struct buf *));
+ struct vnode *b_vp; /* Device vnode. */
+ int b_pfcent; /* Center page when swapping cluster. */
+ int b_dirtyoff; /* Offset in buffer of dirty region. */
+ int b_dirtyend; /* Offset of end of dirty region. */
+ struct ucred *b_rcred; /* Read credentials reference. */
+ struct ucred *b_wcred; /* Write credentials reference. */
+ int b_validoff; /* Offset in buffer of valid region. */
+ int b_validend; /* Offset of end of valid region. */
+};
+
+/* Device driver compatibility definitions. */
+#define b_active b_bcount /* Driver queue head: drive active. */
+#define b_data b_un.b_addr /* b_un.b_addr is not changeable. */
+#define b_errcnt b_resid /* Retry count while I/O in progress. */
+#define iodone biodone /* Old name for biodone. */
+#define iowait biowait /* Old name for biowait. */
+
+/*
+ * These flags are kept in b_flags.
+ */
+#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
+#define B_APPENDWRITE 0x00000002 /* Append-write in progress. */
+#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
+#define B_BAD 0x00000008 /* Bad block revectoring in progress. */
+#define B_BUSY 0x00000010 /* I/O in progress. */
+#define B_CACHE 0x00000020 /* Bread found us in the cache. */
+#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
+#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
+#define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */
+#define B_DONE 0x00000200 /* I/O completed. */
+#define B_EINTR 0x00000400 /* I/O was interrupted */
+#define B_ERROR 0x00000800 /* I/O error occurred. */
+#define B_GATHERED 0x00001000 /* LFS: already in a segment. */
+#define B_INVAL 0x00002000 /* Does not contain valid info. */
+#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
+#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
+#define B_PAGET 0x00010000 /* Page in/out of page table space. */
+#define B_PGIN 0x00020000 /* Pagein op, so swap() can count it. */
+#define B_PHYS 0x00040000 /* I/O to user memory. */
+#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
+#define B_READ 0x00100000 /* Read buffer. */
+#define B_TAPE 0x00200000 /* Magnetic tape I/O. */
+#define B_UAREA 0x00400000 /* Buffer describes Uarea I/O. */
+#define B_WANTED 0x00800000 /* Process wants this buffer. */
+#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
+#define B_WRITEINPROG 0x01000000 /* Write in progress. */
+#define B_XXX 0x02000000 /* Debugging flag. */
+
+/*
+ * This structure describes a clustered I/O. It is stored in the b_saveaddr
+ * field of the buffer on which I/O is done. At I/O completion, cluster
+ * callback uses the structure to parcel I/O's to individual buffers, and
+ * then free's this structure.
+ */
+struct cluster_save {
+ long bs_bcount; /* Saved b_bcount. */
+ long bs_bufsize; /* Saved b_bufsize. */
+ void *bs_saveaddr; /* Saved b_addr. */
+ int bs_nchildren; /* Number of associated buffers. */
+ struct buf **bs_children; /* List of associated buffers. */
+};
+
+/*
+ * Zero out the buffer's data area.
+ */
+#define clrbuf(bp) { \
+ blkclr((bp)->b_data, (u_int)(bp)->b_bcount); \
+ (bp)->b_resid = 0; \
+}
+
+/* Flags to low-level allocation routines. */
+#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
+#define B_SYNC 0x02 /* Do all allocations synchronously. */
+
+#ifdef KERNEL
+int nbuf; /* The number of buffer headers */
+struct buf *buf; /* The buffer headers. */
+char *buffers; /* The buffer contents. */
+int bufpages; /* Number of memory pages in the buffer pool. */
+struct buf *swbuf; /* Swap I/O buffer headers. */
+int nswbuf; /* Number of swap I/O buffer headers. */
+struct buf bswlist; /* Head of swap I/O buffer headers free list. */
+struct buf *bclnlist; /* Head of cleaned page list. */
+
+__BEGIN_DECLS
+int allocbuf __P((struct buf *, int));
+int bawrite __P((struct buf *));
+int bdwrite __P((struct buf *));
+void biodone __P((struct buf *));
+int biowait __P((struct buf *));
+int bread __P((struct vnode *, daddr_t, int,
+ struct ucred *, struct buf **));
+int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
+ struct ucred *, struct buf **));
+int brelse __P((struct buf *));
+void bufinit __P((void));
+int bwrite __P((struct buf *));
+void cluster_callback __P((struct buf *));
+int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
+ struct ucred *, struct buf **));
+void cluster_write __P((struct buf *, u_quad_t));
+struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
+struct buf *geteblk __P((int));
+struct buf *getnewbuf __P((int slpflag, int slptimeo));
+struct buf *incore __P((struct vnode *, daddr_t));
+u_int minphys __P((struct buf *bp));
+__END_DECLS
+#endif
+#endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/callout.h b/sys/sys/callout.h
new file mode 100644
index 000000000000..d685e56d3f08
--- /dev/null
+++ b/sys/sys/callout.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)callout.h 8.2 (Berkeley) 1/21/94
+ */
+
+struct callout {
+ struct callout *c_next; /* next callout in queue */
+ void *c_arg; /* function argument */
+ void (*c_func) __P((void *)); /* function to call */
+ int c_time; /* ticks to the event */
+};
+
+#ifdef KERNEL
+struct callout *callfree, *callout, calltodo;
+int ncallout;
+#endif
diff --git a/sys/sys/cdefs.h b/sys/sys/cdefs.h
new file mode 100644
index 000000000000..c104b9e964dd
--- /dev/null
+++ b/sys/sys/cdefs.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cdefs.h 8.7 (Berkeley) 1/21/94
+ */
+
+#ifndef _CDEFS_H_
+#define _CDEFS_H_
+
+#if defined(__cplusplus)
+#define __BEGIN_DECLS extern "C" {
+#define __END_DECLS };
+#else
+#define __BEGIN_DECLS
+#define __END_DECLS
+#endif
+
+/*
+ * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
+ * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
+ * The __CONCAT macro is a bit tricky -- make sure you don't put spaces
+ * in between its arguments. __CONCAT can also concatenate double-quoted
+ * strings produced by the __STRING macro, but this only works with ANSI C.
+ */
+#if defined(__STDC__) || defined(__cplusplus)
+#define __P(protos) protos /* full-blown ANSI C */
+#define __CONCAT(x,y) x ## y
+#define __STRING(x) #x
+
+#define __const const /* define reserved names to standard */
+#define __signed signed
+#define __volatile volatile
+#if defined(__cplusplus)
+#define __inline inline /* convert to C++ keyword */
+#else
+#ifndef __GNUC__
+#define __inline /* delete GCC keyword */
+#endif /* !__GNUC__ */
+#endif /* !__cplusplus */
+
+#else /* !(__STDC__ || __cplusplus) */
+#define __P(protos) () /* traditional C preprocessor */
+#define __CONCAT(x,y) x/**/y
+#define __STRING(x) "x"
+
+#ifndef __GNUC__
+#define __const /* delete pseudo-ANSI C keywords */
+#define __inline
+#define __signed
+#define __volatile
+/*
+ * In non-ANSI C environments, new programs will want ANSI-only C keywords
+ * deleted from the program and old programs will want them left alone.
+ * When using a compiler other than gcc, programs using the ANSI C keywords
+ * const, inline etc. as normal identifiers should define -DNO_ANSI_KEYWORDS.
+ * When using "gcc -traditional", we assume that this is the intent; if
+ * __GNUC__ is defined but __STDC__ is not, we leave the new keywords alone.
+ */
+#ifndef NO_ANSI_KEYWORDS
+#define const /* delete ANSI C keywords */
+#define inline
+#define signed
+#define volatile
+#endif
+#endif /* !__GNUC__ */
+#endif /* !(__STDC__ || __cplusplus) */
+
+/*
+ * GCC1 and some versions of GCC2 declare dead (non-returning) and
+ * pure (no side effects) functions using "volatile" and "const";
+ * unfortunately, these then cause warnings under "-ansi -pedantic".
+ * GCC2 uses a new, peculiar __attribute__((attrs)) style. All of
+ * these work for GNU C++ (modulo a slight glitch in the C++ grammar
+ * in the distribution version of 2.5.5).
+ */
+#if !defined(__GNUC__) || __GNUC__ < 2 || __GNUC_MINOR__ < 5
+#define __attribute__(x) /* delete __attribute__ if non-gcc or gcc1 */
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#define __dead __volatile
+#define __pure __const
+#endif
+#endif
+
+/* Delete pseudo-keywords wherever they are not available or needed. */
+#ifndef __dead
+#define __dead
+#define __pure
+#endif
+
+#endif /* !_CDEFS_H_ */
diff --git a/sys/sys/clist.h b/sys/sys/clist.h
new file mode 100644
index 000000000000..bad26477015d
--- /dev/null
+++ b/sys/sys/clist.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)clist.h 8.1 (Berkeley) 6/4/93
+ */
+
+struct cblock {
+ struct cblock *c_next; /* next cblock in queue */
+ char c_quote[CBQSIZE]; /* quoted characters */
+ char c_info[CBSIZE]; /* characters */
+};
+
+#ifdef KERNEL
+extern struct cblock *cfree, *cfreelist;
+extern int cfreecount, nclist;
+#endif
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
new file mode 100644
index 000000000000..58cb6fa8339c
--- /dev/null
+++ b/sys/sys/conf.h
@@ -0,0 +1,123 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)conf.h 8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * Definitions of device driver entry switches
+ */
+
+struct buf;
+struct proc;
+struct tty;
+struct uio;
+struct vnode;
+
+struct bdevsw {
+ int (*d_open) __P((dev_t dev, int oflags, int devtype,
+ struct proc *p));
+ int (*d_close) __P((dev_t dev, int fflag, int devtype,
+ struct proc *p));
+ int (*d_strategy) __P((struct buf *bp));
+ int (*d_ioctl) __P((dev_t dev, int cmd, caddr_t data,
+ int fflag, struct proc *p));
+ int (*d_dump) (); /* parameters vary by architecture */
+ int (*d_psize) __P((dev_t dev));
+ int d_flags;
+};
+
+#ifdef KERNEL
+extern struct bdevsw bdevsw[];
+#endif
+
+struct cdevsw {
+ int (*d_open) __P((dev_t dev, int oflags, int devtype,
+ struct proc *p));
+ int (*d_close) __P((dev_t dev, int fflag, int devtype,
+ struct proc *));
+ int (*d_read) __P((dev_t dev, struct uio *uio, int ioflag));
+ int (*d_write) __P((dev_t dev, struct uio *uio, int ioflag));
+ int (*d_ioctl) __P((dev_t dev, int cmd, caddr_t data,
+ int fflag, struct proc *p));
+ int (*d_stop) __P((struct tty *tp, int rw));
+ int (*d_reset) __P((int uban)); /* XXX */
+ struct tty *d_ttys;
+ int (*d_select) __P((dev_t dev, int which, struct proc *p));
+ int (*d_mmap) __P(());
+ int (*d_strategy) __P((struct buf *bp));
+};
+
+#ifdef KERNEL
+extern struct cdevsw cdevsw[];
+
+/* symbolic sleep message strings */
+extern char devopn[], devio[], devwait[], devin[], devout[];
+extern char devioc[], devcls[];
+#endif
+
+struct linesw {
+ int (*l_open) __P((dev_t dev, struct tty *tp));
+ int (*l_close) __P((struct tty *tp, int flag));
+ int (*l_read) __P((struct tty *tp, struct uio *uio,
+ int flag));
+ int (*l_write) __P((struct tty *tp, struct uio *uio,
+ int flag));
+ int (*l_ioctl) __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+ int (*l_rint) __P((int c, struct tty *tp));
+ int (*l_start) __P((struct tty *tp));
+ int (*l_modem) __P((struct tty *tp, int flag));
+};
+
+#ifdef KERNEL
+extern struct linesw linesw[];
+#endif
+
+struct swdevt {
+ dev_t sw_dev;
+ int sw_flags;
+ int sw_nblks;
+ struct vnode *sw_vp;
+};
+#define SW_FREED 0x01
+#define SW_SEQUENTIAL 0x02
+#define sw_freed sw_flags /* XXX compat */
+
+#ifdef KERNEL
+extern struct swdevt swdevt[];
+#endif
diff --git a/sys/sys/device.h b/sys/sys/device.h
new file mode 100644
index 000000000000..0a233ed3e5c1
--- /dev/null
+++ b/sys/sys/device.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)device.h 8.2 (Berkeley) 2/17/94
+ */
+
+#ifndef _SYS_DEVICE_H_
+#define _SYS_DEVICE_H_
+
+/*
+ * Minimal device structures.
+ * Note that all ``system'' device types are listed here.
+ */
+enum devclass {
+ DV_DULL, /* generic, no special info */
+ DV_CPU, /* CPU (carries resource utilization) */
+ DV_DISK, /* disk drive (label, etc) */
+ DV_IFNET, /* network interface */
+ DV_TAPE, /* tape device */
+ DV_TTY /* serial line interface (???) */
+};
+
+struct device {
+ enum devclass dv_class; /* this device's classification */
+ struct device *dv_next; /* next in list of all */
+ struct cfdata *dv_cfdata; /* config data that found us */
+ int dv_unit; /* device unit number */
+ char dv_xname[16]; /* external name (name + unit) */
+ struct device *dv_parent; /* pointer to parent device */
+};
+
+/* `event' counters (use zero or more per device instance, as needed) */
+struct evcnt {
+ struct evcnt *ev_next; /* linked list */
+ struct device *ev_dev; /* associated device */
+ int ev_count; /* how many have occurred */
+ char ev_name[8]; /* what to call them (systat display) */
+};
+
+/*
+ * Configuration data (i.e., data placed in ioconf.c).
+ */
+struct cfdata {
+ struct cfdriver *cf_driver; /* config driver */
+ short cf_unit; /* unit number */
+ short cf_fstate; /* finding state (below) */
+ int *cf_loc; /* locators (machine dependent) */
+ int cf_flags; /* flags from config */
+ short *cf_parents; /* potential parents */
+ void (**cf_ivstubs)(); /* config-generated vectors, if any */
+};
+#define FSTATE_NOTFOUND 0 /* has not been found */
+#define FSTATE_FOUND 1 /* has been found */
+#define FSTATE_STAR 2 /* duplicable */
+
+typedef int (*cfmatch_t) __P((struct device *, struct cfdata *, void *));
+
+/*
+ * `configuration' driver (what the machine-independent autoconf uses).
+ * As devices are found, they are applied against all the potential matches.
+ * The one with the best match is taken, and a device structure (plus any
+ * other data desired) is allocated. Pointers to these are placed into
+ * an array of pointers. The array itself must be dynamic since devices
+ * can be found long after the machine is up and running.
+ */
+struct cfdriver {
+ void **cd_devs; /* devices found */
+ char *cd_name; /* device name */
+ cfmatch_t cd_match; /* returns a match level */
+ void (*cd_attach) __P((struct device *, struct device *, void *));
+ enum devclass cd_class; /* device classification */
+ size_t cd_devsize; /* size of dev data (for malloc) */
+ void *cd_aux; /* additional driver, if any */
+ int cd_ndevs; /* size of cd_devs array */
+};
+
+/*
+ * Configuration printing functions, and their return codes. The second
+ * argument is NULL if the device was configured; otherwise it is the name
+ * of the parent device. The return value is ignored if the device was
+ * configured, so most functions can return UNCONF unconditionally.
+ */
+typedef int (*cfprint_t) __P((void *, char *));
+#define QUIET 0 /* print nothing */
+#define UNCONF 1 /* print " not configured\n" */
+#define UNSUPP 2 /* print " not supported\n" */
+
+/*
+ * Pseudo-device attach information (function + number of pseudo-devs).
+ */
+struct pdevinit {
+ void (*pdev_attach) __P((int));
+ int pdev_count;
+};
+
+struct device *alldevs; /* head of list of all devices */
+struct evcnt *allevents; /* head of list of all events */
+
+struct cfdata *config_search __P((cfmatch_t, struct device *, void *));
+struct cfdata *config_rootsearch __P((cfmatch_t, char *, void *));
+int config_found __P((struct device *, void *, cfprint_t));
+int config_rootfound __P((char *, void *));
+void config_attach __P((struct device *, struct cfdata *, void *, cfprint_t));
+void evcnt_attach __P((struct device *, const char *, struct evcnt *));
+#endif /* !_SYS_DEVICE_H_ */
diff --git a/sys/sys/dir.h b/sys/sys/dir.h
new file mode 100644
index 000000000000..0c4cd679cee7
--- /dev/null
+++ b/sys/sys/dir.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dir.h 8.2 (Berkeley) 1/4/94
+ */
+
+/*
+ * The information in this file should be obtained from <dirent.h>
+ * and is provided solely (and temporarily) for backward compatibility.
+ */
+
+#ifndef _SYS_DIR_H_
+#define _SYS_DIR_H_
+
+#include <dirent.h>
+
+/*
+ * Backwards compatibility.
+ */
+#define direct dirent
+
+/*
+ * The DIRSIZ macro gives the minimum record length which will hold
+ * the directory entry. This requires the amount of space in struct direct
+ * without the d_name field, plus enough space for the name with a terminating
+ * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ */
+#undef DIRSIZ
+#define DIRSIZ(dp) \
+ ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+
+#endif /* !_SYS_DIR_H_ */
diff --git a/sys/sys/dirent.h b/sys/sys/dirent.h
new file mode 100644
index 000000000000..1c4b96aa29cc
--- /dev/null
+++ b/sys/sys/dirent.h
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dirent.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * The dirent structure defines the format of directory entries returned by
+ * the getdirentries(2) system call.
+ *
+ * A directory entry has a struct dirent at the front of it, containing its
+ * inode number, the length of the entry, and the length of the name
+ * contained in the entry. These are followed by the name padded to a 4
+ * byte boundary with null bytes. All names are guaranteed null terminated.
+ * The maximum length of a name in a directory is MAXNAMLEN.
+ */
+
+struct dirent {
+ unsigned long d_fileno; /* file number of entry */
+ unsigned short d_reclen; /* length of this record */
+ unsigned char d_type; /* file type, see below */
+ unsigned char d_namlen; /* length of string in d_name */
+#ifdef _POSIX_SOURCE
+ char d_name[255 + 1]; /* name must be no longer than this */
+#else
+#define MAXNAMLEN 255
+ char d_name[MAXNAMLEN + 1]; /* name must be no longer than this */
+#endif
+};
+
+/*
+ * File types
+ */
+#define DT_UNKNOWN 0
+#define DT_FIFO 1
+#define DT_CHR 2
+#define DT_DIR 4
+#define DT_BLK 6
+#define DT_REG 8
+#define DT_LNK 10
+#define DT_SOCK 12
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define IFTODT(mode) (((mode) & 0170000) >> 12)
+#define DTTOIF(dirtype) ((dirtype) << 12)
diff --git a/sys/sys/disk.h b/sys/sys/disk.h
new file mode 100644
index 000000000000..352ecf002740
--- /dev/null
+++ b/sys/sys/disk.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)disk.h 8.1 (Berkeley) 6/2/93
+ *
+ * from: $Header: disk.h,v 1.5 92/11/19 04:33:03 torek Exp $ (LBL)
+ */
+
+/*
+ * Disk device structures.
+ *
+ * Note that this is only a preliminary outline. The final disk structures
+ * may be somewhat different.
+ */
+struct buf;
+
+struct dkdevice {
+ struct device dk_dev; /* base device */
+ struct dkdevice *dk_next; /* list of disks; not yet used */
+ int dk_bps; /* xfer rate: bytes per second */
+ int dk_bopenmask; /* block devices open */
+ int dk_copenmask; /* character devices open */
+ int dk_openmask; /* composite (bopen|copen) */
+ int dk_state; /* label state ### */
+ int dk_blkshift; /* shift to convert DEV_BSIZE to blks */
+ int dk_byteshift; /* shift to convert bytes to blks */
+ struct dkdriver *dk_driver; /* pointer to driver */
+ daddr_t dk_labelsector; /* sector containing label */
+ struct disklabel dk_label; /* label */
+};
+
+struct dkdriver {
+ void (*d_strategy) __P((struct buf *));
+#ifdef notyet
+ int (*d_open) __P((dev_t dev, int ifmt, int, struct proc *));
+ int (*d_close) __P((dev_t dev, int, int ifmt, struct proc *));
+ int (*d_ioctl) __P((dev_t dev, int cmd, caddr_t data, int fflag,
+ struct proc *));
+ int (*d_dump) __P((dev_t));
+ void (*d_start) __P((struct buf *, daddr_t));
+ int (*d_mklabel) __P((struct dkdevice *));
+#endif
+};
+
+/* states */
+#define DK_CLOSED 0 /* drive is closed */
+#define DK_WANTOPEN 1 /* drive being opened */
+#define DK_WANTOPENRAW 2 /* drive being opened */
+#define DK_RDLABEL 3 /* label being read */
+#define DK_OPEN 4 /* label read, drive open */
+#define DK_OPENRAW 5 /* open without label */
+
+#ifdef DISKSORT_STATS
+/*
+ * Stats from disksort().
+ */
+struct disksort_stats {
+ long ds_newhead; /* # new queue heads created */
+ long ds_newtail; /* # new queue tails created */
+ long ds_midfirst; /* # insertions into sort list */
+ long ds_endfirst; /* # insertions at end of sort list */
+ long ds_newsecond; /* # inversions (2nd lists) created */
+ long ds_midsecond; /* # insertions into 2nd list */
+ long ds_endsecond; /* # insertions at end of 2nd list */
+};
+#endif
+
+#ifdef KERNEL
+void disksort __P((struct buf *, struct buf *));
+char *readdisklabel __P((struct dkdevice *, int));
+int setdisklabel __P((struct dkdevice *, struct disklabel *));
+int writedisklabel __P((struct dkdevice *, int));
+int diskerr __P((struct dkdevice *, struct buf *, char *, int, int));
+#endif
diff --git a/sys/sys/disklabel.h b/sys/sys/disklabel.h
new file mode 100644
index 000000000000..a25ee29e363d
--- /dev/null
+++ b/sys/sys/disklabel.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)disklabel.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define _PATH_DISKTAB "/etc/disktab"
+#define DISKTAB "/etc/disktab" /* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR 1 /* sector containing label */
+#define LABELOFFSET 0 /* offset of label in sector */
+#endif
+
+#ifndef LABELSECTOR
+#define LABELSECTOR 0 /* sector containing label */
+#endif
+
+#ifndef LABELOFFSET
+#define LABELOFFSET 64 /* offset of label in sector */
+#endif
+
+#define DISKMAGIC ((u_long) 0x82564557) /* The disk magic number */
+#ifndef MAXPARTITIONS
+#define MAXPARTITIONS 8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+ u_long d_magic; /* the magic number */
+ short d_type; /* drive type */
+ short d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ /*
+ * d_packname contains the pack identifier and is returned when
+ * the disklabel is read off the disk or in-core copy.
+ * d_boot0 and d_boot1 are the (optional) names of the
+ * primary (block 0) and secondary (block 1-15) bootstraps
+ * as found in /usr/mdec. These are returned when using
+ * getdiskbyname(3) to retrieve the values from /etc/disktab.
+ */
+#if defined(KERNEL) || defined(STANDALONE)
+ char d_packname[16]; /* pack identifier */
+#else
+ union {
+ char un_d_packname[16]; /* pack identifier */
+ struct {
+ char *un_d_boot0; /* primary bootstrap name */
+ char *un_d_boot1; /* secondary bootstrap name */
+ } un_b;
+ } d_un;
+#define d_packname d_un.un_d_packname
+#define d_boot0 d_un.un_b.un_d_boot0
+#define d_boot1 d_un.un_b.un_d_boot1
+#endif /* ! KERNEL or STANDALONE */
+ /* disk geometry: */
+ u_long d_secsize; /* # of bytes per sector */
+ u_long d_nsectors; /* # of data sectors per track */
+ u_long d_ntracks; /* # of tracks per cylinder */
+ u_long d_ncylinders; /* # of data cylinders per unit */
+ u_long d_secpercyl; /* # of data sectors per cylinder */
+ u_long d_secperunit; /* # of data sectors per unit */
+ /*
+ * Spares (bad sector replacements) below
+ * are not counted in d_nsectors or d_secpercyl.
+ * Spare sectors are assumed to be physical sectors
+ * which occupy space at the end of each track and/or cylinder.
+ */
+ u_short d_sparespertrack; /* # of spare sectors per track */
+ u_short d_sparespercyl; /* # of spare sectors per cylinder */
+ /*
+ * Alternate cylinders include maintenance, replacement,
+ * configuration description areas, etc.
+ */
+ u_long d_acylinders; /* # of alt. cylinders per unit */
+
+ /* hardware characteristics: */
+ /*
+ * d_interleave, d_trackskew and d_cylskew describe perturbations
+ * in the media format used to compensate for a slow controller.
+ * Interleave is physical sector interleave, set up by the formatter
+ * or controller when formatting. When interleaving is in use,
+ * logically adjacent sectors are not physically contiguous,
+ * but instead are separated by some number of sectors.
+ * It is specified as the ratio of physical sectors traversed
+ * per logical sector. Thus an interleave of 1:1 implies contiguous
+ * layout, while 2:1 implies that logical sector 0 is separated
+ * by one sector from logical sector 1.
+ * d_trackskew is the offset of sector 0 on track N
+ * relative to sector 0 on track N-1 on the same cylinder.
+ * Finally, d_cylskew is the offset of sector 0 on cylinder N
+ * relative to sector 0 on cylinder N-1.
+ */
+ u_short d_rpm; /* rotational speed */
+ u_short d_interleave; /* hardware sector interleave */
+ u_short d_trackskew; /* sector 0 skew, per track */
+ u_short d_cylskew; /* sector 0 skew, per cylinder */
+ u_long d_headswitch; /* head switch time, usec */
+ u_long d_trkseek; /* track-to-track seek, usec */
+ u_long d_flags; /* generic flags */
+#define NDDATA 5
+ u_long d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ u_long d_spare[NSPARE]; /* reserved for future use */
+ u_long d_magic2; /* the magic number (again) */
+ u_short d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ u_short d_npartitions; /* number of partitions in following */
+ u_long d_bbsize; /* size of boot area at sn0, bytes */
+ u_long d_sbsize; /* max size of fs superblock, bytes */
+ struct partition { /* the partition table */
+ u_long p_size; /* number of sectors in partition */
+ u_long p_offset; /* starting sector */
+ u_long p_fsize; /* filesystem basic fragment size */
+ u_char p_fstype; /* filesystem type, see below */
+ u_char p_frag; /* filesystem fragments per block */
+ union {
+ u_short cpg; /* UFS: FS cylinders per group */
+ u_short sgs; /* LFS: FS segment shift */
+ } __partition_u1;
+#define p_cpg __partition_u1.cpg
+#define p_sgs __partition_u1.sgs
+ } d_partitions[MAXPARTITIONS]; /* actually may be more */
+};
+#else /* LOCORE */
+ /*
+ * offsets for asm boot files.
+ */
+ .set d_secsize,40
+ .set d_nsectors,44
+ .set d_ntracks,48
+ .set d_ncylinders,52
+ .set d_secpercyl,56
+ .set d_secperunit,60
+ .set d_end_,276 /* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define DTYPE_SMD 1 /* SMD, XSMD; VAX hp/up */
+#define DTYPE_MSCP 2 /* MSCP */
+#define DTYPE_DEC 3 /* other DEC (rk, rl) */
+#define DTYPE_SCSI 4 /* SCSI */
+#define DTYPE_ESDI 5 /* ESDI interface */
+#define DTYPE_ST506 6 /* ST506 etc. */
+#define DTYPE_HPIB 7 /* CS/80 on HP-IB */
+#define DTYPE_HPFL 8 /* HP Fiber-link */
+#define DTYPE_FLOPPY 10 /* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+ "unknown",
+ "SMD",
+ "MSCP",
+ "old DEC",
+ "SCSI",
+ "ESDI",
+ "ST506",
+ "HP-IB",
+ "HP-FL",
+ "type 9",
+ "floppy",
+ 0
+};
+#define DKMAXTYPES (sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define FS_UNUSED 0 /* unused */
+#define FS_SWAP 1 /* swap */
+#define FS_V6 2 /* Sixth Edition */
+#define FS_V7 3 /* Seventh Edition */
+#define FS_SYSV 4 /* System V */
+#define FS_V71K 5 /* V7 with 1K blocks (4.1, 2.9) */
+#define FS_V8 6 /* Eighth Edition, 4K blocks */
+#define FS_BSDFFS 7 /* 4.2BSD fast file system */
+#define FS_MSDOS 8 /* MSDOS file system */
+#define FS_BSDLFS 9 /* 4.4BSD log-structured file system */
+#define FS_OTHER 10 /* in use, but unknown/unsupported */
+#define FS_HPFS 11 /* OS/2 high-performance file system */
+#define FS_ISO9660 12 /* ISO 9660, normally CD-ROM */
+#define FS_BOOT 13 /* partition contains bootstrap */
+
+#ifdef DKTYPENAMES
+static char *fstypenames[] = {
+ "unused",
+ "swap",
+ "Version 6",
+ "Version 7",
+ "System V",
+ "4.1BSD",
+ "Eighth Edition",
+ "4.2BSD",
+ "MSDOS",
+ "4.4LFS",
+ "unknown",
+ "HPFS",
+ "ISO9660",
+ "boot",
+ 0
+};
+#define FSMAXTYPES (sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define D_REMOVABLE 0x01 /* removable media */
+#define D_ECC 0x02 /* supports ECC */
+#define D_BADSECT 0x04 /* supports bad sector forw. */
+#define D_RAMDISK 0x08 /* disk emulator */
+#define D_CHAIN 0x10 /* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define d_smdflags d_drivedata[0]
+#define D_SSE 0x1 /* supports skip sectoring */
+#define d_mindist d_drivedata[1]
+#define d_maxdist d_drivedata[2]
+#define d_sdist d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl d_drivedata[0]
+#define d_gap3 d_drivedata[1] /* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define d_blind d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+ char *df_buf;
+ int df_count; /* value-result */
+ daddr_t df_startblk;
+ int df_reg[8]; /* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+ struct disklabel *disklab;
+ struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+ /* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO _IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO _IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO _IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART _IOW('d', 104, struct partinfo) /* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT _IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT _IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP _IOW('d', 107, int) /* set step rate */
+#define DIOCSRETRIES _IOW('d', 108, int) /* set # of retries */
+#define DIOCWLABEL _IOW('d', 109, int) /* write en/disable label */
+
+#define DIOCSBAD _IOW('d', 110, struct dkbad) /* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/diskmbr.h b/sys/sys/diskmbr.h
new file mode 100644
index 000000000000..a25ee29e363d
--- /dev/null
+++ b/sys/sys/diskmbr.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)disklabel.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define _PATH_DISKTAB "/etc/disktab"
+#define DISKTAB "/etc/disktab" /* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR 1 /* sector containing label */
+#define LABELOFFSET 0 /* offset of label in sector */
+#endif
+
+#ifndef LABELSECTOR
+#define LABELSECTOR 0 /* sector containing label */
+#endif
+
+#ifndef LABELOFFSET
+#define LABELOFFSET 64 /* offset of label in sector */
+#endif
+
+#define DISKMAGIC ((u_long) 0x82564557) /* The disk magic number */
+#ifndef MAXPARTITIONS
+#define MAXPARTITIONS 8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+ u_long d_magic; /* the magic number */
+ short d_type; /* drive type */
+ short d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ /*
+ * d_packname contains the pack identifier and is returned when
+ * the disklabel is read off the disk or in-core copy.
+ * d_boot0 and d_boot1 are the (optional) names of the
+ * primary (block 0) and secondary (block 1-15) bootstraps
+ * as found in /usr/mdec. These are returned when using
+ * getdiskbyname(3) to retrieve the values from /etc/disktab.
+ */
+#if defined(KERNEL) || defined(STANDALONE)
+ char d_packname[16]; /* pack identifier */
+#else
+ union {
+ char un_d_packname[16]; /* pack identifier */
+ struct {
+ char *un_d_boot0; /* primary bootstrap name */
+ char *un_d_boot1; /* secondary bootstrap name */
+ } un_b;
+ } d_un;
+#define d_packname d_un.un_d_packname
+#define d_boot0 d_un.un_b.un_d_boot0
+#define d_boot1 d_un.un_b.un_d_boot1
+#endif /* ! KERNEL or STANDALONE */
+ /* disk geometry: */
+ u_long d_secsize; /* # of bytes per sector */
+ u_long d_nsectors; /* # of data sectors per track */
+ u_long d_ntracks; /* # of tracks per cylinder */
+ u_long d_ncylinders; /* # of data cylinders per unit */
+ u_long d_secpercyl; /* # of data sectors per cylinder */
+ u_long d_secperunit; /* # of data sectors per unit */
+ /*
+ * Spares (bad sector replacements) below
+ * are not counted in d_nsectors or d_secpercyl.
+ * Spare sectors are assumed to be physical sectors
+ * which occupy space at the end of each track and/or cylinder.
+ */
+ u_short d_sparespertrack; /* # of spare sectors per track */
+ u_short d_sparespercyl; /* # of spare sectors per cylinder */
+ /*
+ * Alternate cylinders include maintenance, replacement,
+ * configuration description areas, etc.
+ */
+ u_long d_acylinders; /* # of alt. cylinders per unit */
+
+ /* hardware characteristics: */
+ /*
+ * d_interleave, d_trackskew and d_cylskew describe perturbations
+ * in the media format used to compensate for a slow controller.
+ * Interleave is physical sector interleave, set up by the formatter
+ * or controller when formatting. When interleaving is in use,
+ * logically adjacent sectors are not physically contiguous,
+ * but instead are separated by some number of sectors.
+ * It is specified as the ratio of physical sectors traversed
+ * per logical sector. Thus an interleave of 1:1 implies contiguous
+ * layout, while 2:1 implies that logical sector 0 is separated
+ * by one sector from logical sector 1.
+ * d_trackskew is the offset of sector 0 on track N
+ * relative to sector 0 on track N-1 on the same cylinder.
+ * Finally, d_cylskew is the offset of sector 0 on cylinder N
+ * relative to sector 0 on cylinder N-1.
+ */
+ u_short d_rpm; /* rotational speed */
+ u_short d_interleave; /* hardware sector interleave */
+ u_short d_trackskew; /* sector 0 skew, per track */
+ u_short d_cylskew; /* sector 0 skew, per cylinder */
+ u_long d_headswitch; /* head switch time, usec */
+ u_long d_trkseek; /* track-to-track seek, usec */
+ u_long d_flags; /* generic flags */
+#define NDDATA 5
+ u_long d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ u_long d_spare[NSPARE]; /* reserved for future use */
+ u_long d_magic2; /* the magic number (again) */
+ u_short d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ u_short d_npartitions; /* number of partitions in following */
+ u_long d_bbsize; /* size of boot area at sn0, bytes */
+ u_long d_sbsize; /* max size of fs superblock, bytes */
+ struct partition { /* the partition table */
+ u_long p_size; /* number of sectors in partition */
+ u_long p_offset; /* starting sector */
+ u_long p_fsize; /* filesystem basic fragment size */
+ u_char p_fstype; /* filesystem type, see below */
+ u_char p_frag; /* filesystem fragments per block */
+ union {
+ u_short cpg; /* UFS: FS cylinders per group */
+ u_short sgs; /* LFS: FS segment shift */
+ } __partition_u1;
+#define p_cpg __partition_u1.cpg
+#define p_sgs __partition_u1.sgs
+ } d_partitions[MAXPARTITIONS]; /* actually may be more */
+};
+#else /* LOCORE */
+ /*
+ * offsets for asm boot files.
+ */
+ .set d_secsize,40
+ .set d_nsectors,44
+ .set d_ntracks,48
+ .set d_ncylinders,52
+ .set d_secpercyl,56
+ .set d_secperunit,60
+ .set d_end_,276 /* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define DTYPE_SMD 1 /* SMD, XSMD; VAX hp/up */
+#define DTYPE_MSCP 2 /* MSCP */
+#define DTYPE_DEC 3 /* other DEC (rk, rl) */
+#define DTYPE_SCSI 4 /* SCSI */
+#define DTYPE_ESDI 5 /* ESDI interface */
+#define DTYPE_ST506 6 /* ST506 etc. */
+#define DTYPE_HPIB 7 /* CS/80 on HP-IB */
+#define DTYPE_HPFL 8 /* HP Fiber-link */
+#define DTYPE_FLOPPY 10 /* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+ "unknown",
+ "SMD",
+ "MSCP",
+ "old DEC",
+ "SCSI",
+ "ESDI",
+ "ST506",
+ "HP-IB",
+ "HP-FL",
+ "type 9",
+ "floppy",
+ 0
+};
+#define DKMAXTYPES (sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define FS_UNUSED 0 /* unused */
+#define FS_SWAP 1 /* swap */
+#define FS_V6 2 /* Sixth Edition */
+#define FS_V7 3 /* Seventh Edition */
+#define FS_SYSV 4 /* System V */
+#define FS_V71K 5 /* V7 with 1K blocks (4.1, 2.9) */
+#define FS_V8 6 /* Eighth Edition, 4K blocks */
+#define FS_BSDFFS 7 /* 4.2BSD fast file system */
+#define FS_MSDOS 8 /* MSDOS file system */
+#define FS_BSDLFS 9 /* 4.4BSD log-structured file system */
+#define FS_OTHER 10 /* in use, but unknown/unsupported */
+#define FS_HPFS 11 /* OS/2 high-performance file system */
+#define FS_ISO9660 12 /* ISO 9660, normally CD-ROM */
+#define FS_BOOT 13 /* partition contains bootstrap */
+
+#ifdef DKTYPENAMES
+static char *fstypenames[] = {
+ "unused",
+ "swap",
+ "Version 6",
+ "Version 7",
+ "System V",
+ "4.1BSD",
+ "Eighth Edition",
+ "4.2BSD",
+ "MSDOS",
+ "4.4LFS",
+ "unknown",
+ "HPFS",
+ "ISO9660",
+ "boot",
+ 0
+};
+#define FSMAXTYPES (sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define D_REMOVABLE 0x01 /* removable media */
+#define D_ECC 0x02 /* supports ECC */
+#define D_BADSECT 0x04 /* supports bad sector forw. */
+#define D_RAMDISK 0x08 /* disk emulator */
+#define D_CHAIN 0x10 /* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define d_smdflags d_drivedata[0]
+#define D_SSE 0x1 /* supports skip sectoring */
+#define d_mindist d_drivedata[1]
+#define d_maxdist d_drivedata[2]
+#define d_sdist d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl d_drivedata[0]
+#define d_gap3 d_drivedata[1] /* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define d_blind d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+ char *df_buf;
+ int df_count; /* value-result */
+ daddr_t df_startblk;
+ int df_reg[8]; /* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+ struct disklabel *disklab;
+ struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+ /* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO _IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO _IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO _IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART _IOW('d', 104, struct partinfo) /* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT _IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT _IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP _IOW('d', 107, int) /* set step rate */
+#define DIOCSRETRIES _IOW('d', 108, int) /* set # of retries */
+#define DIOCWLABEL _IOW('d', 109, int) /* write en/disable label */
+
+#define DIOCSBAD _IOW('d', 110, struct dkbad) /* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/diskpc98.h b/sys/sys/diskpc98.h
new file mode 100644
index 000000000000..a25ee29e363d
--- /dev/null
+++ b/sys/sys/diskpc98.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)disklabel.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define _PATH_DISKTAB "/etc/disktab"
+#define DISKTAB "/etc/disktab" /* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR 1 /* sector containing label */
+#define LABELOFFSET 0 /* offset of label in sector */
+#endif
+
+#ifndef LABELSECTOR
+#define LABELSECTOR 0 /* sector containing label */
+#endif
+
+#ifndef LABELOFFSET
+#define LABELOFFSET 64 /* offset of label in sector */
+#endif
+
+#define DISKMAGIC ((u_long) 0x82564557) /* The disk magic number */
+#ifndef MAXPARTITIONS
+#define MAXPARTITIONS 8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+ u_long d_magic; /* the magic number */
+ short d_type; /* drive type */
+ short d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ /*
+ * d_packname contains the pack identifier and is returned when
+ * the disklabel is read off the disk or in-core copy.
+ * d_boot0 and d_boot1 are the (optional) names of the
+ * primary (block 0) and secondary (block 1-15) bootstraps
+ * as found in /usr/mdec. These are returned when using
+ * getdiskbyname(3) to retrieve the values from /etc/disktab.
+ */
+#if defined(KERNEL) || defined(STANDALONE)
+ char d_packname[16]; /* pack identifier */
+#else
+ union {
+ char un_d_packname[16]; /* pack identifier */
+ struct {
+ char *un_d_boot0; /* primary bootstrap name */
+ char *un_d_boot1; /* secondary bootstrap name */
+ } un_b;
+ } d_un;
+#define d_packname d_un.un_d_packname
+#define d_boot0 d_un.un_b.un_d_boot0
+#define d_boot1 d_un.un_b.un_d_boot1
+#endif /* ! KERNEL or STANDALONE */
+ /* disk geometry: */
+ u_long d_secsize; /* # of bytes per sector */
+ u_long d_nsectors; /* # of data sectors per track */
+ u_long d_ntracks; /* # of tracks per cylinder */
+ u_long d_ncylinders; /* # of data cylinders per unit */
+ u_long d_secpercyl; /* # of data sectors per cylinder */
+ u_long d_secperunit; /* # of data sectors per unit */
+ /*
+ * Spares (bad sector replacements) below
+ * are not counted in d_nsectors or d_secpercyl.
+ * Spare sectors are assumed to be physical sectors
+ * which occupy space at the end of each track and/or cylinder.
+ */
+ u_short d_sparespertrack; /* # of spare sectors per track */
+ u_short d_sparespercyl; /* # of spare sectors per cylinder */
+ /*
+ * Alternate cylinders include maintenance, replacement,
+ * configuration description areas, etc.
+ */
+ u_long d_acylinders; /* # of alt. cylinders per unit */
+
+ /* hardware characteristics: */
+ /*
+ * d_interleave, d_trackskew and d_cylskew describe perturbations
+ * in the media format used to compensate for a slow controller.
+ * Interleave is physical sector interleave, set up by the formatter
+ * or controller when formatting. When interleaving is in use,
+ * logically adjacent sectors are not physically contiguous,
+ * but instead are separated by some number of sectors.
+ * It is specified as the ratio of physical sectors traversed
+ * per logical sector. Thus an interleave of 1:1 implies contiguous
+ * layout, while 2:1 implies that logical sector 0 is separated
+ * by one sector from logical sector 1.
+ * d_trackskew is the offset of sector 0 on track N
+ * relative to sector 0 on track N-1 on the same cylinder.
+ * Finally, d_cylskew is the offset of sector 0 on cylinder N
+ * relative to sector 0 on cylinder N-1.
+ */
+ u_short d_rpm; /* rotational speed */
+ u_short d_interleave; /* hardware sector interleave */
+ u_short d_trackskew; /* sector 0 skew, per track */
+ u_short d_cylskew; /* sector 0 skew, per cylinder */
+ u_long d_headswitch; /* head switch time, usec */
+ u_long d_trkseek; /* track-to-track seek, usec */
+ u_long d_flags; /* generic flags */
+#define NDDATA 5
+ u_long d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ u_long d_spare[NSPARE]; /* reserved for future use */
+ u_long d_magic2; /* the magic number (again) */
+ u_short d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ u_short d_npartitions; /* number of partitions in following */
+ u_long d_bbsize; /* size of boot area at sn0, bytes */
+ u_long d_sbsize; /* max size of fs superblock, bytes */
+ struct partition { /* the partition table */
+ u_long p_size; /* number of sectors in partition */
+ u_long p_offset; /* starting sector */
+ u_long p_fsize; /* filesystem basic fragment size */
+ u_char p_fstype; /* filesystem type, see below */
+ u_char p_frag; /* filesystem fragments per block */
+ union {
+ u_short cpg; /* UFS: FS cylinders per group */
+ u_short sgs; /* LFS: FS segment shift */
+ } __partition_u1;
+#define p_cpg __partition_u1.cpg
+#define p_sgs __partition_u1.sgs
+ } d_partitions[MAXPARTITIONS]; /* actually may be more */
+};
+#else /* LOCORE */
+ /*
+ * offsets for asm boot files.
+ */
+ .set d_secsize,40
+ .set d_nsectors,44
+ .set d_ntracks,48
+ .set d_ncylinders,52
+ .set d_secpercyl,56
+ .set d_secperunit,60
+ .set d_end_,276 /* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define DTYPE_SMD 1 /* SMD, XSMD; VAX hp/up */
+#define DTYPE_MSCP 2 /* MSCP */
+#define DTYPE_DEC 3 /* other DEC (rk, rl) */
+#define DTYPE_SCSI 4 /* SCSI */
+#define DTYPE_ESDI 5 /* ESDI interface */
+#define DTYPE_ST506 6 /* ST506 etc. */
+#define DTYPE_HPIB 7 /* CS/80 on HP-IB */
+#define DTYPE_HPFL 8 /* HP Fiber-link */
+#define DTYPE_FLOPPY 10 /* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+ "unknown",
+ "SMD",
+ "MSCP",
+ "old DEC",
+ "SCSI",
+ "ESDI",
+ "ST506",
+ "HP-IB",
+ "HP-FL",
+ "type 9",
+ "floppy",
+ 0
+};
+#define DKMAXTYPES (sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define FS_UNUSED 0 /* unused */
+#define FS_SWAP 1 /* swap */
+#define FS_V6 2 /* Sixth Edition */
+#define FS_V7 3 /* Seventh Edition */
+#define FS_SYSV 4 /* System V */
+#define FS_V71K 5 /* V7 with 1K blocks (4.1, 2.9) */
+#define FS_V8 6 /* Eighth Edition, 4K blocks */
+#define FS_BSDFFS 7 /* 4.2BSD fast file system */
+#define FS_MSDOS 8 /* MSDOS file system */
+#define FS_BSDLFS 9 /* 4.4BSD log-structured file system */
+#define FS_OTHER 10 /* in use, but unknown/unsupported */
+#define FS_HPFS 11 /* OS/2 high-performance file system */
+#define FS_ISO9660 12 /* ISO 9660, normally CD-ROM */
+#define FS_BOOT 13 /* partition contains bootstrap */
+
+#ifdef DKTYPENAMES
+static char *fstypenames[] = {
+ "unused",
+ "swap",
+ "Version 6",
+ "Version 7",
+ "System V",
+ "4.1BSD",
+ "Eighth Edition",
+ "4.2BSD",
+ "MSDOS",
+ "4.4LFS",
+ "unknown",
+ "HPFS",
+ "ISO9660",
+ "boot",
+ 0
+};
+#define FSMAXTYPES (sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define D_REMOVABLE 0x01 /* removable media */
+#define D_ECC 0x02 /* supports ECC */
+#define D_BADSECT 0x04 /* supports bad sector forw. */
+#define D_RAMDISK 0x08 /* disk emulator */
+#define D_CHAIN 0x10 /* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define d_smdflags d_drivedata[0]
+#define D_SSE 0x1 /* supports skip sectoring */
+#define d_mindist d_drivedata[1]
+#define d_maxdist d_drivedata[2]
+#define d_sdist d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl d_drivedata[0]
+#define d_gap3 d_drivedata[1] /* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define d_blind d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+ char *df_buf;
+ int df_count; /* value-result */
+ daddr_t df_startblk;
+ int df_reg[8]; /* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+ struct disklabel *disklab;
+ struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+ /* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO _IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO _IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO _IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART _IOW('d', 104, struct partinfo) /* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT _IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT _IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP _IOW('d', 107, int) /* set step rate */
+#define DIOCSRETRIES _IOW('d', 108, int) /* set # of retries */
+#define DIOCWLABEL _IOW('d', 109, int) /* write en/disable label */
+
+#define DIOCSBAD _IOW('d', 110, struct dkbad) /* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/dkbad.h b/sys/sys/dkbad.h
new file mode 100644
index 000000000000..c574000aaf87
--- /dev/null
+++ b/sys/sys/dkbad.h
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dkbad.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Definitions needed to perform bad sector revectoring ala DEC STD 144.
+ *
+ * The bad sector information is located in the first 5 even numbered
+ * sectors of the last track of the disk pack. There are five identical
+ * copies of the information, described by the dkbad structure.
+ *
+ * Replacement sectors are allocated starting with the first sector before
+ * the bad sector information and working backwards towards the beginning of
+ * the disk. A maximum of 126 bad sectors are supported. The position of
+ * the bad sector in the bad sector table determines which replacement sector
+ * it corresponds to.
+ *
+ * The bad sector information and replacement sectors are conventionally
+ * only accessible through the 'c' file system partition of the disk. If
+ * that partition is used for a file system, the user is responsible for
+ * making sure that it does not overlap the bad sector information or any
+ * replacement sectors.
+ */
+struct dkbad {
+ long bt_csn; /* cartridge serial number */
+ u_short bt_mbz; /* unused; should be 0 */
+ u_short bt_flag; /* -1 => alignment cartridge */
+ struct bt_bad {
+ u_short bt_cyl; /* cylinder number of bad sector */
+ u_short bt_trksec; /* track and sector number */
+ } bt_bad[126];
+};
+
+#define ECC 0
+#define SSE 1
+#define BSE 2
+#define CONT 3
diff --git a/sys/sys/dkstat.h b/sys/sys/dkstat.h
new file mode 100644
index 000000000000..f7f5f1594a27
--- /dev/null
+++ b/sys/sys/dkstat.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dkstat.h 8.2 (Berkeley) 1/21/94
+ */
+
+#define CP_USER 0
+#define CP_NICE 1
+#define CP_SYS 2
+#define CP_INTR 3
+#define CP_IDLE 4
+#define CPUSTATES 5
+
+#define DK_NDRIVE 8
+#ifdef KERNEL
+long cp_time[CPUSTATES];
+long dk_seek[DK_NDRIVE];
+long dk_time[DK_NDRIVE];
+long dk_wds[DK_NDRIVE];
+long dk_wpms[DK_NDRIVE];
+long dk_xfer[DK_NDRIVE];
+
+int dk_busy;
+int dk_ndrive;
+
+long tk_cancc;
+long tk_nin;
+long tk_nout;
+long tk_rawcc;
+#endif
diff --git a/sys/sys/dmap.h b/sys/sys/dmap.h
new file mode 100644
index 000000000000..2a6f538259e2
--- /dev/null
+++ b/sys/sys/dmap.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dmap.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_DMAP_H_
+#define _SYS_DMAP_H_
+
+/*
+ * Definitions for the mapping of vitual swap space to the physical swap
+ * area - the disk map.
+ */
+#define NDMAP 38 /* size of the swap area map */
+
+struct dmap {
+ swblk_t dm_size; /* current size used by process */
+ swblk_t dm_alloc; /* amount of physical swap space allocated */
+ swblk_t dm_map[NDMAP]; /* first disk block number in each chunk */
+};
+#ifdef KERNEL
+struct dmap zdmap;
+int dmmin, dmmax, dmtext;
+#endif
+
+/* The following structure is that ``returned'' from a call to vstodb(). */
+struct dblock {
+ swblk_t db_base; /* base of physical contig drum block */
+ swblk_t db_size; /* size of block */
+};
+#endif /* !_SYS_DMAP_H_ */
diff --git a/sys/sys/domain.h b/sys/sys/domain.h
new file mode 100644
index 000000000000..b056347d5394
--- /dev/null
+++ b/sys/sys/domain.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)domain.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Structure per communications domain.
+ */
+
+/*
+ * Forward structure declarations for function prototypes [sic].
+ */
+struct mbuf;
+
+struct domain {
+ int dom_family; /* AF_xxx */
+ char *dom_name;
+ void (*dom_init) /* initialize domain data structures */
+ __P((void));
+ int (*dom_externalize) /* externalize access rights */
+ __P((struct mbuf *));
+ int (*dom_dispose) /* dispose of internalized rights */
+ __P((struct mbuf *));
+ struct protosw *dom_protosw, *dom_protoswNPROTOSW;
+ struct domain *dom_next;
+ int (*dom_rtattach) /* initialize routing table */
+ __P((void **, int));
+ int dom_rtoffset; /* an arg to rtattach, in bits */
+ int dom_maxrtkey; /* for routing layer */
+};
+
+#ifdef KERNEL
+struct domain *domains;
+#endif
diff --git a/sys/sys/errno.h b/sys/sys/errno.h
new file mode 100644
index 000000000000..a4e4ea6eb694
--- /dev/null
+++ b/sys/sys/errno.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)errno.h 8.5 (Berkeley) 1/21/94
+ */
+
+#ifndef KERNEL
+extern int errno; /* global error number */
+#endif
+
+#define EPERM 1 /* Operation not permitted */
+#define ENOENT 2 /* No such file or directory */
+#define ESRCH 3 /* No such process */
+#define EINTR 4 /* Interrupted system call */
+#define EIO 5 /* Input/output error */
+#define ENXIO 6 /* Device not configured */
+#define E2BIG 7 /* Argument list too long */
+#define ENOEXEC 8 /* Exec format error */
+#define EBADF 9 /* Bad file descriptor */
+#define ECHILD 10 /* No child processes */
+#define EDEADLK 11 /* Resource deadlock avoided */
+ /* 11 was EAGAIN */
+#define ENOMEM 12 /* Cannot allocate memory */
+#define EACCES 13 /* Permission denied */
+#define EFAULT 14 /* Bad address */
+#ifndef _POSIX_SOURCE
+#define ENOTBLK 15 /* Block device required */
+#endif
+#define EBUSY 16 /* Device busy */
+#define EEXIST 17 /* File exists */
+#define EXDEV 18 /* Cross-device link */
+#define ENODEV 19 /* Operation not supported by device */
+#define ENOTDIR 20 /* Not a directory */
+#define EISDIR 21 /* Is a directory */
+#define EINVAL 22 /* Invalid argument */
+#define ENFILE 23 /* Too many open files in system */
+#define EMFILE 24 /* Too many open files */
+#define ENOTTY 25 /* Inappropriate ioctl for device */
+#ifndef _POSIX_SOURCE
+#define ETXTBSY 26 /* Text file busy */
+#endif
+#define EFBIG 27 /* File too large */
+#define ENOSPC 28 /* No space left on device */
+#define ESPIPE 29 /* Illegal seek */
+#define EROFS 30 /* Read-only file system */
+#define EMLINK 31 /* Too many links */
+#define EPIPE 32 /* Broken pipe */
+
+/* math software */
+#define EDOM 33 /* Numerical argument out of domain */
+#define ERANGE 34 /* Result too large */
+
+/* non-blocking and interrupt i/o */
+#define EAGAIN 35 /* Resource temporarily unavailable */
+#ifndef _POSIX_SOURCE
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define EINPROGRESS 36 /* Operation now in progress */
+#define EALREADY 37 /* Operation already in progress */
+
+/* ipc/network software -- argument errors */
+#define ENOTSOCK 38 /* Socket operation on non-socket */
+#define EDESTADDRREQ 39 /* Destination address required */
+#define EMSGSIZE 40 /* Message too long */
+#define EPROTOTYPE 41 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 42 /* Protocol not available */
+#define EPROTONOSUPPORT 43 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
+#define EOPNOTSUPP 45 /* Operation not supported */
+#define EPFNOSUPPORT 46 /* Protocol family not supported */
+#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */
+#define EADDRINUSE 48 /* Address already in use */
+#define EADDRNOTAVAIL 49 /* Can't assign requested address */
+
+/* ipc/network software -- operational errors */
+#define ENETDOWN 50 /* Network is down */
+#define ENETUNREACH 51 /* Network is unreachable */
+#define ENETRESET 52 /* Network dropped connection on reset */
+#define ECONNABORTED 53 /* Software caused connection abort */
+#define ECONNRESET 54 /* Connection reset by peer */
+#define ENOBUFS 55 /* No buffer space available */
+#define EISCONN 56 /* Socket is already connected */
+#define ENOTCONN 57 /* Socket is not connected */
+#define ESHUTDOWN 58 /* Can't send after socket shutdown */
+#define ETOOMANYREFS 59 /* Too many references: can't splice */
+#define ETIMEDOUT 60 /* Operation timed out */
+#define ECONNREFUSED 61 /* Connection refused */
+
+#define ELOOP 62 /* Too many levels of symbolic links */
+#endif /* _POSIX_SOURCE */
+#define ENAMETOOLONG 63 /* File name too long */
+
+/* should be rearranged */
+#ifndef _POSIX_SOURCE
+#define EHOSTDOWN 64 /* Host is down */
+#define EHOSTUNREACH 65 /* No route to host */
+#endif /* _POSIX_SOURCE */
+#define ENOTEMPTY 66 /* Directory not empty */
+
+/* quotas & mush */
+#ifndef _POSIX_SOURCE
+#define EPROCLIM 67 /* Too many processes */
+#define EUSERS 68 /* Too many users */
+#define EDQUOT 69 /* Disc quota exceeded */
+
+/* Network File System */
+#define ESTALE 70 /* Stale NFS file handle */
+#define EREMOTE 71 /* Too many levels of remote in path */
+#define EBADRPC 72 /* RPC struct is bad */
+#define ERPCMISMATCH 73 /* RPC version wrong */
+#define EPROGUNAVAIL 74 /* RPC prog. not avail */
+#define EPROGMISMATCH 75 /* Program version wrong */
+#define EPROCUNAVAIL 76 /* Bad procedure for program */
+#endif /* _POSIX_SOURCE */
+
+#define ENOLCK 77 /* No locks available */
+#define ENOSYS 78 /* Function not implemented */
+
+#ifndef _POSIX_SOURCE
+#define EFTYPE 79 /* Inappropriate file type or format */
+#define EAUTH 80 /* Authentication error */
+#define ENEEDAUTH 81 /* Need authenticator */
+#define ELAST 81 /* Must be equal largest errno */
+#endif /* _POSIX_SOURCE */
+
+#ifdef KERNEL
+/* pseudo-errors returned inside kernel to modify return to process */
+#define ERESTART -1 /* restart syscall */
+#define EJUSTRETURN -2 /* don't modify regs, just return */
+#endif
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
new file mode 100644
index 000000000000..443e14434148
--- /dev/null
+++ b/sys/sys/exec.h
@@ -0,0 +1,71 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)exec.h 8.3 (Berkeley) 1/21/94
+ */
+
+#include <machine/exec.h>
+
+/*
+ * The following structure is found at the top of the user stack of each
+ * user process. The ps program uses it to locate argv and environment
+ * strings. Programs that wish ps to display other information may modify
+ * it; normally ps_argvstr points to the text for argv[0], and ps_nargvstr
+ * is the same as the program's argc. The fields ps_envstr and ps_nenvstr
+ * are the equivalent for the environment.
+ */
+struct ps_strings {
+ char *ps_argvstr; /* first of 0 or more argument strings */
+ int ps_nargvstr; /* the number of argument strings */
+ char *ps_envstr; /* first of 0 or more environment strings */
+ int ps_nenvstr; /* the number of environment strings */
+};
+
+/*
+ * Address of ps_strings structure (in user space).
+ */
+#define PS_STRINGS \
+ ((struct ps_strings *)(USRSTACK - sizeof(struct ps_strings)))
+
+/*
+ * Arguments to the exec system call.
+ */
+struct execve_args {
+ char *fname;
+ char **argp;
+ char **envp;
+};
diff --git a/sys/sys/fbio.h b/sys/sys/fbio.h
new file mode 100644
index 000000000000..63371b77ed8d
--- /dev/null
+++ b/sys/sys/fbio.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software developed by the Computer Systems
+ * Engineering group at Lawrence Berkeley Laboratory under DARPA
+ * contract BG 91-66 and contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fbio.h 8.2 (Berkeley) 10/30/93
+ *
+ * from: $Header: fbio.h,v 1.6 93/10/31 06:01:56 torek Exp $ (LBL)
+ */
+
+/*
+ * Frame buffer ioctls (from Sprite, trimmed to essentials for X11).
+ */
+
+/*
+ * Frame buffer type codes.
+ */
+#define FBTYPE_SUN1BW 0 /* multibus mono */
+#define FBTYPE_SUN1COLOR 1 /* multibus color */
+#define FBTYPE_SUN2BW 2 /* memory mono */
+#define FBTYPE_SUN2COLOR 3 /* color w/rasterop chips */
+#define FBTYPE_SUN2GP 4 /* GP1/GP2 */
+#define FBTYPE_SUN5COLOR 5 /* RoadRunner accelerator */
+#define FBTYPE_SUN3COLOR 6 /* memory color */
+#define FBTYPE_MEMCOLOR 7 /* memory 24-bit */
+#define FBTYPE_SUN4COLOR 8 /* memory color w/overlay */
+
+#define FBTYPE_NOTSUN1 9 /* reserved for customer */
+#define FBTYPE_NOTSUN2 10 /* reserved for customer */
+#define FBTYPE_NOTSUN3 11 /* reserved for customer */
+
+#define FBTYPE_SUNFAST_COLOR 12 /* accelerated 8bit */
+#define FBTYPE_SUNROP_COLOR 13 /* MEMCOLOR with rop h/w */
+#define FBTYPE_SUNFB_VIDEO 14 /* Simple video mixing */
+#define FBTYPE_RESERVED5 15 /* reserved, do not use */
+#define FBTYPE_RESERVED4 16 /* reserved, do not use */
+#define FBTYPE_RESERVED3 17 /* reserved, do not use */
+#define FBTYPE_RESERVED2 18 /* reserved, do not use */
+#define FBTYPE_RESERVED1 19 /* reserved, do not use */
+
+#define FBTYPE_LASTPLUSONE 20 /* max number of fbs (change as add) */
+
+/*
+ * Frame buffer descriptor as returned by FBIOGTYPE.
+ */
+struct fbtype {
+ int fb_type; /* as defined above */
+ int fb_height; /* in pixels */
+ int fb_width; /* in pixels */
+ int fb_depth; /* bits per pixel */
+ int fb_cmsize; /* size of color map (entries) */
+ int fb_size; /* total size in bytes */
+};
+#define FBIOGTYPE _IOR('F', 0, struct fbtype)
+
+#ifdef notdef
+/*
+ * General purpose structure for passing info in and out of frame buffers
+ * (used for gp1) -- unsupported.
+ */
+struct fbinfo {
+ int fb_physaddr; /* physical frame buffer address */
+ int fb_hwwidth; /* fb board width */
+ int fb_hwheight; /* fb board height */
+ int fb_addrdelta; /* phys addr diff between boards */
+ u_char *fb_ropaddr; /* fb virtual addr */
+ int fb_unit; /* minor devnum of fb */
+};
+#define FBIOGINFO _IOR('F', 2, struct fbinfo)
+#endif
+
+/*
+ * Color map I/O.
+ */
+struct fbcmap {
+ int index; /* first element (0 origin) */
+ int count; /* number of elements */
+ u_char *red; /* red color map elements */
+ u_char *green; /* green color map elements */
+ u_char *blue; /* blue color map elements */
+};
+#define FBIOPUTCMAP _IOW('F', 3, struct fbcmap)
+#define FBIOGETCMAP _IOW('F', 4, struct fbcmap)
+
+/*
+ * Set/get attributes.
+ */
+#define FB_ATTR_NDEVSPECIFIC 8 /* no. of device specific values */
+#define FB_ATTR_NEMUTYPES 4 /* no. of emulation types */
+
+struct fbsattr {
+ int flags; /* flags; see below */
+ int emu_type; /* emulation type (-1 if unused) */
+ int dev_specific[FB_ATTR_NDEVSPECIFIC]; /* catchall */
+};
+#define FB_ATTR_AUTOINIT 1 /* emulation auto init flag */
+#define FB_ATTR_DEVSPECIFIC 2 /* dev. specific stuff valid flag */
+
+struct fbgattr {
+ int real_type; /* real device type */
+ int owner; /* PID of owner, 0 if myself */
+ struct fbtype fbtype; /* fbtype info for real device */
+ struct fbsattr sattr; /* see above */
+ int emu_types[FB_ATTR_NEMUTYPES]; /* possible emulations */
+ /* (-1 if unused) */
+};
+/* FBIOSATTR _IOW('F', 5, struct fbsattr) -- unsupported */
+#define FBIOGATTR _IOR('F', 6, struct fbgattr)
+
+/*
+ * Video control.
+ */
+#define FBVIDEO_OFF 0
+#define FBVIDEO_ON 1
+
+#define FBIOSVIDEO _IOW('F', 7, int)
+#define FBIOGVIDEO _IOR('F', 8, int)
+
+/*
+ * Hardware cursor control (for, e.g., CG6). A rather complex and icky
+ * interface that smells like VMS, but there it is....
+ */
+struct fbcurpos {
+ short x;
+ short y;
+};
+
+struct fbcursor {
+ short set; /* flags; see below */
+ short enable; /* nonzero => cursor on, 0 => cursor off */
+ struct fbcurpos pos; /* position on display */
+ struct fbcurpos hot; /* hot-spot within cursor */
+ struct fbcmap cmap; /* cursor color map */
+ struct fbcurpos size; /* number of valid bits in image & mask */
+ caddr_t image; /* cursor image bits */
+ caddr_t mask; /* cursor mask bits */
+};
+#define FB_CUR_SETCUR 0x01 /* set on/off (i.e., obey fbcursor.enable) */
+#define FB_CUR_SETPOS 0x02 /* set position */
+#define FB_CUR_SETHOT 0x04 /* set hot-spot */
+#define FB_CUR_SETCMAP 0x08 /* set cursor color map */
+#define FB_CUR_SETSHAPE 0x10 /* set size & bits */
+#define FB_CUR_SETALL (FB_CUR_SETCUR | FB_CUR_SETPOS | FB_CUR_SETHOT | \
+ FB_CUR_SETCMAP | FB_CUR_SETSHAPE)
+
+/* controls for cursor attributes & shape (including position) */
+#define FBIOSCURSOR _IOW('F', 24, struct fbcursor)
+#define FBIOGCURSOR _IOWR('F', 25, struct fbcursor)
+
+/* controls for cursor position only */
+#define FBIOSCURPOS _IOW('F', 26, struct fbcurpos)
+#define FBIOGCURPOS _IOW('F', 27, struct fbcurpos)
+
+/* get maximum cursor size */
+#define FBIOGCURMAX _IOR('F', 28, struct fbcurpos)
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
new file mode 100644
index 000000000000..62762f3498af
--- /dev/null
+++ b/sys/sys/fcntl.h
@@ -0,0 +1,190 @@
+/*-
+ * Copyright (c) 1983, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fcntl.h 8.3 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_FCNTL_H_
+#define _SYS_FCNTL_H_
+
+/*
+ * This file includes the definitions for open and fcntl
+ * described by POSIX for <fcntl.h>; it also includes
+ * related kernel definitions.
+ */
+
+#ifndef KERNEL
+#include <sys/types.h>
+#endif
+
+/*
+ * File status flags: these are used by open(2), fcntl(2).
+ * They are also used (indirectly) in the kernel file structure f_flags,
+ * which is a superset of the open/fcntl flags. Open flags and f_flags
+ * are inter-convertible using OFLAGS(fflags) and FFLAGS(oflags).
+ * Open/fcntl flags begin with O_; kernel-internal flags begin with F.
+ */
+/* open-only flags */
+#define O_RDONLY 0x0000 /* open for reading only */
+#define O_WRONLY 0x0001 /* open for writing only */
+#define O_RDWR 0x0002 /* open for reading and writing */
+#define O_ACCMODE 0x0003 /* mask for above modes */
+
+/*
+ * Kernel encoding of open mode; separate read and write bits that are
+ * independently testable: 1 greater than the above.
+ *
+ * XXX
+ * FREAD and FWRITE are excluded from the #ifdef KERNEL so that TIOCFLUSH,
+ * which was documented to use FREAD/FWRITE, continues to work.
+ */
+#ifndef _POSIX_SOURCE
+#define FREAD 0x0001
+#define FWRITE 0x0002
+#endif
+#define O_NONBLOCK 0x0004 /* no delay */
+#define O_APPEND 0x0008 /* set append mode */
+#ifndef _POSIX_SOURCE
+#define O_SHLOCK 0x0010 /* open with shared file lock */
+#define O_EXLOCK 0x0020 /* open with exclusive file lock */
+#define O_ASYNC 0x0040 /* signal pgrp when data ready */
+#define O_FSYNC 0x0080 /* synchronous writes */
+#endif
+#define O_CREAT 0x0200 /* create if nonexistant */
+#define O_TRUNC 0x0400 /* truncate to zero length */
+#define O_EXCL 0x0800 /* error if already exists */
+#ifdef KERNEL
+#define FMARK 0x1000 /* mark during gc() */
+#define FDEFER 0x2000 /* defer for next gc pass */
+#define FHASLOCK 0x4000 /* descriptor holds advisory lock */
+#endif
+
+/* defined by POSIX 1003.1; BSD default, so no bit required */
+#define O_NOCTTY 0 /* don't assign controlling terminal */
+
+#ifdef KERNEL
+/* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
+#define FFLAGS(oflags) ((oflags) + 1)
+#define OFLAGS(fflags) ((fflags) - 1)
+
+/* bits to save after open */
+#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK)
+/* bits settable by fcntl(F_SETFL, ...) */
+#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK)
+#endif
+
+/*
+ * The O_* flags used to have only F* names, which were used in the kernel
+ * and by fcntl. We retain the F* names for the kernel f_flags field
+ * and for backward compatibility for fcntl.
+ */
+#ifndef _POSIX_SOURCE
+#define FAPPEND O_APPEND /* kernel/compat */
+#define FASYNC O_ASYNC /* kernel/compat */
+#define FFSYNC O_FSYNC /* kernel */
+#define FNONBLOCK O_NONBLOCK /* kernel */
+#define FNDELAY O_NONBLOCK /* compat */
+#define O_NDELAY O_NONBLOCK /* compat */
+#endif
+
+/*
+ * Constants used for fcntl(2)
+ */
+
+/* command values */
+#define F_DUPFD 0 /* duplicate file descriptor */
+#define F_GETFD 1 /* get file descriptor flags */
+#define F_SETFD 2 /* set file descriptor flags */
+#define F_GETFL 3 /* get file status flags */
+#define F_SETFL 4 /* set file status flags */
+#ifndef _POSIX_SOURCE
+#define F_GETOWN 5 /* get SIGIO/SIGURG proc/pgrp */
+#define F_SETOWN 6 /* set SIGIO/SIGURG proc/pgrp */
+#endif
+#define F_GETLK 7 /* get record locking information */
+#define F_SETLK 8 /* set record locking information */
+#define F_SETLKW 9 /* F_SETLK; wait if blocked */
+
+/* file descriptor flags (F_GETFD, F_SETFD) */
+#define FD_CLOEXEC 1 /* close-on-exec flag */
+
+/* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */
+#define F_RDLCK 1 /* shared or read lock */
+#define F_UNLCK 2 /* unlock */
+#define F_WRLCK 3 /* exclusive or write lock */
+#ifdef KERNEL
+#define F_WAIT 0x010 /* Wait until lock is granted */
+#define F_FLOCK 0x020 /* Use flock(2) semantics for lock */
+#define F_POSIX 0x040 /* Use POSIX semantics for lock */
+#endif
+
+/*
+ * Advisory file segment locking data type -
+ * information passed to system by user
+ */
+struct flock {
+ off_t l_start; /* starting offset */
+ off_t l_len; /* len = 0 means until end of file */
+ pid_t l_pid; /* lock owner */
+ short l_type; /* lock type: read/write, etc. */
+ short l_whence; /* type of l_start */
+};
+
+
+#ifndef _POSIX_SOURCE
+/* lock operations for flock(2) */
+#define LOCK_SH 0x01 /* shared file lock */
+#define LOCK_EX 0x02 /* exclusive file lock */
+#define LOCK_NB 0x04 /* don't block when locking */
+#define LOCK_UN 0x08 /* unlock file */
+#endif
+
+
+#ifndef KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int open __P((const char *, int, ...));
+int creat __P((const char *, mode_t));
+int fcntl __P((int, int, ...));
+#ifndef _POSIX_SOURCE
+int flock __P((int, int));
+#endif /* !_POSIX_SOURCE */
+__END_DECLS
+#endif
+
+#endif /* !_SYS_FCNTL_H_ */
diff --git a/sys/sys/file.h b/sys/sys/file.h
new file mode 100644
index 000000000000..3d82190669ad
--- /dev/null
+++ b/sys/sys/file.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)file.h 8.1 (Berkeley) 6/2/93
+ */
+
+#include <sys/fcntl.h>
+#include <sys/unistd.h>
+
+#ifdef KERNEL
+struct proc;
+struct uio;
+
+/*
+ * Kernel descriptor table.
+ * One entry for each open kernel vnode and socket.
+ */
+struct file {
+ struct file *f_filef; /* list of active files */
+ struct file **f_fileb; /* list of active files */
+ short f_flag; /* see fcntl.h */
+#define DTYPE_VNODE 1 /* file */
+#define DTYPE_SOCKET 2 /* communications endpoint */
+ short f_type; /* descriptor type */
+ short f_count; /* reference count */
+ short f_msgcount; /* references from message queue */
+ struct ucred *f_cred; /* credentials associated with descriptor */
+ struct fileops {
+ int (*fo_read) __P((struct file *fp, struct uio *uio,
+ struct ucred *cred));
+ int (*fo_write) __P((struct file *fp, struct uio *uio,
+ struct ucred *cred));
+ int (*fo_ioctl) __P((struct file *fp, int com,
+ caddr_t data, struct proc *p));
+ int (*fo_select) __P((struct file *fp, int which,
+ struct proc *p));
+ int (*fo_close) __P((struct file *fp, struct proc *p));
+ } *f_ops;
+ off_t f_offset;
+ caddr_t f_data; /* vnode or socket */
+};
+
+extern struct file *filehead; /* head of list of open files */
+extern int maxfiles; /* kernel limit on number of open files */
+extern int nfiles; /* actual number of open files */
+
+#endif /* KERNEL */
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
new file mode 100644
index 000000000000..1071bc105970
--- /dev/null
+++ b/sys/sys/filedesc.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)filedesc.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * This structure is used for the management of descriptors. It may be
+ * shared by multiple processes.
+ *
+ * A process is initially started out with NDFILE descriptors stored within
+ * this structure, selected to be enough for typical applications based on
+ * the historical limit of 20 open files (and the usage of descriptors by
+ * shells). If these descriptors are exhausted, a larger descriptor table
+ * may be allocated, up to a process' resource limit; the internal arrays
+ * are then unused. The initial expansion is set to NDEXTENT; each time
+ * it runs out, it is doubled until the resource limit is reached. NDEXTENT
+ * should be selected to be the biggest multiple of OFILESIZE (see below)
+ * that will fit in a power-of-two sized piece of memory.
+ */
+#define NDFILE 20
+#define NDEXTENT 50 /* 250 bytes in 256-byte alloc. */
+
+struct filedesc {
+ struct file **fd_ofiles; /* file structures for open files */
+ char *fd_ofileflags; /* per-process open file flags */
+ struct vnode *fd_cdir; /* current directory */
+ struct vnode *fd_rdir; /* root directory */
+ int fd_nfiles; /* number of open files allocated */
+ u_short fd_lastfile; /* high-water mark of fd_ofiles */
+ u_short fd_freefile; /* approx. next free file */
+ u_short fd_cmask; /* mask for file creation */
+ u_short fd_refcnt; /* reference count */
+};
+
+/*
+ * Basic allocation of descriptors:
+ * one of the above, plus arrays for NDFILE descriptors.
+ */
+struct filedesc0 {
+ struct filedesc fd_fd;
+ /*
+ * These arrays are used when the number of open files is
+ * <= NDFILE, and are then pointed to by the pointers above.
+ */
+ struct file *fd_dfiles[NDFILE];
+ char fd_dfileflags[NDFILE];
+};
+
+/*
+ * Per-process open flags.
+ */
+#define UF_EXCLOSE 0x01 /* auto-close on exec */
+#define UF_MAPPED 0x02 /* mapped from device */
+
+/*
+ * Storage required per open file descriptor.
+ */
+#define OFILESIZE (sizeof(struct file *) + sizeof(char))
+
+#ifdef KERNEL
+/*
+ * Kernel global variables and routines.
+ */
+int fdalloc __P((struct proc *p, int want, int *result));
+int fdavail __P((struct proc *p, int n));
+int falloc __P((struct proc *p, struct file **resultfp, int *resultfd));
+struct filedesc *fdcopy __P((struct proc *p));
+void fdfree __P((struct proc *p));
+#endif
diff --git a/sys/sys/filio.h b/sys/sys/filio.h
new file mode 100644
index 000000000000..5c8789b882ba
--- /dev/null
+++ b/sys/sys/filio.h
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)filio.h 8.1 (Berkeley) 3/28/94
+ */
+
+#ifndef _SYS_FILIO_H_
+#define _SYS_FILIO_H_
+
+#include <sys/ioccom.h>
+
+/* Generic file-descriptor ioctl's. */
+#define FIOCLEX _IO('f', 1) /* set close on exec on fd */
+#define FIONCLEX _IO('f', 2) /* remove close on exec */
+#define FIONREAD _IOR('f', 127, int) /* get # bytes to read */
+#define FIONBIO _IOW('f', 126, int) /* set/clear non-blocking i/o */
+#define FIOASYNC _IOW('f', 125, int) /* set/clear async i/o */
+#define FIOSETOWN _IOW('f', 124, int) /* set owner */
+#define FIOGETOWN _IOR('f', 123, int) /* get owner */
+
+#endif /* !_SYS_FILIO_H_ */
diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h
new file mode 100644
index 000000000000..b103df80a8a7
--- /dev/null
+++ b/sys/sys/gmon.h
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)gmon.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_GMON_H_
+#define _SYS_GMON_H_
+
+#include <machine/profile.h>
+
+/*
+ * Structure prepended to gmon.out profiling data file.
+ */
+struct gmonhdr {
+ u_long lpc; /* base pc address of sample buffer */
+ u_long hpc; /* max pc address of sampled buffer */
+ int ncnt; /* size of sample buffer (plus this header) */
+ int version; /* version number */
+ int profrate; /* profiling clock rate */
+ int spare[3]; /* reserved */
+};
+#define GMONVERSION 0x00051879
+
+/*
+ * histogram counters are unsigned shorts (according to the kernel).
+ */
+#define HISTCOUNTER unsigned short
+
+/*
+ * fraction of text space to allocate for histogram counters here, 1/2
+ */
+#define HISTFRACTION 2
+
+/*
+ * Fraction of text space to allocate for from hash buckets.
+ * The value of HASHFRACTION is based on the minimum number of bytes
+ * of separation between two subroutine call points in the object code.
+ * Given MIN_SUBR_SEPARATION bytes of separation the value of
+ * HASHFRACTION is calculated as:
+ *
+ * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
+ *
+ * For example, on the VAX, the shortest two call sequence is:
+ *
+ * calls $0,(r0)
+ * calls $0,(r0)
+ *
+ * which is separated by only three bytes, thus HASHFRACTION is
+ * calculated as:
+ *
+ * HASHFRACTION = 3 / (2 * 2 - 1) = 1
+ *
+ * Note that the division above rounds down, thus if MIN_SUBR_FRACTION
+ * is less than three, this algorithm will not work!
+ *
+ * In practice, however, call instructions are rarely at a minimal
+ * distance. Hence, we will define HASHFRACTION to be 2 across all
+ * architectures. This saves a reasonable amount of space for
+ * profiling data structures without (in practice) sacrificing
+ * any granularity.
+ */
+#define HASHFRACTION 2
+
+/*
+ * percent of text space to allocate for tostructs with a minimum.
+ */
+#define ARCDENSITY 2
+#define MINARCS 50
+#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2)
+
+struct tostruct {
+ u_long selfpc;
+ long count;
+ u_short link;
+ u_short pad;
+};
+
+/*
+ * a raw arc, with pointers to the calling site and
+ * the called site and a count.
+ */
+struct rawarc {
+ u_long raw_frompc;
+ u_long raw_selfpc;
+ long raw_count;
+};
+
+/*
+ * general rounding functions.
+ */
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
+
+/*
+ * The profiling data structures are housed in this structure.
+ */
+struct gmonparam {
+ int state;
+ u_short *kcount;
+ u_long kcountsize;
+ u_short *froms;
+ u_long fromssize;
+ struct tostruct *tos;
+ u_long tossize;
+ long tolimit;
+ u_long lowpc;
+ u_long highpc;
+ u_long textsize;
+ u_long hashfraction;
+};
+extern struct gmonparam _gmonparam;
+
+/*
+ * Possible states of profiling.
+ */
+#define GMON_PROF_ON 0
+#define GMON_PROF_BUSY 1
+#define GMON_PROF_ERROR 2
+#define GMON_PROF_OFF 3
+
+/*
+ * Sysctl definitions for extracting profiling information from the kernel.
+ */
+#define GPROF_STATE 0 /* int: profiling enabling variable */
+#define GPROF_COUNT 1 /* struct: profile tick count buffer */
+#define GPROF_FROMS 2 /* struct: from location hash bucket */
+#define GPROF_TOS 3 /* struct: destination/count structure */
+#define GPROF_GMONPARAM 4 /* struct: profiling parameters (see above) */
+#endif /* !_SYS_GMON_H_ */
diff --git a/sys/sys/ioccom.h b/sys/sys/ioccom.h
new file mode 100644
index 000000000000..5bc11b328bdf
--- /dev/null
+++ b/sys/sys/ioccom.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ioccom.h 8.2 (Berkeley) 3/28/94
+ */
+
+#ifndef _SYS_IOCCOM_H_
+#define _SYS_IOCCOM_H_
+
+/*
+ * Ioctl's have the command encoded in the lower word, and the size of
+ * any in or out parameters in the upper word. The high 3 bits of the
+ * upper word are used to encode the in/out status of the parameter.
+ */
+#define IOCPARM_MASK 0x1fff /* parameter length, at most 13 bits */
+#define IOCPARM_LEN(x) (((x) >> 16) & IOCPARM_MASK)
+#define IOCBASECMD(x) ((x) & ~(IOCPARM_MASK << 16))
+#define IOCGROUP(x) (((x) >> 8) & 0xff)
+
+#define IOCPARM_MAX NBPG /* max size of ioctl, mult. of NBPG */
+#define IOC_VOID 0x20000000 /* no parameters */
+#define IOC_OUT 0x40000000 /* copy out parameters */
+#define IOC_IN 0x80000000 /* copy in parameters */
+#define IOC_INOUT (IOC_IN|IOC_OUT)
+#define IOC_DIRMASK 0xe0000000 /* mask for IN/OUT/VOID */
+
+#define _IOC(inout,group,num,len) \
+ (inout | ((len & IOCPARM_MASK) << 16) | ((group) << 8) | (num))
+#define _IO(g,n) _IOC(IOC_VOID, (g), (n), 0)
+#define _IOR(g,n,t) _IOC(IOC_OUT, (g), (n), sizeof(t))
+#define _IOW(g,n,t) _IOC(IOC_IN, (g), (n), sizeof(t))
+/* this should be _IORW, but stdio got there first */
+#define _IOWR(g,n,t) _IOC(IOC_INOUT, (g), (n), sizeof(t))
+
+#endif /* !_SYS_IOCCOM_H_ */
diff --git a/sys/sys/ioctl.h b/sys/sys/ioctl.h
new file mode 100644
index 000000000000..d04394fd1811
--- /dev/null
+++ b/sys/sys/ioctl.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ioctl.h 8.6 (Berkeley) 3/28/94
+ */
+
+#ifndef _SYS_IOCTL_H_
+#define _SYS_IOCTL_H_
+
+#include <sys/ttycom.h>
+
+/*
+ * Pun for SunOS prior to 3.2. SunOS 3.2 and later support TIOCGWINSZ
+ * and TIOCSWINSZ (yes, even 3.2-3.5, the fact that it wasn't documented
+ * nonwithstanding).
+ */
+struct ttysize {
+ unsigned short ts_lines;
+ unsigned short ts_cols;
+ unsigned short ts_xxx;
+ unsigned short ts_yyy;
+};
+#define TIOCGSIZE TIOCGWINSZ
+#define TIOCSSIZE TIOCSWINSZ
+
+#include <sys/ioccom.h>
+
+#include <sys/filio.h>
+#include <sys/sockio.h>
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int ioctl __P((int, unsigned long, ...));
+__END_DECLS
+#endif /* !KERNEL */
+#endif /* !_SYS_IOCTL_H_ */
+
+/*
+ * Keep outside _SYS_IOCTL_H_
+ * Compatability with old terminal driver
+ *
+ * Source level -> #define USE_OLD_TTY
+ * Kernel level -> options COMPAT_43 or COMPAT_SUNOS
+ */
+#if defined(USE_OLD_TTY) || defined(COMPAT_43) || defined(COMPAT_SUNOS)
+#include <sys/ioctl_compat.h>
+#endif
diff --git a/sys/sys/ioctl_compat.h b/sys/sys/ioctl_compat.h
new file mode 100644
index 000000000000..fd87b514cf80
--- /dev/null
+++ b/sys/sys/ioctl_compat.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ioctl_compat.h 8.4 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_IOCTL_COMPAT_H_
+#define _SYS_IOCTL_COMPAT_H_
+
+#include <sys/ttychars.h>
+#include <sys/ttydev.h>
+
+struct tchars {
+ char t_intrc; /* interrupt */
+ char t_quitc; /* quit */
+ char t_startc; /* start output */
+ char t_stopc; /* stop output */
+ char t_eofc; /* end-of-file */
+ char t_brkc; /* input delimiter (like nl) */
+};
+
+struct ltchars {
+ char t_suspc; /* stop process signal */
+ char t_dsuspc; /* delayed stop process signal */
+ char t_rprntc; /* reprint line */
+ char t_flushc; /* flush output (toggles) */
+ char t_werasc; /* word erase */
+ char t_lnextc; /* literal next character */
+};
+
+/*
+ * Structure for TIOCGETP and TIOCSETP ioctls.
+ */
+#ifndef _SGTTYB_
+#define _SGTTYB_
+struct sgttyb {
+ char sg_ispeed; /* input speed */
+ char sg_ospeed; /* output speed */
+ char sg_erase; /* erase character */
+ char sg_kill; /* kill character */
+ short sg_flags; /* mode flags */
+};
+#endif
+
+#ifdef USE_OLD_TTY
+# undef TIOCGETD
+# define TIOCGETD _IOR('t', 0, int) /* get line discipline */
+# undef TIOCSETD
+# define TIOCSETD _IOW('t', 1, int) /* set line discipline */
+#else
+# define OTIOCGETD _IOR('t', 0, int) /* get line discipline */
+# define OTIOCSETD _IOW('t', 1, int) /* set line discipline */
+#endif
+#define TIOCHPCL _IO('t', 2) /* hang up on last close */
+#define TIOCGETP _IOR('t', 8,struct sgttyb)/* get parameters -- gtty */
+#define TIOCSETP _IOW('t', 9,struct sgttyb)/* set parameters -- stty */
+#define TIOCSETN _IOW('t',10,struct sgttyb)/* as above, but no flushtty*/
+#define TIOCSETC _IOW('t',17,struct tchars)/* set special characters */
+#define TIOCGETC _IOR('t',18,struct tchars)/* get special characters */
+#define TANDEM 0x00000001 /* send stopc on out q full */
+#define CBREAK 0x00000002 /* half-cooked mode */
+#define LCASE 0x00000004 /* simulate lower case */
+#define ECHO 0x00000008 /* echo input */
+#define CRMOD 0x00000010 /* map \r to \r\n on output */
+#define RAW 0x00000020 /* no i/o processing */
+#define ODDP 0x00000040 /* get/send odd parity */
+#define EVENP 0x00000080 /* get/send even parity */
+#define ANYP 0x000000c0 /* get any parity/send none */
+#define NLDELAY 0x00000300 /* \n delay */
+#define NL0 0x00000000
+#define NL1 0x00000100 /* tty 37 */
+#define NL2 0x00000200 /* vt05 */
+#define NL3 0x00000300
+#define TBDELAY 0x00000c00 /* horizontal tab delay */
+#define TAB0 0x00000000
+#define TAB1 0x00000400 /* tty 37 */
+#define TAB2 0x00000800
+#define XTABS 0x00000c00 /* expand tabs on output */
+#define CRDELAY 0x00003000 /* \r delay */
+#define CR0 0x00000000
+#define CR1 0x00001000 /* tn 300 */
+#define CR2 0x00002000 /* tty 37 */
+#define CR3 0x00003000 /* concept 100 */
+#define VTDELAY 0x00004000 /* vertical tab delay */
+#define FF0 0x00000000
+#define FF1 0x00004000 /* tty 37 */
+#define BSDELAY 0x00008000 /* \b delay */
+#define BS0 0x00000000
+#define BS1 0x00008000
+#define ALLDELAY (NLDELAY|TBDELAY|CRDELAY|VTDELAY|BSDELAY)
+#define CRTBS 0x00010000 /* do backspacing for crt */
+#define PRTERA 0x00020000 /* \ ... / erase */
+#define CRTERA 0x00040000 /* " \b " to wipe out char */
+#define TILDE 0x00080000 /* hazeltine tilde kludge */
+#define MDMBUF 0x00100000 /*start/stop output on carrier*/
+#define LITOUT 0x00200000 /* literal output */
+#define TOSTOP 0x00400000 /*SIGSTOP on background output*/
+#define FLUSHO 0x00800000 /* flush output to terminal */
+#define NOHANG 0x01000000 /* (no-op) was no SIGHUP on carrier drop */
+#define L001000 0x02000000
+#define CRTKIL 0x04000000 /* kill line with " \b " */
+#define PASS8 0x08000000
+#define CTLECH 0x10000000 /* echo control chars as ^X */
+#define PENDIN 0x20000000 /* tp->t_rawq needs reread */
+#define DECCTQ 0x40000000 /* only ^Q starts after ^S */
+#define NOFLSH 0x80000000 /* no output flush on signal */
+#define TIOCLBIS _IOW('t', 127, int) /* bis local mode bits */
+#define TIOCLBIC _IOW('t', 126, int) /* bic local mode bits */
+#define TIOCLSET _IOW('t', 125, int) /* set entire local mode word */
+#define TIOCLGET _IOR('t', 124, int) /* get local modes */
+#define LCRTBS (CRTBS>>16)
+#define LPRTERA (PRTERA>>16)
+#define LCRTERA (CRTERA>>16)
+#define LTILDE (TILDE>>16)
+#define LMDMBUF (MDMBUF>>16)
+#define LLITOUT (LITOUT>>16)
+#define LTOSTOP (TOSTOP>>16)
+#define LFLUSHO (FLUSHO>>16)
+#define LNOHANG (NOHANG>>16)
+#define LCRTKIL (CRTKIL>>16)
+#define LPASS8 (PASS8>>16)
+#define LCTLECH (CTLECH>>16)
+#define LPENDIN (PENDIN>>16)
+#define LDECCTQ (DECCTQ>>16)
+#define LNOFLSH (NOFLSH>>16)
+#define TIOCSLTC _IOW('t',117,struct ltchars)/* set local special chars*/
+#define TIOCGLTC _IOR('t',116,struct ltchars)/* get local special chars*/
+#define OTIOCCONS _IO('t', 98) /* for hp300 -- sans int arg */
+#define OTTYDISC 0
+#define NETLDISC 1
+#define NTTYDISC 2
+
+#endif /* !_SYS_IOCTL_COMPAT_H_ */
diff --git a/sys/sys/ipc.h b/sys/sys/ipc.h
new file mode 100644
index 000000000000..cc036a8e83b5
--- /dev/null
+++ b/sys/sys/ipc.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ipc.h 8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * SVID compatible ipc.h file
+ */
+#ifndef _SYS_IPC_H_
+#define _SYS_IPC_H_
+
+typedef long key_t; /* XXX should be in types.h */
+
+struct ipc_perm {
+ ushort cuid; /* creator user id */
+ ushort cgid; /* creator group id */
+ ushort uid; /* user id */
+ ushort gid; /* group id */
+ ushort mode; /* r/w permission */
+ ushort seq; /* sequence # (to generate unique msg/sem/shm id) */
+ key_t key; /* user specified msg/sem/shm key */
+};
+
+/* common mode bits */
+#define IPC_R 00400 /* read permission */
+#define IPC_W 00200 /* write/alter permission */
+
+/* SVID required constants (same values as system 5) */
+#define IPC_CREAT 01000 /* create entry if key does not exist */
+#define IPC_EXCL 02000 /* fail if key exists */
+#define IPC_NOWAIT 04000 /* error if request must wait */
+
+#define IPC_PRIVATE (key_t)0 /* private key */
+
+#define IPC_RMID 0 /* remove identifier */
+#define IPC_SET 1 /* set options */
+#define IPC_STAT 2 /* get options */
+
+#endif /* !_SYS_IPC_H_ */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
new file mode 100644
index 000000000000..682e6c8c1940
--- /dev/null
+++ b/sys/sys/kernel.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kernel.h 8.3 (Berkeley) 1/21/94
+ */
+
+/* Global variables for the kernel. */
+
+/* 1.1 */
+extern long hostid;
+extern char hostname[MAXHOSTNAMELEN];
+extern int hostnamelen;
+
+/* 1.2 */
+extern volatile struct timeval mono_time;
+extern struct timeval boottime;
+extern struct timeval runtime;
+extern volatile struct timeval time;
+extern struct timezone tz; /* XXX */
+
+extern int tick; /* usec per tick (1000000 / hz) */
+extern int hz; /* system clock's frequency */
+extern int stathz; /* statistics clock's frequency */
+extern int profhz; /* profiling clock's frequency */
+extern int lbolt; /* once a second sleep address */
diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h
new file mode 100644
index 000000000000..1623c3562fe1
--- /dev/null
+++ b/sys/sys/ktrace.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ktrace.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * operations to ktrace system call (KTROP(op))
+ */
+#define KTROP_SET 0 /* set trace points */
+#define KTROP_CLEAR 1 /* clear trace points */
+#define KTROP_CLEARFILE 2 /* stop all tracing to file */
+#define KTROP(o) ((o)&3) /* macro to extract operation */
+/*
+ * flags (ORed in with operation)
+ */
+#define KTRFLAG_DESCEND 4 /* perform op on all children too */
+
+/*
+ * ktrace record header
+ */
+struct ktr_header {
+ int ktr_len; /* length of buf */
+ short ktr_type; /* trace record type */
+ pid_t ktr_pid; /* process id */
+ char ktr_comm[MAXCOMLEN+1]; /* command name */
+ struct timeval ktr_time; /* timestamp */
+ caddr_t ktr_buf;
+};
+
+/*
+ * Test for kernel trace point
+ */
+#define KTRPOINT(p, type) \
+ (((p)->p_traceflag & ((1<<(type))|KTRFAC_ACTIVE)) == (1<<(type)))
+
+/*
+ * ktrace record types
+ */
+
+/*
+ * KTR_SYSCALL - system call record
+ */
+#define KTR_SYSCALL 1
+struct ktr_syscall {
+ short ktr_code; /* syscall number */
+ short ktr_narg; /* number of arguments */
+ /*
+ * followed by ktr_narg ints
+ */
+};
+
+/*
+ * KTR_SYSRET - return from system call record
+ */
+#define KTR_SYSRET 2
+struct ktr_sysret {
+ short ktr_code;
+ short ktr_eosys;
+ int ktr_error;
+ int ktr_retval;
+};
+
+/*
+ * KTR_NAMEI - namei record
+ */
+#define KTR_NAMEI 3
+ /* record contains pathname */
+
+/*
+ * KTR_GENIO - trace generic process i/o
+ */
+#define KTR_GENIO 4
+struct ktr_genio {
+ int ktr_fd;
+ enum uio_rw ktr_rw;
+ /*
+ * followed by data successfully read/written
+ */
+};
+
+/*
+ * KTR_PSIG - trace processed signal
+ */
+#define KTR_PSIG 5
+struct ktr_psig {
+ int signo;
+ sig_t action;
+ int mask;
+ int code;
+};
+
+/*
+ * KTR_CSW - trace context switches
+ */
+#define KTR_CSW 6
+struct ktr_csw {
+ int out; /* 1 if switch out, 0 if switch in */
+ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */
+};
+
+/*
+ * kernel trace points (in p_traceflag)
+ */
+#define KTRFAC_MASK 0x00ffffff
+#define KTRFAC_SYSCALL (1<<KTR_SYSCALL)
+#define KTRFAC_SYSRET (1<<KTR_SYSRET)
+#define KTRFAC_NAMEI (1<<KTR_NAMEI)
+#define KTRFAC_GENIO (1<<KTR_GENIO)
+#define KTRFAC_PSIG (1<<KTR_PSIG)
+#define KTRFAC_CSW (1<<KTR_CSW)
+/*
+ * trace flags (also in p_traceflags)
+ */
+#define KTRFAC_ROOT 0x80000000 /* root set this trace */
+#define KTRFAC_INHERIT 0x40000000 /* pass trace flags to children */
+#define KTRFAC_ACTIVE 0x20000000 /* ktrace logging in progress, ignore */
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int ktrace __P((const char *, int, int, pid_t));
+__END_DECLS
+
+#endif /* !KERNEL */
diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h
new file mode 100644
index 000000000000..0e465e03dfde
--- /dev/null
+++ b/sys/sys/libkern.h
@@ -0,0 +1,98 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)libkern.h 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/types.h>
+
+static inline int
+imax(a, b)
+ int a, b;
+{
+ return (a > b ? a : b);
+}
+static inline int
+imin(a, b)
+ int a, b;
+{
+ return (a < b ? a : b);
+}
+static inline long
+lmax(a, b)
+ long a, b;
+{
+ return (a > b ? a : b);
+}
+static inline long
+lmin(a, b)
+ long a, b;
+{
+ return (a < b ? a : b);
+}
+static inline u_int
+max(a, b)
+ u_int a, b;
+{
+ return (a > b ? a : b);
+}
+static inline u_int
+min(a, b)
+ u_int a, b;
+{
+ return (a < b ? a : b);
+}
+static inline u_long
+ulmax(a, b)
+ u_long a, b;
+{
+ return (a > b ? a : b);
+}
+static inline u_long
+ulmin(a, b)
+ u_long a, b;
+{
+ return (a < b ? a : b);
+}
+
+/* Prototypes for non-quad routines. */
+int bcmp __P((const void *, const void *, size_t));
+int ffs __P((int));
+int locc __P((int, char *, u_int));
+u_long random __P((void));
+char *rindex __P((const char *, int));
+int scanc __P((u_int, u_char *, u_char *, int));
+int skpc __P((int, int, char *));
+char *strcat __P((char *, const char *));
+char *strcpy __P((char *, const char *));
+size_t strlen __P((const char *));
+char *strncpy __P((char *, const char *, size_t));
diff --git a/sys/sys/linedisc.h b/sys/sys/linedisc.h
new file mode 100644
index 000000000000..58cb6fa8339c
--- /dev/null
+++ b/sys/sys/linedisc.h
@@ -0,0 +1,123 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)conf.h 8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * Definitions of device driver entry switches
+ */
+
+struct buf;
+struct proc;
+struct tty;
+struct uio;
+struct vnode;
+
+struct bdevsw {
+ int (*d_open) __P((dev_t dev, int oflags, int devtype,
+ struct proc *p));
+ int (*d_close) __P((dev_t dev, int fflag, int devtype,
+ struct proc *p));
+ int (*d_strategy) __P((struct buf *bp));
+ int (*d_ioctl) __P((dev_t dev, int cmd, caddr_t data,
+ int fflag, struct proc *p));
+ int (*d_dump) (); /* parameters vary by architecture */
+ int (*d_psize) __P((dev_t dev));
+ int d_flags;
+};
+
+#ifdef KERNEL
+extern struct bdevsw bdevsw[];
+#endif
+
+struct cdevsw {
+ int (*d_open) __P((dev_t dev, int oflags, int devtype,
+ struct proc *p));
+ int (*d_close) __P((dev_t dev, int fflag, int devtype,
+ struct proc *));
+ int (*d_read) __P((dev_t dev, struct uio *uio, int ioflag));
+ int (*d_write) __P((dev_t dev, struct uio *uio, int ioflag));
+ int (*d_ioctl) __P((dev_t dev, int cmd, caddr_t data,
+ int fflag, struct proc *p));
+ int (*d_stop) __P((struct tty *tp, int rw));
+ int (*d_reset) __P((int uban)); /* XXX */
+ struct tty *d_ttys;
+ int (*d_select) __P((dev_t dev, int which, struct proc *p));
+ int (*d_mmap) __P(());
+ int (*d_strategy) __P((struct buf *bp));
+};
+
+#ifdef KERNEL
+extern struct cdevsw cdevsw[];
+
+/* symbolic sleep message strings */
+extern char devopn[], devio[], devwait[], devin[], devout[];
+extern char devioc[], devcls[];
+#endif
+
+struct linesw {
+ int (*l_open) __P((dev_t dev, struct tty *tp));
+ int (*l_close) __P((struct tty *tp, int flag));
+ int (*l_read) __P((struct tty *tp, struct uio *uio,
+ int flag));
+ int (*l_write) __P((struct tty *tp, struct uio *uio,
+ int flag));
+ int (*l_ioctl) __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+ int (*l_rint) __P((int c, struct tty *tp));
+ int (*l_start) __P((struct tty *tp));
+ int (*l_modem) __P((struct tty *tp, int flag));
+};
+
+#ifdef KERNEL
+extern struct linesw linesw[];
+#endif
+
+struct swdevt {
+ dev_t sw_dev;
+ int sw_flags;
+ int sw_nblks;
+ struct vnode *sw_vp;
+};
+#define SW_FREED 0x01
+#define SW_SEQUENTIAL 0x02
+#define sw_freed sw_flags /* XXX compat */
+
+#ifdef KERNEL
+extern struct swdevt swdevt[];
+#endif
diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
new file mode 100644
index 000000000000..ba67bda1f5a4
--- /dev/null
+++ b/sys/sys/malloc.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)malloc.h 8.3 (Berkeley) 1/12/94
+ */
+
+#ifndef _SYS_MALLOC_H_
+#define _SYS_MALLOC_H_
+
+#define KMEMSTATS
+
+/*
+ * flags to malloc
+ */
+#define M_WAITOK 0x0000
+#define M_NOWAIT 0x0001
+
+/*
+ * Types of memory to be allocated
+ */
+#define M_FREE 0 /* should be on free list */
+#define M_MBUF 1 /* mbuf */
+#define M_DEVBUF 2 /* device driver memory */
+#define M_SOCKET 3 /* socket structure */
+#define M_PCB 4 /* protocol control block */
+#define M_RTABLE 5 /* routing tables */
+#define M_HTABLE 6 /* IMP host tables */
+#define M_FTABLE 7 /* fragment reassembly header */
+#define M_ZOMBIE 8 /* zombie proc status */
+#define M_IFADDR 9 /* interface address */
+#define M_SOOPTS 10 /* socket options */
+#define M_SONAME 11 /* socket name */
+#define M_NAMEI 12 /* namei path name buffer */
+#define M_GPROF 13 /* kernel profiling buffer */
+#define M_IOCTLOPS 14 /* ioctl data buffer */
+#define M_MAPMEM 15 /* mapped memory descriptors */
+#define M_CRED 16 /* credentials */
+#define M_PGRP 17 /* process group header */
+#define M_SESSION 18 /* session header */
+#define M_IOV 19 /* large iov's */
+#define M_MOUNT 20 /* vfs mount struct */
+#define M_FHANDLE 21 /* network file handle */
+#define M_NFSREQ 22 /* NFS request header */
+#define M_NFSMNT 23 /* NFS mount structure */
+#define M_NFSNODE 24 /* NFS vnode private part */
+#define M_VNODE 25 /* Dynamically allocated vnodes */
+#define M_CACHE 26 /* Dynamically allocated cache entries */
+#define M_DQUOT 27 /* UFS quota entries */
+#define M_UFSMNT 28 /* UFS mount structure */
+#define M_SHM 29 /* SVID compatible shared memory segments */
+#define M_VMMAP 30 /* VM map structures */
+#define M_VMMAPENT 31 /* VM map entry structures */
+#define M_VMOBJ 32 /* VM object structure */
+#define M_VMOBJHASH 33 /* VM object hash structure */
+#define M_VMPMAP 34 /* VM pmap */
+#define M_VMPVENT 35 /* VM phys-virt mapping entry */
+#define M_VMPAGER 36 /* XXX: VM pager struct */
+#define M_VMPGDATA 37 /* XXX: VM pager private data */
+#define M_FILE 38 /* Open file structure */
+#define M_FILEDESC 39 /* Open file descriptor table */
+#define M_LOCKF 40 /* Byte-range locking structures */
+#define M_PROC 41 /* Proc structures */
+#define M_SUBPROC 42 /* Proc sub-structures */
+#define M_SEGMENT 43 /* Segment for LFS */
+#define M_LFSNODE 44 /* LFS vnode private part */
+#define M_FFSNODE 45 /* FFS vnode private part */
+#define M_MFSNODE 46 /* MFS vnode private part */
+#define M_NQLEASE 47 /* Nqnfs lease */
+#define M_NQMHOST 48 /* Nqnfs host address table */
+#define M_NETADDR 49 /* Export host address structure */
+#define M_NFSSVC 50 /* Nfs server structure */
+#define M_NFSUID 51 /* Nfs uid mapping structure */
+#define M_NFSD 52 /* Nfs server daemon structure */
+#define M_IPMOPTS 53 /* internet multicast options */
+#define M_IPMADDR 54 /* internet multicast address */
+#define M_IFMADDR 55 /* link-level multicast address */
+#define M_MRTABLE 56 /* multicast routing tables */
+#define M_ISOFSMNT 57 /* ISOFS mount structure */
+#define M_ISOFSNODE 58 /* ISOFS vnode private part */
+#define M_TEMP 74 /* misc temporary data buffers */
+#define M_LAST 75 /* Must be last type + 1 */
+
+#define INITKMEMNAMES { \
+ "free", /* 0 M_FREE */ \
+ "mbuf", /* 1 M_MBUF */ \
+ "devbuf", /* 2 M_DEVBUF */ \
+ "socket", /* 3 M_SOCKET */ \
+ "pcb", /* 4 M_PCB */ \
+ "routetbl", /* 5 M_RTABLE */ \
+ "hosttbl", /* 6 M_HTABLE */ \
+ "fragtbl", /* 7 M_FTABLE */ \
+ "zombie", /* 8 M_ZOMBIE */ \
+ "ifaddr", /* 9 M_IFADDR */ \
+ "soopts", /* 10 M_SOOPTS */ \
+ "soname", /* 11 M_SONAME */ \
+ "namei", /* 12 M_NAMEI */ \
+ "gprof", /* 13 M_GPROF */ \
+ "ioctlops", /* 14 M_IOCTLOPS */ \
+ "mapmem", /* 15 M_MAPMEM */ \
+ "cred", /* 16 M_CRED */ \
+ "pgrp", /* 17 M_PGRP */ \
+ "session", /* 18 M_SESSION */ \
+ "iov", /* 19 M_IOV */ \
+ "mount", /* 20 M_MOUNT */ \
+ "fhandle", /* 21 M_FHANDLE */ \
+ "NFS req", /* 22 M_NFSREQ */ \
+ "NFS mount", /* 23 M_NFSMNT */ \
+ "NFS node", /* 24 M_NFSNODE */ \
+ "vnodes", /* 25 M_VNODE */ \
+ "namecache", /* 26 M_CACHE */ \
+ "UFS quota", /* 27 M_DQUOT */ \
+ "UFS mount", /* 28 M_UFSMNT */ \
+ "shm", /* 29 M_SHM */ \
+ "VM map", /* 30 M_VMMAP */ \
+ "VM mapent", /* 31 M_VMMAPENT */ \
+ "VM object", /* 32 M_VMOBJ */ \
+ "VM objhash", /* 33 M_VMOBJHASH */ \
+ "VM pmap", /* 34 M_VMPMAP */ \
+ "VM pvmap", /* 35 M_VMPVENT */ \
+ "VM pager", /* 36 M_VMPAGER */ \
+ "VM pgdata", /* 37 M_VMPGDATA */ \
+ "file", /* 38 M_FILE */ \
+ "file desc", /* 39 M_FILEDESC */ \
+ "lockf", /* 40 M_LOCKF */ \
+ "proc", /* 41 M_PROC */ \
+ "subproc", /* 42 M_SUBPROC */ \
+ "LFS segment", /* 43 M_SEGMENT */ \
+ "LFS node", /* 44 M_LFSNODE */ \
+ "FFS node", /* 45 M_FFSNODE */ \
+ "MFS node", /* 46 M_MFSNODE */ \
+ "NQNFS Lease", /* 47 M_NQLEASE */ \
+ "NQNFS Host", /* 48 M_NQMHOST */ \
+ "Export Host", /* 49 M_NETADDR */ \
+ "NFS srvsock", /* 50 M_NFSSVC */ \
+ "NFS uid", /* 51 M_NFSUID */ \
+ "NFS daemon", /* 52 M_NFSD */ \
+ "ip_moptions", /* 53 M_IPMOPTS */ \
+ "in_multi", /* 54 M_IPMADDR */ \
+ "ether_multi", /* 55 M_IFMADDR */ \
+ "mrt", /* 56 M_MRTABLE */ \
+ "ISOFS mount", /* 57 M_ISOFSMNT */ \
+ "ISOFS node", /* 58 M_ISOFSNODE */ \
+ NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, NULL, NULL, NULL, \
+ "temp", /* 74 M_TEMP */ \
+}
+
+struct kmemstats {
+ long ks_inuse; /* # of packets of this type currently in use */
+ long ks_calls; /* total packets of this type ever allocated */
+ long ks_memuse; /* total memory held in bytes */
+ u_short ks_limblocks; /* number of times blocked for hitting limit */
+ u_short ks_mapblocks; /* number of times blocked for kernel map */
+ long ks_maxused; /* maximum number ever used */
+ long ks_limit; /* most that are allowed to exist */
+ long ks_size; /* sizes of this thing that are allocated */
+ long ks_spare;
+};
+
+/*
+ * Array of descriptors that describe the contents of each page
+ */
+struct kmemusage {
+ short ku_indx; /* bucket index */
+ union {
+ u_short freecnt;/* for small allocations, free pieces in page */
+ u_short pagecnt;/* for large allocations, pages alloced */
+ } ku_un;
+};
+#define ku_freecnt ku_un.freecnt
+#define ku_pagecnt ku_un.pagecnt
+
+/*
+ * Set of buckets for each size of memory block that is retained
+ */
+struct kmembuckets {
+ caddr_t kb_next; /* list of free blocks */
+ caddr_t kb_last; /* last free block */
+ long kb_calls; /* total calls to allocate this size */
+ long kb_total; /* total number of blocks allocated */
+ long kb_totalfree; /* # of free elements in this bucket */
+ long kb_elmpercl; /* # of elements in this sized allocation */
+ long kb_highwat; /* high water mark */
+ long kb_couldfree; /* over high water mark and could free */
+};
+
+#ifdef KERNEL
+#define MINALLOCSIZE (1 << MINBUCKET)
+#define BUCKETINDX(size) \
+ (size) <= (MINALLOCSIZE * 128) \
+ ? (size) <= (MINALLOCSIZE * 8) \
+ ? (size) <= (MINALLOCSIZE * 2) \
+ ? (size) <= (MINALLOCSIZE * 1) \
+ ? (MINBUCKET + 0) \
+ : (MINBUCKET + 1) \
+ : (size) <= (MINALLOCSIZE * 4) \
+ ? (MINBUCKET + 2) \
+ : (MINBUCKET + 3) \
+ : (size) <= (MINALLOCSIZE* 32) \
+ ? (size) <= (MINALLOCSIZE * 16) \
+ ? (MINBUCKET + 4) \
+ : (MINBUCKET + 5) \
+ : (size) <= (MINALLOCSIZE * 64) \
+ ? (MINBUCKET + 6) \
+ : (MINBUCKET + 7) \
+ : (size) <= (MINALLOCSIZE * 2048) \
+ ? (size) <= (MINALLOCSIZE * 512) \
+ ? (size) <= (MINALLOCSIZE * 256) \
+ ? (MINBUCKET + 8) \
+ : (MINBUCKET + 9) \
+ : (size) <= (MINALLOCSIZE * 1024) \
+ ? (MINBUCKET + 10) \
+ : (MINBUCKET + 11) \
+ : (size) <= (MINALLOCSIZE * 8192) \
+ ? (size) <= (MINALLOCSIZE * 4096) \
+ ? (MINBUCKET + 12) \
+ : (MINBUCKET + 13) \
+ : (size) <= (MINALLOCSIZE * 16384) \
+ ? (MINBUCKET + 14) \
+ : (MINBUCKET + 15)
+
+/*
+ * Turn virtual addresses into kmem map indicies
+ */
+#define kmemxtob(alloc) (kmembase + (alloc) * NBPG)
+#define btokmemx(addr) (((caddr_t)(addr) - kmembase) / NBPG)
+#define btokup(addr) (&kmemusage[((caddr_t)(addr) - kmembase) >> CLSHIFT])
+
+/*
+ * Macro versions for the usual cases of malloc/free
+ */
+#if defined(KMEMSTATS) || defined(DIAGNOSTIC)
+#define MALLOC(space, cast, size, type, flags) \
+ (space) = (cast)malloc((u_long)(size), type, flags)
+#define FREE(addr, type) free((caddr_t)(addr), type)
+
+#else /* do not collect statistics */
+#define MALLOC(space, cast, size, type, flags) { \
+ register struct kmembuckets *kbp = &bucket[BUCKETINDX(size)]; \
+ long s = splimp(); \
+ if (kbp->kb_next == NULL) { \
+ (space) = (cast)malloc((u_long)(size), type, flags); \
+ } else { \
+ (space) = (cast)kbp->kb_next; \
+ kbp->kb_next = *(caddr_t *)(space); \
+ } \
+ splx(s); \
+}
+
+#define FREE(addr, type) { \
+ register struct kmembuckets *kbp; \
+ register struct kmemusage *kup = btokup(addr); \
+ long s = splimp(); \
+ if (1 << kup->ku_indx > MAXALLOCSAVE) { \
+ free((caddr_t)(addr), type); \
+ } else { \
+ kbp = &bucket[kup->ku_indx]; \
+ if (kbp->kb_next == NULL) \
+ kbp->kb_next = (caddr_t)(addr); \
+ else \
+ *(caddr_t *)(kbp->kb_last) = (caddr_t)(addr); \
+ *(caddr_t *)(addr) = NULL; \
+ kbp->kb_last = (caddr_t)(addr); \
+ } \
+ splx(s); \
+}
+#endif /* do not collect statistics */
+
+extern struct kmemstats kmemstats[];
+extern struct kmemusage *kmemusage;
+extern char *kmembase;
+extern struct kmembuckets bucket[];
+extern void *malloc __P((unsigned long size, int type, int flags));
+extern void free __P((void *addr, int type));
+#endif /* KERNEL */
+#endif /* !_SYS_MALLOC_H_ */
diff --git a/sys/sys/map.h b/sys/sys/map.h
new file mode 100644
index 000000000000..6cec4b556533
--- /dev/null
+++ b/sys/sys/map.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)map.h 8.3 (Berkeley) 1/26/94
+ */
+
+/*
+ * Resource allocation maps.
+ *
+ * Associated routines manage sub-allocation of an address space using
+ * an array of segment descriptors. The first element of this array
+ * is a map structure, describing the arrays extent and the name
+ * of the controlled object. Each additional structure represents
+ * a free segment of the address space.
+ *
+ * A call to rminit initializes a resource map and may also be used
+ * to free some address space for the map. Subsequent calls to rmalloc
+ * and rmfree allocate and free space in the resource map. If the resource
+ * map becomes too fragmented to be described in the available space,
+ * then some of the resource is discarded. This may lead to critical
+ * shortages, but is better than not checking (as the previous versions
+ * of these routines did) or giving up and calling panic(). The routines
+ * could use linked lists and call a memory allocator when they run
+ * out of space, but that would not solve the out of space problem when
+ * called at interrupt time.
+ *
+ * N.B.: The address 0 in the resource address space is not available
+ * as it is used internally by the resource map routines.
+ */
+struct map {
+ struct mapent *m_limit; /* address of last slot in map */
+ char *m_name; /* name of resource, for messages */
+};
+
+struct mapent {
+ long m_size; /* size of this segment of the map */
+ long m_addr; /* start of segment */
+};
+
+#ifdef KERNEL
+#define ARGMAPSIZE 16
+struct map *kmemmap, *mbmap, *swapmap;
+int nswapmap;
+
+long rmalloc __P((struct map *, long));
+void rmfree __P((struct map *, long, long));
+void rminit __P((struct map *, long, long, char *, int));
+#endif
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
new file mode 100644
index 000000000000..f3ea7edefe6f
--- /dev/null
+++ b/sys/sys/mbuf.h
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94
+ */
+
+#ifndef M_WAITOK
+#include <sys/malloc.h>
+#endif
+
+/*
+ * Mbufs are of a single size, MSIZE (machine/machparam.h), which
+ * includes overhead. An mbuf may add a single "mbuf cluster" of size
+ * MCLBYTES (also in machine/machparam.h), which has no additional overhead
+ * and is used instead of the internal data area; this is done when
+ * at least MINCLSIZE of data must be stored.
+ */
+
+#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */
+#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
+
+#define MINCLSIZE (MHLEN + MLEN) /* smallest amount to put in cluster */
+#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
+
+/*
+ * Macros for type conversion
+ * mtod(m,t) - convert mbuf pointer to data pointer of correct type
+ * dtom(x) - convert data pointer within mbuf to mbuf pointer (XXX)
+ * mtocl(x) - convert pointer within cluster to cluster index #
+ * cltom(x) - convert cluster # to ptr to beginning of cluster
+ */
+#define mtod(m,t) ((t)((m)->m_data))
+#define dtom(x) ((struct mbuf *)((int)(x) & ~(MSIZE-1)))
+#define mtocl(x) (((u_int)(x) - (u_int)mbutl) >> MCLSHIFT)
+#define cltom(x) ((caddr_t)((u_int)mbutl + ((u_int)(x) << MCLSHIFT)))
+
+/* header at beginning of each mbuf: */
+struct m_hdr {
+ struct mbuf *mh_next; /* next buffer in chain */
+ struct mbuf *mh_nextpkt; /* next chain in queue/record */
+ int mh_len; /* amount of data in this mbuf */
+ caddr_t mh_data; /* location of data */
+ short mh_type; /* type of data in this mbuf */
+ short mh_flags; /* flags; see below */
+};
+
+/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
+struct pkthdr {
+ int len; /* total packet length */
+ struct ifnet *rcvif; /* rcv interface */
+};
+
+/* description of external storage mapped into mbuf, valid if M_EXT set */
+struct m_ext {
+ caddr_t ext_buf; /* start of buffer */
+ void (*ext_free)(); /* free routine if not the usual */
+ u_int ext_size; /* size of buffer, for ext_free */
+};
+
+struct mbuf {
+ struct m_hdr m_hdr;
+ union {
+ struct {
+ struct pkthdr MH_pkthdr; /* M_PKTHDR set */
+ union {
+ struct m_ext MH_ext; /* M_EXT set */
+ char MH_databuf[MHLEN];
+ } MH_dat;
+ } MH;
+ char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
+ } M_dat;
+};
+#define m_next m_hdr.mh_next
+#define m_len m_hdr.mh_len
+#define m_data m_hdr.mh_data
+#define m_type m_hdr.mh_type
+#define m_flags m_hdr.mh_flags
+#define m_nextpkt m_hdr.mh_nextpkt
+#define m_act m_nextpkt
+#define m_pkthdr M_dat.MH.MH_pkthdr
+#define m_ext M_dat.MH.MH_dat.MH_ext
+#define m_pktdat M_dat.MH.MH_dat.MH_databuf
+#define m_dat M_dat.M_databuf
+
+/* mbuf flags */
+#define M_EXT 0x0001 /* has associated external storage */
+#define M_PKTHDR 0x0002 /* start of record */
+#define M_EOR 0x0004 /* end of record */
+
+/* mbuf pkthdr flags, also in m_flags */
+#define M_BCAST 0x0100 /* send/received as link-level broadcast */
+#define M_MCAST 0x0200 /* send/received as link-level multicast */
+
+/* flags copied when copying m_pkthdr */
+#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST)
+
+/* mbuf types */
+#define MT_FREE 0 /* should be on free list */
+#define MT_DATA 1 /* dynamic (data) allocation */
+#define MT_HEADER 2 /* packet header */
+#define MT_SOCKET 3 /* socket structure */
+#define MT_PCB 4 /* protocol control block */
+#define MT_RTABLE 5 /* routing tables */
+#define MT_HTABLE 6 /* IMP host tables */
+#define MT_ATABLE 7 /* address resolution tables */
+#define MT_SONAME 8 /* socket name */
+#define MT_SOOPTS 10 /* socket options */
+#define MT_FTABLE 11 /* fragment reassembly header */
+#define MT_RIGHTS 12 /* access rights */
+#define MT_IFADDR 13 /* interface address */
+#define MT_CONTROL 14 /* extra-data protocol message */
+#define MT_OOBDATA 15 /* expedited data */
+
+/* flags to m_get/MGET */
+#define M_DONTWAIT M_NOWAIT
+#define M_WAIT M_WAITOK
+
+/*
+ * mbuf utility macros:
+ *
+ * MBUFLOCK(code)
+ * prevents a section of code from from being interrupted by network
+ * drivers.
+ */
+#define MBUFLOCK(code) \
+ { int ms = splimp(); \
+ { code } \
+ splx(ms); \
+ }
+
+/*
+ * mbuf allocation/deallocation macros:
+ *
+ * MGET(struct mbuf *m, int how, int type)
+ * allocates an mbuf and initializes it to contain internal data.
+ *
+ * MGETHDR(struct mbuf *m, int how, int type)
+ * allocates an mbuf and initializes it to contain a packet header
+ * and internal data.
+ */
+#define MGET(m, how, type) { \
+ MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); \
+ if (m) { \
+ (m)->m_type = (type); \
+ MBUFLOCK(mbstat.m_mtypes[type]++;) \
+ (m)->m_next = (struct mbuf *)NULL; \
+ (m)->m_nextpkt = (struct mbuf *)NULL; \
+ (m)->m_data = (m)->m_dat; \
+ (m)->m_flags = 0; \
+ } else \
+ (m) = m_retry((how), (type)); \
+}
+
+#define MGETHDR(m, how, type) { \
+ MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); \
+ if (m) { \
+ (m)->m_type = (type); \
+ MBUFLOCK(mbstat.m_mtypes[type]++;) \
+ (m)->m_next = (struct mbuf *)NULL; \
+ (m)->m_nextpkt = (struct mbuf *)NULL; \
+ (m)->m_data = (m)->m_pktdat; \
+ (m)->m_flags = M_PKTHDR; \
+ } else \
+ (m) = m_retryhdr((how), (type)); \
+}
+
+/*
+ * Mbuf cluster macros.
+ * MCLALLOC(caddr_t p, int how) allocates an mbuf cluster.
+ * MCLGET adds such clusters to a normal mbuf;
+ * the flag M_EXT is set upon success.
+ * MCLFREE releases a reference to a cluster allocated by MCLALLOC,
+ * freeing the cluster if the reference count has reached 0.
+ *
+ * Normal mbuf clusters are normally treated as character arrays
+ * after allocation, but use the first word of the buffer as a free list
+ * pointer while on the free list.
+ */
+union mcluster {
+ union mcluster *mcl_next;
+ char mcl_buf[MCLBYTES];
+};
+
+#define MCLALLOC(p, how) \
+ MBUFLOCK( \
+ if (mclfree == 0) \
+ (void)m_clalloc(1, (how)); \
+ if ((p) = (caddr_t)mclfree) { \
+ ++mclrefcnt[mtocl(p)]; \
+ mbstat.m_clfree--; \
+ mclfree = ((union mcluster *)(p))->mcl_next; \
+ } \
+ )
+
+#define MCLGET(m, how) \
+ { MCLALLOC((m)->m_ext.ext_buf, (how)); \
+ if ((m)->m_ext.ext_buf != NULL) { \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ (m)->m_flags |= M_EXT; \
+ (m)->m_ext.ext_size = MCLBYTES; \
+ } \
+ }
+
+#define MCLFREE(p) \
+ MBUFLOCK ( \
+ if (--mclrefcnt[mtocl(p)] == 0) { \
+ ((union mcluster *)(p))->mcl_next = mclfree; \
+ mclfree = (union mcluster *)(p); \
+ mbstat.m_clfree++; \
+ } \
+ )
+
+/*
+ * MFREE(struct mbuf *m, struct mbuf *n)
+ * Free a single mbuf and associated external storage.
+ * Place the successor, if any, in n.
+ */
+#ifdef notyet
+#define MFREE(m, n) \
+ { MBUFLOCK(mbstat.m_mtypes[(m)->m_type]--;) \
+ if ((m)->m_flags & M_EXT) { \
+ if ((m)->m_ext.ext_free) \
+ (*((m)->m_ext.ext_free))((m)->m_ext.ext_buf, \
+ (m)->m_ext.ext_size); \
+ else \
+ MCLFREE((m)->m_ext.ext_buf); \
+ } \
+ (n) = (m)->m_next; \
+ FREE((m), mbtypes[(m)->m_type]); \
+ }
+#else /* notyet */
+#define MFREE(m, nn) \
+ { MBUFLOCK(mbstat.m_mtypes[(m)->m_type]--;) \
+ if ((m)->m_flags & M_EXT) { \
+ MCLFREE((m)->m_ext.ext_buf); \
+ } \
+ (nn) = (m)->m_next; \
+ FREE((m), mbtypes[(m)->m_type]); \
+ }
+#endif
+
+/*
+ * Copy mbuf pkthdr from from to to.
+ * from must have M_PKTHDR set, and to must be empty.
+ */
+#define M_COPY_PKTHDR(to, from) { \
+ (to)->m_pkthdr = (from)->m_pkthdr; \
+ (to)->m_flags = (from)->m_flags & M_COPYFLAGS; \
+ (to)->m_data = (to)->m_pktdat; \
+}
+
+/*
+ * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
+ * an object of the specified size at the end of the mbuf, longword aligned.
+ */
+#define M_ALIGN(m, len) \
+ { (m)->m_data += (MLEN - (len)) &~ (sizeof(long) - 1); }
+/*
+ * As above, for mbufs allocated with m_gethdr/MGETHDR
+ * or initialized by M_COPY_PKTHDR.
+ */
+#define MH_ALIGN(m, len) \
+ { (m)->m_data += (MHLEN - (len)) &~ (sizeof(long) - 1); }
+
+/*
+ * Compute the amount of space available
+ * before the current start of data in an mbuf.
+ */
+#define M_LEADINGSPACE(m) \
+ ((m)->m_flags & M_EXT ? /* (m)->m_data - (m)->m_ext.ext_buf */ 0 : \
+ (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
+ (m)->m_data - (m)->m_dat)
+
+/*
+ * Compute the amount of space available
+ * after the end of data in an mbuf.
+ */
+#define M_TRAILINGSPACE(m) \
+ ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
+ ((m)->m_data + (m)->m_len) : \
+ &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
+
+/*
+ * Arrange to prepend space of size plen to mbuf m.
+ * If a new mbuf must be allocated, how specifies whether to wait.
+ * If how is M_DONTWAIT and allocation fails, the original mbuf chain
+ * is freed and m is set to NULL.
+ */
+#define M_PREPEND(m, plen, how) { \
+ if (M_LEADINGSPACE(m) >= (plen)) { \
+ (m)->m_data -= (plen); \
+ (m)->m_len += (plen); \
+ } else \
+ (m) = m_prepend((m), (plen), (how)); \
+ if ((m) && (m)->m_flags & M_PKTHDR) \
+ (m)->m_pkthdr.len += (plen); \
+}
+
+/* change mbuf to new type */
+#define MCHTYPE(m, t) { \
+ MBUFLOCK(mbstat.m_mtypes[(m)->m_type]--; mbstat.m_mtypes[t]++;) \
+ (m)->m_type = t;\
+}
+
+/* length to m_copy to copy all */
+#define M_COPYALL 1000000000
+
+/* compatiblity with 4.3 */
+#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
+
+/*
+ * Mbuf statistics.
+ */
+struct mbstat {
+ u_long m_mbufs; /* mbufs obtained from page pool */
+ u_long m_clusters; /* clusters obtained from page pool */
+ u_long m_spare; /* spare field */
+ u_long m_clfree; /* free clusters */
+ u_long m_drops; /* times failed to find space */
+ u_long m_wait; /* times waited for space */
+ u_long m_drain; /* times drained protocols for space */
+ u_short m_mtypes[256]; /* type specific mbuf allocations */
+};
+
+#ifdef KERNEL
+extern struct mbuf *mbutl; /* virtual address of mclusters */
+extern char *mclrefcnt; /* cluster reference counts */
+struct mbstat mbstat;
+extern int nmbclusters;
+union mcluster *mclfree;
+int max_linkhdr; /* largest link-level header */
+int max_protohdr; /* largest protocol header */
+int max_hdr; /* largest link+protocol header */
+int max_datalen; /* MHLEN - max_hdr */
+extern int mbtypes[]; /* XXX */
+
+struct mbuf *m_copym __P((struct mbuf *, int, int, int));
+struct mbuf *m_free __P((struct mbuf *));
+struct mbuf *m_get __P((int, int));
+struct mbuf *m_getclr __P((int, int));
+struct mbuf *m_gethdr __P((int, int));
+struct mbuf *m_prepend __P((struct mbuf *, int, int));
+struct mbuf *m_pullup __P((struct mbuf *, int));
+struct mbuf *m_retry __P((int, int));
+struct mbuf *m_retryhdr __P((int, int));
+int m_clalloc __P((int, int));
+void m_copyback __P((struct mbuf *, int, int, caddr_t));
+void m_freem __P((struct mbuf *));
+
+#ifdef MBTYPES
+int mbtypes[] = { /* XXX */
+ M_FREE, /* MT_FREE 0 should be on free list */
+ M_MBUF, /* MT_DATA 1 dynamic (data) allocation */
+ M_MBUF, /* MT_HEADER 2 packet header */
+ M_SOCKET, /* MT_SOCKET 3 socket structure */
+ M_PCB, /* MT_PCB 4 protocol control block */
+ M_RTABLE, /* MT_RTABLE 5 routing tables */
+ M_HTABLE, /* MT_HTABLE 6 IMP host tables */
+ 0, /* MT_ATABLE 7 address resolution tables */
+ M_MBUF, /* MT_SONAME 8 socket name */
+ 0, /* 9 */
+ M_SOOPTS, /* MT_SOOPTS 10 socket options */
+ M_FTABLE, /* MT_FTABLE 11 fragment reassembly header */
+ M_MBUF, /* MT_RIGHTS 12 access rights */
+ M_IFADDR, /* MT_IFADDR 13 interface address */
+ M_MBUF, /* MT_CONTROL 14 extra-data protocol message */
+ M_MBUF, /* MT_OOBDATA 15 expedited data */
+#ifdef DATAKIT
+ 25, 26, 27, 28, 29, 30, 31, 32 /* datakit ugliness */
+#endif
+};
+#endif
+#endif
diff --git a/sys/sys/mman.h b/sys/sys/mman.h
new file mode 100644
index 000000000000..b3951c202cb2
--- /dev/null
+++ b/sys/sys/mman.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mman.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Protections are chosen from these bits, or-ed together
+ */
+#define PROT_READ 0x01 /* pages can be read */
+#define PROT_WRITE 0x02 /* pages can be written */
+#define PROT_EXEC 0x04 /* pages can be executed */
+
+/*
+ * Flags contain sharing type and options.
+ * Sharing types; choose one.
+ */
+#define MAP_SHARED 0x0001 /* share changes */
+#define MAP_PRIVATE 0x0002 /* changes are private */
+#define MAP_COPY 0x0004 /* "copy" region at mmap time */
+
+/*
+ * Other flags
+ */
+#define MAP_FIXED 0x0010 /* map addr must be exactly as requested */
+#define MAP_RENAME 0x0020 /* Sun: rename private pages to file */
+#define MAP_NORESERVE 0x0040 /* Sun: don't reserve needed swap area */
+#define MAP_INHERIT 0x0080 /* region is retained after exec */
+#define MAP_NOEXTEND 0x0100 /* for MAP_FILE, don't change file size */
+#define MAP_HASSEMAPHORE 0x0200 /* region may contain semaphores */
+
+/*
+ * Mapping type; default is map from file.
+ */
+#define MAP_ANON 0x1000 /* allocated from memory, swap space */
+
+/*
+ * Advice to madvise
+ */
+#define MADV_NORMAL 0 /* no further special treatment */
+#define MADV_RANDOM 1 /* expect random page references */
+#define MADV_SEQUENTIAL 2 /* expect sequential page references */
+#define MADV_WILLNEED 3 /* will need these pages */
+#define MADV_DONTNEED 4 /* dont need these pages */
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+/* Some of these int's should probably be size_t's */
+caddr_t mmap __P((caddr_t, size_t, int, int, int, off_t));
+int mprotect __P((caddr_t, size_t, int));
+int munmap __P((caddr_t, size_t));
+int msync __P((caddr_t, size_t));
+int mlock __P((caddr_t, size_t));
+int munlock __P((caddr_t, size_t));
+__END_DECLS
+
+#endif /* !KERNEL */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
new file mode 100644
index 000000000000..4561675ef0d1
--- /dev/null
+++ b/sys/sys/mount.h
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mount.h 8.13 (Berkeley) 3/27/94
+ */
+
+#ifndef KERNEL
+#include <sys/ucred.h>
+#endif
+#include <sys/queue.h>
+
+typedef struct { long val[2]; } fsid_t; /* file system id type */
+
+/*
+ * File identifier.
+ * These are unique per filesystem on a single machine.
+ */
+#define MAXFIDSZ 16
+
+struct fid {
+ u_short fid_len; /* length of data in bytes */
+ u_short fid_reserved; /* force longword alignment */
+ char fid_data[MAXFIDSZ]; /* data (variable length) */
+};
+
+/*
+ * file system statistics
+ */
+
+#define MNAMELEN 90 /* length of buffer for returned name */
+
+struct statfs {
+ short f_type; /* type of filesystem (see below) */
+ short f_flags; /* copy of mount flags */
+ long f_bsize; /* fundamental file system block size */
+ long f_iosize; /* optimal transfer block size */
+ long f_blocks; /* total data blocks in file system */
+ long f_bfree; /* free blocks in fs */
+ long f_bavail; /* free blocks avail to non-superuser */
+ long f_files; /* total file nodes in file system */
+ long f_ffree; /* free file nodes in fs */
+ fsid_t f_fsid; /* file system id */
+ long f_spare[9]; /* spare for later */
+ char f_mntonname[MNAMELEN]; /* directory on which mounted */
+ char f_mntfromname[MNAMELEN];/* mounted filesystem */
+};
+
+/*
+ * File system types.
+ */
+#define MOUNT_NONE 0
+#define MOUNT_UFS 1 /* Fast Filesystem */
+#define MOUNT_NFS 2 /* Sun-compatible Network Filesystem */
+#define MOUNT_MFS 3 /* Memory-based Filesystem */
+#define MOUNT_MSDOS 4 /* MS/DOS Filesystem */
+#define MOUNT_LFS 5 /* Log-based Filesystem */
+#define MOUNT_LOFS 6 /* Loopback Filesystem */
+#define MOUNT_FDESC 7 /* File Descriptor Filesystem */
+#define MOUNT_PORTAL 8 /* Portal Filesystem */
+#define MOUNT_NULL 9 /* Minimal Filesystem Layer */
+#define MOUNT_UMAP 10 /* User/Group Identifer Remapping Filesystem */
+#define MOUNT_KERNFS 11 /* Kernel Information Filesystem */
+#define MOUNT_PROCFS 12 /* /proc Filesystem */
+#define MOUNT_AFS 13 /* Andrew Filesystem */
+#define MOUNT_CD9660 14 /* ISO9660 (aka CDROM) Filesystem */
+#define MOUNT_UNION 15 /* Union (translucent) Filesystem */
+#define MOUNT_MAXTYPE 15
+
+#define INITMOUNTNAMES { \
+ "none", /* 0 MOUNT_NONE */ \
+ "ufs", /* 1 MOUNT_UFS */ \
+ "nfs", /* 2 MOUNT_NFS */ \
+ "mfs", /* 3 MOUNT_MFS */ \
+ "msdos", /* 4 MOUNT_MSDOS */ \
+ "lfs", /* 5 MOUNT_LFS */ \
+ "lofs", /* 6 MOUNT_LOFS */ \
+ "fdesc", /* 7 MOUNT_FDESC */ \
+ "portal", /* 8 MOUNT_PORTAL */ \
+ "null", /* 9 MOUNT_NULL */ \
+ "umap", /* 10 MOUNT_UMAP */ \
+ "kernfs", /* 11 MOUNT_KERNFS */ \
+ "procfs", /* 12 MOUNT_PROCFS */ \
+ "afs", /* 13 MOUNT_AFS */ \
+ "iso9660fs", /* 14 MOUNT_CD9660 */ \
+ "union", /* 15 MOUNT_UNION */ \
+ 0, /* 16 MOUNT_SPARE */ \
+}
+
+/*
+ * Structure per mounted file system. Each mounted file system has an
+ * array of operations and an instance record. The file systems are
+ * put on a doubly linked list.
+ */
+LIST_HEAD(vnodelst, vnode);
+
+struct mount {
+ TAILQ_ENTRY(mount) mnt_list; /* mount list */
+ struct vfsops *mnt_op; /* operations on fs */
+ struct vnode *mnt_vnodecovered; /* vnode we mounted on */
+ struct vnodelst mnt_vnodelist; /* list of vnodes this mount */
+ int mnt_flag; /* flags */
+ int mnt_maxsymlinklen; /* max size of short symlink */
+ struct statfs mnt_stat; /* cache of filesystem stats */
+ qaddr_t mnt_data; /* private data */
+};
+
+/*
+ * Mount flags.
+ *
+ * Unmount uses MNT_FORCE flag.
+ */
+#define MNT_RDONLY 0x00000001 /* read only filesystem */
+#define MNT_SYNCHRONOUS 0x00000002 /* file system written synchronously */
+#define MNT_NOEXEC 0x00000004 /* can't exec from filesystem */
+#define MNT_NOSUID 0x00000008 /* don't honor setuid bits on fs */
+#define MNT_NODEV 0x00000010 /* don't interpret special files */
+#define MNT_UNION 0x00000020 /* union with underlying filesystem */
+#define MNT_ASYNC 0x00000040 /* file system written asynchronously */
+
+/*
+ * exported mount flags.
+ */
+#define MNT_EXRDONLY 0x00000080 /* exported read only */
+#define MNT_EXPORTED 0x00000100 /* file system is exported */
+#define MNT_DEFEXPORTED 0x00000200 /* exported to the world */
+#define MNT_EXPORTANON 0x00000400 /* use anon uid mapping for everyone */
+#define MNT_EXKERB 0x00000800 /* exported with Kerberos uid mapping */
+
+/*
+ * Flags set by internal operations.
+ */
+#define MNT_LOCAL 0x00001000 /* filesystem is stored locally */
+#define MNT_QUOTA 0x00002000 /* quotas are enabled on filesystem */
+#define MNT_ROOTFS 0x00004000 /* identifies the root filesystem */
+#define MNT_USER 0x00008000 /* mounted by a user */
+
+/*
+ * Mask of flags that are visible to statfs()
+ */
+#define MNT_VISFLAGMASK 0x0000ffff
+
+/*
+ * filesystem control flags.
+ *
+ * MNT_MLOCK lock the mount entry so that name lookup cannot proceed
+ * past the mount point. This keeps the subtree stable during mounts
+ * and unmounts.
+ */
+#define MNT_UPDATE 0x00010000 /* not a real mount, just an update */
+#define MNT_DELEXPORT 0x00020000 /* delete export host lists */
+#define MNT_RELOAD 0x00040000 /* reload filesystem data */
+#define MNT_FORCE 0x00080000 /* force unmount or readonly change */
+#define MNT_MLOCK 0x00100000 /* lock so that subtree is stable */
+#define MNT_MWAIT 0x00200000 /* someone is waiting for lock */
+#define MNT_MPBUSY 0x00400000 /* scan of mount point in progress */
+#define MNT_MPWANT 0x00800000 /* waiting for mount point */
+#define MNT_UNMOUNT 0x01000000 /* unmount in progress */
+#define MNT_WANTRDWR 0x02000000 /* want upgrade to read/write */
+
+/*
+ * Operations supported on mounted file system.
+ */
+#ifdef KERNEL
+#ifdef __STDC__
+struct nameidata;
+struct mbuf;
+#endif
+
+struct vfsops {
+ int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+ int (*vfs_start) __P((struct mount *mp, int flags,
+ struct proc *p));
+ int (*vfs_unmount) __P((struct mount *mp, int mntflags,
+ struct proc *p));
+ int (*vfs_root) __P((struct mount *mp, struct vnode **vpp));
+ int (*vfs_quotactl) __P((struct mount *mp, int cmds, uid_t uid,
+ caddr_t arg, struct proc *p));
+ int (*vfs_statfs) __P((struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+ int (*vfs_sync) __P((struct mount *mp, int waitfor,
+ struct ucred *cred, struct proc *p));
+ int (*vfs_vget) __P((struct mount *mp, ino_t ino,
+ struct vnode **vpp));
+ int (*vfs_fhtovp) __P((struct mount *mp, struct fid *fhp,
+ struct mbuf *nam, struct vnode **vpp,
+ int *exflagsp, struct ucred **credanonp));
+ int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp));
+ int (*vfs_init) __P((void));
+};
+
+#define VFS_MOUNT(MP, PATH, DATA, NDP, P) \
+ (*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P)
+#define VFS_START(MP, FLAGS, P) (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P)
+#define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P)
+#define VFS_ROOT(MP, VPP) (*(MP)->mnt_op->vfs_root)(MP, VPP)
+#define VFS_QUOTACTL(MP,C,U,A,P) (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P)
+#define VFS_STATFS(MP, SBP, P) (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P)
+#define VFS_SYNC(MP, WAIT, C, P) (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P)
+#define VFS_VGET(MP, INO, VPP) (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP)
+#define VFS_FHTOVP(MP, FIDP, NAM, VPP, EXFLG, CRED) \
+ (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, NAM, VPP, EXFLG, CRED)
+#define VFS_VPTOFH(VP, FIDP) (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP)
+#endif /* KERNEL */
+
+/*
+ * Flags for various system call interfaces.
+ *
+ * waitfor flags to vfs_sync() and getfsstat()
+ */
+#define MNT_WAIT 1
+#define MNT_NOWAIT 2
+
+/*
+ * Generic file handle
+ */
+struct fhandle {
+ fsid_t fh_fsid; /* File system id of mount point */
+ struct fid fh_fid; /* File sys specific id */
+};
+typedef struct fhandle fhandle_t;
+
+#ifdef KERNEL
+#include <net/radix.h>
+#include <sys/socket.h> /* XXX for AF_MAX */
+
+/*
+ * Network address lookup element
+ */
+struct netcred {
+ struct radix_node netc_rnodes[2];
+ int netc_exflags;
+ struct ucred netc_anon;
+};
+
+/*
+ * Network export information
+ */
+struct netexport {
+ struct netcred ne_defexported; /* Default export */
+ struct radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */
+};
+#endif /* KERNEL */
+
+/*
+ * Export arguments for local filesystem mount calls.
+ */
+struct export_args {
+ int ex_flags; /* export related flags */
+ uid_t ex_root; /* mapping for root uid */
+ struct ucred ex_anon; /* mapping for anonymous user */
+ struct sockaddr *ex_addr; /* net address to which exported */
+ int ex_addrlen; /* and the net address length */
+ struct sockaddr *ex_mask; /* mask of valid bits in saddr */
+ int ex_masklen; /* and the smask length */
+};
+
+/*
+ * Arguments to mount UFS-based filesystems
+ */
+struct ufs_args {
+ char *fspec; /* block special device to mount */
+ struct export_args export; /* network export information */
+};
+
+#ifdef MFS
+/*
+ * Arguments to mount MFS
+ */
+struct mfs_args {
+ char *fspec; /* name to export for statfs */
+ struct export_args export; /* if exported MFSes are supported */
+ caddr_t base; /* base of file system in memory */
+ u_long size; /* size of file system */
+};
+#endif /* MFS */
+
+#ifdef CD9660
+/*
+ * Arguments to mount ISO 9660 filesystems.
+ */
+struct iso_args {
+ char *fspec; /* block special device to mount */
+ struct export_args export; /* network export info */
+ int flags; /* mounting flags, see below */
+
+};
+#define ISOFSMNT_NORRIP 0x00000001 /* disable Rock Ridge Ext.*/
+#define ISOFSMNT_GENS 0x00000002 /* enable generation numbers */
+#define ISOFSMNT_EXTATT 0x00000004 /* enable extended attributes */
+#endif /* CD9660 */
+
+#ifdef NFS
+/*
+ * File Handle (32 bytes for version 2), variable up to 1024 for version 3
+ */
+union nfsv2fh {
+ fhandle_t fh_generic;
+ u_char fh_bytes[32];
+};
+typedef union nfsv2fh nfsv2fh_t;
+
+/*
+ * Arguments to mount NFS
+ */
+struct nfs_args {
+ struct sockaddr *addr; /* file server address */
+ int addrlen; /* length of address */
+ int sotype; /* Socket type */
+ int proto; /* and Protocol */
+ nfsv2fh_t *fh; /* File handle to be mounted */
+ int flags; /* flags */
+ int wsize; /* write size in bytes */
+ int rsize; /* read size in bytes */
+ int timeo; /* initial timeout in .1 secs */
+ int retrans; /* times to retry send */
+ int maxgrouplist; /* Max. size of group list */
+ int readahead; /* # of blocks to readahead */
+ int leaseterm; /* Term (sec) of lease */
+ int deadthresh; /* Retrans threshold */
+ char *hostname; /* server's name */
+};
+
+
+/*
+ * NFS mount option flags
+ */
+#define NFSMNT_SOFT 0x00000001 /* soft mount (hard is default) */
+#define NFSMNT_WSIZE 0x00000002 /* set write size */
+#define NFSMNT_RSIZE 0x00000004 /* set read size */
+#define NFSMNT_TIMEO 0x00000008 /* set initial timeout */
+#define NFSMNT_RETRANS 0x00000010 /* set number of request retrys */
+#define NFSMNT_MAXGRPS 0x00000020 /* set maximum grouplist size */
+#define NFSMNT_INT 0x00000040 /* allow interrupts on hard mount */
+#define NFSMNT_NOCONN 0x00000080 /* Don't Connect the socket */
+#define NFSMNT_NQNFS 0x00000100 /* Use Nqnfs protocol */
+#define NFSMNT_MYWRITE 0x00000200 /* Assume writes were mine */
+#define NFSMNT_KERB 0x00000400 /* Use Kerberos authentication */
+#define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */
+#define NFSMNT_RDIRALOOK 0x00001000 /* Do lookup with readdir (nqnfs) */
+#define NFSMNT_LEASETERM 0x00002000 /* set lease term (nqnfs) */
+#define NFSMNT_READAHEAD 0x00004000 /* set read ahead */
+#define NFSMNT_DEADTHRESH 0x00008000 /* set dead server retry thresh */
+#define NFSMNT_NQLOOKLEASE 0x00010000 /* Get lease for lookup */
+#define NFSMNT_RESVPORT 0x00020000 /* Allocate a reserved port */
+#define NFSMNT_INTERNAL 0xffe00000 /* Bits set internally */
+#define NFSMNT_MNTD 0x00200000 /* Mnt server for mnt point */
+#define NFSMNT_DISMINPROG 0x00400000 /* Dismount in progress */
+#define NFSMNT_DISMNT 0x00800000 /* Dismounted */
+#define NFSMNT_SNDLOCK 0x01000000 /* Send socket lock */
+#define NFSMNT_WANTSND 0x02000000 /* Want above */
+#define NFSMNT_RCVLOCK 0x04000000 /* Rcv socket lock */
+#define NFSMNT_WANTRCV 0x08000000 /* Want above */
+#define NFSMNT_WAITAUTH 0x10000000 /* Wait for authentication */
+#define NFSMNT_HASAUTH 0x20000000 /* Has authenticator */
+#define NFSMNT_WANTAUTH 0x40000000 /* Wants an authenticator */
+#define NFSMNT_AUTHERR 0x80000000 /* Authentication error */
+#endif /* NFS */
+
+#ifdef KERNEL
+/*
+ * exported vnode operations
+ */
+struct mount *getvfs __P((fsid_t *)); /* return vfs given fsid */
+int vfs_export /* process mount export info */
+ __P((struct mount *, struct netexport *, struct export_args *));
+struct netcred *vfs_export_lookup /* lookup host in fs export list */
+ __P((struct mount *, struct netexport *, struct mbuf *));
+int vfs_lock __P((struct mount *)); /* lock a vfs */
+int vfs_mountedon __P((struct vnode *));/* is a vfs mounted on vp */
+void vfs_unlock __P((struct mount *)); /* unlock a vfs */
+extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */
+extern struct vfsops *vfssw[]; /* filesystem type table */
+
+#else /* KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int fstatfs __P((int, struct statfs *));
+int getfh __P((const char *, fhandle_t *));
+int getfsstat __P((struct statfs *, long, int));
+int getmntinfo __P((struct statfs **, int));
+int mount __P((int, const char *, int, void *));
+int statfs __P((const char *, struct statfs *));
+int unmount __P((const char *, int));
+__END_DECLS
+
+#endif /* KERNEL */
diff --git a/sys/sys/msgbuf.h b/sys/sys/msgbuf.h
new file mode 100644
index 000000000000..57ee0b6f30a0
--- /dev/null
+++ b/sys/sys/msgbuf.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 1981, 1984, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)msgbuf.h 8.1 (Berkeley) 6/2/93
+ */
+
+#define MSG_BSIZE (4096 - 3 * sizeof(long))
+struct msgbuf {
+#define MSG_MAGIC 0x063061
+ long msg_magic;
+ long msg_bufx; /* write pointer */
+ long msg_bufr; /* read pointer */
+ char msg_bufc[MSG_BSIZE]; /* buffer */
+};
+#ifdef KERNEL
+struct msgbuf *msgbufp;
+#endif
diff --git a/sys/sys/mtio.h b/sys/sys/mtio.h
new file mode 100644
index 000000000000..7b4ef0c017cb
--- /dev/null
+++ b/sys/sys/mtio.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mtio.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Structures and definitions for mag tape io control commands
+ */
+
+/* structure for MTIOCTOP - mag tape op command */
+struct mtop {
+ short mt_op; /* operations defined below */
+ daddr_t mt_count; /* how many of them */
+};
+
+/* operations */
+#define MTWEOF 0 /* write an end-of-file record */
+#define MTFSF 1 /* forward space file */
+#define MTBSF 2 /* backward space file */
+#define MTFSR 3 /* forward space record */
+#define MTBSR 4 /* backward space record */
+#define MTREW 5 /* rewind */
+#define MTOFFL 6 /* rewind and put the drive offline */
+#define MTNOP 7 /* no operation, sets status only */
+#define MTCACHE 8 /* enable controller cache */
+#define MTNOCACHE 9 /* disable controller cache */
+
+/* structure for MTIOCGET - mag tape get status command */
+
+struct mtget {
+ short mt_type; /* type of magtape device */
+/* the following two registers are grossly device dependent */
+ short mt_dsreg; /* ``drive status'' register */
+ short mt_erreg; /* ``error'' register */
+/* end device-dependent registers */
+ short mt_resid; /* residual count */
+/* the following two are not yet implemented */
+ daddr_t mt_fileno; /* file number of current position */
+ daddr_t mt_blkno; /* block number of current position */
+/* end not yet implemented */
+};
+
+/*
+ * Constants for mt_type byte. These are the same
+ * for controllers compatible with the types listed.
+ */
+#define MT_ISTS 0x01 /* TS-11 */
+#define MT_ISHT 0x02 /* TM03 Massbus: TE16, TU45, TU77 */
+#define MT_ISTM 0x03 /* TM11/TE10 Unibus */
+#define MT_ISMT 0x04 /* TM78/TU78 Massbus */
+#define MT_ISUT 0x05 /* SI TU-45 emulation on Unibus */
+#define MT_ISCPC 0x06 /* SUN */
+#define MT_ISAR 0x07 /* SUN */
+#define MT_ISTMSCP 0x08 /* DEC TMSCP protocol (TU81, TK50) */
+#define MT_ISCY 0x09 /* CCI Cipher */
+#define MT_ISCT 0x0a /* HP 1/4 tape */
+#define MT_ISFHP 0x0b /* HP 7980 1/2 tape */
+#define MT_ISEXABYTE 0x0c /* Exabyte */
+#define MT_ISEXA8200 0x0c /* Exabyte EXB-8200 */
+#define MT_ISEXA8500 0x0d /* Exabyte EXB-8500 */
+#define MT_ISVIPER1 0x0e /* Archive Viper-150 */
+#define MT_ISPYTHON 0x0f /* Archive Python (DAT) */
+#define MT_ISHPDAT 0x10 /* HP 35450A DAT drive */
+#define MT_ISMFOUR 0x11 /* M4 Data 1/2 9track drive */
+#define MT_ISTK50 0x12 /* DEC SCSI TK50 */
+#define MT_ISMT02 0x13 /* Emulex MT02 SCSI tape controller */
+
+/* mag tape io control commands */
+#define MTIOCTOP _IOW('m', 1, struct mtop) /* do a mag tape op */
+#define MTIOCGET _IOR('m', 2, struct mtget) /* get tape status */
+#define MTIOCIEOT _IO('m', 3) /* ignore EOT error */
+#define MTIOCEEOT _IO('m', 4) /* enable EOT error */
+
+#ifndef KERNEL
+#define DEFTAPE "/dev/rmt12"
+#endif
+
+#ifdef KERNEL
+/*
+ * minor device number
+ */
+
+#define T_UNIT 003 /* unit selection */
+#define T_NOREWIND 004 /* no rewind on close */
+#define T_DENSEL 030 /* density select */
+#define T_800BPI 000 /* select 800 bpi */
+#define T_1600BPI 010 /* select 1600 bpi */
+#define T_6250BPI 020 /* select 6250 bpi */
+#define T_BADBPI 030 /* undefined selection */
+#endif
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
new file mode 100644
index 000000000000..74ff3602c26b
--- /dev/null
+++ b/sys/sys/namei.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1985, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)namei.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_NAMEI_H_
+#define _SYS_NAMEI_H_
+
+/*
+ * Encapsulation of namei parameters.
+ */
+struct nameidata {
+ /*
+ * Arguments to namei/lookup.
+ */
+ caddr_t ni_dirp; /* pathname pointer */
+ enum uio_seg ni_segflg; /* location of pathname */
+ /* u_long ni_nameiop; namei operation */
+ /* u_long ni_flags; flags to namei */
+ /* struct proc *ni_proc; process requesting lookup */
+ /*
+ * Arguments to lookup.
+ */
+ /* struct ucred *ni_cred; credentials */
+ struct vnode *ni_startdir; /* starting directory */
+ struct vnode *ni_rootdir; /* logical root directory */
+ /*
+ * Results: returned from/manipulated by lookup
+ */
+ struct vnode *ni_vp; /* vnode of result */
+ struct vnode *ni_dvp; /* vnode of intermediate directory */
+ /*
+ * Shared between namei and lookup/commit routines.
+ */
+ long ni_pathlen; /* remaining chars in path */
+ char *ni_next; /* next location in pathname */
+ u_long ni_loopcnt; /* count of symlinks encountered */
+ /*
+ * Lookup parameters: this structure describes the subset of
+ * information from the nameidata structure that is passed
+ * through the VOP interface.
+ */
+ struct componentname {
+ /*
+ * Arguments to lookup.
+ */
+ u_long cn_nameiop; /* namei operation */
+ u_long cn_flags; /* flags to namei */
+ struct proc *cn_proc; /* process requesting lookup */
+ struct ucred *cn_cred; /* credentials */
+ /*
+ * Shared between lookup and commit routines.
+ */
+ char *cn_pnbuf; /* pathname buffer */
+ char *cn_nameptr; /* pointer to looked up name */
+ long cn_namelen; /* length of looked up component */
+ u_long cn_hash; /* hash value of looked up name */
+ long cn_consume; /* chars to consume in lookup() */
+ } ni_cnd;
+};
+
+#ifdef KERNEL
+/*
+ * namei operations
+ */
+#define LOOKUP 0 /* perform name lookup only */
+#define CREATE 1 /* setup for file creation */
+#define DELETE 2 /* setup for file deletion */
+#define RENAME 3 /* setup for file renaming */
+#define OPMASK 3 /* mask for operation */
+/*
+ * namei operational modifier flags, stored in ni_cnd.flags
+ */
+#define LOCKLEAF 0x0004 /* lock inode on return */
+#define LOCKPARENT 0x0008 /* want parent vnode returned locked */
+#define WANTPARENT 0x0010 /* want parent vnode returned unlocked */
+#define NOCACHE 0x0020 /* name must not be left in cache */
+#define FOLLOW 0x0040 /* follow symbolic links */
+#define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */
+#define MODMASK 0x00fc /* mask of operational modifiers */
+/*
+ * Namei parameter descriptors.
+ *
+ * SAVENAME may be set by either the callers of namei or by VOP_LOOKUP.
+ * If the caller of namei sets the flag (for example execve wants to
+ * know the name of the program that is being executed), then it must
+ * free the buffer. If VOP_LOOKUP sets the flag, then the buffer must
+ * be freed by either the commit routine or the VOP_ABORT routine.
+ * SAVESTART is set only by the callers of namei. It implies SAVENAME
+ * plus the addition of saving the parent directory that contains the
+ * name in ni_startdir. It allows repeated calls to lookup for the
+ * name being sought. The caller is responsible for releasing the
+ * buffer and for vrele'ing ni_startdir.
+ */
+#define NOCROSSMOUNT 0x00100 /* do not cross mount points */
+#define RDONLY 0x00200 /* lookup with read-only semantics */
+#define HASBUF 0x00400 /* has allocated pathname buffer */
+#define SAVENAME 0x00800 /* save pathanme buffer */
+#define SAVESTART 0x01000 /* save starting directory */
+#define ISDOTDOT 0x02000 /* current component name is .. */
+#define MAKEENTRY 0x04000 /* entry is to be added to name cache */
+#define ISLASTCN 0x08000 /* this is last component of pathname */
+#define ISSYMLINK 0x10000 /* symlink needs interpretation */
+#define PARAMASK 0xfff00 /* mask of parameter descriptors */
+/*
+ * Initialization of an nameidata structure.
+ */
+#define NDINIT(ndp, op, flags, segflg, namep, p) { \
+ (ndp)->ni_cnd.cn_nameiop = op; \
+ (ndp)->ni_cnd.cn_flags = flags; \
+ (ndp)->ni_segflg = segflg; \
+ (ndp)->ni_dirp = namep; \
+ (ndp)->ni_cnd.cn_proc = p; \
+}
+#endif
+
+/*
+ * This structure describes the elements in the cache of recent
+ * names looked up by namei. NCHNAMLEN is sized to make structure
+ * size a power of two to optimize malloc's. Minimum reasonable
+ * size is 15.
+ */
+
+#define NCHNAMLEN 31 /* maximum name segment length we bother with */
+
+struct namecache {
+ struct namecache *nc_forw; /* hash chain */
+ struct namecache **nc_back; /* hash chain */
+ struct namecache *nc_nxt; /* LRU chain */
+ struct namecache **nc_prev; /* LRU chain */
+ struct vnode *nc_dvp; /* vnode of parent of name */
+ u_long nc_dvpid; /* capability number of nc_dvp */
+ struct vnode *nc_vp; /* vnode the name refers to */
+ u_long nc_vpid; /* capability number of nc_vp */
+ char nc_nlen; /* length of name */
+ char nc_name[NCHNAMLEN]; /* segment name */
+};
+
+#ifdef KERNEL
+u_long nextvnodeid;
+int namei __P((struct nameidata *ndp));
+int lookup __P((struct nameidata *ndp));
+#endif
+
+/*
+ * Stats on usefulness of namei caches.
+ */
+struct nchstats {
+ long ncs_goodhits; /* hits that we can really use */
+ long ncs_neghits; /* negative hits that we can use */
+ long ncs_badhits; /* hits we must drop */
+ long ncs_falsehits; /* hits with id mismatch */
+ long ncs_miss; /* misses */
+ long ncs_long; /* long names that ignore cache */
+ long ncs_pass2; /* names found with passes == 2 */
+ long ncs_2passes; /* number of times we attempt it */
+};
+#endif /* !_SYS_NAMEI_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
new file mode 100644
index 000000000000..91bdfd8facc5
--- /dev/null
+++ b/sys/sys/param.h
@@ -0,0 +1,216 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)param.h 8.2 (Berkeley) 1/21/94
+ */
+
+#define BSD 199306 /* System version (year & month). */
+#define BSD4_3 1
+#define BSD4_4 1
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#ifndef LOCORE
+#include <sys/types.h>
+#endif
+
+/*
+ * Machine-independent constants (some used in following include files).
+ * Redefined constants are from POSIX 1003.1 limits file.
+ *
+ * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>)
+ * MAXLOGNAME should be >= UT_NAMESIZE (see <utmp.h>)
+ */
+#include <sys/syslimits.h>
+
+#define MAXCOMLEN 16 /* max command name remembered */
+#define MAXINTERP 32 /* max interpreter file name length */
+#define MAXLOGNAME 12 /* max login name length */
+#define MAXUPRC CHILD_MAX /* max simultaneous processes */
+#define NCARGS ARG_MAX /* max bytes for an exec function */
+#define NGROUPS NGROUPS_MAX /* max number groups */
+#define NOFILE OPEN_MAX /* max open files per process */
+#define NOGROUP 65535 /* marker for empty group set member */
+#define MAXHOSTNAMELEN 256 /* max hostname size */
+
+/* More types and definitions used throughout the kernel. */
+#ifdef KERNEL
+#include <sys/cdefs.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/ucred.h>
+#include <sys/uio.h>
+#endif
+
+/* Signals. */
+#include <sys/signal.h>
+
+/* Machine type dependent parameters. */
+#include <machine/param.h>
+#include <machine/limits.h>
+
+/*
+ * Priorities. Note that with 32 run queues, differences less than 4 are
+ * insignificant.
+ */
+#define PSWP 0
+#define PVM 4
+#define PINOD 8
+#define PRIBIO 16
+#define PVFS 20
+#define PZERO 22 /* No longer magic, shouldn't be here. XXX */
+#define PSOCK 24
+#define PWAIT 32
+#define PLOCK 36
+#define PPAUSE 40
+#define PUSER 50
+#define MAXPRI 127 /* Priorities range from 0 through MAXPRI. */
+
+#define PRIMASK 0x0ff
+#define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */
+
+#define NZERO 0 /* default "nice" */
+
+#define NBPW sizeof(int) /* number of bytes per word (integer) */
+
+#define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */
+#define NODEV (dev_t)(-1) /* non-existent device */
+
+/*
+ * Clustering of hardware pages on machines with ridiculously small
+ * page sizes is done here. The paging subsystem deals with units of
+ * CLSIZE pte's describing NBPG (from machine/machparam.h) pages each.
+ */
+#define CLBYTES (CLSIZE*NBPG)
+#define CLOFSET (CLSIZE*NBPG-1) /* for clusters, like PGOFSET */
+#define claligned(x) ((((int)(x))&CLOFSET)==0)
+#define CLOFF CLOFSET
+#define CLSHIFT (PGSHIFT+CLSIZELOG2)
+
+#if CLSIZE==1
+#define clbase(i) (i)
+#define clrnd(i) (i)
+#else
+/* Give the base virtual address (first of CLSIZE). */
+#define clbase(i) ((i) &~ (CLSIZE-1))
+/* Round a number of clicks up to a whole cluster. */
+#define clrnd(i) (((i) + (CLSIZE-1)) &~ (CLSIZE-1))
+#endif
+
+#define CBLOCK 64 /* Clist block size, must be a power of 2. */
+#define CBQSIZE (CBLOCK/NBBY) /* Quote bytes/cblock - can do better. */
+ /* Data chars/clist. */
+#define CBSIZE (CBLOCK - sizeof(struct cblock *) - CBQSIZE)
+#define CROUND (CBLOCK - 1) /* Clist rounding. */
+
+/*
+ * File system parameters and macros.
+ *
+ * The file system is made out of blocks of at most MAXBSIZE units, with
+ * smaller units (fragments) only in the last direct block. MAXBSIZE
+ * primarily determines the size of buffers in the buffer pool. It may be
+ * made larger without any effect on existing file systems; however making
+ * it smaller make make some file systems unmountable.
+ */
+#define MAXBSIZE MAXPHYS
+#define MAXFRAG 8
+
+/*
+ * MAXPATHLEN defines the longest permissable path length after expanding
+ * symbolic links. It is used to allocate a temporary buffer from the buffer
+ * pool in which to do the name expansion, hence should be a power of two,
+ * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the
+ * maximum number of symbolic links that may be expanded in a path name.
+ * It should be set high enough to allow all legitimate uses, but halt
+ * infinite loops reasonably quickly.
+ */
+#define MAXPATHLEN PATH_MAX
+#define MAXSYMLINKS 8
+
+/* Bit map related macros. */
+#define setbit(a,i) ((a)[(i)/NBBY] |= 1<<((i)%NBBY))
+#define clrbit(a,i) ((a)[(i)/NBBY] &= ~(1<<((i)%NBBY)))
+#define isset(a,i) ((a)[(i)/NBBY] & (1<<((i)%NBBY)))
+#define isclr(a,i) (((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
+
+/* Macros for counting and rounding. */
+#ifndef howmany
+#define howmany(x, y) (((x)+((y)-1))/(y))
+#endif
+#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
+#define powerof2(x) ((((x)-1)&(x))==0)
+
+/* Macros for min/max. */
+#ifndef KERNEL
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+/*
+ * Constants for setting the parameters of the kernel memory allocator.
+ *
+ * 2 ** MINBUCKET is the smallest unit of memory that will be
+ * allocated. It must be at least large enough to hold a pointer.
+ *
+ * Units of memory less or equal to MAXALLOCSAVE will permanently
+ * allocate physical memory; requests for these size pieces of
+ * memory are quite fast. Allocations greater than MAXALLOCSAVE must
+ * always allocate and free physical memory; requests for these
+ * size allocations should be done infrequently as they will be slow.
+ *
+ * Constraints: CLBYTES <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14), and
+ * MAXALLOCSIZE must be a power of two.
+ */
+#define MINBUCKET 4 /* 4 => min allocation of 16 bytes */
+#define MAXALLOCSAVE (2 * CLBYTES)
+
+/*
+ * Scale factor for scaled integers used to count %cpu time and load avgs.
+ *
+ * The number of CPU `tick's that map to a unique `%age' can be expressed
+ * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that
+ * can be calculated (assuming 32 bits) can be closely approximated using
+ * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15).
+ *
+ * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
+ * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024.
+ */
+#define FSHIFT 11 /* bits to right of fixed binary point */
+#define FSCALE (1<<FSHIFT)
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
new file mode 100644
index 000000000000..bbe60cddcac7
--- /dev/null
+++ b/sys/sys/proc.h
@@ -0,0 +1,263 @@
+/*-
+ * Copyright (c) 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)proc.h 8.8 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_PROC_H_
+#define _SYS_PROC_H_
+
+#include <machine/proc.h> /* Machine-dependent proc substruct. */
+#include <sys/select.h> /* For struct selinfo. */
+
+/*
+ * One structure allocated per session.
+ */
+struct session {
+ int s_count; /* Ref cnt; pgrps in session. */
+ struct proc *s_leader; /* Session leader. */
+ struct vnode *s_ttyvp; /* Vnode of controlling terminal. */
+ struct tty *s_ttyp; /* Controlling terminal. */
+ char s_login[MAXLOGNAME]; /* Setlogin() name. */
+};
+
+/*
+ * One structure allocated per process group.
+ */
+struct pgrp {
+ struct pgrp *pg_hforw; /* Forward link in hash bucket. */
+ struct proc *pg_mem; /* Pointer to pgrp members. */
+ struct session *pg_session; /* Pointer to session. */
+ pid_t pg_id; /* Pgrp id. */
+ int pg_jobc; /* # procs qualifying pgrp for job control */
+};
+
+/*
+ * Description of a process.
+ *
+ * This structure contains the information needed to manage a thread of
+ * control, known in UN*X as a process; it has references to substructures
+ * containing descriptions of things that the process uses, but may share
+ * with related processes. The process structure and the substructures
+ * are always addressible except for those marked "(PROC ONLY)" below,
+ * which might be addressible only on a processor on which the process
+ * is running.
+ */
+struct proc {
+ struct proc *p_forw; /* Doubly-linked run/sleep queue. */
+ struct proc *p_back;
+ struct proc *p_next; /* Linked list of active procs */
+ struct proc **p_prev; /* and zombies. */
+
+ /* substructures: */
+ struct pcred *p_cred; /* Process owner's identity. */
+ struct filedesc *p_fd; /* Ptr to open files structure. */
+ struct pstats *p_stats; /* Accounting/statistics (PROC ONLY). */
+ struct plimit *p_limit; /* Process limits. */
+ struct vmspace *p_vmspace; /* Address space. */
+ struct sigacts *p_sigacts; /* Signal actions, state (PROC ONLY). */
+
+#define p_ucred p_cred->pc_ucred
+#define p_rlimit p_limit->pl_rlimit
+
+ int p_flag; /* P_* flags. */
+ char p_stat; /* S* process status. */
+ char p_pad1[3];
+
+ pid_t p_pid; /* Process identifier. */
+ struct proc *p_hash; /* Hashed based on p_pid for kill+exit+... */
+ struct proc *p_pgrpnxt; /* Pointer to next process in process group. */
+ struct proc *p_pptr; /* Pointer to process structure of parent. */
+ struct proc *p_osptr; /* Pointer to older sibling processes. */
+
+/* The following fields are all zeroed upon creation in fork. */
+#define p_startzero p_ysptr
+ struct proc *p_ysptr; /* Pointer to younger siblings. */
+ struct proc *p_cptr; /* Pointer to youngest living child. */
+ pid_t p_oppid; /* Save parent pid during ptrace. XXX */
+ int p_dupfd; /* Sideways return value from fdopen. XXX */
+
+ /* scheduling */
+ u_int p_estcpu; /* Time averaged value of p_cpticks. */
+ int p_cpticks; /* Ticks of cpu time. */
+ fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */
+ void *p_wchan; /* Sleep address. */
+ char *p_wmesg; /* Reason for sleep. */
+ u_int p_swtime; /* Time swapped in or out. */
+ u_int p_slptime; /* Time since last blocked. */
+
+ struct itimerval p_realtimer; /* Alarm timer. */
+ struct timeval p_rtime; /* Real time. */
+ u_quad_t p_uticks; /* Statclock hits in user mode. */
+ u_quad_t p_sticks; /* Statclock hits in system mode. */
+ u_quad_t p_iticks; /* Statclock hits processing intr. */
+
+ int p_traceflag; /* Kernel trace points. */
+ struct vnode *p_tracep; /* Trace to vnode. */
+
+ int p_siglist; /* Signals arrived but not delivered. */
+
+ struct vnode *p_textvp; /* Vnode of executable. */
+
+ long p_spare[5]; /* pad to 256, avoid shifting eproc. */
+
+/* End area that is zeroed on creation. */
+#define p_endzero p_startcopy
+
+/* The following fields are all copied upon creation in fork. */
+#define p_startcopy p_sigmask
+
+ sigset_t p_sigmask; /* Current signal mask. */
+ sigset_t p_sigignore; /* Signals being ignored. */
+ sigset_t p_sigcatch; /* Signals being caught by user. */
+
+ u_char p_priority; /* Process priority. */
+ u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */
+ char p_nice; /* Process "nice" value. */
+ char p_comm[MAXCOMLEN+1];
+
+ struct pgrp *p_pgrp; /* Pointer to process group. */
+
+/* End area that is copied on creation. */
+#define p_endcopy p_thread
+ int p_thread; /* Id for this "thread"; Mach glue. XXX */
+ struct user *p_addr; /* Kernel virtual addr of u-area (PROC ONLY). */
+ struct mdproc p_md; /* Any machine-dependent fields. */
+
+ u_short p_xstat; /* Exit status for wait; also stop signal. */
+ u_short p_acflag; /* Accounting flags. */
+ struct rusage *p_ru; /* Exit information. XXX */
+
+};
+
+#define p_session p_pgrp->pg_session
+#define p_pgid p_pgrp->pg_id
+
+/* Status values. */
+#define SIDL 1 /* Process being created by fork. */
+#define SRUN 2 /* Currently runnable. */
+#define SSLEEP 3 /* Sleeping on an address. */
+#define SSTOP 4 /* Process debugging or suspension. */
+#define SZOMB 5 /* Awaiting collection by parent. */
+
+/* These flags are kept in p_flags. */
+#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */
+#define P_CONTROLT 0x00002 /* Has a controlling terminal. */
+#define P_INMEM 0x00004 /* Loaded into memory. */
+#define P_NOCLDSTOP 0x00008 /* No SIGCHLD when children stop. */
+#define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */
+#define P_PROFIL 0x00020 /* Has started profiling. */
+#define P_SELECT 0x00040 /* Selecting; wakeup/waiting danger. */
+#define P_SINTR 0x00080 /* Sleep is interruptible. */
+#define P_SUGID 0x00100 /* Had set id privileges since last exec. */
+#define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */
+#define P_TIMEOUT 0x00400 /* Timing out during sleep. */
+#define P_TRACED 0x00800 /* Debugged process being traced. */
+#define P_WAITED 0x01000 /* Debugging process has waited for child. */
+#define P_WEXIT 0x02000 /* Working on exiting. */
+#define P_EXEC 0x04000 /* Process called exec. */
+
+/* Should probably be changed into a hold count. */
+#define P_NOSWAP 0x08000 /* Another flag to prevent swap out. */
+#define P_PHYSIO 0x10000 /* Doing physical I/O. */
+
+/* Should be moved to machine-dependent areas. */
+#define P_OWEUPC 0x20000 /* Owe process an addupc() call at next ast. */
+
+/*
+ * MOVE TO ucred.h?
+ *
+ * Shareable process credentials (always resident). This includes a reference
+ * to the current user credentials as well as real and saved ids that may be
+ * used to change ids.
+ */
+struct pcred {
+ struct ucred *pc_ucred; /* Current credentials. */
+ uid_t p_ruid; /* Real user id. */
+ uid_t p_svuid; /* Saved effective user id. */
+ gid_t p_rgid; /* Real group id. */
+ gid_t p_svgid; /* Saved effective group id. */
+ int p_refcnt; /* Number of references. */
+};
+
+#ifdef KERNEL
+/*
+ * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
+ * as it is used to represent "no process group".
+ */
+#define PID_MAX 30000
+#define NO_PID 30001
+#define PIDHASH(pid) ((pid) & pidhashmask)
+
+#define SESS_LEADER(p) ((p)->p_session->s_leader == (p))
+#define SESSHOLD(s) ((s)->s_count++)
+#define SESSRELE(s) { \
+ if (--(s)->s_count == 0) \
+ FREE(s, M_SESSION); \
+}
+
+extern struct proc *pidhash[]; /* In param.c. */
+extern struct pgrp *pgrphash[]; /* In param.c. */
+extern struct proc *curproc; /* Current running proc. */
+extern struct proc proc0; /* Process slot for swapper. */
+extern int nprocs, maxproc; /* Current and max number of procs. */
+extern int pidhashmask; /* In param.c. */
+
+volatile struct proc *allproc; /* List of active procs. */
+struct proc *zombproc; /* List of zombie procs. */
+struct proc *initproc, *pageproc; /* Process slots for init, pager. */
+
+#define NQS 32 /* 32 run queues. */
+int whichqs; /* Bit mask summary of non-empty Q's. */
+struct prochd {
+ struct proc *ph_link; /* Linked list of running processes. */
+ struct proc *ph_rlink;
+} qs[NQS];
+
+struct proc *pfind __P((pid_t)); /* Find process by id. */
+struct pgrp *pgfind __P((pid_t)); /* Find process group by id. */
+
+void mi_switch __P((void));
+void resetpriority __P((struct proc *));
+void setrunnable __P((struct proc *));
+void setrunqueue __P((struct proc *));
+void sleep __P((void *chan, int pri));
+int tsleep __P((void *chan, int pri, char *wmesg, int timo));
+void unsleep __P((struct proc *));
+void wakeup __P((void *chan));
+#endif /* KERNEL */
+#endif /* !_SYS_PROC_H_ */
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
new file mode 100644
index 000000000000..76ac720d85ff
--- /dev/null
+++ b/sys/sys/protosw.h
@@ -0,0 +1,210 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)protosw.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Protocol switch table.
+ *
+ * Each protocol has a handle initializing one of these structures,
+ * which is used for protocol-protocol and system-protocol communication.
+ *
+ * A protocol is called through the pr_init entry before any other.
+ * Thereafter it is called every 200ms through the pr_fasttimo entry and
+ * every 500ms through the pr_slowtimo for timer based actions.
+ * The system will call the pr_drain entry if it is low on space and
+ * this should throw away any non-critical data.
+ *
+ * Protocols pass data between themselves as chains of mbufs using
+ * the pr_input and pr_output hooks. Pr_input passes data up (towards
+ * UNIX) and pr_output passes it down (towards the imps); control
+ * information passes up and down on pr_ctlinput and pr_ctloutput.
+ * The protocol is responsible for the space occupied by any the
+ * arguments to these entries and must dispose it.
+ *
+ * The userreq routine interfaces protocols to the system and is
+ * described below.
+ */
+struct protosw {
+ short pr_type; /* socket type used for */
+ struct domain *pr_domain; /* domain protocol a member of */
+ short pr_protocol; /* protocol number */
+ short pr_flags; /* see below */
+/* protocol-protocol hooks */
+ void (*pr_input)(); /* input to protocol (from below) */
+ int (*pr_output)(); /* output to protocol (from above) */
+ void (*pr_ctlinput)(); /* control input (from below) */
+ int (*pr_ctloutput)(); /* control output (from above) */
+/* user-protocol hook */
+ int (*pr_usrreq)(); /* user request: see list below */
+/* utility hooks */
+ void (*pr_init)(); /* initialization hook */
+ void (*pr_fasttimo)(); /* fast timeout (200ms) */
+ void (*pr_slowtimo)(); /* slow timeout (500ms) */
+ void (*pr_drain)(); /* flush any excess space possible */
+ int (*pr_sysctl)(); /* sysctl for protocol */
+};
+
+#define PR_SLOWHZ 2 /* 2 slow timeouts per second */
+#define PR_FASTHZ 5 /* 5 fast timeouts per second */
+
+/*
+ * Values for pr_flags.
+ * PR_ADDR requires PR_ATOMIC;
+ * PR_ADDR and PR_CONNREQUIRED are mutually exclusive.
+ */
+#define PR_ATOMIC 0x01 /* exchange atomic messages only */
+#define PR_ADDR 0x02 /* addresses given with messages */
+#define PR_CONNREQUIRED 0x04 /* connection required by protocol */
+#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */
+#define PR_RIGHTS 0x10 /* passes capabilities */
+
+/*
+ * The arguments to usrreq are:
+ * (*protosw[].pr_usrreq)(up, req, m, nam, opt);
+ * where up is a (struct socket *), req is one of these requests,
+ * m is a optional mbuf chain containing a message,
+ * nam is an optional mbuf chain containing an address,
+ * and opt is a pointer to a socketopt structure or nil.
+ * The protocol is responsible for disposal of the mbuf chain m,
+ * the caller is responsible for any space held by nam and opt.
+ * A non-zero return from usrreq gives an
+ * UNIX error number which should be passed to higher level software.
+ */
+#define PRU_ATTACH 0 /* attach protocol to up */
+#define PRU_DETACH 1 /* detach protocol from up */
+#define PRU_BIND 2 /* bind socket to address */
+#define PRU_LISTEN 3 /* listen for connection */
+#define PRU_CONNECT 4 /* establish connection to peer */
+#define PRU_ACCEPT 5 /* accept connection from peer */
+#define PRU_DISCONNECT 6 /* disconnect from peer */
+#define PRU_SHUTDOWN 7 /* won't send any more data */
+#define PRU_RCVD 8 /* have taken data; more room now */
+#define PRU_SEND 9 /* send this data */
+#define PRU_ABORT 10 /* abort (fast DISCONNECT, DETATCH) */
+#define PRU_CONTROL 11 /* control operations on protocol */
+#define PRU_SENSE 12 /* return status into m */
+#define PRU_RCVOOB 13 /* retrieve out of band data */
+#define PRU_SENDOOB 14 /* send out of band data */
+#define PRU_SOCKADDR 15 /* fetch socket's address */
+#define PRU_PEERADDR 16 /* fetch peer's address */
+#define PRU_CONNECT2 17 /* connect two sockets */
+/* begin for protocols internal use */
+#define PRU_FASTTIMO 18 /* 200ms timeout */
+#define PRU_SLOWTIMO 19 /* 500ms timeout */
+#define PRU_PROTORCV 20 /* receive from below */
+#define PRU_PROTOSEND 21 /* send to below */
+
+#define PRU_NREQ 21
+
+#ifdef PRUREQUESTS
+char *prurequests[] = {
+ "ATTACH", "DETACH", "BIND", "LISTEN",
+ "CONNECT", "ACCEPT", "DISCONNECT", "SHUTDOWN",
+ "RCVD", "SEND", "ABORT", "CONTROL",
+ "SENSE", "RCVOOB", "SENDOOB", "SOCKADDR",
+ "PEERADDR", "CONNECT2", "FASTTIMO", "SLOWTIMO",
+ "PROTORCV", "PROTOSEND",
+};
+#endif
+
+/*
+ * The arguments to the ctlinput routine are
+ * (*protosw[].pr_ctlinput)(cmd, sa, arg);
+ * where cmd is one of the commands below, sa is a pointer to a sockaddr,
+ * and arg is an optional caddr_t argument used within a protocol family.
+ */
+#define PRC_IFDOWN 0 /* interface transition */
+#define PRC_ROUTEDEAD 1 /* select new route if possible ??? */
+#define PRC_QUENCH2 3 /* DEC congestion bit says slow down */
+#define PRC_QUENCH 4 /* some one said to slow down */
+#define PRC_MSGSIZE 5 /* message size forced drop */
+#define PRC_HOSTDEAD 6 /* host appears to be down */
+#define PRC_HOSTUNREACH 7 /* deprecated (use PRC_UNREACH_HOST) */
+#define PRC_UNREACH_NET 8 /* no route to network */
+#define PRC_UNREACH_HOST 9 /* no route to host */
+#define PRC_UNREACH_PROTOCOL 10 /* dst says bad protocol */
+#define PRC_UNREACH_PORT 11 /* bad port # */
+/* was PRC_UNREACH_NEEDFRAG 12 (use PRC_MSGSIZE) */
+#define PRC_UNREACH_SRCFAIL 13 /* source route failed */
+#define PRC_REDIRECT_NET 14 /* net routing redirect */
+#define PRC_REDIRECT_HOST 15 /* host routing redirect */
+#define PRC_REDIRECT_TOSNET 16 /* redirect for type of service & net */
+#define PRC_REDIRECT_TOSHOST 17 /* redirect for tos & host */
+#define PRC_TIMXCEED_INTRANS 18 /* packet lifetime expired in transit */
+#define PRC_TIMXCEED_REASS 19 /* lifetime expired on reass q */
+#define PRC_PARAMPROB 20 /* header incorrect */
+
+#define PRC_NCMDS 21
+
+#define PRC_IS_REDIRECT(cmd) \
+ ((cmd) >= PRC_REDIRECT_NET && (cmd) <= PRC_REDIRECT_TOSHOST)
+
+#ifdef PRCREQUESTS
+char *prcrequests[] = {
+ "IFDOWN", "ROUTEDEAD", "#2", "DEC-BIT-QUENCH2",
+ "QUENCH", "MSGSIZE", "HOSTDEAD", "#7",
+ "NET-UNREACH", "HOST-UNREACH", "PROTO-UNREACH", "PORT-UNREACH",
+ "#12", "SRCFAIL-UNREACH", "NET-REDIRECT", "HOST-REDIRECT",
+ "TOSNET-REDIRECT", "TOSHOST-REDIRECT", "TX-INTRANS", "TX-REASS",
+ "PARAMPROB"
+};
+#endif
+
+/*
+ * The arguments to ctloutput are:
+ * (*protosw[].pr_ctloutput)(req, so, level, optname, optval);
+ * req is one of the actions listed below, so is a (struct socket *),
+ * level is an indication of which protocol layer the option is intended.
+ * optname is a protocol dependent socket option request,
+ * optval is a pointer to a mbuf-chain pointer, for value-return results.
+ * The protocol is responsible for disposal of the mbuf chain *optval
+ * if supplied,
+ * the caller is responsible for any space held by *optval, when returned.
+ * A non-zero return from usrreq gives an
+ * UNIX error number which should be passed to higher level software.
+ */
+#define PRCO_GETOPT 0
+#define PRCO_SETOPT 1
+
+#define PRCO_NCMDS 2
+
+#ifdef PRCOREQUESTS
+char *prcorequests[] = {
+ "GETOPT", "SETOPT",
+};
+#endif
+
+#ifdef KERNEL
+extern struct protosw *pffindproto(), *pffindtype();
+#endif
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
new file mode 100644
index 000000000000..f7f99d474a49
--- /dev/null
+++ b/sys/sys/ptrace.h
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 1984, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ptrace.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_PTRACE_H_
+#define _SYS_PTRACE_H_
+
+#define PT_TRACE_ME 0 /* child declares it's being traced */
+#define PT_READ_I 1 /* read word in child's I space */
+#define PT_READ_D 2 /* read word in child's D space */
+#define PT_READ_U 3 /* read word in child's user structure */
+#define PT_WRITE_I 4 /* write word in child's I space */
+#define PT_WRITE_D 5 /* write word in child's D space */
+#define PT_WRITE_U 6 /* write word in child's user structure */
+#define PT_CONTINUE 7 /* continue the child */
+#define PT_KILL 8 /* kill the child process */
+#define PT_STEP 9 /* single step the child */
+#define PT_ATTACH 10 /* trace some running process */
+#define PT_DETACH 11 /* stop tracing a process */
+
+#define PT_FIRSTMACH 32 /* for machine-specific requests */
+#include <machine/ptrace.h> /* machine-specific requests, if any */
+
+#ifdef KERNEL
+void proc_reparent __P((struct proc *child, struct proc *newparent));
+#else /* !KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int ptrace __P((int _request, pid_t _pid, caddr_t _addr, int _data));
+__END_DECLS
+
+#endif /* !KERNEL */
+
+#endif /* !_SYS_PTRACE_H_ */
diff --git a/sys/sys/queue.h b/sys/sys/queue.h
new file mode 100644
index 000000000000..c200c9f4ccf9
--- /dev/null
+++ b/sys/sys/queue.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.4 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_QUEUE_H_
+#define _SYS_QUEUE_H_
+
+/*
+ * This file defines three types of data structures: lists, tail queues,
+ * and circular queues.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list after
+ * an existing element or at the head of the list. A list may only be
+ * traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list after
+ * an existing element, at the head of the list, or at the end of the
+ * list. A tail queue may only be traversed in the forward direction.
+ *
+ * A circle queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the list.
+ * A circle queue may be traversed in either direction, but has a more
+ * complex end of list detection.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * List definitions.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define LIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+#define LIST_INIT(head) { \
+ (head)->lh_first = NULL; \
+}
+
+#define LIST_INSERT_AFTER(listelm, elm, field) { \
+ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
+ (listelm)->field.le_next->field.le_prev = \
+ &(elm)->field.le_next; \
+ (listelm)->field.le_next = (elm); \
+ (elm)->field.le_prev = &(listelm)->field.le_next; \
+}
+
+#define LIST_INSERT_HEAD(head, elm, field) { \
+ if (((elm)->field.le_next = (head)->lh_first) != NULL) \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+ (head)->lh_first = (elm); \
+ (elm)->field.le_prev = &(head)->lh_first; \
+}
+
+#define LIST_REMOVE(elm, field) { \
+ if ((elm)->field.le_next != NULL) \
+ (elm)->field.le_next->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = (elm)->field.le_next; \
+}
+
+/*
+ * Tail queue definitions.
+ */
+#define TAILQ_HEAD(name, type) \
+struct name { \
+ struct type *tqh_first; /* first element */ \
+ struct type **tqh_last; /* addr of last next element */ \
+}
+
+#define TAILQ_ENTRY(type) \
+struct { \
+ struct type *tqe_next; /* next element */ \
+ struct type **tqe_prev; /* address of previous next element */ \
+}
+
+/*
+ * Tail queue functions.
+ */
+#define TAILQ_INIT(head) { \
+ (head)->tqh_first = NULL; \
+ (head)->tqh_last = &(head)->tqh_first; \
+}
+
+#define TAILQ_INSERT_HEAD(head, elm, field) { \
+ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (head)->tqh_first = (elm); \
+ (elm)->field.tqe_prev = &(head)->tqh_first; \
+}
+
+#define TAILQ_INSERT_TAIL(head, elm, field) { \
+ (elm)->field.tqe_next = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+}
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) { \
+ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+ (elm)->field.tqe_next->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (listelm)->field.tqe_next = (elm); \
+ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
+}
+
+#define TAILQ_REMOVE(head, elm, field) { \
+ if (((elm)->field.tqe_next) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
+}
+
+/*
+ * Circular queue definitions.
+ */
+#define CIRCLEQ_HEAD(name, type) \
+struct name { \
+ struct type *cqh_first; /* first element */ \
+ struct type *cqh_last; /* last element */ \
+}
+
+#define CIRCLEQ_ENTRY(type) \
+struct { \
+ struct type *cqe_next; /* next element */ \
+ struct type *cqe_prev; /* previous element */ \
+}
+
+/*
+ * Circular queue functions.
+ */
+#define CIRCLEQ_INIT(head) { \
+ (head)->cqh_first = (void *)(head); \
+ (head)->cqh_last = (void *)(head); \
+}
+
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) { \
+ (elm)->field.cqe_next = (listelm)->field.cqe_next; \
+ (elm)->field.cqe_prev = (listelm); \
+ if ((listelm)->field.cqe_next == (void *)(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (listelm)->field.cqe_next->field.cqe_prev = (elm); \
+ (listelm)->field.cqe_next = (elm); \
+}
+
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) { \
+ (elm)->field.cqe_next = (listelm); \
+ (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \
+ if ((listelm)->field.cqe_prev == (void *)(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (listelm)->field.cqe_prev->field.cqe_next = (elm); \
+ (listelm)->field.cqe_prev = (elm); \
+}
+
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) { \
+ (elm)->field.cqe_next = (head)->cqh_first; \
+ (elm)->field.cqe_prev = (void *)(head); \
+ if ((head)->cqh_last == (void *)(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (head)->cqh_first->field.cqe_prev = (elm); \
+ (head)->cqh_first = (elm); \
+}
+
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) { \
+ (elm)->field.cqe_next = (void *)(head); \
+ (elm)->field.cqe_prev = (head)->cqh_last; \
+ if ((head)->cqh_first == (void *)(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (head)->cqh_last->field.cqe_next = (elm); \
+ (head)->cqh_last = (elm); \
+}
+
+#define CIRCLEQ_REMOVE(head, elm, field) { \
+ if ((elm)->field.cqe_next == (void *)(head)) \
+ (head)->cqh_last = (elm)->field.cqe_prev; \
+ else \
+ (elm)->field.cqe_next->field.cqe_prev = \
+ (elm)->field.cqe_prev; \
+ if ((elm)->field.cqe_prev == (void *)(head)) \
+ (head)->cqh_first = (elm)->field.cqe_next; \
+ else \
+ (elm)->field.cqe_prev->field.cqe_next = \
+ (elm)->field.cqe_next; \
+}
+#endif /* !_SYS_QUEUE_H_ */
diff --git a/sys/sys/reboot.h b/sys/sys/reboot.h
new file mode 100644
index 000000000000..c3c957e17eeb
--- /dev/null
+++ b/sys/sys/reboot.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)reboot.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Arguments to reboot system call.
+ * These are passed to boot program in r11,
+ * and on to init.
+ */
+#define RB_AUTOBOOT 0 /* flags for system auto-booting itself */
+
+#define RB_ASKNAME 0x01 /* ask for file name to reboot from */
+#define RB_SINGLE 0x02 /* reboot to single user only */
+#define RB_NOSYNC 0x04 /* dont sync before reboot */
+#define RB_HALT 0x08 /* don't reboot, just halt */
+#define RB_INITNAME 0x10 /* name given for /etc/init (unused) */
+#define RB_DFLTROOT 0x20 /* use compiled-in rootdev */
+#define RB_KDB 0x40 /* give control to kernel debugger */
+#define RB_RDONLY 0x80 /* mount root fs read-only */
+#define RB_DUMP 0x100 /* dump kernel memory before reboot */
+#define RB_MINIROOT 0x200 /* mini-root present in memory at boot time */
+
+/*
+ * Constants for converting boot-style device number to type,
+ * adaptor (uba, mba, etc), unit number and partition number.
+ * Type (== major device number) is in the low byte
+ * for backward compatibility. Except for that of the "magic
+ * number", each mask applies to the shifted value.
+ * Format:
+ * (4) (4) (4) (4) (8) (8)
+ * --------------------------------
+ * |MA | AD| CT| UN| PART | TYPE |
+ * --------------------------------
+ */
+#define B_ADAPTORSHIFT 24
+#define B_ADAPTORMASK 0x0f
+#define B_ADAPTOR(val) (((val) >> B_ADAPTORSHIFT) & B_ADAPTORMASK)
+#define B_CONTROLLERSHIFT 20
+#define B_CONTROLLERMASK 0xf
+#define B_CONTROLLER(val) (((val)>>B_CONTROLLERSHIFT) & B_CONTROLLERMASK)
+#define B_UNITSHIFT 16
+#define B_UNITMASK 0xf
+#define B_UNIT(val) (((val) >> B_UNITSHIFT) & B_UNITMASK)
+#define B_PARTITIONSHIFT 8
+#define B_PARTITIONMASK 0xff
+#define B_PARTITION(val) (((val) >> B_PARTITIONSHIFT) & B_PARTITIONMASK)
+#define B_TYPESHIFT 0
+#define B_TYPEMASK 0xff
+#define B_TYPE(val) (((val) >> B_TYPESHIFT) & B_TYPEMASK)
+
+#define B_MAGICMASK ((u_long)0xf0000000)
+#define B_DEVMAGIC ((u_long)0xa0000000)
+
+#define MAKEBOOTDEV(type, adaptor, controller, unit, partition) \
+ (((type) << B_TYPESHIFT) | ((adaptor) << B_ADAPTORSHIFT) | \
+ ((controller) << B_CONTROLLERSHIFT) | ((unit) << B_UNITSHIFT) | \
+ ((partition) << B_PARTITIONSHIFT) | B_DEVMAGIC)
diff --git a/sys/sys/resource.h b/sys/sys/resource.h
new file mode 100644
index 000000000000..559f1ac6c377
--- /dev/null
+++ b/sys/sys/resource.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)resource.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_RESOURCE_H_
+#define _SYS_RESOURCE_H_
+
+/*
+ * Process priority specifications to get/setpriority.
+ */
+#define PRIO_MIN -20
+#define PRIO_MAX 20
+
+#define PRIO_PROCESS 0
+#define PRIO_PGRP 1
+#define PRIO_USER 2
+
+/*
+ * Resource utilization information.
+ */
+
+#define RUSAGE_SELF 0
+#define RUSAGE_CHILDREN -1
+
+struct rusage {
+ struct timeval ru_utime; /* user time used */
+ struct timeval ru_stime; /* system time used */
+ long ru_maxrss; /* max resident set size */
+#define ru_first ru_ixrss
+ long ru_ixrss; /* integral shared memory size */
+ long ru_idrss; /* integral unshared data " */
+ long ru_isrss; /* integral unshared stack " */
+ long ru_minflt; /* page reclaims */
+ long ru_majflt; /* page faults */
+ long ru_nswap; /* swaps */
+ long ru_inblock; /* block input operations */
+ long ru_oublock; /* block output operations */
+ long ru_msgsnd; /* messages sent */
+ long ru_msgrcv; /* messages received */
+ long ru_nsignals; /* signals received */
+ long ru_nvcsw; /* voluntary context switches */
+ long ru_nivcsw; /* involuntary " */
+#define ru_last ru_nivcsw
+};
+
+/*
+ * Resource limits
+ */
+#define RLIMIT_CPU 0 /* cpu time in milliseconds */
+#define RLIMIT_FSIZE 1 /* maximum file size */
+#define RLIMIT_DATA 2 /* data size */
+#define RLIMIT_STACK 3 /* stack size */
+#define RLIMIT_CORE 4 /* core file size */
+#define RLIMIT_RSS 5 /* resident set size */
+#define RLIMIT_MEMLOCK 6 /* locked-in-memory address space */
+#define RLIMIT_NPROC 7 /* number of processes */
+#define RLIMIT_NOFILE 8 /* number of open files */
+
+#define RLIM_NLIMITS 9 /* number of resource limits */
+
+#define RLIM_INFINITY (((u_quad_t)1 << 63) - 1)
+
+struct orlimit {
+ long rlim_cur; /* current (soft) limit */
+ long rlim_max; /* maximum value for rlim_cur */
+};
+
+struct rlimit {
+ quad_t rlim_cur; /* current (soft) limit */
+ quad_t rlim_max; /* maximum value for rlim_cur */
+};
+
+/* Load average structure. */
+struct loadavg {
+ fixpt_t ldavg[3];
+ long fscale;
+};
+
+#ifdef KERNEL
+extern struct loadavg averunnable;
+
+#else
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int getpriority __P((int, int));
+int getrlimit __P((int, struct rlimit *));
+int getrusage __P((int, struct rusage *));
+int setpriority __P((int, int, int));
+int setrlimit __P((int, const struct rlimit *));
+__END_DECLS
+
+#endif /* KERNEL */
+#endif /* !_SYS_RESOURCE_H_ */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
new file mode 100644
index 000000000000..0f8d5e30eed2
--- /dev/null
+++ b/sys/sys/resourcevar.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)resourcevar.h 8.3 (Berkeley) 2/22/94
+ */
+
+#ifndef _SYS_RESOURCEVAR_H_
+#define _SYS_RESOURCEVAR_H_
+
+/*
+ * Kernel per-process accounting / statistics
+ * (not necessarily resident except when running).
+ */
+struct pstats {
+#define pstat_startzero p_ru
+ struct rusage p_ru; /* stats for this proc */
+ struct rusage p_cru; /* sum of stats for reaped children */
+#define pstat_endzero pstat_startcopy
+
+#define pstat_startcopy p_timer
+ struct itimerval p_timer[3]; /* virtual-time timers */
+
+ struct uprof { /* profile arguments */
+ caddr_t pr_base; /* buffer base */
+ u_long pr_size; /* buffer size */
+ u_long pr_off; /* pc offset */
+ u_long pr_scale; /* pc scaling */
+ u_long pr_addr; /* temp storage for addr until AST */
+ u_long pr_ticks; /* temp storage for ticks until AST */
+ } p_prof;
+#define pstat_endcopy p_start
+ struct timeval p_start; /* starting time */
+};
+
+/*
+ * Kernel shareable process resource limits. Because this structure
+ * is moderately large but changes infrequently, it is normally
+ * shared copy-on-write after forks. If a group of processes
+ * ("threads") share modifications, the PL_SHAREMOD flag is set,
+ * and a copy must be made for the child of a new fork that isn't
+ * sharing modifications to the limits.
+ */
+struct plimit {
+ struct rlimit pl_rlimit[RLIM_NLIMITS];
+#define PL_SHAREMOD 0x01 /* modifications are shared */
+ int p_lflags;
+ int p_refcnt; /* number of references */
+};
+
+/* add user profiling from AST */
+#define ADDUPROF(p) \
+ addupc_task(p, \
+ (p)->p_stats->p_prof.pr_addr, (p)->p_stats->p_prof.pr_ticks)
+
+#ifdef KERNEL
+void addupc_intr __P((struct proc *p, u_long pc, u_int ticks));
+void addupc_task __P((struct proc *p, u_long pc, u_int ticks));
+struct plimit
+ *limcopy __P((struct plimit *lim));
+#endif
+#endif /* !_SYS_RESOURCEVAR_H_ */
diff --git a/sys/sys/select.h b/sys/sys/select.h
new file mode 100644
index 000000000000..a279c592fbe7
--- /dev/null
+++ b/sys/sys/select.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)select.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_SELECT_H_
+#define _SYS_SELECT_H_
+
+/*
+ * Used to maintain information about processes that wish to be
+ * notified when I/O becomes possible.
+ */
+struct selinfo {
+ pid_t si_pid; /* process to be notified */
+ short si_flags; /* see below */
+};
+#define SI_COLL 0x0001 /* collision occurred */
+
+#ifdef KERNEL
+struct proc;
+
+void selrecord __P((struct proc *selector, struct selinfo *));
+void selwakeup __P((struct selinfo *));
+#endif
+
+#endif /* !_SYS_SELECT_H_ */
diff --git a/sys/sys/selinfo.h b/sys/sys/selinfo.h
new file mode 100644
index 000000000000..a279c592fbe7
--- /dev/null
+++ b/sys/sys/selinfo.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)select.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_SELECT_H_
+#define _SYS_SELECT_H_
+
+/*
+ * Used to maintain information about processes that wish to be
+ * notified when I/O becomes possible.
+ */
+struct selinfo {
+ pid_t si_pid; /* process to be notified */
+ short si_flags; /* see below */
+};
+#define SI_COLL 0x0001 /* collision occurred */
+
+#ifdef KERNEL
+struct proc;
+
+void selrecord __P((struct proc *selector, struct selinfo *));
+void selwakeup __P((struct selinfo *));
+#endif
+
+#endif /* !_SYS_SELECT_H_ */
diff --git a/sys/sys/signal.h b/sys/sys/signal.h
new file mode 100644
index 000000000000..8ccded41c3be
--- /dev/null
+++ b/sys/sys/signal.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)signal.h 8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_SIGNAL_H_
+#define _SYS_SIGNAL_H_
+
+#define NSIG 32 /* counting 0; could be 33 (mask is 1-32) */
+
+#ifndef _ANSI_SOURCE
+#include <machine/signal.h> /* sigcontext; codes for SIGILL, SIGFPE */
+#endif
+
+#define SIGHUP 1 /* hangup */
+#define SIGINT 2 /* interrupt */
+#define SIGQUIT 3 /* quit */
+#define SIGILL 4 /* illegal instruction (not reset when caught) */
+#ifndef _POSIX_SOURCE
+#define SIGTRAP 5 /* trace trap (not reset when caught) */
+#endif
+#define SIGABRT 6 /* abort() */
+#ifndef _POSIX_SOURCE
+#define SIGIOT SIGABRT /* compatibility */
+#define SIGEMT 7 /* EMT instruction */
+#endif
+#define SIGFPE 8 /* floating point exception */
+#define SIGKILL 9 /* kill (cannot be caught or ignored) */
+#ifndef _POSIX_SOURCE
+#define SIGBUS 10 /* bus error */
+#endif
+#define SIGSEGV 11 /* segmentation violation */
+#ifndef _POSIX_SOURCE
+#define SIGSYS 12 /* bad argument to system call */
+#endif
+#define SIGPIPE 13 /* write on a pipe with no one to read it */
+#define SIGALRM 14 /* alarm clock */
+#define SIGTERM 15 /* software termination signal from kill */
+#ifndef _POSIX_SOURCE
+#define SIGURG 16 /* urgent condition on IO channel */
+#endif
+#define SIGSTOP 17 /* sendable stop signal not from tty */
+#define SIGTSTP 18 /* stop signal from tty */
+#define SIGCONT 19 /* continue a stopped process */
+#define SIGCHLD 20 /* to parent on child stop or exit */
+#define SIGTTIN 21 /* to readers pgrp upon background tty read */
+#define SIGTTOU 22 /* like TTIN for output if (tp->t_local&LTOSTOP) */
+#ifndef _POSIX_SOURCE
+#define SIGIO 23 /* input/output possible signal */
+#define SIGXCPU 24 /* exceeded CPU time limit */
+#define SIGXFSZ 25 /* exceeded file size limit */
+#define SIGVTALRM 26 /* virtual time alarm */
+#define SIGPROF 27 /* profiling time alarm */
+#define SIGWINCH 28 /* window size changes */
+#define SIGINFO 29 /* information request */
+#endif
+#define SIGUSR1 30 /* user defined signal 1 */
+#define SIGUSR2 31 /* user defined signal 2 */
+
+#if defined(_ANSI_SOURCE) || defined(__cplusplus)
+/*
+ * Language spec sez we must list exactly one parameter, even though we
+ * actually supply three. Ugh!
+ */
+#define SIG_DFL (void (*)(int))0
+#define SIG_IGN (void (*)(int))1
+#define SIG_ERR (void (*)(int))-1
+#else
+#define SIG_DFL (void (*)())0
+#define SIG_IGN (void (*)())1
+#define SIG_ERR (void (*)())-1
+#endif
+
+#ifndef _ANSI_SOURCE
+typedef unsigned int sigset_t;
+
+/*
+ * Signal vector "template" used in sigaction call.
+ */
+struct sigaction {
+ void (*sa_handler)(); /* signal handler */
+ sigset_t sa_mask; /* signal mask to apply */
+ int sa_flags; /* see signal options below */
+};
+#ifndef _POSIX_SOURCE
+#define SA_ONSTACK 0x0001 /* take signal on signal stack */
+#define SA_RESTART 0x0002 /* restart system on signal return */
+#define SA_DISABLE 0x0004 /* disable taking signals on alternate stack */
+#ifdef COMPAT_SUNOS
+#define SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */
+#endif
+#endif
+#define SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define SIG_BLOCK 1 /* block specified signal set */
+#define SIG_UNBLOCK 2 /* unblock specified signal set */
+#define SIG_SETMASK 3 /* set specified signal set */
+
+#ifndef _POSIX_SOURCE
+#ifndef KERNEL
+#include <sys/cdefs.h>
+#endif
+typedef void (*sig_t) __P((int)); /* type of signal function */
+
+/*
+ * Structure used in sigaltstack call.
+ */
+struct sigaltstack {
+ char *ss_base; /* signal stack base */
+ int ss_size; /* signal stack length */
+ int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */
+};
+#define MINSIGSTKSZ 8192 /* minimum allowable stack */
+#define SIGSTKSZ (MINSIGSTKSZ + 32768) /* recommended stack size */
+
+/*
+ * 4.3 compatibility:
+ * Signal vector "template" used in sigvec call.
+ */
+struct sigvec {
+ void (*sv_handler)(); /* signal handler */
+ int sv_mask; /* signal mask to apply */
+ int sv_flags; /* see signal options below */
+};
+
+#define SV_ONSTACK SA_ONSTACK
+#define SV_INTERRUPT SA_RESTART /* same bit, opposite sense */
+#define sv_onstack sv_flags /* isn't compatibility wonderful! */
+
+/*
+ * Structure used in sigstack call.
+ */
+struct sigstack {
+ char *ss_sp; /* signal stack pointer */
+ int ss_onstack; /* current status */
+};
+
+/*
+ * Macro for converting signal number to a mask suitable for
+ * sigblock().
+ */
+#define sigmask(m) (1 << ((m)-1))
+
+#define BADSIG SIG_ERR
+
+#endif /* !_POSIX_SOURCE */
+#endif /* !_ANSI_SOURCE */
+
+/*
+ * For historical reasons; programs expect signal's return value to be
+ * defined by <sys/signal.h>.
+ */
+__BEGIN_DECLS
+void (*signal __P((int, void (*) __P((int))))) __P((int));
+__END_DECLS
+#endif /* !_SYS_SIGNAL_H_ */
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
new file mode 100644
index 000000000000..3d7e68bc5309
--- /dev/null
+++ b/sys/sys/signalvar.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)signalvar.h 8.3 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_SIGNALVAR_H_ /* tmp for user.h */
+#define _SYS_SIGNALVAR_H_
+
+/*
+ * Kernel signal definitions and data structures,
+ * not exported to user programs.
+ */
+
+/*
+ * Process signal actions and state, needed only within the process
+ * (not necessarily resident).
+ */
+struct sigacts {
+ sig_t ps_sigact[NSIG]; /* disposition of signals */
+ sigset_t ps_catchmask[NSIG]; /* signals to be blocked */
+ sigset_t ps_sigonstack; /* signals to take on sigstack */
+ sigset_t ps_sigintr; /* signals that interrupt syscalls */
+ sigset_t ps_oldmask; /* saved mask from before sigpause */
+ int ps_flags; /* signal flags, below */
+ struct sigaltstack ps_sigstk; /* sp & on stack state variable */
+ int ps_sig; /* for core dump/debugger XXX */
+ int ps_code; /* for core dump/debugger XXX */
+ int ps_addr; /* for core dump/debugger XXX */
+ sigset_t ps_usertramp; /* SunOS compat; libc sigtramp XXX */
+};
+
+/* signal flags */
+#define SAS_OLDMASK 0x01 /* need to restore mask before pause */
+#define SAS_ALTSTACK 0x02 /* have alternate signal stack */
+
+/* additional signal action values, used only temporarily/internally */
+#define SIG_CATCH (void (*)())2
+#define SIG_HOLD (void (*)())3
+
+/*
+ * get signal action for process and signal; currently only for current process
+ */
+#define SIGACTION(p, sig) (p->p_sigacts->ps_sigact[(sig)])
+
+/*
+ * Determine signal that should be delivered to process p, the current
+ * process, 0 if none. If there is a pending stop signal with default
+ * action, the process stops in issig().
+ */
+#define CURSIG(p) \
+ (((p)->p_siglist == 0 || \
+ ((p)->p_flag & P_TRACED) == 0 && \
+ ((p)->p_siglist & ~(p)->p_sigmask) == 0) ? \
+ 0 : issignal(p))
+
+/*
+ * Clear a pending signal from a process.
+ */
+#define CLRSIG(p, sig) { (p)->p_siglist &= ~sigmask(sig); }
+
+/*
+ * Signal properties and actions.
+ * The array below categorizes the signals and their default actions
+ * according to the following properties:
+ */
+#define SA_KILL 0x01 /* terminates process by default */
+#define SA_CORE 0x02 /* ditto and coredumps */
+#define SA_STOP 0x04 /* suspend process */
+#define SA_TTYSTOP 0x08 /* ditto, from tty */
+#define SA_IGNORE 0x10 /* ignore by default */
+#define SA_CONT 0x20 /* continue if suspended */
+#define SA_CANTMASK 0x40 /* non-maskable, catchable */
+
+#ifdef SIGPROP
+int sigprop[NSIG + 1] = {
+ 0, /* unused */
+ SA_KILL, /* SIGHUP */
+ SA_KILL, /* SIGINT */
+ SA_KILL|SA_CORE, /* SIGQUIT */
+ SA_KILL|SA_CORE, /* SIGILL */
+ SA_KILL|SA_CORE, /* SIGTRAP */
+ SA_KILL|SA_CORE, /* SIGABRT */
+ SA_KILL|SA_CORE, /* SIGEMT */
+ SA_KILL|SA_CORE, /* SIGFPE */
+ SA_KILL, /* SIGKILL */
+ SA_KILL|SA_CORE, /* SIGBUS */
+ SA_KILL|SA_CORE, /* SIGSEGV */
+ SA_KILL|SA_CORE, /* SIGSYS */
+ SA_KILL, /* SIGPIPE */
+ SA_KILL, /* SIGALRM */
+ SA_KILL, /* SIGTERM */
+ SA_IGNORE, /* SIGURG */
+ SA_STOP, /* SIGSTOP */
+ SA_STOP|SA_TTYSTOP, /* SIGTSTP */
+ SA_IGNORE|SA_CONT, /* SIGCONT */
+ SA_IGNORE, /* SIGCHLD */
+ SA_STOP|SA_TTYSTOP, /* SIGTTIN */
+ SA_STOP|SA_TTYSTOP, /* SIGTTOU */
+ SA_IGNORE, /* SIGIO */
+ SA_KILL, /* SIGXCPU */
+ SA_KILL, /* SIGXFSZ */
+ SA_KILL, /* SIGVTALRM */
+ SA_KILL, /* SIGPROF */
+ SA_IGNORE, /* SIGWINCH */
+ SA_IGNORE, /* SIGINFO */
+ SA_KILL, /* SIGUSR1 */
+ SA_KILL, /* SIGUSR2 */
+};
+
+#define contsigmask (sigmask(SIGCONT))
+#define stopsigmask (sigmask(SIGSTOP) | sigmask(SIGTSTP) | \
+ sigmask(SIGTTIN) | sigmask(SIGTTOU))
+
+#endif /* SIGPROP */
+
+#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
+
+#ifdef KERNEL
+/*
+ * Machine-independent functions:
+ */
+int coredump __P((struct proc *p));
+void execsigs __P((struct proc *p));
+void gsignal __P((int pgid, int sig));
+int issig __P((struct proc *p));
+void pgsignal __P((struct pgrp *pgrp, int sig, int checkctty));
+void postsig __P((int sig));
+void psignal __P((struct proc *p, int sig));
+void siginit __P((struct proc *p));
+void trapsignal __P((struct proc *p, int sig, unsigned code));
+
+/*
+ * Machine-dependent functions:
+ */
+void sendsig __P((sig_t action, int sig, int returnmask, unsigned code));
+#endif /* KERNEL */
+#endif /* !_SYS_SIGNALVAR_H_ */
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
new file mode 100644
index 000000000000..f6728e988540
--- /dev/null
+++ b/sys/sys/socket.h
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)socket.h 8.4 (Berkeley) 2/21/94
+ */
+
+#ifndef _SYS_SOCKET_H_
+#define _SYS_SOCKET_H_
+
+/*
+ * Definitions related to sockets: types, address families, options.
+ */
+
+/*
+ * Types
+ */
+#define SOCK_STREAM 1 /* stream socket */
+#define SOCK_DGRAM 2 /* datagram socket */
+#define SOCK_RAW 3 /* raw-protocol interface */
+#define SOCK_RDM 4 /* reliably-delivered message */
+#define SOCK_SEQPACKET 5 /* sequenced packet stream */
+
+/*
+ * Option flags per-socket.
+ */
+#define SO_DEBUG 0x0001 /* turn on debugging info recording */
+#define SO_ACCEPTCONN 0x0002 /* socket has had listen() */
+#define SO_REUSEADDR 0x0004 /* allow local address reuse */
+#define SO_KEEPALIVE 0x0008 /* keep connections alive */
+#define SO_DONTROUTE 0x0010 /* just use interface addresses */
+#define SO_BROADCAST 0x0020 /* permit sending of broadcast msgs */
+#define SO_USELOOPBACK 0x0040 /* bypass hardware when possible */
+#define SO_LINGER 0x0080 /* linger on close if data present */
+#define SO_OOBINLINE 0x0100 /* leave received OOB data in line */
+#define SO_REUSEPORT 0x0200 /* allow local address & port reuse */
+
+/*
+ * Additional options, not kept in so_options.
+ */
+#define SO_SNDBUF 0x1001 /* send buffer size */
+#define SO_RCVBUF 0x1002 /* receive buffer size */
+#define SO_SNDLOWAT 0x1003 /* send low-water mark */
+#define SO_RCVLOWAT 0x1004 /* receive low-water mark */
+#define SO_SNDTIMEO 0x1005 /* send timeout */
+#define SO_RCVTIMEO 0x1006 /* receive timeout */
+#define SO_ERROR 0x1007 /* get error status and clear */
+#define SO_TYPE 0x1008 /* get socket type */
+
+/*
+ * Structure used for manipulating linger option.
+ */
+struct linger {
+ int l_onoff; /* option on/off */
+ int l_linger; /* linger time */
+};
+
+/*
+ * Level number for (get/set)sockopt() to apply to socket itself.
+ */
+#define SOL_SOCKET 0xffff /* options for socket level */
+
+/*
+ * Address families.
+ */
+#define AF_UNSPEC 0 /* unspecified */
+#define AF_LOCAL 1 /* local to host (pipes, portals) */
+#define AF_UNIX AF_LOCAL /* backward compatibility */
+#define AF_INET 2 /* internetwork: UDP, TCP, etc. */
+#define AF_IMPLINK 3 /* arpanet imp addresses */
+#define AF_PUP 4 /* pup protocols: e.g. BSP */
+#define AF_CHAOS 5 /* mit CHAOS protocols */
+#define AF_NS 6 /* XEROX NS protocols */
+#define AF_ISO 7 /* ISO protocols */
+#define AF_OSI AF_ISO
+#define AF_ECMA 8 /* european computer manufacturers */
+#define AF_DATAKIT 9 /* datakit protocols */
+#define AF_CCITT 10 /* CCITT protocols, X.25 etc */
+#define AF_SNA 11 /* IBM SNA */
+#define AF_DECnet 12 /* DECnet */
+#define AF_DLI 13 /* DEC Direct data link interface */
+#define AF_LAT 14 /* LAT */
+#define AF_HYLINK 15 /* NSC Hyperchannel */
+#define AF_APPLETALK 16 /* Apple Talk */
+#define AF_ROUTE 17 /* Internal Routing Protocol */
+#define AF_LINK 18 /* Link layer interface */
+#define pseudo_AF_XTP 19 /* eXpress Transfer Protocol (no AF) */
+#define AF_COIP 20 /* connection-oriented IP, aka ST II */
+#define AF_CNT 21 /* Computer Network Technology */
+#define pseudo_AF_RTIP 22 /* Help Identify RTIP packets */
+#define AF_IPX 23 /* Novell Internet Protocol */
+#define AF_SIP 24 /* Simple Internet Protocol */
+#define pseudo_AF_PIP 25 /* Help Identify PIP packets */
+
+#define AF_MAX 26
+
+/*
+ * Structure used by kernel to store most
+ * addresses.
+ */
+struct sockaddr {
+ u_char sa_len; /* total length */
+ u_char sa_family; /* address family */
+ char sa_data[14]; /* actually longer; address value */
+};
+
+/*
+ * Structure used by kernel to pass protocol
+ * information in raw sockets.
+ */
+struct sockproto {
+ u_short sp_family; /* address family */
+ u_short sp_protocol; /* protocol */
+};
+
+/*
+ * Protocol families, same as address families for now.
+ */
+#define PF_UNSPEC AF_UNSPEC
+#define PF_LOCAL AF_LOCAL
+#define PF_UNIX PF_LOCAL /* backward compatibility */
+#define PF_INET AF_INET
+#define PF_IMPLINK AF_IMPLINK
+#define PF_PUP AF_PUP
+#define PF_CHAOS AF_CHAOS
+#define PF_NS AF_NS
+#define PF_ISO AF_ISO
+#define PF_OSI AF_ISO
+#define PF_ECMA AF_ECMA
+#define PF_DATAKIT AF_DATAKIT
+#define PF_CCITT AF_CCITT
+#define PF_SNA AF_SNA
+#define PF_DECnet AF_DECnet
+#define PF_DLI AF_DLI
+#define PF_LAT AF_LAT
+#define PF_HYLINK AF_HYLINK
+#define PF_APPLETALK AF_APPLETALK
+#define PF_ROUTE AF_ROUTE
+#define PF_LINK AF_LINK
+#define PF_XTP pseudo_AF_XTP /* really just proto family, no AF */
+#define PF_COIP AF_COIP
+#define PF_CNT AF_CNT
+#define PF_SIP AF_SIP
+#define PF_IPX AF_IPX /* same format as AF_NS */
+#define PF_RTIP pseudo_AF_FTIP /* same format as AF_INET */
+#define PF_PIP pseudo_AF_PIP
+
+#define PF_MAX AF_MAX
+
+/*
+ * Definitions for network related sysctl, CTL_NET.
+ *
+ * Second level is protocol family.
+ * Third level is protocol number.
+ *
+ * Further levels are defined by the individual families below.
+ */
+#define NET_MAXID AF_MAX
+
+#define CTL_NET_NAMES { \
+ { 0, 0 }, \
+ { "unix", CTLTYPE_NODE }, \
+ { "inet", CTLTYPE_NODE }, \
+ { "implink", CTLTYPE_NODE }, \
+ { "pup", CTLTYPE_NODE }, \
+ { "chaos", CTLTYPE_NODE }, \
+ { "xerox_ns", CTLTYPE_NODE }, \
+ { "iso", CTLTYPE_NODE }, \
+ { "emca", CTLTYPE_NODE }, \
+ { "datakit", CTLTYPE_NODE }, \
+ { "ccitt", CTLTYPE_NODE }, \
+ { "ibm_sna", CTLTYPE_NODE }, \
+ { "decnet", CTLTYPE_NODE }, \
+ { "dec_dli", CTLTYPE_NODE }, \
+ { "lat", CTLTYPE_NODE }, \
+ { "hylink", CTLTYPE_NODE }, \
+ { "appletalk", CTLTYPE_NODE }, \
+ { "route", CTLTYPE_NODE }, \
+ { "link_layer", CTLTYPE_NODE }, \
+ { "xtp", CTLTYPE_NODE }, \
+ { "coip", CTLTYPE_NODE }, \
+ { "cnt", CTLTYPE_NODE }, \
+ { "rtip", CTLTYPE_NODE }, \
+ { "ipx", CTLTYPE_NODE }, \
+ { "sip", CTLTYPE_NODE }, \
+ { "pip", CTLTYPE_NODE }, \
+}
+
+/*
+ * PF_ROUTE - Routing table
+ *
+ * Three additional levels are defined:
+ * Fourth: address family, 0 is wildcard
+ * Fifth: type of info, defined below
+ * Sixth: flag(s) to mask with for NET_RT_FLAGS
+ */
+#define NET_RT_DUMP 1 /* dump; may limit to a.f. */
+#define NET_RT_FLAGS 2 /* by flags, e.g. RESOLVING */
+#define NET_RT_IFLIST 3 /* survey interface list */
+#define NET_RT_MAXID 4
+
+#define CTL_NET_RT_NAMES { \
+ { 0, 0 }, \
+ { "dump", CTLTYPE_STRUCT }, \
+ { "flags", CTLTYPE_STRUCT }, \
+ { "iflist", CTLTYPE_STRUCT }, \
+}
+
+/*
+ * Maximum queue length specifiable by listen.
+ */
+#define SOMAXCONN 5
+
+/*
+ * Message header for recvmsg and sendmsg calls.
+ * Used value-result for recvmsg, value only for sendmsg.
+ */
+struct msghdr {
+ caddr_t msg_name; /* optional address */
+ u_int msg_namelen; /* size of address */
+ struct iovec *msg_iov; /* scatter/gather array */
+ u_int msg_iovlen; /* # elements in msg_iov */
+ caddr_t msg_control; /* ancillary data, see below */
+ u_int msg_controllen; /* ancillary data buffer len */
+ int msg_flags; /* flags on received message */
+};
+
+#define MSG_OOB 0x1 /* process out-of-band data */
+#define MSG_PEEK 0x2 /* peek at incoming message */
+#define MSG_DONTROUTE 0x4 /* send without using routing tables */
+#define MSG_EOR 0x8 /* data completes record */
+#define MSG_TRUNC 0x10 /* data discarded before delivery */
+#define MSG_CTRUNC 0x20 /* control data lost before delivery */
+#define MSG_WAITALL 0x40 /* wait for full request or error */
+#define MSG_DONTWAIT 0x80 /* this message should be nonblocking */
+
+/*
+ * Header for ancillary data objects in msg_control buffer.
+ * Used for additional information with/about a datagram
+ * not expressible by flags. The format is a sequence
+ * of message elements headed by cmsghdr structures.
+ */
+struct cmsghdr {
+ u_int cmsg_len; /* data byte count, including hdr */
+ int cmsg_level; /* originating protocol */
+ int cmsg_type; /* protocol-specific type */
+/* followed by u_char cmsg_data[]; */
+};
+
+/* given pointer to struct cmsghdr, return pointer to data */
+#define CMSG_DATA(cmsg) ((u_char *)((cmsg) + 1))
+
+/* given pointer to struct cmsghdr, return pointer to next cmsghdr */
+#define CMSG_NXTHDR(mhdr, cmsg) \
+ (((caddr_t)(cmsg) + (cmsg)->cmsg_len + sizeof(struct cmsghdr) > \
+ (mhdr)->msg_control + (mhdr)->msg_controllen) ? \
+ (struct cmsghdr *)NULL : \
+ (struct cmsghdr *)((caddr_t)(cmsg) + ALIGN((cmsg)->cmsg_len)))
+
+#define CMSG_FIRSTHDR(mhdr) ((struct cmsghdr *)(mhdr)->msg_control)
+
+/* "Socket"-level control message types: */
+#define SCM_RIGHTS 0x01 /* access rights (array of int) */
+
+/*
+ * 4.3 compat sockaddr, move to compat file later
+ */
+struct osockaddr {
+ u_short sa_family; /* address family */
+ char sa_data[14]; /* up to 14 bytes of direct address */
+};
+
+/*
+ * 4.3-compat message header (move to compat file later).
+ */
+struct omsghdr {
+ caddr_t msg_name; /* optional address */
+ int msg_namelen; /* size of address */
+ struct iovec *msg_iov; /* scatter/gather array */
+ int msg_iovlen; /* # elements in msg_iov */
+ caddr_t msg_accrights; /* access rights sent/received */
+ int msg_accrightslen;
+};
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int accept __P((int, struct sockaddr *, int *));
+int bind __P((int, const struct sockaddr *, int));
+int connect __P((int, const struct sockaddr *, int));
+int getpeername __P((int, struct sockaddr *, int *));
+int getsockname __P((int, struct sockaddr *, int *));
+int getsockopt __P((int, int, int, void *, int *));
+int listen __P((int, int));
+ssize_t recv __P((int, void *, size_t, int));
+ssize_t recvfrom __P((int, void *, size_t, int, struct sockaddr *, int *));
+ssize_t recvmsg __P((int, struct msghdr *, int));
+ssize_t send __P((int, const void *, size_t, int));
+ssize_t sendto __P((int, const void *,
+ size_t, int, const struct sockaddr *, int));
+ssize_t sendmsg __P((int, const struct msghdr *, int));
+int setsockopt __P((int, int, int, const void *, int));
+int shutdown __P((int, int));
+int socket __P((int, int, int));
+int socketpair __P((int, int, int, int *));
+__END_DECLS
+
+#endif /* !KERNEL */
+#endif /* !_SYS_SOCKET_H_ */
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
new file mode 100644
index 000000000000..ff104046c7c3
--- /dev/null
+++ b/sys/sys/socketvar.h
@@ -0,0 +1,207 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)socketvar.h 8.1 (Berkeley) 6/2/93
+ */
+
+#include <sys/select.h> /* for struct selinfo */
+
+/*
+ * Kernel structure per socket.
+ * Contains send and receive buffer queues,
+ * handle on protocol and pointer to protocol
+ * private data and error information.
+ */
+struct socket {
+ short so_type; /* generic type, see socket.h */
+ short so_options; /* from socket call, see socket.h */
+ short so_linger; /* time to linger while closing */
+ short so_state; /* internal state flags SS_*, below */
+ caddr_t so_pcb; /* protocol control block */
+ struct protosw *so_proto; /* protocol handle */
+/*
+ * Variables for connection queueing.
+ * Socket where accepts occur is so_head in all subsidiary sockets.
+ * If so_head is 0, socket is not related to an accept.
+ * For head socket so_q0 queues partially completed connections,
+ * while so_q is a queue of connections ready to be accepted.
+ * If a connection is aborted and it has so_head set, then
+ * it has to be pulled out of either so_q0 or so_q.
+ * We allow connections to queue up based on current queue lengths
+ * and limit on number of queued connections for this socket.
+ */
+ struct socket *so_head; /* back pointer to accept socket */
+ struct socket *so_q0; /* queue of partial connections */
+ struct socket *so_q; /* queue of incoming connections */
+ short so_q0len; /* partials on so_q0 */
+ short so_qlen; /* number of connections on so_q */
+ short so_qlimit; /* max number queued connections */
+ short so_timeo; /* connection timeout */
+ u_short so_error; /* error affecting connection */
+ pid_t so_pgid; /* pgid for signals */
+ u_long so_oobmark; /* chars to oob mark */
+/*
+ * Variables for socket buffering.
+ */
+ struct sockbuf {
+ u_long sb_cc; /* actual chars in buffer */
+ u_long sb_hiwat; /* max actual char count */
+ u_long sb_mbcnt; /* chars of mbufs used */
+ u_long sb_mbmax; /* max chars of mbufs to use */
+ long sb_lowat; /* low water mark */
+ struct mbuf *sb_mb; /* the mbuf chain */
+ struct selinfo sb_sel; /* process selecting read/write */
+ short sb_flags; /* flags, see below */
+ short sb_timeo; /* timeout for read/write */
+ } so_rcv, so_snd;
+#define SB_MAX (256*1024) /* default for max chars in sockbuf */
+#define SB_LOCK 0x01 /* lock on data queue */
+#define SB_WANT 0x02 /* someone is waiting to lock */
+#define SB_WAIT 0x04 /* someone is waiting for data/space */
+#define SB_SEL 0x08 /* someone is selecting */
+#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
+#define SB_NOTIFY (SB_WAIT|SB_SEL|SB_ASYNC)
+#define SB_NOINTR 0x40 /* operations not interruptible */
+
+ caddr_t so_tpcb; /* Wisc. protocol control block XXX */
+ void (*so_upcall) __P((struct socket *so, caddr_t arg, int waitf));
+ caddr_t so_upcallarg; /* Arg for above */
+};
+
+/*
+ * Socket state bits.
+ */
+#define SS_NOFDREF 0x001 /* no file table ref any more */
+#define SS_ISCONNECTED 0x002 /* socket connected to a peer */
+#define SS_ISCONNECTING 0x004 /* in process of connecting to peer */
+#define SS_ISDISCONNECTING 0x008 /* in process of disconnecting */
+#define SS_CANTSENDMORE 0x010 /* can't send more data to peer */
+#define SS_CANTRCVMORE 0x020 /* can't receive more data from peer */
+#define SS_RCVATMARK 0x040 /* at mark on input */
+
+#define SS_PRIV 0x080 /* privileged for broadcast, raw... */
+#define SS_NBIO 0x100 /* non-blocking ops */
+#define SS_ASYNC 0x200 /* async i/o notify */
+#define SS_ISCONFIRMING 0x400 /* deciding to accept connection req */
+
+
+/*
+ * Macros for sockets and socket buffering.
+ */
+
+/*
+ * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
+ * This is problematical if the fields are unsigned, as the space might
+ * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
+ * overflow and return 0. Should use "lmin" but it doesn't exist now.
+ */
+#define sbspace(sb) \
+ ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \
+ (int)((sb)->sb_mbmax - (sb)->sb_mbcnt)))
+
+/* do we have to send all at once on a socket? */
+#define sosendallatonce(so) \
+ ((so)->so_proto->pr_flags & PR_ATOMIC)
+
+/* can we read something from so? */
+#define soreadable(so) \
+ ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
+ ((so)->so_state & SS_CANTRCVMORE) || \
+ (so)->so_qlen || (so)->so_error)
+
+/* can we write something to so? */
+#define sowriteable(so) \
+ (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
+ (((so)->so_state&SS_ISCONNECTED) || \
+ ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0) || \
+ ((so)->so_state & SS_CANTSENDMORE) || \
+ (so)->so_error)
+
+/* adjust counters in sb reflecting allocation of m */
+#define sballoc(sb, m) { \
+ (sb)->sb_cc += (m)->m_len; \
+ (sb)->sb_mbcnt += MSIZE; \
+ if ((m)->m_flags & M_EXT) \
+ (sb)->sb_mbcnt += (m)->m_ext.ext_size; \
+}
+
+/* adjust counters in sb reflecting freeing of m */
+#define sbfree(sb, m) { \
+ (sb)->sb_cc -= (m)->m_len; \
+ (sb)->sb_mbcnt -= MSIZE; \
+ if ((m)->m_flags & M_EXT) \
+ (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
+}
+
+/*
+ * Set lock on sockbuf sb; sleep if lock is already held.
+ * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
+ * Returns error without lock if sleep is interrupted.
+ */
+#define sblock(sb, wf) ((sb)->sb_flags & SB_LOCK ? \
+ (((wf) == M_WAITOK) ? sb_lock(sb) : EWOULDBLOCK) : \
+ ((sb)->sb_flags |= SB_LOCK), 0)
+
+/* release lock on sockbuf sb */
+#define sbunlock(sb) { \
+ (sb)->sb_flags &= ~SB_LOCK; \
+ if ((sb)->sb_flags & SB_WANT) { \
+ (sb)->sb_flags &= ~SB_WANT; \
+ wakeup((caddr_t)&(sb)->sb_flags); \
+ } \
+}
+
+#define sorwakeup(so) { sowakeup((so), &(so)->so_rcv); \
+ if ((so)->so_upcall) \
+ (*((so)->so_upcall))((so), (so)->so_upcallarg, M_DONTWAIT); \
+ }
+
+#define sowwakeup(so) sowakeup((so), &(so)->so_snd)
+
+#ifdef KERNEL
+u_long sb_max;
+/* to catch callers missing new second argument to sonewconn: */
+#define sonewconn(head, connstatus) sonewconn1((head), (connstatus))
+struct socket *sonewconn1 __P((struct socket *head, int connstatus));
+
+/* strings for sleep message: */
+extern char netio[], netcon[], netcls[];
+
+/*
+ * File operations on sockets.
+ */
+int soo_read __P((struct file *fp, struct uio *uio, struct ucred *cred));
+int soo_write __P((struct file *fp, struct uio *uio, struct ucred *cred));
+int soo_ioctl __P((struct file *fp, int com, caddr_t data, struct proc *p));
+int soo_select __P((struct file *fp, int which, struct proc *p));
+int soo_close __P((struct file *fp, struct proc *p));
+#endif
diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h
new file mode 100644
index 000000000000..eb5a44a598d1
--- /dev/null
+++ b/sys/sys/sockio.h
@@ -0,0 +1,77 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sockio.h 8.1 (Berkeley) 3/28/94
+ */
+
+#ifndef _SYS_SOCKIO_H_
+#define _SYS_SOCKIO_H_
+
+#include <sys/ioccom.h>
+
+/* Socket ioctl's. */
+#define SIOCSHIWAT _IOW('s', 0, int) /* set high watermark */
+#define SIOCGHIWAT _IOR('s', 1, int) /* get high watermark */
+#define SIOCSLOWAT _IOW('s', 2, int) /* set low watermark */
+#define SIOCGLOWAT _IOR('s', 3, int) /* get low watermark */
+#define SIOCATMARK _IOR('s', 7, int) /* at oob mark? */
+#define SIOCSPGRP _IOW('s', 8, int) /* set process group */
+#define SIOCGPGRP _IOR('s', 9, int) /* get process group */
+
+#define SIOCADDRT _IOW('r', 10, struct ortentry) /* add route */
+#define SIOCDELRT _IOW('r', 11, struct ortentry) /* delete route */
+
+#define SIOCSIFADDR _IOW('i', 12, struct ifreq) /* set ifnet address */
+#define OSIOCGIFADDR _IOWR('i', 13, struct ifreq) /* get ifnet address */
+#define SIOCGIFADDR _IOWR('i', 33, struct ifreq) /* get ifnet address */
+#define SIOCSIFDSTADDR _IOW('i', 14, struct ifreq) /* set p-p address */
+#define OSIOCGIFDSTADDR _IOWR('i', 15, struct ifreq) /* get p-p address */
+#define SIOCGIFDSTADDR _IOWR('i', 34, struct ifreq) /* get p-p address */
+#define SIOCSIFFLAGS _IOW('i', 16, struct ifreq) /* set ifnet flags */
+#define SIOCGIFFLAGS _IOWR('i', 17, struct ifreq) /* get ifnet flags */
+#define OSIOCGIFBRDADDR _IOWR('i', 18, struct ifreq) /* get broadcast addr */
+#define SIOCGIFBRDADDR _IOWR('i', 35, struct ifreq) /* get broadcast addr */
+#define SIOCSIFBRDADDR _IOW('i', 19, struct ifreq) /* set broadcast addr */
+#define OSIOCGIFCONF _IOWR('i', 20, struct ifconf) /* get ifnet list */
+#define SIOCGIFCONF _IOWR('i', 36, struct ifconf) /* get ifnet list */
+#define OSIOCGIFNETMASK _IOWR('i', 21, struct ifreq) /* get net addr mask */
+#define SIOCGIFNETMASK _IOWR('i', 37, struct ifreq) /* get net addr mask */
+#define SIOCSIFNETMASK _IOW('i', 22, struct ifreq) /* set net addr mask */
+#define SIOCGIFMETRIC _IOWR('i', 23, struct ifreq) /* get IF metric */
+#define SIOCSIFMETRIC _IOW('i', 24, struct ifreq) /* set IF metric */
+#define SIOCDIFADDR _IOW('i', 25, struct ifreq) /* delete IF addr */
+#define SIOCAIFADDR _IOW('i', 26, struct ifaliasreq)/* add/chg IF alias */
+
+#define SIOCADDMULTI _IOW('i', 49, struct ifreq) /* add m'cast addr */
+#define SIOCDELMULTI _IOW('i', 50, struct ifreq) /* del m'cast addr */
+
+#endif /* !_SYS_SOCKIO_H_ */
diff --git a/sys/sys/stat.h b/sys/sys/stat.h
new file mode 100644
index 000000000000..07020c367703
--- /dev/null
+++ b/sys/sys/stat.h
@@ -0,0 +1,193 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)stat.h 8.6 (Berkeley) 3/8/94
+ */
+
+#ifndef _SYS_STAT_H_
+#define _SYS_STAT_H_
+
+#include <sys/time.h>
+
+#ifndef _POSIX_SOURCE
+struct ostat {
+ unsigned short st_dev; /* inode's device */
+ ino_t st_ino; /* inode's number */
+ mode_t st_mode; /* inode protection mode */
+ nlink_t st_nlink; /* number of hard links */
+ unsigned short st_uid; /* user ID of the file's owner */
+ unsigned short st_gid; /* group ID of the file's group */
+ unsigned short st_rdev; /* device type */
+ long st_size; /* file size, in bytes */
+ struct timespec st_atimespec; /* time of last access */
+ struct timespec st_mtimespec; /* time of last data modification */
+ struct timespec st_ctimespec; /* time of last file status change */
+ long st_blksize; /* optimal blocksize for I/O */
+ long st_blocks; /* blocks allocated for file */
+ unsigned long st_flags; /* user defined flags for file */
+ unsigned long st_gen; /* file generation number */
+};
+#endif /* !_POSIX_SOURCE */
+
+struct stat {
+ dev_t st_dev; /* inode's device */
+ ino_t st_ino; /* inode's number */
+ mode_t st_mode; /* inode protection mode */
+ nlink_t st_nlink; /* number of hard links */
+ uid_t st_uid; /* user ID of the file's owner */
+ gid_t st_gid; /* group ID of the file's group */
+ dev_t st_rdev; /* device type */
+ struct timespec st_atimespec; /* time of last access */
+ struct timespec st_mtimespec; /* time of last data modification */
+ struct timespec st_ctimespec; /* time of last file status change */
+ off_t st_size; /* file size, in bytes */
+ quad_t st_blocks; /* blocks allocated for file */
+ unsigned long st_blksize; /* optimal blocksize for I/O */
+ unsigned long st_flags; /* user defined flags for file */
+ unsigned long st_gen; /* file generation number */
+ long st_lspare;
+ quad_t st_qspare[2];
+};
+#define st_atime st_atimespec.ts_sec
+#define st_mtime st_mtimespec.ts_sec
+#define st_ctime st_ctimespec.ts_sec
+
+#define S_ISUID 0004000 /* set user id on execution */
+#define S_ISGID 0002000 /* set group id on execution */
+#ifndef _POSIX_SOURCE
+#define S_ISTXT 0001000 /* sticky bit */
+#endif
+
+#define S_IRWXU 0000700 /* RWX mask for owner */
+#define S_IRUSR 0000400 /* R for owner */
+#define S_IWUSR 0000200 /* W for owner */
+#define S_IXUSR 0000100 /* X for owner */
+
+#ifndef _POSIX_SOURCE
+#define S_IREAD S_IRUSR
+#define S_IWRITE S_IWUSR
+#define S_IEXEC S_IXUSR
+#endif
+
+#define S_IRWXG 0000070 /* RWX mask for group */
+#define S_IRGRP 0000040 /* R for group */
+#define S_IWGRP 0000020 /* W for group */
+#define S_IXGRP 0000010 /* X for group */
+
+#define S_IRWXO 0000007 /* RWX mask for other */
+#define S_IROTH 0000004 /* R for other */
+#define S_IWOTH 0000002 /* W for other */
+#define S_IXOTH 0000001 /* X for other */
+
+#ifndef _POSIX_SOURCE
+#define S_IFMT 0170000 /* type of file mask */
+#define S_IFIFO 0010000 /* named pipe (fifo) */
+#define S_IFCHR 0020000 /* character special */
+#define S_IFDIR 0040000 /* directory */
+#define S_IFBLK 0060000 /* block special */
+#define S_IFREG 0100000 /* regular */
+#define S_IFLNK 0120000 /* symbolic link */
+#define S_IFSOCK 0140000 /* socket */
+#define S_ISVTX 0001000 /* save swapped text even after use */
+#endif
+
+#define S_ISDIR(m) ((m & 0170000) == 0040000) /* directory */
+#define S_ISCHR(m) ((m & 0170000) == 0020000) /* char special */
+#define S_ISBLK(m) ((m & 0170000) == 0060000) /* block special */
+#define S_ISREG(m) ((m & 0170000) == 0100000) /* regular file */
+#define S_ISFIFO(m) ((m & 0170000) == 0100000 || \
+ (m & 0170000) == 0140000) /* fifo or socket */
+#ifndef _POSIX_SOURCE
+#define S_ISLNK(m) ((m & 0170000) == 0120000) /* symbolic link */
+#define S_ISSOCK(m) ((m & 0170000) == 0100000 || \
+ (m & 0170000) == 0140000) /* fifo or socket */
+#endif
+
+#ifndef _POSIX_SOURCE
+#define ACCESSPERMS (S_IRWXU|S_IRWXG|S_IRWXO) /* 0777 */
+ /* 7777 */
+#define ALLPERMS (S_ISUID|S_ISGID|S_ISTXT|S_IRWXU|S_IRWXG|S_IRWXO)
+ /* 0666 */
+#define DEFFILEMODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
+
+#define S_BLKSIZE 512 /* block size used in the stat struct */
+
+/*
+ * Definitions of flags stored in file flags word.
+ *
+ * Super-user and owner changeable flags.
+ */
+#define UF_SETTABLE 0x0000ffff /* mask of owner changeable flags */
+#define UF_NODUMP 0x00000001 /* do not dump file */
+#define UF_IMMUTABLE 0x00000002 /* file may not be changed */
+#define UF_APPEND 0x00000004 /* writes to file may only append */
+/*
+ * Super-user changeable flags.
+ */
+#define SF_SETTABLE 0xffff0000 /* mask of superuser changeable flags */
+#define SF_ARCHIVED 0x00010000 /* file is archived */
+#define SF_IMMUTABLE 0x00020000 /* file may not be changed */
+#define SF_APPEND 0x00040000 /* writes to file may only append */
+
+#ifdef KERNEL
+/*
+ * Shorthand abbreviations of above.
+ */
+#define APPEND (UF_APPEND | SF_APPEND)
+#define IMMUTABLE (UF_IMMUTABLE | SF_IMMUTABLE)
+#endif
+#endif
+
+#ifndef KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int chmod __P((const char *, mode_t));
+int fstat __P((int, struct stat *));
+int mkdir __P((const char *, mode_t));
+int mkfifo __P((const char *, mode_t));
+int stat __P((const char *, struct stat *));
+mode_t umask __P((mode_t));
+#ifndef _POSIX_SOURCE
+int chflags __P((const char *, u_long));
+int fchflags __P((int, u_long));
+int fchmod __P((int, mode_t));
+int lstat __P((const char *, struct stat *));
+#endif
+__END_DECLS
+#endif
+#endif /* !_SYS_STAT_H_ */
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
new file mode 100644
index 000000000000..8df8eb4fc51f
--- /dev/null
+++ b/sys/sys/syscall.h
@@ -0,0 +1,186 @@
+/*
+ * System call numbers.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from @(#)syscalls.master 8.2 (Berkeley) 1/13/94
+ */
+
+#define SYS_syscall 0
+#define SYS_exit 1
+#define SYS_fork 2
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_wait4 7
+ /* 8 is old creat */
+#define SYS_link 9
+#define SYS_unlink 10
+ /* 11 is obsolete execv */
+#define SYS_chdir 12
+#define SYS_fchdir 13
+#define SYS_mknod 14
+#define SYS_chmod 15
+#define SYS_chown 16
+#define SYS_break 17
+#define SYS_getfsstat 18
+ /* 19 is old lseek */
+#define SYS_getpid 20
+#define SYS_mount 21
+#define SYS_unmount 22
+#define SYS_setuid 23
+#define SYS_getuid 24
+#define SYS_geteuid 25
+#define SYS_ptrace 26
+#define SYS_recvmsg 27
+#define SYS_sendmsg 28
+#define SYS_recvfrom 29
+#define SYS_accept 30
+#define SYS_getpeername 31
+#define SYS_getsockname 32
+#define SYS_access 33
+#define SYS_chflags 34
+#define SYS_fchflags 35
+#define SYS_sync 36
+#define SYS_kill 37
+ /* 38 is old stat */
+#define SYS_getppid 39
+ /* 40 is old lstat */
+#define SYS_dup 41
+#define SYS_pipe 42
+#define SYS_getegid 43
+#define SYS_profil 44
+#define SYS_ktrace 45
+#define SYS_sigaction 46
+#define SYS_getgid 47
+#define SYS_sigprocmask 48
+#define SYS_getlogin 49
+#define SYS_setlogin 50
+#define SYS_acct 51
+#define SYS_sigpending 52
+#define SYS_sigaltstack 53
+#define SYS_ioctl 54
+#define SYS_reboot 55
+#define SYS_revoke 56
+#define SYS_symlink 57
+#define SYS_readlink 58
+#define SYS_execve 59
+#define SYS_umask 60
+#define SYS_chroot 61
+ /* 62 is old fstat */
+ /* 63 is old getkerninfo */
+ /* 64 is old getpagesize */
+#define SYS_msync 65
+#define SYS_vfork 66
+ /* 67 is obsolete vread */
+ /* 68 is obsolete vwrite */
+#define SYS_sbrk 69
+#define SYS_sstk 70
+ /* 71 is old mmap */
+#define SYS_vadvise 72
+#define SYS_munmap 73
+#define SYS_mprotect 74
+#define SYS_madvise 75
+ /* 76 is obsolete vhangup */
+ /* 77 is obsolete vlimit */
+#define SYS_mincore 78
+#define SYS_getgroups 79
+#define SYS_setgroups 80
+#define SYS_getpgrp 81
+#define SYS_setpgid 82
+#define SYS_setitimer 83
+ /* 84 is old wait */
+#define SYS_swapon 85
+#define SYS_getitimer 86
+ /* 87 is old gethostname */
+ /* 88 is old sethostname */
+#define SYS_getdtablesize 89
+#define SYS_dup2 90
+#define SYS_fcntl 92
+#define SYS_select 93
+#define SYS_fsync 95
+#define SYS_setpriority 96
+#define SYS_socket 97
+#define SYS_connect 98
+ /* 99 is old accept */
+#define SYS_getpriority 100
+ /* 101 is old send */
+ /* 102 is old recv */
+#define SYS_sigreturn 103
+#define SYS_bind 104
+#define SYS_setsockopt 105
+#define SYS_listen 106
+ /* 107 is obsolete vtimes */
+ /* 108 is old sigvec */
+ /* 109 is old sigblock */
+ /* 110 is old sigsetmask */
+#define SYS_sigsuspend 111
+ /* 112 is old sigstack */
+ /* 113 is old recvmsg */
+ /* 114 is old sendmsg */
+#define SYS_vtrace 115
+ /* 115 is obsolete vtrace */
+#define SYS_gettimeofday 116
+#define SYS_getrusage 117
+#define SYS_getsockopt 118
+#define SYS_resuba 119
+#define SYS_readv 120
+#define SYS_writev 121
+#define SYS_settimeofday 122
+#define SYS_fchown 123
+#define SYS_fchmod 124
+ /* 125 is old recvfrom */
+ /* 126 is old setreuid */
+ /* 127 is old setregid */
+#define SYS_rename 128
+ /* 129 is old truncate */
+ /* 130 is old ftruncate */
+#define SYS_flock 131
+#define SYS_mkfifo 132
+#define SYS_sendto 133
+#define SYS_shutdown 134
+#define SYS_socketpair 135
+#define SYS_mkdir 136
+#define SYS_rmdir 137
+#define SYS_utimes 138
+ /* 139 is obsolete 4.2 sigreturn */
+#define SYS_adjtime 140
+ /* 141 is old getpeername */
+ /* 142 is old gethostid */
+ /* 143 is old sethostid */
+ /* 144 is old getrlimit */
+ /* 145 is old setrlimit */
+ /* 146 is old killpg */
+#define SYS_setsid 147
+#define SYS_quotactl 148
+ /* 149 is old quota */
+ /* 150 is old getsockname */
+#define SYS_nfssvc 155
+ /* 156 is old getdirentries */
+#define SYS_statfs 157
+#define SYS_fstatfs 158
+#define SYS_getfh 161
+#define SYS_shmsys 171
+#define SYS_setgid 181
+#define SYS_setegid 182
+#define SYS_seteuid 183
+#define SYS_lfs_bmapv 184
+#define SYS_lfs_markv 185
+#define SYS_lfs_segclean 186
+#define SYS_lfs_segwait 187
+#define SYS_stat 188
+#define SYS_fstat 189
+#define SYS_lstat 190
+#define SYS_pathconf 191
+#define SYS_fpathconf 192
+#define SYS_getrlimit 194
+#define SYS_setrlimit 195
+#define SYS_getdirentries 196
+#define SYS_mmap 197
+#define SYS___syscall 198
+#define SYS_lseek 199
+#define SYS_truncate 200
+#define SYS_ftruncate 201
+#define SYS___sysctl 202
+#define SYS_mlock 203
+#define SYS_munlock 204
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
new file mode 100644
index 000000000000..4ad83a74542b
--- /dev/null
+++ b/sys/sys/sysctl.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Karels at Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sysctl.h 8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _SYS_SYSCTL_H_
+#define _SYS_SYSCTL_H_
+
+/*
+ * These are for the eproc structure defined below.
+ */
+#ifndef KERNEL
+#include <sys/time.h>
+#include <sys/ucred.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#endif
+
+/*
+ * Definitions for sysctl call. The sysctl call uses a hierarchical name
+ * for objects that can be examined or modified. The name is expressed as
+ * a sequence of integers. Like a file path name, the meaning of each
+ * component depends on its place in the hierarchy. The top-level and kern
+ * identifiers are defined here, and other identifiers are defined in the
+ * respective subsystem header files.
+ */
+
+#define CTL_MAXNAME 12 /* largest number of components supported */
+
+/*
+ * Each subsystem defined by sysctl defines a list of variables
+ * for that subsystem. Each name is either a node with further
+ * levels defined below it, or it is a leaf of some particular
+ * type given below. Each sysctl level defines a set of name/type
+ * pairs to be used by sysctl(1) in manipulating the subsystem.
+ */
+struct ctlname {
+ char *ctl_name; /* subsystem name */
+ int ctl_type; /* type of name */
+};
+#define CTLTYPE_NODE 1 /* name is a node */
+#define CTLTYPE_INT 2 /* name describes an integer */
+#define CTLTYPE_STRING 3 /* name describes a string */
+#define CTLTYPE_QUAD 4 /* name describes a 64-bit number */
+#define CTLTYPE_STRUCT 5 /* name describes a structure */
+
+/*
+ * Top-level identifiers
+ */
+#define CTL_UNSPEC 0 /* unused */
+#define CTL_KERN 1 /* "high kernel": proc, limits */
+#define CTL_VM 2 /* virtual memory */
+#define CTL_FS 3 /* file system, mount type is next */
+#define CTL_NET 4 /* network, see socket.h */
+#define CTL_DEBUG 5 /* debugging parameters */
+#define CTL_HW 6 /* generic cpu/io */
+#define CTL_MACHDEP 7 /* machine dependent */
+#define CTL_USER 8 /* user-level */
+#define CTL_MAXID 9 /* number of valid top-level ids */
+
+#define CTL_NAMES { \
+ { 0, 0 }, \
+ { "kern", CTLTYPE_NODE }, \
+ { "vm", CTLTYPE_NODE }, \
+ { "fs", CTLTYPE_NODE }, \
+ { "net", CTLTYPE_NODE }, \
+ { "debug", CTLTYPE_NODE }, \
+ { "hw", CTLTYPE_NODE }, \
+ { "machdep", CTLTYPE_NODE }, \
+ { "user", CTLTYPE_NODE }, \
+}
+
+/*
+ * CTL_KERN identifiers
+ */
+#define KERN_OSTYPE 1 /* string: system version */
+#define KERN_OSRELEASE 2 /* string: system release */
+#define KERN_OSREV 3 /* int: system revision */
+#define KERN_VERSION 4 /* string: compile time info */
+#define KERN_MAXVNODES 5 /* int: max vnodes */
+#define KERN_MAXPROC 6 /* int: max processes */
+#define KERN_MAXFILES 7 /* int: max open files */
+#define KERN_ARGMAX 8 /* int: max arguments to exec */
+#define KERN_SECURELVL 9 /* int: system security level */
+#define KERN_HOSTNAME 10 /* string: hostname */
+#define KERN_HOSTID 11 /* int: host identifier */
+#define KERN_CLOCKRATE 12 /* struct: struct clockrate */
+#define KERN_VNODE 13 /* struct: vnode structures */
+#define KERN_PROC 14 /* struct: process entries */
+#define KERN_FILE 15 /* struct: file entries */
+#define KERN_PROF 16 /* node: kernel profiling info */
+#define KERN_POSIX1 17 /* int: POSIX.1 version */
+#define KERN_NGROUPS 18 /* int: # of supplemental group ids */
+#define KERN_JOB_CONTROL 19 /* int: is job control available */
+#define KERN_SAVED_IDS 20 /* int: saved set-user/group-ID */
+#define KERN_BOOTTIME 21 /* struct: time kernel was booted */
+#define KERN_MAXID 22 /* number of valid kern ids */
+
+#define CTL_KERN_NAMES { \
+ { 0, 0 }, \
+ { "ostype", CTLTYPE_STRING }, \
+ { "osrelease", CTLTYPE_STRING }, \
+ { "osrevision", CTLTYPE_INT }, \
+ { "version", CTLTYPE_STRING }, \
+ { "maxvnodes", CTLTYPE_INT }, \
+ { "maxproc", CTLTYPE_INT }, \
+ { "maxfiles", CTLTYPE_INT }, \
+ { "argmax", CTLTYPE_INT }, \
+ { "securelevel", CTLTYPE_INT }, \
+ { "hostname", CTLTYPE_STRING }, \
+ { "hostid", CTLTYPE_INT }, \
+ { "clockrate", CTLTYPE_STRUCT }, \
+ { "vnode", CTLTYPE_STRUCT }, \
+ { "proc", CTLTYPE_STRUCT }, \
+ { "file", CTLTYPE_STRUCT }, \
+ { "profiling", CTLTYPE_NODE }, \
+ { "posix1version", CTLTYPE_INT }, \
+ { "ngroups", CTLTYPE_INT }, \
+ { "job_control", CTLTYPE_INT }, \
+ { "saved_ids", CTLTYPE_INT }, \
+ { "boottime", CTLTYPE_STRUCT }, \
+}
+
+/*
+ * KERN_PROC subtypes
+ */
+#define KERN_PROC_ALL 0 /* everything */
+#define KERN_PROC_PID 1 /* by process id */
+#define KERN_PROC_PGRP 2 /* by process group id */
+#define KERN_PROC_SESSION 3 /* by session of pid */
+#define KERN_PROC_TTY 4 /* by controlling tty */
+#define KERN_PROC_UID 5 /* by effective uid */
+#define KERN_PROC_RUID 6 /* by real uid */
+
+/*
+ * KERN_PROC subtype ops return arrays of augmented proc structures:
+ */
+struct kinfo_proc {
+ struct proc kp_proc; /* proc structure */
+ struct eproc {
+ struct proc *e_paddr; /* address of proc */
+ struct session *e_sess; /* session pointer */
+ struct pcred e_pcred; /* process credentials */
+ struct ucred e_ucred; /* current credentials */
+#ifdef sparc
+ struct {
+ segsz_t vm_rssize; /* resident set size */
+ segsz_t vm_tsize; /* text size */
+ segsz_t vm_dsize; /* data size */
+ segsz_t vm_ssize; /* stack size */
+ } e_vm;
+#else
+ struct vmspace e_vm; /* address space */
+#endif
+ pid_t e_ppid; /* parent process id */
+ pid_t e_pgid; /* process group id */
+ short e_jobc; /* job control counter */
+ dev_t e_tdev; /* controlling tty dev */
+ pid_t e_tpgid; /* tty process group id */
+ struct session *e_tsess; /* tty session pointer */
+#define WMESGLEN 7
+ char e_wmesg[WMESGLEN+1]; /* wchan message */
+ segsz_t e_xsize; /* text size */
+ short e_xrssize; /* text rss */
+ short e_xccount; /* text references */
+ short e_xswrss;
+ long e_flag;
+#define EPROC_CTTY 0x01 /* controlling tty vnode active */
+#define EPROC_SLEADER 0x02 /* session leader */
+ char e_login[MAXLOGNAME]; /* setlogin() name */
+ long e_spare[4];
+ } kp_eproc;
+};
+
+/*
+ * CTL_HW identifiers
+ */
+#define HW_MACHINE 1 /* string: machine class */
+#define HW_MODEL 2 /* string: specific machine model */
+#define HW_NCPU 3 /* int: number of cpus */
+#define HW_BYTEORDER 4 /* int: machine byte order */
+#define HW_PHYSMEM 5 /* int: total memory */
+#define HW_USERMEM 6 /* int: non-kernel memory */
+#define HW_PAGESIZE 7 /* int: software page size */
+#define HW_DISKNAMES 8 /* strings: disk drive names */
+#define HW_DISKSTATS 9 /* struct: diskstats[] */
+#define HW_MAXID 10 /* number of valid hw ids */
+
+#define CTL_HW_NAMES { \
+ { 0, 0 }, \
+ { "machine", CTLTYPE_STRING }, \
+ { "model", CTLTYPE_STRING }, \
+ { "ncpu", CTLTYPE_INT }, \
+ { "byteorder", CTLTYPE_INT }, \
+ { "physmem", CTLTYPE_INT }, \
+ { "usermem", CTLTYPE_INT }, \
+ { "pagesize", CTLTYPE_INT }, \
+ { "disknames", CTLTYPE_STRUCT }, \
+ { "diskstats", CTLTYPE_STRUCT }, \
+}
+
+/*
+ * CTL_USER definitions
+ */
+#define USER_CS_PATH 1 /* string: _CS_PATH */
+#define USER_BC_BASE_MAX 2 /* int: BC_BASE_MAX */
+#define USER_BC_DIM_MAX 3 /* int: BC_DIM_MAX */
+#define USER_BC_SCALE_MAX 4 /* int: BC_SCALE_MAX */
+#define USER_BC_STRING_MAX 5 /* int: BC_STRING_MAX */
+#define USER_COLL_WEIGHTS_MAX 6 /* int: COLL_WEIGHTS_MAX */
+#define USER_EXPR_NEST_MAX 7 /* int: EXPR_NEST_MAX */
+#define USER_LINE_MAX 8 /* int: LINE_MAX */
+#define USER_RE_DUP_MAX 9 /* int: RE_DUP_MAX */
+#define USER_POSIX2_VERSION 10 /* int: POSIX2_VERSION */
+#define USER_POSIX2_C_BIND 11 /* int: POSIX2_C_BIND */
+#define USER_POSIX2_C_DEV 12 /* int: POSIX2_C_DEV */
+#define USER_POSIX2_CHAR_TERM 13 /* int: POSIX2_CHAR_TERM */
+#define USER_POSIX2_FORT_DEV 14 /* int: POSIX2_FORT_DEV */
+#define USER_POSIX2_FORT_RUN 15 /* int: POSIX2_FORT_RUN */
+#define USER_POSIX2_LOCALEDEF 16 /* int: POSIX2_LOCALEDEF */
+#define USER_POSIX2_SW_DEV 17 /* int: POSIX2_SW_DEV */
+#define USER_POSIX2_UPE 18 /* int: POSIX2_UPE */
+#define USER_STREAM_MAX 19 /* int: POSIX2_STREAM_MAX */
+#define USER_TZNAME_MAX 20 /* int: POSIX2_TZNAME_MAX */
+#define USER_MAXID 21 /* number of valid user ids */
+
+#define CTL_USER_NAMES { \
+ { 0, 0 }, \
+ { "cs_path", CTLTYPE_STRING }, \
+ { "bc_base_max", CTLTYPE_INT }, \
+ { "bc_dim_max", CTLTYPE_INT }, \
+ { "bc_scale_max", CTLTYPE_INT }, \
+ { "bc_string_max", CTLTYPE_INT }, \
+ { "coll_weights_max", CTLTYPE_INT }, \
+ { "expr_nest_max", CTLTYPE_INT }, \
+ { "line_max", CTLTYPE_INT }, \
+ { "re_dup_max", CTLTYPE_INT }, \
+ { "posix2_version", CTLTYPE_INT }, \
+ { "posix2_c_bind", CTLTYPE_INT }, \
+ { "posix2_c_dev", CTLTYPE_INT }, \
+ { "posix2_char_term", CTLTYPE_INT }, \
+ { "posix2_fort_dev", CTLTYPE_INT }, \
+ { "posix2_fort_run", CTLTYPE_INT }, \
+ { "posix2_localedef", CTLTYPE_INT }, \
+ { "posix2_sw_dev", CTLTYPE_INT }, \
+ { "posix2_upe", CTLTYPE_INT }, \
+ { "stream_max", CTLTYPE_INT }, \
+ { "tzname_max", CTLTYPE_INT }, \
+}
+
+/*
+ * CTL_DEBUG definitions
+ *
+ * Second level identifier specifies which debug variable.
+ * Third level identifier specifies which stucture component.
+ */
+#define CTL_DEBUG_NAME 0 /* string: variable name */
+#define CTL_DEBUG_VALUE 1 /* int: variable value */
+#define CTL_DEBUG_MAXID 20
+
+#ifdef KERNEL
+#ifdef DEBUG
+/*
+ * CTL_DEBUG variables.
+ *
+ * These are declared as separate variables so that they can be
+ * individually initialized at the location of their associated
+ * variable. The loader prevents multiple use by issuing errors
+ * if a variable is initialized in more than one place. They are
+ * aggregated into an array in debug_sysctl(), so that it can
+ * conveniently locate them when querried. If more debugging
+ * variables are added, they must also be declared here and also
+ * entered into the array.
+ */
+struct ctldebug {
+ char *debugname; /* name of debugging variable */
+ int *debugvar; /* pointer to debugging variable */
+};
+extern struct ctldebug debug0, debug1, debug2, debug3, debug4;
+extern struct ctldebug debug5, debug6, debug7, debug8, debug9;
+extern struct ctldebug debug10, debug11, debug12, debug13, debug14;
+extern struct ctldebug debug15, debug16, debug17, debug18, debug19;
+#endif /* DEBUG */
+
+/*
+ * Internal sysctl function calling convention:
+ *
+ * (*sysctlfn)(name, namelen, oldval, oldlenp, newval, newlen);
+ *
+ * The name parameter points at the next component of the name to be
+ * interpreted. The namelen parameter is the number of integers in
+ * the name.
+ */
+typedef int (sysctlfn)
+ __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *));
+
+int sysctl_int __P((void *, size_t *, void *, size_t, int *));
+int sysctl_rdint __P((void *, size_t *, void *, int));
+int sysctl_string __P((void *, size_t *, void *, size_t, char *, int));
+int sysctl_rdstring __P((void *, size_t *, void *, char *));
+int sysctl_rdstruct __P((void *, size_t *, void *, void *, int));
+void fill_eproc __P((struct proc *, struct eproc *));
+
+#else /* !KERNEL */
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+__END_DECLS
+#endif /* KERNEL */
+#endif /* !_SYS_SYSCTL_H_ */
diff --git a/sys/sys/syslimits.h b/sys/sys/syslimits.h
new file mode 100644
index 000000000000..550000c6503b
--- /dev/null
+++ b/sys/sys/syslimits.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)syslimits.h 8.1 (Berkeley) 6/2/93
+ */
+
+#define ARG_MAX 20480 /* max bytes for an exec function */
+#define CHILD_MAX 40 /* max simultaneous processes */
+#define LINK_MAX 32767 /* max file link count */
+#define MAX_CANON 255 /* max bytes in term canon input line */
+#define MAX_INPUT 255 /* max bytes in terminal input */
+#define NAME_MAX 255 /* max bytes in a file name */
+#define NGROUPS_MAX 16 /* max supplemental group id's */
+#define OPEN_MAX 64 /* max open files per process */
+#define PATH_MAX 1024 /* max bytes in pathname */
+#define PIPE_BUF 512 /* max bytes for atomic pipe writes */
+
+#define BC_BASE_MAX 99 /* max ibase/obase values in bc(1) */
+#define BC_DIM_MAX 2048 /* max array elements in bc(1) */
+#define BC_SCALE_MAX 99 /* max scale value in bc(1) */
+#define BC_STRING_MAX 1000 /* max const string length in bc(1) */
+#define COLL_WEIGHTS_MAX 0 /* max weights for order keyword */
+#define EXPR_NEST_MAX 32 /* max expressions nested in expr(1) */
+#define LINE_MAX 2048 /* max bytes in an input line */
+#define RE_DUP_MAX 255 /* max RE's in interval notation */
diff --git a/sys/sys/syslog.h b/sys/sys/syslog.h
new file mode 100644
index 000000000000..935db2d4484e
--- /dev/null
+++ b/sys/sys/syslog.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)syslog.h 8.1 (Berkeley) 6/2/93
+ */
+
+#define _PATH_LOG "/dev/log"
+
+/*
+ * priorities/facilities are encoded into a single 32-bit quantity, where the
+ * bottom 3 bits are the priority (0-7) and the top 28 bits are the facility
+ * (0-big number). Both the priorities and the facilities map roughly
+ * one-to-one to strings in the syslogd(8) source code. This mapping is
+ * included in this file.
+ *
+ * priorities (these are ordered)
+ */
+#define LOG_EMERG 0 /* system is unusable */
+#define LOG_ALERT 1 /* action must be taken immediately */
+#define LOG_CRIT 2 /* critical conditions */
+#define LOG_ERR 3 /* error conditions */
+#define LOG_WARNING 4 /* warning conditions */
+#define LOG_NOTICE 5 /* normal but significant condition */
+#define LOG_INFO 6 /* informational */
+#define LOG_DEBUG 7 /* debug-level messages */
+
+#define LOG_PRIMASK 0x07 /* mask to extract priority part (internal) */
+ /* extract priority */
+#define LOG_PRI(p) ((p) & LOG_PRIMASK)
+#define LOG_MAKEPRI(fac, pri) (((fac) << 3) | (pri))
+
+#ifdef SYSLOG_NAMES
+#define INTERNAL_NOPRI 0x10 /* the "no priority" priority */
+ /* mark "facility" */
+#define INTERNAL_MARK LOG_MAKEPRI(LOG_NFACILITIES, 0)
+typedef struct _code {
+ char *c_name;
+ int c_val;
+} CODE;
+
+CODE prioritynames[] = {
+ "alert", LOG_ALERT,
+ "crit", LOG_CRIT,
+ "debug", LOG_DEBUG,
+ "emerg", LOG_EMERG,
+ "err", LOG_ERR,
+ "error", LOG_ERR, /* DEPRECATED */
+ "info", LOG_INFO,
+ "none", INTERNAL_NOPRI, /* INTERNAL */
+ "notice", LOG_NOTICE,
+ "panic", LOG_EMERG, /* DEPRECATED */
+ "warn", LOG_WARNING, /* DEPRECATED */
+ "warning", LOG_WARNING,
+ NULL, -1,
+};
+#endif
+
+/* facility codes */
+#define LOG_KERN (0<<3) /* kernel messages */
+#define LOG_USER (1<<3) /* random user-level messages */
+#define LOG_MAIL (2<<3) /* mail system */
+#define LOG_DAEMON (3<<3) /* system daemons */
+#define LOG_AUTH (4<<3) /* security/authorization messages */
+#define LOG_SYSLOG (5<<3) /* messages generated internally by syslogd */
+#define LOG_LPR (6<<3) /* line printer subsystem */
+#define LOG_NEWS (7<<3) /* network news subsystem */
+#define LOG_UUCP (8<<3) /* UUCP subsystem */
+#define LOG_CRON (9<<3) /* clock daemon */
+#define LOG_AUTHPRIV (10<<3) /* security/authorization messages (private) */
+#define LOG_FTP (11<<3) /* ftp daemon */
+
+ /* other codes through 15 reserved for system use */
+#define LOG_LOCAL0 (16<<3) /* reserved for local use */
+#define LOG_LOCAL1 (17<<3) /* reserved for local use */
+#define LOG_LOCAL2 (18<<3) /* reserved for local use */
+#define LOG_LOCAL3 (19<<3) /* reserved for local use */
+#define LOG_LOCAL4 (20<<3) /* reserved for local use */
+#define LOG_LOCAL5 (21<<3) /* reserved for local use */
+#define LOG_LOCAL6 (22<<3) /* reserved for local use */
+#define LOG_LOCAL7 (23<<3) /* reserved for local use */
+
+#define LOG_NFACILITIES 24 /* current number of facilities */
+#define LOG_FACMASK 0x03f8 /* mask to extract facility part */
+ /* facility of pri */
+#define LOG_FAC(p) (((p) & LOG_FACMASK) >> 3)
+
+#ifdef SYSLOG_NAMES
+CODE facilitynames[] = {
+ "auth", LOG_AUTH,
+ "authpriv", LOG_AUTHPRIV,
+ "cron", LOG_CRON,
+ "daemon", LOG_DAEMON,
+ "ftp", LOG_FTP,
+ "kern", LOG_KERN,
+ "lpr", LOG_LPR,
+ "mail", LOG_MAIL,
+ "mark", INTERNAL_MARK, /* INTERNAL */
+ "news", LOG_NEWS,
+ "security", LOG_AUTH, /* DEPRECATED */
+ "syslog", LOG_SYSLOG,
+ "user", LOG_USER,
+ "uucp", LOG_UUCP,
+ "local0", LOG_LOCAL0,
+ "local1", LOG_LOCAL1,
+ "local2", LOG_LOCAL2,
+ "local3", LOG_LOCAL3,
+ "local4", LOG_LOCAL4,
+ "local5", LOG_LOCAL5,
+ "local6", LOG_LOCAL6,
+ "local7", LOG_LOCAL7,
+ NULL, -1,
+};
+#endif
+
+#ifdef KERNEL
+#define LOG_PRINTF -1 /* pseudo-priority to indicate use of printf */
+#endif
+
+/*
+ * arguments to setlogmask.
+ */
+#define LOG_MASK(pri) (1 << (pri)) /* mask for one priority */
+#define LOG_UPTO(pri) ((1 << ((pri)+1)) - 1) /* all priorities through pri */
+
+/*
+ * Option flags for openlog.
+ *
+ * LOG_ODELAY no longer does anything.
+ * LOG_NDELAY is the inverse of what it used to be.
+ */
+#define LOG_PID 0x01 /* log the pid with each message */
+#define LOG_CONS 0x02 /* log on the console if errors in sending */
+#define LOG_ODELAY 0x04 /* delay open until first syslog() (default) */
+#define LOG_NDELAY 0x08 /* don't delay open */
+#define LOG_NOWAIT 0x10 /* don't wait for console forks: DEPRECATED */
+#define LOG_PERROR 0x20 /* log to stderr as well */
+
+#ifndef KERNEL
+
+/*
+ * Don't use va_list in the vsyslog() prototype. Va_list is typedef'd in two
+ * places (<machine/varargs.h> and <machine/stdarg.h>), so if we include one
+ * of them here we may collide with the utility's includes. It's unreasonable
+ * for utilities to have to include one of them to include syslog.h, so we get
+ * _BSD_VA_LIST_ from <machine/ansi.h> and use it.
+ */
+#include <machine/ansi.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+void closelog __P((void));
+void openlog __P((const char *, int, int));
+int setlogmask __P((int));
+void syslog __P((int, const char *, ...));
+void vsyslog __P((int, const char *, _BSD_VA_LIST_));
+__END_DECLS
+
+#endif /* !KERNEL */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
new file mode 100644
index 000000000000..91cb64bd5fa7
--- /dev/null
+++ b/sys/sys/systm.h
@@ -0,0 +1,165 @@
+/*-
+ * Copyright (c) 1982, 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)systm.h 8.4 (Berkeley) 2/23/94
+ */
+
+/*
+ * The `securelevel' variable controls the security level of the system.
+ * It can only be decreased by process 1 (/sbin/init).
+ *
+ * Security levels are as follows:
+ * -1 permannently insecure mode - always run system in level 0 mode.
+ * 0 insecure mode - immutable and append-only flags make be turned off.
+ * All devices may be read or written subject to permission modes.
+ * 1 secure mode - immutable and append-only flags may not be changed;
+ * raw disks of mounted filesystems, /dev/mem, and /dev/kmem are
+ * read-only.
+ * 2 highly secure mode - same as (1) plus raw disks are always
+ * read-only whether mounted or not. This level precludes tampering
+ * with filesystems by unmounting them, but also inhibits running
+ * newfs while the system is secured.
+ *
+ * In normal operation, the system runs in level 0 mode while single user
+ * and in level 1 mode while multiuser. If level 2 mode is desired while
+ * running multiuser, it can be set in the multiuser startup script
+ * (/etc/rc.local) using sysctl(1). If it is desired to run the system
+ * in level 0 mode while multiuser, initialize the variable securelevel
+ * in /sys/kern/kern_sysctl.c to -1. Note that it is NOT initialized to
+ * zero as that would allow the vmunix binary to be patched to -1.
+ * Without initialization, securelevel loads in the BSS area which only
+ * comes into existence when the kernel is loaded and hence cannot be
+ * patched by a stalking hacker.
+ */
+extern int securelevel; /* system security level */
+extern const char *panicstr; /* panic message */
+extern char version[]; /* system version */
+extern char copyright[]; /* system copyright */
+
+extern int nblkdev; /* number of entries in bdevsw */
+extern int nchrdev; /* number of entries in cdevsw */
+extern int nswdev; /* number of swap devices */
+extern int nswap; /* size of swap space */
+
+extern int selwait; /* select timeout address */
+
+extern u_char curpriority; /* priority of current process */
+
+extern int maxmem; /* max memory per process */
+extern int physmem; /* physical memory */
+
+extern dev_t dumpdev; /* dump device */
+extern long dumplo; /* offset into dumpdev */
+
+extern dev_t rootdev; /* root device */
+extern struct vnode *rootvp; /* vnode equivalent to above */
+
+extern dev_t swapdev; /* swapping device */
+extern struct vnode *swapdev_vp;/* vnode equivalent to above */
+
+extern struct sysent { /* system call table */
+ int sy_narg; /* number of arguments */
+ int (*sy_call)(); /* implementing function */
+} sysent[];
+
+extern int boothowto; /* reboot flags, from console subsystem */
+
+/* casts to keep lint happy */
+#define insque(q,p) _insque((caddr_t)q,(caddr_t)p)
+#define remque(q) _remque((caddr_t)q)
+
+/*
+ * General function declarations.
+ */
+int nullop __P((void));
+int enodev __P((void));
+int enoioctl __P((void));
+int enxio __P((void));
+int eopnotsupp __P((void));
+int seltrue __P((dev_t dev, int which, struct proc *p));
+void *hashinit __P((int count, int type, u_long *hashmask));
+
+#ifdef __GNUC__
+volatile void panic __P((const char *, ...));
+#else
+void panic __P((const char *, ...));
+#endif
+void tablefull __P((const char *));
+void addlog __P((const char *, ...));
+void log __P((int, const char *, ...));
+void printf __P((const char *, ...));
+int sprintf __P((char *buf, const char *, ...));
+void ttyprintf __P((struct tty *, const char *, ...));
+
+void bcopy __P((const void *from, void *to, u_int len));
+void ovbcopy __P((const void *from, void *to, u_int len));
+void bzero __P((void *buf, u_int len));
+
+int copystr __P((void *kfaddr, void *kdaddr, u_int len, u_int *done));
+int copyinstr __P((void *udaddr, void *kaddr, u_int len, u_int *done));
+int copyoutstr __P((void *kaddr, void *udaddr, u_int len, u_int *done));
+int copyin __P((void *udaddr, void *kaddr, u_int len));
+int copyout __P((void *kaddr, void *udaddr, u_int len));
+
+int fubyte __P((void *base));
+#ifdef notdef
+int fuibyte __P((void *base));
+#endif
+int subyte __P((void *base, int byte));
+int suibyte __P((void *base, int byte));
+int fuword __P((void *base));
+int fuiword __P((void *base));
+int suword __P((void *base, int word));
+int suiword __P((void *base, int word));
+
+int hzto __P((struct timeval *tv));
+void timeout __P((void (*func)(void *), void *arg, int ticks));
+void untimeout __P((void (*func)(void *), void *arg));
+void realitexpire __P((void *));
+
+struct clockframe;
+void hardclock __P((struct clockframe *frame));
+void softclock __P((void));
+void statclock __P((struct clockframe *frame));
+
+void initclocks __P((void));
+
+void startprofclock __P((struct proc *));
+void stopprofclock __P((struct proc *));
+void setstatclockrate __P((int hzrate));
+
+#include <libkern/libkern.h>
diff --git a/sys/sys/tablet.h b/sys/sys/tablet.h
new file mode 100644
index 000000000000..cbb3f23d006c
--- /dev/null
+++ b/sys/sys/tablet.h
@@ -0,0 +1,94 @@
+/*-
+ * Copyright (c) 1985, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tablet.h 8.3 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_TABLET_H_
+#define _SYS_TABLET_H_
+
+/*
+ * Tablet line discipline.
+ */
+#include <sys/ioctl.h>
+
+/*
+ * Reads on the tablet return one of the following structures, depending on
+ * the underlying tablet type. The first two are defined such that a read of
+ * sizeof (gtcopos) on a non-gtco tablet will return meaningful info. The
+ * in-proximity bit is simulated where the tablet does not directly provide
+ * the information.
+ */
+struct tbpos {
+ int xpos, ypos; /* raw x-y coordinates */
+ short status; /* buttons/pen down */
+#define TBINPROX 0100000 /* pen in proximity of tablet */
+ short scount; /* sample count */
+};
+
+struct gtcopos {
+ int xpos, ypos; /* raw x-y coordinates */
+ short status; /* as above */
+ short scount; /* sample count */
+ short xtilt, ytilt; /* raw tilt */
+ short pressure;
+ short pad; /* pad to longword boundary */
+};
+
+struct polpos {
+ short p_x, p_y, p_z; /* raw 3-space coordinates */
+ short p_azi, p_pit, p_rol; /* azimuth, pitch, and roll */
+ short p_stat; /* status, as above */
+ char p_key; /* calculator input keyboard */
+};
+
+#define BIOSMODE _IOW('b', 1, int) /* set mode bit(s) */
+#define BIOGMODE _IOR('b', 2, int) /* get mode bit(s) */
+#define TBMODE 0xfff0 /* mode bits: */
+#define TBPOINT 0x0010 /* single point */
+#define TBRUN 0x0000 /* runs contin. */
+#define TBSTOP 0x0020 /* shut-up */
+#define TBGO 0x0000 /* ~TBSTOP */
+#define TBTYPE 0x000f /* tablet type: */
+#define TBUNUSED 0x0
+#define TBHITACHI 0x1 /* hitachi tablet */
+#define TBTIGER 0x2 /* hitachi tiger */
+#define TBGTCO 0x3 /* gtco */
+#define TBPOL 0x4 /* polhemus 3space */
+#define TBHDG 0x5 /* hdg-1111b, low res */
+#define TBHDGHIRES 0x6 /* hdg-1111b, high res */
+#define TBDIGI 0x7 /* gtco digi-pad, low res */
+#define TBDIGIHIRES 0x8 /* gtco digi-pad, high res */
+#define BIOSTYPE _IOW('b', 3, int) /* set tablet type */
+#define BIOGTYPE _IOR('b', 4, int) /* get tablet type*/
+
+#endif /* !_SYS_TABLET_H_ */
diff --git a/sys/sys/termios.h b/sys/sys/termios.h
new file mode 100644
index 000000000000..4ad04a10fb19
--- /dev/null
+++ b/sys/sys/termios.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 1988, 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)termios.h 8.3 (Berkeley) 3/28/94
+ */
+
+#ifndef _SYS_TERMIOS_H_
+#define _SYS_TERMIOS_H_
+
+/*
+ * Special Control Characters
+ *
+ * Index into c_cc[] character array.
+ *
+ * Name Subscript Enabled by
+ */
+#define VEOF 0 /* ICANON */
+#define VEOL 1 /* ICANON */
+#ifndef _POSIX_SOURCE
+#define VEOL2 2 /* ICANON */
+#endif
+#define VERASE 3 /* ICANON */
+#ifndef _POSIX_SOURCE
+#define VWERASE 4 /* ICANON */
+#endif
+#define VKILL 5 /* ICANON */
+#ifndef _POSIX_SOURCE
+#define VREPRINT 6 /* ICANON */
+#endif
+/* 7 spare 1 */
+#define VINTR 8 /* ISIG */
+#define VQUIT 9 /* ISIG */
+#define VSUSP 10 /* ISIG */
+#ifndef _POSIX_SOURCE
+#define VDSUSP 11 /* ISIG */
+#endif
+#define VSTART 12 /* IXON, IXOFF */
+#define VSTOP 13 /* IXON, IXOFF */
+#ifndef _POSIX_SOURCE
+#define VLNEXT 14 /* IEXTEN */
+#define VDISCARD 15 /* IEXTEN */
+#endif
+#define VMIN 16 /* !ICANON */
+#define VTIME 17 /* !ICANON */
+#ifndef _POSIX_SOURCE
+#define VSTATUS 18 /* ICANON */
+/* 19 spare 2 */
+#endif
+#define NCCS 20
+
+#define _POSIX_VDISABLE ((unsigned char)'\377')
+
+#ifndef _POSIX_SOURCE
+#define CCEQ(val, c) (c == val ? val != _POSIX_VDISABLE : 0)
+#endif
+
+/*
+ * Input flags - software input processing
+ */
+#define IGNBRK 0x00000001 /* ignore BREAK condition */
+#define BRKINT 0x00000002 /* map BREAK to SIGINTR */
+#define IGNPAR 0x00000004 /* ignore (discard) parity errors */
+#define PARMRK 0x00000008 /* mark parity and framing errors */
+#define INPCK 0x00000010 /* enable checking of parity errors */
+#define ISTRIP 0x00000020 /* strip 8th bit off chars */
+#define INLCR 0x00000040 /* map NL into CR */
+#define IGNCR 0x00000080 /* ignore CR */
+#define ICRNL 0x00000100 /* map CR to NL (ala CRMOD) */
+#define IXON 0x00000200 /* enable output flow control */
+#define IXOFF 0x00000400 /* enable input flow control */
+#ifndef _POSIX_SOURCE
+#define IXANY 0x00000800 /* any char will restart after stop */
+#define IMAXBEL 0x00002000 /* ring bell on input queue full */
+#endif /*_POSIX_SOURCE */
+
+/*
+ * Output flags - software output processing
+ */
+#define OPOST 0x00000001 /* enable following output processing */
+#ifndef _POSIX_SOURCE
+#define ONLCR 0x00000002 /* map NL to CR-NL (ala CRMOD) */
+#define OXTABS 0x00000004 /* expand tabs to spaces */
+#define ONOEOT 0x00000008 /* discard EOT's (^D) on output) */
+#endif /*_POSIX_SOURCE */
+
+/*
+ * Control flags - hardware control of terminal
+ */
+#ifndef _POSIX_SOURCE
+#define CIGNORE 0x00000001 /* ignore control flags */
+#endif
+#define CSIZE 0x00000300 /* character size mask */
+#define CS5 0x00000000 /* 5 bits (pseudo) */
+#define CS6 0x00000100 /* 6 bits */
+#define CS7 0x00000200 /* 7 bits */
+#define CS8 0x00000300 /* 8 bits */
+#define CSTOPB 0x00000400 /* send 2 stop bits */
+#define CREAD 0x00000800 /* enable receiver */
+#define PARENB 0x00001000 /* parity enable */
+#define PARODD 0x00002000 /* odd parity, else even */
+#define HUPCL 0x00004000 /* hang up on last close */
+#define CLOCAL 0x00008000 /* ignore modem status lines */
+#ifndef _POSIX_SOURCE
+#define CCTS_OFLOW 0x00010000 /* CTS flow control of output */
+#define CRTSCTS CCTS_OFLOW /* ??? */
+#define CRTS_IFLOW 0x00020000 /* RTS flow control of input */
+#define MDMBUF 0x00100000 /* flow control output via Carrier */
+#endif
+
+
+/*
+ * "Local" flags - dumping ground for other state
+ *
+ * Warning: some flags in this structure begin with
+ * the letter "I" and look like they belong in the
+ * input flag.
+ */
+
+#ifndef _POSIX_SOURCE
+#define ECHOKE 0x00000001 /* visual erase for line kill */
+#endif /*_POSIX_SOURCE */
+#define ECHOE 0x00000002 /* visually erase chars */
+#define ECHOK 0x00000004 /* echo NL after line kill */
+#define ECHO 0x00000008 /* enable echoing */
+#define ECHONL 0x00000010 /* echo NL even if ECHO is off */
+#ifndef _POSIX_SOURCE
+#define ECHOPRT 0x00000020 /* visual erase mode for hardcopy */
+#define ECHOCTL 0x00000040 /* echo control chars as ^(Char) */
+#endif /*_POSIX_SOURCE */
+#define ISIG 0x00000080 /* enable signals INTR, QUIT, [D]SUSP */
+#define ICANON 0x00000100 /* canonicalize input lines */
+#ifndef _POSIX_SOURCE
+#define ALTWERASE 0x00000200 /* use alternate WERASE algorithm */
+#endif /*_POSIX_SOURCE */
+#define IEXTEN 0x00000400 /* enable DISCARD and LNEXT */
+#define EXTPROC 0x00000800 /* external processing */
+#define TOSTOP 0x00400000 /* stop background jobs from output */
+#ifndef _POSIX_SOURCE
+#define FLUSHO 0x00800000 /* output being flushed (state) */
+#define NOKERNINFO 0x02000000 /* no kernel output from VSTATUS */
+#define PENDIN 0x20000000 /* XXX retype pending input (state) */
+#endif /*_POSIX_SOURCE */
+#define NOFLSH 0x80000000 /* don't flush after interrupt */
+
+typedef unsigned long tcflag_t;
+typedef unsigned char cc_t;
+typedef long speed_t;
+
+struct termios {
+ tcflag_t c_iflag; /* input flags */
+ tcflag_t c_oflag; /* output flags */
+ tcflag_t c_cflag; /* control flags */
+ tcflag_t c_lflag; /* local flags */
+ cc_t c_cc[NCCS]; /* control chars */
+ long c_ispeed; /* input speed */
+ long c_ospeed; /* output speed */
+};
+
+/*
+ * Commands passed to tcsetattr() for setting the termios structure.
+ */
+#define TCSANOW 0 /* make change immediate */
+#define TCSADRAIN 1 /* drain output, then change */
+#define TCSAFLUSH 2 /* drain output, flush input */
+#ifndef _POSIX_SOURCE
+#define TCSASOFT 0x10 /* flag - don't alter h.w. state */
+#endif
+
+/*
+ * Standard speeds
+ */
+#define B0 0
+#define B50 50
+#define B75 75
+#define B110 110
+#define B134 134
+#define B150 150
+#define B200 200
+#define B300 300
+#define B600 600
+#define B1200 1200
+#define B1800 1800
+#define B2400 2400
+#define B4800 4800
+#define B9600 9600
+#define B19200 19200
+#define B38400 38400
+#ifndef _POSIX_SOURCE
+#define B7200 7200
+#define B14400 14400
+#define B28800 28800
+#define B57600 57600
+#define B76800 76800
+#define B115200 115200
+#define B230400 230400
+#define EXTA 19200
+#define EXTB 38400
+#endif /* !_POSIX_SOURCE */
+
+#ifndef KERNEL
+
+#define TCIFLUSH 1
+#define TCOFLUSH 2
+#define TCIOFLUSH 3
+#define TCOOFF 1
+#define TCOON 2
+#define TCIOFF 3
+#define TCION 4
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+speed_t cfgetispeed __P((const struct termios *));
+speed_t cfgetospeed __P((const struct termios *));
+int cfsetispeed __P((struct termios *, speed_t));
+int cfsetospeed __P((struct termios *, speed_t));
+int tcgetattr __P((int, struct termios *));
+int tcsetattr __P((int, int, const struct termios *));
+int tcdrain __P((int));
+int tcflow __P((int, int));
+int tcflush __P((int, int));
+int tcsendbreak __P((int, int));
+
+#ifndef _POSIX_SOURCE
+void cfmakeraw __P((struct termios *));
+int cfsetspeed __P((struct termios *, speed_t));
+#endif /* !_POSIX_SOURCE */
+__END_DECLS
+
+#endif /* !KERNEL */
+
+#ifndef _POSIX_SOURCE
+
+/*
+ * Include tty ioctl's that aren't just for backwards compatibility
+ * with the old tty driver. These ioctl definitions were previously
+ * in <sys/ioctl.h>.
+ */
+#include <sys/ttycom.h>
+#endif
+
+/*
+ * END OF PROTECTED INCLUDE.
+ */
+#endif /* !_SYS_TERMIOS_H_ */
+
+#ifndef _POSIX_SOURCE
+#include <sys/ttydefaults.h>
+#endif
diff --git a/sys/sys/time.h b/sys/sys/time.h
new file mode 100644
index 000000000000..53227712a3b7
--- /dev/null
+++ b/sys/sys/time.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)time.h 8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _SYS_TIME_H_
+#define _SYS_TIME_H_
+
+/*
+ * Structure returned by gettimeofday(2) system call,
+ * and used in other calls.
+ */
+struct timeval {
+ long tv_sec; /* seconds */
+ long tv_usec; /* and microseconds */
+};
+
+/*
+ * Structure defined by POSIX.4 to be like a timeval.
+ */
+struct timespec {
+ long ts_sec; /* seconds */
+ long ts_nsec; /* and nanoseconds */
+};
+
+#define TIMEVAL_TO_TIMESPEC(tv, ts) { \
+ (ts)->ts_sec = (tv)->tv_sec; \
+ (ts)->ts_nsec = (tv)->tv_usec * 1000; \
+}
+#define TIMESPEC_TO_TIMEVAL(tv, ts) { \
+ (tv)->tv_sec = (ts)->ts_sec; \
+ (tv)->tv_usec = (ts)->ts_nsec / 1000; \
+}
+
+struct timezone {
+ int tz_minuteswest; /* minutes west of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+#define DST_NONE 0 /* not on dst */
+#define DST_USA 1 /* USA style dst */
+#define DST_AUST 2 /* Australian style dst */
+#define DST_WET 3 /* Western European dst */
+#define DST_MET 4 /* Middle European dst */
+#define DST_EET 5 /* Eastern European dst */
+#define DST_CAN 6 /* Canada */
+
+/* Operations on timevals. */
+#define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0
+#define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec)
+#define timercmp(tvp, uvp, cmp) \
+ (((tvp)->tv_sec == (uvp)->tv_sec) ? \
+ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \
+ ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define ITIMER_REAL 0
+#define ITIMER_VIRTUAL 1
+#define ITIMER_PROF 2
+
+struct itimerval {
+ struct timeval it_interval; /* timer interval */
+ struct timeval it_value; /* current value */
+};
+
+/*
+ * Getkerninfo clock information structure
+ */
+struct clockinfo {
+ int hz; /* clock frequency */
+ int tick; /* micro-seconds per hz tick */
+ int stathz; /* statistics clock frequency */
+ int profhz; /* profiling clock frequency */
+};
+
+#ifndef KERNEL
+#include <time.h>
+
+#ifndef _POSIX_SOURCE
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int adjtime __P((const struct timeval *, struct timeval *));
+int getitimer __P((int, struct itimerval *));
+int gettimeofday __P((struct timeval *, struct timezone *));
+int setitimer __P((int, const struct itimerval *, struct itimerval *));
+int settimeofday __P((const struct timeval *, const struct timezone *));
+int utimes __P((const char *, const struct timeval *));
+__END_DECLS
+#endif /* !POSIX */
+
+#endif /* !KERNEL */
+
+#endif /* !_SYS_TIME_H_ */
diff --git a/sys/sys/timeb.h b/sys/sys/timeb.h
new file mode 100644
index 000000000000..2ab010514b60
--- /dev/null
+++ b/sys/sys/timeb.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)timeb.h 8.2 (Berkeley) 1/21/94
+ */
+
+/* The ftime(2) system call structure -- deprecated. */
+struct timeb {
+ time_t time; /* seconds since the Epoch */
+ unsigned short millitm; /* + milliseconds since the Epoch */
+ short timezone; /* minutes west of CUT */
+ short dstflag; /* DST == non-zero */
+};
diff --git a/sys/sys/times.h b/sys/sys/times.h
new file mode 100644
index 000000000000..23a150082910
--- /dev/null
+++ b/sys/sys/times.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)times.h 8.4 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_TIMES_H_
+#define _SYS_TIMES_H_
+
+#include <machine/ansi.h>
+
+#ifdef _BSD_CLOCK_T_
+typedef _BSD_CLOCK_T_ clock_t;
+#undef _BSD_CLOCK_T_
+#endif
+
+struct tms {
+ clock_t tms_utime; /* User CPU time */
+ clock_t tms_stime; /* System CPU time */
+ clock_t tms_cutime; /* User CPU time of terminated child procs */
+ clock_t tms_cstime; /* System CPU time of terminated child procs */
+};
+
+#ifndef KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+clock_t times __P((struct tms *));
+__END_DECLS
+#endif
+#endif /* !_SYS_TIMES_H_ */
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
new file mode 100644
index 000000000000..53227712a3b7
--- /dev/null
+++ b/sys/sys/timetc.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)time.h 8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _SYS_TIME_H_
+#define _SYS_TIME_H_
+
+/*
+ * Structure returned by gettimeofday(2) system call,
+ * and used in other calls.
+ */
+struct timeval {
+ long tv_sec; /* seconds */
+ long tv_usec; /* and microseconds */
+};
+
+/*
+ * Structure defined by POSIX.4 to be like a timeval.
+ */
+struct timespec {
+ long ts_sec; /* seconds */
+ long ts_nsec; /* and nanoseconds */
+};
+
+#define TIMEVAL_TO_TIMESPEC(tv, ts) { \
+ (ts)->ts_sec = (tv)->tv_sec; \
+ (ts)->ts_nsec = (tv)->tv_usec * 1000; \
+}
+#define TIMESPEC_TO_TIMEVAL(tv, ts) { \
+ (tv)->tv_sec = (ts)->ts_sec; \
+ (tv)->tv_usec = (ts)->ts_nsec / 1000; \
+}
+
+struct timezone {
+ int tz_minuteswest; /* minutes west of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+#define DST_NONE 0 /* not on dst */
+#define DST_USA 1 /* USA style dst */
+#define DST_AUST 2 /* Australian style dst */
+#define DST_WET 3 /* Western European dst */
+#define DST_MET 4 /* Middle European dst */
+#define DST_EET 5 /* Eastern European dst */
+#define DST_CAN 6 /* Canada */
+
+/* Operations on timevals. */
+#define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0
+#define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec)
+#define timercmp(tvp, uvp, cmp) \
+ (((tvp)->tv_sec == (uvp)->tv_sec) ? \
+ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \
+ ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define ITIMER_REAL 0
+#define ITIMER_VIRTUAL 1
+#define ITIMER_PROF 2
+
+struct itimerval {
+ struct timeval it_interval; /* timer interval */
+ struct timeval it_value; /* current value */
+};
+
+/*
+ * Getkerninfo clock information structure
+ */
+struct clockinfo {
+ int hz; /* clock frequency */
+ int tick; /* micro-seconds per hz tick */
+ int stathz; /* statistics clock frequency */
+ int profhz; /* profiling clock frequency */
+};
+
+#ifndef KERNEL
+#include <time.h>
+
+#ifndef _POSIX_SOURCE
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int adjtime __P((const struct timeval *, struct timeval *));
+int getitimer __P((int, struct itimerval *));
+int gettimeofday __P((struct timeval *, struct timezone *));
+int setitimer __P((int, const struct itimerval *, struct itimerval *));
+int settimeofday __P((const struct timeval *, const struct timezone *));
+int utimes __P((const char *, const struct timeval *));
+__END_DECLS
+#endif /* !POSIX */
+
+#endif /* !KERNEL */
+
+#endif /* !_SYS_TIME_H_ */
diff --git a/sys/sys/tprintf.h b/sys/sys/tprintf.h
new file mode 100644
index 000000000000..5b83aaec0296
--- /dev/null
+++ b/sys/sys/tprintf.h
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tprintf.h 8.1 (Berkeley) 6/2/93
+ */
+
+typedef struct session *tpr_t;
+
+tpr_t tprintf_open __P((struct proc *));
+void tprintf_close __P((tpr_t));
+
+void tprintf __P((tpr_t, const char *fmt, ...));
diff --git a/sys/sys/trace.h b/sys/sys/trace.h
new file mode 100644
index 000000000000..d401f1459d78
--- /dev/null
+++ b/sys/sys/trace.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)trace.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * File system buffer tracing points; all trace <pack(dev, size), bn>
+ */
+#define TR_BREADHIT 0 /* buffer read found in cache */
+#define TR_BREADMISS 1 /* buffer read not in cache */
+#define TR_BWRITE 2 /* buffer written */
+#define TR_BREADHITRA 3 /* buffer read-ahead found in cache */
+#define TR_BREADMISSRA 4 /* buffer read-ahead not in cache */
+#define TR_XFODMISS 5 /* exe fod read */
+#define TR_XFODHIT 6 /* exe fod read */
+#define TR_BRELSE 7 /* brelse */
+#define TR_BREALLOC 8 /* expand/contract a buffer */
+
+/*
+ * Memory allocator trace points; all trace the amount of memory involved
+ */
+#define TR_MALL 10 /* memory allocated */
+
+/*
+ * Paging trace points: all are <vaddr, pid>
+ */
+#define TR_INTRANS 20 /* page intransit block */
+#define TR_EINTRANS 21 /* page intransit wait done */
+#define TR_FRECLAIM 22 /* reclaim from free list */
+#define TR_RECLAIM 23 /* reclaim from loop */
+#define TR_XSFREC 24 /* reclaim from free list instead of drum */
+#define TR_XIFREC 25 /* reclaim from free list instead of fsys */
+#define TR_WAITMEM 26 /* wait for memory in pagein */
+#define TR_EWAITMEM 27 /* end memory wait in pagein */
+#define TR_ZFOD 28 /* zfod page fault */
+#define TR_EXFOD 29 /* exec fod page fault */
+#define TR_VRFOD 30 /* vread fod page fault */
+#define TR_CACHEFOD 31 /* fod in file system cache */
+#define TR_SWAPIN 32 /* drum page fault */
+#define TR_PGINDONE 33 /* page in done */
+#define TR_SWAPIO 34 /* swap i/o request arrives */
+
+/*
+ * System call trace points.
+ */
+#define TR_VADVISE 40 /* vadvise occurred with <arg, pid> */
+
+/*
+ * Miscellaneous
+ */
+#define TR_STAMP 45 /* user said vtrace(VTR_STAMP, value); */
+
+/*
+ * This defines the size of the trace flags array.
+ */
+#define TR_NFLAGS 100 /* generous */
+
+#define TRCSIZ 4096
+
+/*
+ * Specifications of the vtrace() system call, which takes one argument.
+ */
+#define VTRACE 64+51
+
+#define VTR_DISABLE 0 /* set a trace flag to 0 */
+#define VTR_ENABLE 1 /* set a trace flag to 1 */
+#define VTR_VALUE 2 /* return value of a trace flag */
+#define VTR_UALARM 3 /* set alarm to go off (sig 16) */
+ /* in specified number of hz */
+#define VTR_STAMP 4 /* user specified stamp */
+
+#ifdef KERNEL
+#ifdef TRACE
+struct proc *traceproc;
+int tracewhich, tracebuf[TRCSIZ];
+u_int tracex;
+char traceflags[TR_NFLAGS];
+#define pack(v,b) (((v)->v_mount->mnt_stat.f_fsid.val[0])<<16)|(b)
+#define trace(a,b,c) { \
+ if (traceflags[a]) \
+ trace1(a,b,c); \
+}
+#else
+#define trace(a,b,c)
+#endif
+#endif
diff --git a/sys/sys/tty.h b/sys/sys/tty.h
new file mode 100644
index 000000000000..4a89b0382ad0
--- /dev/null
+++ b/sys/sys/tty.h
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tty.h 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/termios.h>
+#include <sys/select.h> /* For struct selinfo. */
+
+/*
+ * Clists are character lists, which is a variable length linked list
+ * of cblocks, with a count of the number of characters in the list.
+ */
+struct clist {
+ int c_cc; /* Number of characters in the clist. */
+ char *c_cf; /* Pointer to the first cblock. */
+ char *c_cl; /* Pointer to the last cblock. */
+};
+
+/*
+ * Per-tty structure.
+ *
+ * Should be split in two, into device and tty drivers.
+ * Glue could be masks of what to echo and circular buffer
+ * (low, high, timeout).
+ */
+struct tty {
+ struct clist t_rawq; /* Device raw input queue. */
+ long t_rawcc; /* Raw input queue statistics. */
+ struct clist t_canq; /* Device canonical queue. */
+ long t_cancc; /* Canonical queue statistics. */
+ struct clist t_outq; /* Device output queue. */
+ long t_outcc; /* Output queue statistics. */
+ char t_line; /* Interface to device drivers. */
+ dev_t t_dev; /* Device. */
+ int t_state; /* Device and driver (TS*) state. */
+ int t_flags; /* Tty flags. */
+ struct pgrp *t_pgrp; /* Foreground process group. */
+ struct session *t_session; /* Enclosing session. */
+ struct selinfo t_rsel; /* Tty read/oob select. */
+ struct selinfo t_wsel; /* Tty write select. */
+ struct termios t_termios; /* Termios state. */
+ struct winsize t_winsize; /* Window size. */
+ /* Start output. */
+ void (*t_oproc) __P((struct tty *));
+ /* Stop output. */
+ void (*t_stop) __P((struct tty *, int));
+ /* Set hardware state. */
+ int (*t_param) __P((struct tty *, struct termios *));
+ void *t_sc; /* XXX: net/if_sl.c:sl_softc. */
+ short t_column; /* Tty output column. */
+ short t_rocount, t_rocol; /* Tty. */
+ short t_hiwat; /* High water mark. */
+ short t_lowat; /* Low water mark. */
+ short t_gen; /* Generation number. */
+};
+
+#define t_cc t_termios.c_cc
+#define t_cflag t_termios.c_cflag
+#define t_iflag t_termios.c_iflag
+#define t_ispeed t_termios.c_ispeed
+#define t_lflag t_termios.c_lflag
+#define t_min t_termios.c_min
+#define t_oflag t_termios.c_oflag
+#define t_ospeed t_termios.c_ospeed
+#define t_time t_termios.c_time
+
+#define TTIPRI 25 /* Sleep priority for tty reads. */
+#define TTOPRI 26 /* Sleep priority for tty writes. */
+
+#define TTMASK 15
+#define OBUFSIZ 100
+#define TTYHOG 1024
+
+#ifdef KERNEL
+#define TTMAXHIWAT roundup(2048, CBSIZE)
+#define TTMINHIWAT roundup(100, CBSIZE)
+#define TTMAXLOWAT 256
+#define TTMINLOWAT 32
+#endif
+
+/* These flags are kept in t_state. */
+#define TS_ASLEEP 0x00001 /* Process waiting for tty. */
+#define TS_ASYNC 0x00002 /* Tty in async I/O mode. */
+#define TS_BUSY 0x00004 /* Draining output. */
+#define TS_CARR_ON 0x00008 /* Carrier is present. */
+#define TS_FLUSH 0x00010 /* Outq has been flushed during DMA. */
+#define TS_ISOPEN 0x00020 /* Open has completed. */
+#define TS_TBLOCK 0x00040 /* Further input blocked. */
+#define TS_TIMEOUT 0x00080 /* Wait for output char processing. */
+#define TS_TTSTOP 0x00100 /* Output paused. */
+#define TS_WOPEN 0x00200 /* Open in progress. */
+#define TS_XCLUDE 0x00400 /* Tty requires exclusivity. */
+
+/* State for intra-line fancy editing work. */
+#define TS_BKSL 0x00800 /* State for lowercase \ work. */
+#define TS_CNTTB 0x01000 /* Counting tab width, ignore FLUSHO. */
+#define TS_ERASE 0x02000 /* Within a \.../ for PRTRUB. */
+#define TS_LNCH 0x04000 /* Next character is literal. */
+#define TS_TYPEN 0x08000 /* Retyping suspended input (PENDIN). */
+#define TS_LOCAL (TS_BKSL | TS_CNTTB | TS_ERASE | TS_LNCH | TS_TYPEN)
+
+/* Character type information. */
+#define ORDINARY 0
+#define CONTROL 1
+#define BACKSPACE 2
+#define NEWLINE 3
+#define TAB 4
+#define VTAB 5
+#define RETURN 6
+
+struct speedtab {
+ int sp_speed; /* Speed. */
+ int sp_code; /* Code. */
+};
+
+/* Modem control commands (driver). */
+#define DMSET 0
+#define DMBIS 1
+#define DMBIC 2
+#define DMGET 3
+
+/* Flags on a character passed to ttyinput. */
+#define TTY_CHARMASK 0x000000ff /* Character mask */
+#define TTY_QUOTE 0x00000100 /* Character quoted */
+#define TTY_ERRORMASK 0xff000000 /* Error mask */
+#define TTY_FE 0x01000000 /* Framing error or BREAK condition */
+#define TTY_PE 0x02000000 /* Parity error */
+
+/* Is tp controlling terminal for p? */
+#define isctty(p, tp) \
+ ((p)->p_session == (tp)->t_session && (p)->p_flag & P_CONTROLT)
+
+/* Is p in background of tp? */
+#define isbackground(p, tp) \
+ (isctty((p), (tp)) && (p)->p_pgrp != (tp)->t_pgrp)
+
+#ifdef KERNEL
+extern struct ttychars ttydefaults;
+
+/* Symbolic sleep message strings. */
+extern char ttyin[], ttyout[], ttopen[], ttclos[], ttybg[], ttybuf[];
+
+int b_to_q __P((char *cp, int cc, struct clist *q));
+void catq __P((struct clist *from, struct clist *to));
+void clist_init __P((void));
+int getc __P((struct clist *q));
+void ndflush __P((struct clist *q, int cc));
+int ndqb __P((struct clist *q, int flag));
+char *nextc __P((struct clist *q, char *cp, int *c));
+int putc __P((int c, struct clist *q));
+int q_to_b __P((struct clist *q, char *cp, int cc));
+int unputc __P((struct clist *q));
+
+int nullmodem __P((struct tty *tp, int flag));
+int tputchar __P((int c, struct tty *tp));
+int ttioctl __P((struct tty *tp, int com, void *data, int flag));
+int ttread __P((struct tty *tp, struct uio *uio, int flag));
+void ttrstrt __P((void *tp));
+int ttselect __P((dev_t device, int rw, struct proc *p));
+void ttsetwater __P((struct tty *tp));
+int ttspeedtab __P((int speed, struct speedtab *table));
+int ttstart __P((struct tty *tp));
+void ttwakeup __P((struct tty *tp));
+int ttwrite __P((struct tty *tp, struct uio *uio, int flag));
+void ttychars __P((struct tty *tp));
+int ttycheckoutq __P((struct tty *tp, int wait));
+int ttyclose __P((struct tty *tp));
+void ttyflush __P((struct tty *tp, int rw));
+void ttyinfo __P((struct tty *tp));
+int ttyinput __P((int c, struct tty *tp));
+int ttylclose __P((struct tty *tp, int flag));
+int ttymodem __P((struct tty *tp, int flag));
+int ttyopen __P((dev_t device, struct tty *tp));
+int ttyoutput __P((int c, struct tty *tp));
+void ttypend __P((struct tty *tp));
+void ttyretype __P((struct tty *tp));
+void ttyrub __P((int c, struct tty *tp));
+int ttysleep __P((struct tty *tp,
+ void *chan, int pri, char *wmesg, int timeout));
+int ttywait __P((struct tty *tp));
+int ttywflush __P((struct tty *tp));
+#endif
diff --git a/sys/sys/ttychars.h b/sys/sys/ttychars.h
new file mode 100644
index 000000000000..1a23aa770919
--- /dev/null
+++ b/sys/sys/ttychars.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ttychars.h 8.2 (Berkeley) 1/4/94
+ */
+
+/*
+ * 4.3 COMPATIBILITY FILE
+ *
+ * User visible structures and constants related to terminal handling.
+ */
+#ifndef _SYS_TTYCHARS_H_
+#define _SYS_TTYCHARS_H_
+
+struct ttychars {
+ char tc_erase; /* erase last character */
+ char tc_kill; /* erase entire line */
+ char tc_intrc; /* interrupt */
+ char tc_quitc; /* quit */
+ char tc_startc; /* start output */
+ char tc_stopc; /* stop output */
+ char tc_eofc; /* end-of-file */
+ char tc_brkc; /* input delimiter (like nl) */
+ char tc_suspc; /* stop process signal */
+ char tc_dsuspc; /* delayed stop process signal */
+ char tc_rprntc; /* reprint line */
+ char tc_flushc; /* flush output (toggles) */
+ char tc_werasc; /* word erase */
+ char tc_lnextc; /* literal next character */
+};
+#ifdef USE_OLD_TTY
+#include <sys/ttydefaults.h> /* to pick up character defaults */
+#endif
+#endif /* !_SYS_TTYCHARS_H_ */
diff --git a/sys/sys/ttycom.h b/sys/sys/ttycom.h
new file mode 100644
index 000000000000..a12d8d00354c
--- /dev/null
+++ b/sys/sys/ttycom.h
@@ -0,0 +1,128 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ttycom.h 8.1 (Berkeley) 3/28/94
+ */
+
+#ifndef _SYS_TTYCOM_H_
+#define _SYS_TTYCOM_H_
+
+#include <sys/ioccom.h>
+
+/*
+ * Tty ioctl's except for those supported only for backwards compatibility
+ * with the old tty driver.
+ */
+
+/*
+ * Window/terminal size structure. This information is stored by the kernel
+ * in order to provide a consistent interface, but is not used by the kernel.
+ */
+struct winsize {
+ unsigned short ws_row; /* rows, in characters */
+ unsigned short ws_col; /* columns, in characters */
+ unsigned short ws_xpixel; /* horizontal size, pixels */
+ unsigned short ws_ypixel; /* vertical size, pixels */
+};
+
+#define TIOCMODG _IOR('t', 3, int) /* get modem control state */
+#define TIOCMODS _IOW('t', 4, int) /* set modem control state */
+#define TIOCM_LE 0001 /* line enable */
+#define TIOCM_DTR 0002 /* data terminal ready */
+#define TIOCM_RTS 0004 /* request to send */
+#define TIOCM_ST 0010 /* secondary transmit */
+#define TIOCM_SR 0020 /* secondary receive */
+#define TIOCM_CTS 0040 /* clear to send */
+#define TIOCM_CAR 0100 /* carrier detect */
+#define TIOCM_CD TIOCM_CAR
+#define TIOCM_RNG 0200 /* ring */
+#define TIOCM_RI TIOCM_RNG
+#define TIOCM_DSR 0400 /* data set ready */
+ /* 8-10 compat */
+#define TIOCEXCL _IO('t', 13) /* set exclusive use of tty */
+#define TIOCNXCL _IO('t', 14) /* reset exclusive use of tty */
+ /* 15 unused */
+#define TIOCFLUSH _IOW('t', 16, int) /* flush buffers */
+ /* 17-18 compat */
+#define TIOCGETA _IOR('t', 19, struct termios) /* get termios struct */
+#define TIOCSETA _IOW('t', 20, struct termios) /* set termios struct */
+#define TIOCSETAW _IOW('t', 21, struct termios) /* drain output, set */
+#define TIOCSETAF _IOW('t', 22, struct termios) /* drn out, fls in, set */
+#define TIOCGETD _IOR('t', 26, int) /* get line discipline */
+#define TIOCSETD _IOW('t', 27, int) /* set line discipline */
+ /* 127-124 compat */
+#define TIOCSBRK _IO('t', 123) /* set break bit */
+#define TIOCCBRK _IO('t', 122) /* clear break bit */
+#define TIOCSDTR _IO('t', 121) /* set data terminal ready */
+#define TIOCCDTR _IO('t', 120) /* clear data terminal ready */
+#define TIOCGPGRP _IOR('t', 119, int) /* get pgrp of tty */
+#define TIOCSPGRP _IOW('t', 118, int) /* set pgrp of tty */
+ /* 117-116 compat */
+#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */
+#define TIOCSTI _IOW('t', 114, char) /* simulate terminal input */
+#define TIOCNOTTY _IO('t', 113) /* void tty association */
+#define TIOCPKT _IOW('t', 112, int) /* pty: set/clear packet mode */
+#define TIOCPKT_DATA 0x00 /* data packet */
+#define TIOCPKT_FLUSHREAD 0x01 /* flush packet */
+#define TIOCPKT_FLUSHWRITE 0x02 /* flush packet */
+#define TIOCPKT_STOP 0x04 /* stop output */
+#define TIOCPKT_START 0x08 /* start output */
+#define TIOCPKT_NOSTOP 0x10 /* no more ^S, ^Q */
+#define TIOCPKT_DOSTOP 0x20 /* now do ^S ^Q */
+#define TIOCPKT_IOCTL 0x40 /* state change of pty driver */
+#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */
+#define TIOCSTART _IO('t', 110) /* start output, like ^Q */
+#define TIOCMSET _IOW('t', 109, int) /* set all modem bits */
+#define TIOCMBIS _IOW('t', 108, int) /* bis modem bits */
+#define TIOCMBIC _IOW('t', 107, int) /* bic modem bits */
+#define TIOCMGET _IOR('t', 106, int) /* get all modem bits */
+#define TIOCREMOTE _IOW('t', 105, int) /* remote input editing */
+#define TIOCGWINSZ _IOR('t', 104, struct winsize) /* get window size */
+#define TIOCSWINSZ _IOW('t', 103, struct winsize) /* set window size */
+#define TIOCUCNTL _IOW('t', 102, int) /* pty: set/clr usr cntl mode */
+#define UIOCCMD(n) _IO('u', n) /* usr cntl op "n" */
+#define TIOCCONS _IOW('t', 98, int) /* become virtual console */
+#define TIOCSCTTY _IO('t', 97) /* become controlling tty */
+#define TIOCEXT _IOW('t', 96, int) /* pty: external processing */
+#define TIOCSIG _IO('t', 95) /* pty: generate signal */
+#define TIOCDRAIN _IO('t', 94) /* wait till output drained */
+
+#define TTYDISC 0 /* termios tty line discipline */
+#define TABLDISC 3 /* tablet discipline */
+#define SLIPDISC 4 /* serial IP discipline */
+
+#endif /* !_SYS_TTYCOM_H_ */
diff --git a/sys/sys/ttydefaults.h b/sys/sys/ttydefaults.h
new file mode 100644
index 000000000000..1a8aaa5bd006
--- /dev/null
+++ b/sys/sys/ttydefaults.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ttydefaults.h 8.4 (Berkeley) 1/21/94
+ */
+
+/*
+ * System wide defaults for terminal state.
+ */
+#ifndef _SYS_TTYDEFAULTS_H_
+#define _SYS_TTYDEFAULTS_H_
+
+/*
+ * Defaults on "first" open.
+ */
+#define TTYDEF_IFLAG (BRKINT | ISTRIP | ICRNL | IMAXBEL | IXON | IXANY)
+#define TTYDEF_OFLAG (OPOST | ONLCR | OXTABS)
+#define TTYDEF_LFLAG (ECHO | ICANON | ISIG | IEXTEN | ECHOE|ECHOKE|ECHOCTL)
+#define TTYDEF_CFLAG (CREAD | CS7 | PARENB | HUPCL)
+#define TTYDEF_SPEED (B9600)
+
+/*
+ * Control Character Defaults
+ */
+#define CTRL(x) (x&037)
+#define CEOF CTRL('d')
+#define CEOL ((unsigned char)'\377') /* XXX avoid _POSIX_VDISABLE */
+#define CERASE 0177
+#define CINTR CTRL('c')
+#define CSTATUS ((unsigned char)'\377') /* XXX avoid _POSIX_VDISABLE */
+#define CKILL CTRL('u')
+#define CMIN 1
+#define CQUIT 034 /* FS, ^\ */
+#define CSUSP CTRL('z')
+#define CTIME 0
+#define CDSUSP CTRL('y')
+#define CSTART CTRL('q')
+#define CSTOP CTRL('s')
+#define CLNEXT CTRL('v')
+#define CDISCARD CTRL('o')
+#define CWERASE CTRL('w')
+#define CREPRINT CTRL('r')
+#define CEOT CEOF
+/* compat */
+#define CBRK CEOL
+#define CRPRNT CREPRINT
+#define CFLUSH CDISCARD
+
+/* PROTECTED INCLUSION ENDS HERE */
+#endif /* !_SYS_TTYDEFAULTS_H_ */
+
+/*
+ * #define TTYDEFCHARS to include an array of default control characters.
+ */
+#ifdef TTYDEFCHARS
+cc_t ttydefchars[NCCS] = {
+ CEOF, CEOL, CEOL, CERASE, CWERASE, CKILL, CREPRINT,
+ _POSIX_VDISABLE, CINTR, CQUIT, CSUSP, CDSUSP, CSTART, CSTOP, CLNEXT,
+ CDISCARD, CMIN, CTIME, CSTATUS, _POSIX_VDISABLE
+};
+#undef TTYDEFCHARS
+#endif
diff --git a/sys/sys/ttydev.h b/sys/sys/ttydev.h
new file mode 100644
index 000000000000..c52a21369260
--- /dev/null
+++ b/sys/sys/ttydev.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ttydev.h 8.2 (Berkeley) 1/4/94
+ */
+
+/* COMPATABILITY HEADER FILE */
+
+#ifndef _SYS_TTYDEV_H_
+#define _SYS_TTYDEV_H_
+
+#ifdef USE_OLD_TTY
+#define B0 0
+#define B50 1
+#define B75 2
+#define B110 3
+#define B134 4
+#define B150 5
+#define B200 6
+#define B300 7
+#define B600 8
+#define B1200 9
+#define B1800 10
+#define B2400 11
+#define B4800 12
+#define B9600 13
+#define EXTA 14
+#define EXTB 15
+#endif /* USE_OLD_TTY */
+
+#endif /* !_SYS_TTYDEV_H_ */
diff --git a/sys/sys/types.h b/sys/sys/types.h
new file mode 100644
index 000000000000..76d2975d31b5
--- /dev/null
+++ b/sys/sys/types.h
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)types.h 8.4 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_TYPES_H_
+#define _SYS_TYPES_H_
+
+/* Machine type dependent parameters. */
+#include <machine/endian.h>
+
+#ifndef _POSIX_SOURCE
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+typedef unsigned int u_int;
+typedef unsigned long u_long;
+typedef unsigned short ushort; /* Sys V compatibility */
+typedef unsigned int uint; /* Sys V compatibility */
+#endif
+
+typedef unsigned long long u_quad_t; /* quads */
+typedef long long quad_t;
+typedef quad_t * qaddr_t;
+
+typedef char * caddr_t; /* core address */
+typedef long daddr_t; /* disk address */
+typedef unsigned long dev_t; /* device number */
+typedef unsigned long fixpt_t; /* fixed point number */
+typedef unsigned long gid_t; /* group id */
+typedef unsigned long ino_t; /* inode number */
+typedef unsigned short mode_t; /* permissions */
+typedef unsigned short nlink_t; /* link count */
+typedef quad_t off_t; /* file offset */
+typedef long pid_t; /* process id */
+typedef long segsz_t; /* segment size */
+typedef long swblk_t; /* swap offset */
+typedef unsigned long uid_t; /* user id */
+
+/*
+ * This belongs in unistd.h, but is placed here to ensure that programs
+ * casting the second parameter of lseek to off_t will get the correct
+ * version of lseek.
+ */
+#ifndef KERNEL
+#include <sys/cdefs.h>
+__BEGIN_DECLS
+off_t lseek __P((int, off_t, int));
+__END_DECLS
+#endif
+
+#ifndef _POSIX_SOURCE
+#define major(x) ((int)(((u_int)(x) >> 8)&0xff)) /* major number */
+#define minor(x) ((int)((x)&0xff)) /* minor number */
+#define makedev(x,y) ((dev_t)(((x)<<8) | (y))) /* create dev_t */
+#endif
+
+#include <machine/ansi.h>
+#include <machine/types.h>
+
+#ifdef _BSD_CLOCK_T_
+typedef _BSD_CLOCK_T_ clock_t;
+#undef _BSD_CLOCK_T_
+#endif
+
+#ifdef _BSD_SIZE_T_
+typedef _BSD_SIZE_T_ size_t;
+#undef _BSD_SIZE_T_
+#endif
+
+#ifdef _BSD_SSIZE_T_
+typedef _BSD_SSIZE_T_ ssize_t;
+#undef _BSD_SSIZE_T_
+#endif
+
+#ifdef _BSD_TIME_T_
+typedef _BSD_TIME_T_ time_t;
+#undef _BSD_TIME_T_
+#endif
+
+#ifndef _POSIX_SOURCE
+#define NBBY 8 /* number of bits in a byte */
+
+/*
+ * Select uses bit masks of file descriptors in longs. These macros
+ * manipulate such bit fields (the filesystem macros use chars).
+ * FD_SETSIZE may be defined by the user, but the default here should
+ * be enough for most uses.
+ */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE 256
+#endif
+
+typedef long fd_mask;
+#define NFDBITS (sizeof(fd_mask) * NBBY) /* bits per mask */
+
+#ifndef howmany
+#define howmany(x, y) (((x)+((y)-1))/(y))
+#endif
+
+typedef struct fd_set {
+ fd_mask fds_bits[howmany(FD_SETSIZE, NFDBITS)];
+} fd_set;
+
+#define FD_SET(n, p) ((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS)))
+#define FD_CLR(n, p) ((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS)))
+#define FD_ISSET(n, p) ((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS)))
+#define FD_COPY(f, t) bcopy(f, t, sizeof(*(f)))
+#define FD_ZERO(p) bzero(p, sizeof(*(p)))
+
+#if defined(__STDC__) && defined(KERNEL)
+/*
+ * Forward structure declarations for function prototypes. We include the
+ * common structures that cross subsystem boundaries here; others are mostly
+ * used in the same place that the structure is defined.
+ */
+struct proc;
+struct pgrp;
+struct ucred;
+struct rusage;
+struct file;
+struct buf;
+struct tty;
+struct uio;
+#endif
+
+#endif /* !_POSIX_SOURCE */
+#endif /* !_SYS_TYPES_H_ */
diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h
new file mode 100644
index 000000000000..d3ee02dbde3d
--- /dev/null
+++ b/sys/sys/ucred.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ucred.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_UCRED_H_
+#define _SYS_UCRED_H_
+
+/*
+ * Credentials.
+ */
+struct ucred {
+ u_short cr_ref; /* reference count */
+ uid_t cr_uid; /* effective user id */
+ short cr_ngroups; /* number of groups */
+ gid_t cr_groups[NGROUPS]; /* groups */
+};
+#define cr_gid cr_groups[0]
+#define NOCRED ((struct ucred *)-1) /* no credential available */
+#define FSCRED ((struct ucred *)-2) /* filesystem credential */
+
+#ifdef KERNEL
+#define crhold(cr) (cr)->cr_ref++
+struct ucred *crget();
+struct ucred *crcopy();
+struct ucred *crdup();
+#endif /* KERNEL */
+
+#endif /* !_SYS_UCRED_H_ */
diff --git a/sys/sys/uio.h b/sys/sys/uio.h
new file mode 100644
index 000000000000..3356ebfee895
--- /dev/null
+++ b/sys/sys/uio.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uio.h 8.5 (Berkeley) 2/22/94
+ */
+
+#ifndef _SYS_UIO_H_
+#define _SYS_UIO_H_
+
+/*
+ * XXX
+ * iov_base should be a void *.
+ */
+struct iovec {
+ char *iov_base; /* Base address. */
+ size_t iov_len; /* Length. */
+};
+
+enum uio_rw { UIO_READ, UIO_WRITE };
+
+/* Segment flag values. */
+enum uio_seg {
+ UIO_USERSPACE, /* from user data space */
+ UIO_SYSSPACE, /* from system space */
+ UIO_USERISPACE /* from user I space */
+};
+
+#ifdef KERNEL
+struct uio {
+ struct iovec *uio_iov;
+ int uio_iovcnt;
+ off_t uio_offset;
+ int uio_resid;
+ enum uio_seg uio_segflg;
+ enum uio_rw uio_rw;
+ struct proc *uio_procp;
+};
+
+/*
+ * Limits
+ */
+#define UIO_MAXIOV 1024 /* max 1K of iov's */
+#define UIO_SMALLIOV 8 /* 8 on stack, else malloc */
+#endif /* KERNEL */
+
+#ifndef KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+ssize_t readv __P((int, const struct iovec *, int));
+ssize_t writev __P((int, const struct iovec *, int));
+__END_DECLS
+#endif /* !KERNEL */
+#endif /* !_SYS_UIO_H_ */
diff --git a/sys/sys/un.h b/sys/sys/un.h
new file mode 100644
index 000000000000..3e214a26bb5d
--- /dev/null
+++ b/sys/sys/un.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)un.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Definitions for UNIX IPC domain.
+ */
+struct sockaddr_un {
+ u_char sun_len; /* sockaddr len including null */
+ u_char sun_family; /* AF_UNIX */
+ char sun_path[104]; /* path name (gag) */
+};
+
+#ifdef KERNEL
+int unp_discard();
+#else
+
+/* actual length of an initialized sockaddr_un */
+#define SUN_LEN(su) \
+ (sizeof(*(su)) - sizeof((su)->sun_path) + strlen((su)->sun_path))
+#endif
diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
new file mode 100644
index 000000000000..e086f6f6e394
--- /dev/null
+++ b/sys/sys/unistd.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)unistd.h 8.2 (Berkeley) 1/7/94
+ */
+
+#ifndef _SYS_UNISTD_H_
+#define _SYS_UNISTD_H_
+
+/* compile-time symbolic constants */
+#define _POSIX_JOB_CONTROL /* implementation supports job control */
+
+/*
+ * Although we have saved user/group IDs, we do not use them in setuid
+ * as described in POSIX 1003.1, because the feature does not work for
+ * root. We use the saved IDs in seteuid/setegid, which are not currently
+ * part of the POSIX 1003.1 specification.
+ */
+#ifdef _NOT_AVAILABLE
+#define _POSIX_SAVED_IDS /* saved set-user-ID and set-group-ID */
+#endif
+
+#define _POSIX_VERSION 198808L
+#define _POSIX2_VERSION 199212L
+
+/* execution-time symbolic constants */
+ /* chown requires appropriate privileges */
+#define _POSIX_CHOWN_RESTRICTED 1
+ /* too-long path components generate errors */
+#define _POSIX_NO_TRUNC 1
+ /* may disable terminal special characters */
+#define _POSIX_VDISABLE ((unsigned char)'\377')
+
+/* access function */
+#define F_OK 0 /* test for existence of file */
+#define X_OK 0x01 /* test for execute or search permission */
+#define W_OK 0x02 /* test for write permission */
+#define R_OK 0x04 /* test for read permission */
+
+/* whence values for lseek(2) */
+#define SEEK_SET 0 /* set file offset to offset */
+#define SEEK_CUR 1 /* set file offset to current plus offset */
+#define SEEK_END 2 /* set file offset to EOF plus offset */
+
+#ifndef _POSIX_SOURCE
+/* whence values for lseek(2); renamed by POSIX 1003.1 */
+#define L_SET SEEK_SET
+#define L_INCR SEEK_CUR
+#define L_XTND SEEK_END
+#endif
+
+/* configurable pathname variables */
+#define _PC_LINK_MAX 1
+#define _PC_MAX_CANON 2
+#define _PC_MAX_INPUT 3
+#define _PC_NAME_MAX 4
+#define _PC_PATH_MAX 5
+#define _PC_PIPE_BUF 6
+#define _PC_CHOWN_RESTRICTED 7
+#define _PC_NO_TRUNC 8
+#define _PC_VDISABLE 9
+
+/* configurable system variables */
+#define _SC_ARG_MAX 1
+#define _SC_CHILD_MAX 2
+#define _SC_CLK_TCK 3
+#define _SC_NGROUPS_MAX 4
+#define _SC_OPEN_MAX 5
+#define _SC_JOB_CONTROL 6
+#define _SC_SAVED_IDS 7
+#define _SC_VERSION 8
+#define _SC_BC_BASE_MAX 9
+#define _SC_BC_DIM_MAX 10
+#define _SC_BC_SCALE_MAX 11
+#define _SC_BC_STRING_MAX 12
+#define _SC_COLL_WEIGHTS_MAX 13
+#define _SC_EXPR_NEST_MAX 14
+#define _SC_LINE_MAX 15
+#define _SC_RE_DUP_MAX 16
+#define _SC_2_VERSION 17
+#define _SC_2_C_BIND 18
+#define _SC_2_C_DEV 19
+#define _SC_2_CHAR_TERM 20
+#define _SC_2_FORT_DEV 21
+#define _SC_2_FORT_RUN 22
+#define _SC_2_LOCALEDEF 23
+#define _SC_2_SW_DEV 24
+#define _SC_2_UPE 25
+#define _SC_STREAM_MAX 26
+#define _SC_TZNAME_MAX 27
+
+/* configurable system strings */
+#define _CS_PATH 1
+
+#endif /* !_SYS_UNISTD_H_ */
diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h
new file mode 100644
index 000000000000..efcfd0e23c19
--- /dev/null
+++ b/sys/sys/unpcb.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)unpcb.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Protocol control block for an active
+ * instance of a UNIX internal protocol.
+ *
+ * A socket may be associated with an vnode in the
+ * file system. If so, the unp_vnode pointer holds
+ * a reference count to this vnode, which should be irele'd
+ * when the socket goes away.
+ *
+ * A socket may be connected to another socket, in which
+ * case the control block of the socket to which it is connected
+ * is given by unp_conn.
+ *
+ * A socket may be referenced by a number of sockets (e.g. several
+ * sockets may be connected to a datagram socket.) These sockets
+ * are in a linked list starting with unp_refs, linked through
+ * unp_nextref and null-terminated. Note that a socket may be referenced
+ * by a number of other sockets and may also reference a socket (not
+ * necessarily one which is referencing it). This generates
+ * the need for unp_refs and unp_nextref to be separate fields.
+ *
+ * Stream sockets keep copies of receive sockbuf sb_cc and sb_mbcnt
+ * so that changes in the sockbuf may be computed to modify
+ * back pressure on the sender accordingly.
+ */
+struct unpcb {
+ struct socket *unp_socket; /* pointer back to socket */
+ struct vnode *unp_vnode; /* if associated with file */
+ ino_t unp_ino; /* fake inode number */
+ struct unpcb *unp_conn; /* control block of connected socket */
+ struct unpcb *unp_refs; /* referencing socket linked list */
+ struct unpcb *unp_nextref; /* link in unp_refs list */
+ struct mbuf *unp_addr; /* bound address of socket */
+ int unp_cc; /* copy of rcv.sb_cc */
+ int unp_mbcnt; /* copy of rcv.sb_mbcnt */
+};
+
+#define sotounpcb(so) ((struct unpcb *)((so)->so_pcb))
diff --git a/sys/sys/user.h b/sys/sys/user.h
new file mode 100644
index 000000000000..85fdd130c2df
--- /dev/null
+++ b/sys/sys/user.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)user.h 8.2 (Berkeley) 9/23/93
+ */
+
+#include <machine/pcb.h>
+#ifndef KERNEL
+/* stuff that *used* to be included by user.h, or is now needed */
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/ucred.h>
+#include <sys/uio.h>
+#endif
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <vm/vm.h> /* XXX */
+#include <sys/sysctl.h>
+
+
+/*
+ * Per process structure containing data that isn't needed in core
+ * when the process isn't running (esp. when swapped out).
+ * This structure may or may not be at the same kernel address
+ * in all processes.
+ */
+
+struct user {
+ struct pcb u_pcb;
+
+ struct sigacts u_sigacts; /* p_sigacts points here (use it!) */
+ struct pstats u_stats; /* p_stats points here (use it!) */
+
+ /*
+ * Remaining fields only for core dump and/or ptrace--
+ * not valid at other times!
+ */
+ struct kinfo_proc u_kproc; /* proc + eproc */
+ struct md_coredump u_md; /* machine dependent glop */
+};
+
+/*
+ * Redefinitions to make the debuggers happy for now... This subterfuge
+ * brought to you by coredump() and trace_req(). These fields are *only*
+ * valid at those times!
+ */
+#define U_ar0 u_kproc.kp_proc.p_md.md_regs /* copy of curproc->p_md.md_regs */
+#define U_tsize u_kproc.kp_eproc.e_vm.vm_tsize
+#define U_dsize u_kproc.kp_eproc.e_vm.vm_dsize
+#define U_ssize u_kproc.kp_eproc.e_vm.vm_ssize
+#define U_sig u_sigacts.ps_sig
+#define U_code u_sigacts.ps_code
+
+#ifndef KERNEL
+#define u_ar0 U_ar0
+#define u_tsize U_tsize
+#define u_dsize U_dsize
+#define u_ssize U_ssize
+#define u_sig U_sig
+#define u_code U_code
+#endif /* KERNEL */
diff --git a/sys/sys/utsname.h b/sys/sys/utsname.h
new file mode 100644
index 000000000000..aa0f2c75ab62
--- /dev/null
+++ b/sys/sys/utsname.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chuck Karish of Mindcraft, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)utsname.h 8.1 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_UTSNAME_H
+#define _SYS_UTSNAME_H
+
+struct utsname {
+ char sysname[256]; /* Name of this OS. */
+ char nodename[256]; /* Name of this network node. */
+ char release[256]; /* Release level. */
+ char version[256]; /* Version level. */
+ char machine[256]; /* Hardware type. */
+};
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int uname __P((struct utsname *));
+__END_DECLS
+
+#endif /* !_SYS_UTSNAME_H */
diff --git a/sys/sys/vadvise.h b/sys/sys/vadvise.h
new file mode 100644
index 000000000000..be793e8e721a
--- /dev/null
+++ b/sys/sys/vadvise.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vadvise.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Parameters to vadvise() to tell system of particular paging
+ * behaviour:
+ * VA_NORM Normal strategy
+ * VA_ANOM Sampling page behaviour is not a win, don't bother
+ * Suitable during GCs in LISP, or sequential or random
+ * page referencing.
+ * VA_SEQL Sequential behaviour expected.
+ * VA_FLUSH Invalidate all page table entries.
+ */
+#define VA_NORM 0
+#define VA_ANOM 1
+#define VA_SEQL 2
+#define VA_FLUSH 3
diff --git a/sys/sys/vcmd.h b/sys/sys/vcmd.h
new file mode 100644
index 000000000000..de27ec1b0af1
--- /dev/null
+++ b/sys/sys/vcmd.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vcmd.h 8.1 (Berkeley) 6/2/93
+ */
+
+#include <sys/ioctl.h>
+
+#define VPRINT 0100
+#define VPLOT 0200
+#define VPRINTPLOT 0400
+
+#define VGETSTATE _IOR('v', 0, int)
+#define VSETSTATE _IOW('v', 1, int)
diff --git a/sys/sys/vlimit.h b/sys/sys/vlimit.h
new file mode 100644
index 000000000000..b6457e64ddf7
--- /dev/null
+++ b/sys/sys/vlimit.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vlimit.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Limits for u.u_limit[i], per process, inherited.
+ */
+#define LIM_NORAISE 0 /* if <> 0, can't raise limits */
+#define LIM_CPU 1 /* max secs cpu time */
+#define LIM_FSIZE 2 /* max size of file created */
+#define LIM_DATA 3 /* max growth of data space */
+#define LIM_STACK 4 /* max growth of stack */
+#define LIM_CORE 5 /* max size of ``core'' file */
+#define LIM_MAXRSS 6 /* max desired data+stack core usage */
+
+#define NLIMITS 6
+
+#define INFINITY 0x7fffffff
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
new file mode 100644
index 000000000000..f0b3d57f3363
--- /dev/null
+++ b/sys/sys/vmmeter.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vmmeter.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * System wide statistics counters.
+ */
+struct vmmeter {
+ /*
+ * General system activity.
+ */
+ unsigned v_swtch; /* context switches */
+ unsigned v_trap; /* calls to trap */
+ unsigned v_syscall; /* calls to syscall() */
+ unsigned v_intr; /* device interrupts */
+ unsigned v_soft; /* software interrupts */
+ unsigned v_faults; /* total faults taken */
+ /*
+ * Virtual memory activity.
+ */
+ unsigned v_lookups; /* object cache lookups */
+ unsigned v_hits; /* object cache hits */
+ unsigned v_vm_faults; /* number of address memory faults */
+ unsigned v_cow_faults; /* number of copy-on-writes */
+ unsigned v_swpin; /* swapins */
+ unsigned v_swpout; /* swapouts */
+ unsigned v_pswpin; /* pages swapped in */
+ unsigned v_pswpout; /* pages swapped out */
+ unsigned v_pageins; /* number of pageins */
+ unsigned v_pageouts; /* number of pageouts */
+ unsigned v_pgpgin; /* pages paged in */
+ unsigned v_pgpgout; /* pages paged out */
+ unsigned v_intrans; /* intransit blocking page faults */
+ unsigned v_reactivated; /* number of pages reactivated from free list */
+ unsigned v_rev; /* revolutions of the hand */
+ unsigned v_scan; /* scans in page out daemon */
+ unsigned v_dfree; /* pages freed by daemon */
+ unsigned v_pfree; /* pages freed by exiting processes */
+ unsigned v_zfod; /* pages zero filled on demand */
+ unsigned v_nzfod; /* number of zfod's created */
+ /*
+ * Distribution of page usages.
+ */
+ unsigned v_page_size; /* page size in bytes */
+ unsigned v_kernel_pages;/* number of pages in use by kernel */
+ unsigned v_free_target; /* number of pages desired free */
+ unsigned v_free_min; /* minimum number of pages desired free */
+ unsigned v_free_count; /* number of pages free */
+ unsigned v_wire_count; /* number of pages wired down */
+ unsigned v_active_count;/* number of pages active */
+ unsigned v_inactive_target; /* number of pages desired inactive */
+ unsigned v_inactive_count; /* number of pages inactive */
+};
+#ifdef KERNEL
+struct vmmeter cnt;
+#endif
+
+/* systemwide totals computed every five seconds */
+struct vmtotal
+{
+ short t_rq; /* length of the run queue */
+ short t_dw; /* jobs in ``disk wait'' (neg priority) */
+ short t_pw; /* jobs in page wait */
+ short t_sl; /* jobs sleeping in core */
+ short t_sw; /* swapped out runnable/short block jobs */
+ long t_vm; /* total virtual memory */
+ long t_avm; /* active virtual memory */
+ long t_rm; /* total real memory in use */
+ long t_arm; /* active real memory */
+ long t_vmshr; /* shared virtual memory */
+ long t_avmshr; /* active shared virtual memory */
+ long t_rmshr; /* shared real memory */
+ long t_armshr; /* active shared real memory */
+ long t_free; /* free memory pages */
+};
+#ifdef KERNEL
+struct vmtotal total;
+#endif
+
+/*
+ * Optional instrumentation.
+ */
+#ifdef PGINPROF
+
+#define NDMON 128
+#define NSMON 128
+
+#define DRES 20
+#define SRES 5
+
+#define PMONMIN 20
+#define PRES 50
+#define NPMON 64
+
+#define RMONMIN 130
+#define RRES 5
+#define NRMON 64
+
+/* data and stack size distribution counters */
+unsigned int dmon[NDMON+1];
+unsigned int smon[NSMON+1];
+
+/* page in time distribution counters */
+unsigned int pmon[NPMON+2];
+
+/* reclaim time distribution counters */
+unsigned int rmon[NRMON+2];
+
+int pmonmin;
+int pres;
+int rmonmin;
+int rres;
+
+unsigned rectime; /* accumulator for reclaim times */
+unsigned pgintime; /* accumulator for page in times */
+#endif
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
new file mode 100644
index 000000000000..fa51d994a213
--- /dev/null
+++ b/sys/sys/vnode.h
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vnode.h 8.7 (Berkeley) 2/4/94
+ */
+
+#include <sys/queue.h>
+
+/*
+ * The vnode is the focus of all file activity in UNIX. There is a
+ * unique vnode allocated for each active file, each current directory,
+ * each mounted-on file, text file, and the root.
+ */
+
+/*
+ * Vnode types. VNON means no type.
+ */
+enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
+
+/*
+ * Vnode tag types.
+ * These are for the benefit of external programs only (e.g., pstat)
+ * and should NEVER be inspected by the kernel.
+ */
+enum vtagtype {
+ VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
+ VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
+ VT_UNION
+};
+
+/*
+ * Each underlying filesystem allocates its own private area and hangs
+ * it from v_data. If non-null, this area is freed in getnewvnode().
+ */
+LIST_HEAD(buflists, buf);
+
+struct vnode {
+ u_long v_flag; /* vnode flags (see below) */
+ short v_usecount; /* reference count of users */
+ short v_writecount; /* reference count of writers */
+ long v_holdcnt; /* page & buffer references */
+ daddr_t v_lastr; /* last read (read-ahead) */
+ u_long v_id; /* capability identifier */
+ struct mount *v_mount; /* ptr to vfs we are in */
+ int (**v_op)(); /* vnode operations vector */
+ TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */
+ LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */
+ struct buflists v_cleanblkhd; /* clean blocklist head */
+ struct buflists v_dirtyblkhd; /* dirty blocklist head */
+ long v_numoutput; /* num of writes in progress */
+ enum vtype v_type; /* vnode type */
+ union {
+ struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
+ struct socket *vu_socket; /* unix ipc (VSOCK) */
+ caddr_t vu_vmdata; /* private data for vm (VREG) */
+ struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */
+ struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
+ } v_un;
+ struct nqlease *v_lease; /* Soft reference to lease */
+ daddr_t v_lastw; /* last write (write cluster) */
+ daddr_t v_cstart; /* start block of cluster */
+ daddr_t v_lasta; /* last allocation */
+ int v_clen; /* length of current cluster */
+ int v_ralen; /* Read-ahead length */
+ daddr_t v_maxra; /* last readahead block */
+ long v_spare[7]; /* round to 128 bytes */
+ enum vtagtype v_tag; /* type of underlying data */
+ void *v_data; /* private data for fs */
+};
+#define v_mountedhere v_un.vu_mountedhere
+#define v_socket v_un.vu_socket
+#define v_vmdata v_un.vu_vmdata
+#define v_specinfo v_un.vu_specinfo
+#define v_fifoinfo v_un.vu_fifoinfo
+
+/*
+ * Vnode flags.
+ */
+#define VROOT 0x0001 /* root of its file system */
+#define VTEXT 0x0002 /* vnode is a pure text prototype */
+#define VSYSTEM 0x0004 /* vnode being used by kernel */
+#define VXLOCK 0x0100 /* vnode is locked to change underlying type */
+#define VXWANT 0x0200 /* process is waiting for vnode */
+#define VBWAIT 0x0400 /* waiting for output to complete */
+#define VALIASED 0x0800 /* vnode has an alias */
+#define VDIROP 0x1000 /* LFS: vnode is involved in a directory op */
+
+/*
+ * Vnode attributes. A field value of VNOVAL represents a field whose value
+ * is unavailable (getattr) or which is not to be changed (setattr).
+ */
+struct vattr {
+ enum vtype va_type; /* vnode type (for create) */
+ u_short va_mode; /* files access mode and type */
+ short va_nlink; /* number of references to file */
+ uid_t va_uid; /* owner user id */
+ gid_t va_gid; /* owner group id */
+ long va_fsid; /* file system id (dev for now) */
+ long va_fileid; /* file id */
+ u_quad_t va_size; /* file size in bytes */
+ long va_blocksize; /* blocksize preferred for i/o */
+ struct timespec va_atime; /* time of last access */
+ struct timespec va_mtime; /* time of last modification */
+ struct timespec va_ctime; /* time file changed */
+ u_long va_gen; /* generation number of file */
+ u_long va_flags; /* flags defined for file */
+ dev_t va_rdev; /* device the special file represents */
+ u_quad_t va_bytes; /* bytes of disk space held by file */
+ u_quad_t va_filerev; /* file modification number */
+ u_int va_vaflags; /* operations flags, see below */
+ long va_spare; /* remain quad aligned */
+};
+
+/*
+ * Flags for va_cflags.
+ */
+#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */
+
+/*
+ * Flags for ioflag.
+ */
+#define IO_UNIT 0x01 /* do I/O as atomic unit */
+#define IO_APPEND 0x02 /* append write to end */
+#define IO_SYNC 0x04 /* do I/O synchronously */
+#define IO_NODELOCKED 0x08 /* underlying node already locked */
+#define IO_NDELAY 0x10 /* FNDELAY flag set in file table */
+
+/*
+ * Modes. Some values same as Ixxx entries from inode.h for now.
+ */
+#define VSUID 04000 /* set user id on execution */
+#define VSGID 02000 /* set group id on execution */
+#define VSVTX 01000 /* save swapped text even after use */
+#define VREAD 00400 /* read, write, execute permissions */
+#define VWRITE 00200
+#define VEXEC 00100
+
+/*
+ * Token indicating no attribute value yet assigned.
+ */
+#define VNOVAL (-1)
+
+#ifdef KERNEL
+/*
+ * Convert between vnode types and inode formats (since POSIX.1
+ * defines mode word of stat structure in terms of inode formats).
+ */
+extern enum vtype iftovt_tab[];
+extern int vttoif_tab[];
+#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
+#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
+#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
+
+/*
+ * Flags to various vnode functions.
+ */
+#define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
+#define FORCECLOSE 0x0002 /* vflush: force file closeure */
+#define WRITECLOSE 0x0004 /* vflush: only close writeable files */
+#define DOCLOSE 0x0008 /* vclean: close active files */
+#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
+#define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */
+
+#ifdef DIAGNOSTIC
+#define HOLDRELE(vp) holdrele(vp)
+#define VATTR_NULL(vap) vattr_null(vap)
+#define VHOLD(vp) vhold(vp)
+#define VREF(vp) vref(vp)
+
+void holdrele __P((struct vnode *));
+void vattr_null __P((struct vattr *));
+void vhold __P((struct vnode *));
+void vref __P((struct vnode *));
+#else
+#define HOLDRELE(vp) (vp)->v_holdcnt-- /* decrease buf or page ref */
+#define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
+#define VHOLD(vp) (vp)->v_holdcnt++ /* increase buf or page ref */
+#define VREF(vp) (vp)->v_usecount++ /* increase reference */
+#endif
+
+#define NULLVP ((struct vnode *)NULL)
+
+/*
+ * Global vnode data.
+ */
+extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
+extern int desiredvnodes; /* number of vnodes desired */
+extern struct vattr va_null; /* predefined null vattr structure */
+
+/*
+ * Macro/function to check for client cache inconsistency w.r.t. leasing.
+ */
+#define LEASE_READ 0x1 /* Check lease for readers */
+#define LEASE_WRITE 0x2 /* Check lease for modifiers */
+
+#ifdef NFS
+void lease_check __P((struct vnode *vp, struct proc *p,
+ struct ucred *ucred, int flag));
+void lease_updatetime __P((int deltat));
+#define LEASE_CHECK(vp, p, cred, flag) lease_check((vp), (p), (cred), (flag))
+#define LEASE_UPDATETIME(dt) lease_updatetime(dt)
+#else
+#define LEASE_CHECK(vp, p, cred, flag)
+#define LEASE_UPDATETIME(dt)
+#endif /* NFS */
+#endif /* KERNEL */
+
+
+/*
+ * Mods for exensibility.
+ */
+
+/*
+ * Flags for vdesc_flags:
+ */
+#define VDESC_MAX_VPS 16
+/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
+#define VDESC_VP0_WILLRELE 0x0001
+#define VDESC_VP1_WILLRELE 0x0002
+#define VDESC_VP2_WILLRELE 0x0004
+#define VDESC_VP3_WILLRELE 0x0008
+#define VDESC_NOMAP_VPP 0x0100
+#define VDESC_VPP_WILLRELE 0x0200
+
+/*
+ * VDESC_NO_OFFSET is used to identify the end of the offset list
+ * and in places where no such field exists.
+ */
+#define VDESC_NO_OFFSET -1
+
+/*
+ * This structure describes the vnode operation taking place.
+ */
+struct vnodeop_desc {
+ int vdesc_offset; /* offset in vector--first for speed */
+ char *vdesc_name; /* a readable name for debugging */
+ int vdesc_flags; /* VDESC_* flags */
+
+ /*
+ * These ops are used by bypass routines to map and locate arguments.
+ * Creds and procs are not needed in bypass routines, but sometimes
+ * they are useful to (for example) transport layers.
+ * Nameidata is useful because it has a cred in it.
+ */
+ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */
+ int vdesc_vpp_offset; /* return vpp location */
+ int vdesc_cred_offset; /* cred location, if any */
+ int vdesc_proc_offset; /* proc location, if any */
+ int vdesc_componentname_offset; /* if any */
+ /*
+ * Finally, we've got a list of private data (about each operation)
+ * for each transport layer. (Support to manage this list is not
+ * yet part of BSD.)
+ */
+ caddr_t *vdesc_transports;
+};
+
+#ifdef KERNEL
+/*
+ * A list of all the operation descs.
+ */
+extern struct vnodeop_desc *vnodeop_descs[];
+
+
+/*
+ * This macro is very helpful in defining those offsets in the vdesc struct.
+ *
+ * This is stolen from X11R4. I ingored all the fancy stuff for
+ * Crays, so if you decide to port this to such a serious machine,
+ * you might want to consult Intrisics.h's XtOffset{,Of,To}.
+ */
+#define VOPARG_OFFSET(p_type,field) \
+ ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
+#define VOPARG_OFFSETOF(s_type,field) \
+ VOPARG_OFFSET(s_type*,field)
+#define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
+ ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
+
+
+/*
+ * This structure is used to configure the new vnodeops vector.
+ */
+struct vnodeopv_entry_desc {
+ struct vnodeop_desc *opve_op; /* which operation this is */
+ int (*opve_impl)(); /* code implementing this operation */
+};
+struct vnodeopv_desc {
+ /* ptr to the ptr to the vector where op should go */
+ int (***opv_desc_vector_p)();
+ struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */
+};
+
+/*
+ * A default routine which just returns an error.
+ */
+int vn_default_error __P((void));
+
+/*
+ * A generic structure.
+ * This can be used by bypass routines to identify generic arguments.
+ */
+struct vop_generic_args {
+ struct vnodeop_desc *a_desc;
+ /* other random data follows, presumably */
+};
+
+/*
+ * VOCALL calls an op given an ops vector. We break it out because BSD's
+ * vclean changes the ops vector and then wants to call ops with the old
+ * vector.
+ */
+#define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
+
+/*
+ * This call works for vnodes in the kernel.
+ */
+#define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
+#define VDESC(OP) (& __CONCAT(OP,_desc))
+#define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
+
+/*
+ * Finally, include the default set of vnode operations.
+ */
+#include <vnode_if.h>
+
+/*
+ * Public vnode manipulation functions.
+ */
+struct file;
+struct mount;
+struct nameidata;
+struct proc;
+struct stat;
+struct ucred;
+struct uio;
+struct vattr;
+struct vnode;
+struct vop_bwrite_args;
+
+int bdevvp __P((dev_t dev, struct vnode **vpp));
+int getnewvnode __P((enum vtagtype tag,
+ struct mount *mp, int (**vops)(), struct vnode **vpp));
+int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
+ struct proc *p, int slpflag, int slptimeo));
+void vattr_null __P((struct vattr *vap));
+int vcount __P((struct vnode *vp));
+int vget __P((struct vnode *vp, int lockflag));
+void vgone __P((struct vnode *vp));
+void vgoneall __P((struct vnode *vp));
+int vn_bwrite __P((struct vop_bwrite_args *ap));
+int vn_close __P((struct vnode *vp,
+ int flags, struct ucred *cred, struct proc *p));
+int vn_closefile __P((struct file *fp, struct proc *p));
+int vn_ioctl __P((struct file *fp, int com, caddr_t data, struct proc *p));
+int vn_open __P((struct nameidata *ndp, int fmode, int cmode));
+int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
+ int len, off_t offset, enum uio_seg segflg, int ioflg,
+ struct ucred *cred, int *aresid, struct proc *p));
+int vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred));
+int vn_select __P((struct file *fp, int which, struct proc *p));
+int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
+int vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred));
+struct vnode *
+ checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
+void vput __P((struct vnode *vp));
+void vref __P((struct vnode *vp));
+void vrele __P((struct vnode *vp));
+#endif /* KERNEL */
diff --git a/sys/sys/vsio.h b/sys/sys/vsio.h
new file mode 100644
index 000000000000..d84218cc2389
--- /dev/null
+++ b/sys/sys/vsio.h
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vsio.h 8.1 (Berkeley) 6/2/93
+ */
+
+ /****************************************************************************
+ * *
+ * Copyright (c) 1983, 1984 by *
+ * DIGITAL EQUIPMENT CORPORATION, Maynard, Massachusetts. *
+ * All rights reserved. *
+ * *
+ * This software is furnished on an as-is basis and may be used and copied *
+ * only with inclusion of the above copyright notice. This software or any *
+ * other copies thereof may be provided or otherwise made available to *
+ * others only for non-commercial purposes. No title to or ownership of *
+ * the software is hereby transferred. *
+ * *
+ * The information in this software is subject to change without notice *
+ * and should not be construed as a commitment by DIGITAL EQUIPMENT *
+ * CORPORATION. *
+ * *
+ * DIGITAL assumes no responsibility for the use or reliability of its *
+ * software on equipment which is not supplied by DIGITAL. *
+ * *
+ * *
+ ****************************************************************************/
+/*
+ * vsio.h - VS100 I/O command definitions
+ *
+ * Author: Christopher A. Kent
+ * Digital Equipment Corporation
+ * Western Research Lab
+ * Date: Tue Jun 21 1983
+ */
+
+/*
+ * Possible ioctl calls
+ */
+
+#define VSIOINIT _IO('V', 0) /* init the device */
+#define VSIOSTART _IOW('V', 1, int) /* start microcode */
+#define VSIOABORT _IO('V', 2) /* abort a command chain */
+#define VSIOPWRUP _IO('V', 3) /* power-up reset */
+#define VSIOGETVER _IOR('V', 4, int) /* get rom version */
+#define VSIOSYNC _IO('V', 6) /* synch with device */
+#define VSIOBBACTL _IOW('V', 8, int) /* control the BBA */
+#define VSIOFIBCTL _IOW('V', 9, int) /* lamp on/off */
+#define VSIOFIBRETRY _IOW('V',10, int) /* fiber retries */
+#define VSIOGETSTATS _IOR('V',11, vsStats) /* get statistics */
+#define VSIOGETIOA _IOR('V',13, vsIoAddrAddr)/* get ioreg address */
+#define VSIOUSERWAIT _IO('V', 15) /* wait for user I/O completion */
+#define VSIOWAITGO _IOW('V', 16, caddr_t) /* wait then go */
+
+
+#define VSIO_OFF 0 /* option off */
+#define VSIO_ON 1 /* option on */
+
+#define VS_FIB_FINITE 1 /* finite retries */
+#define VS_FIB_INFINITE 2 /* infinite retries */
+
+/*
+ * Event queue entries
+ */
+
+typedef struct _vs_event{
+ u_short vse_x; /* x position */
+ u_short vse_y; /* y position */
+ u_short vse_time; /* 10 millisecond units (button only) */
+ char vse_type; /* button or motion? */
+ u_char vse_key; /* the key (button only) */
+ char vse_direction; /* which direction (button only) */
+ char vse_device; /* which device (button only) */
+}vsEvent;
+
+#define VSE_BUTTON 0 /* button moved */
+#define VSE_MMOTION 1 /* mouse moved */
+#define VSE_TMOTION 2 /* tablet moved */
+
+#define VSE_KBTUP 0 /* up */
+#define VSE_KBTDOWN 1 /* down */
+
+#define VSE_MOUSE 1 /* mouse */
+#define VSE_DKB 2 /* main keyboard */
+#define VSE_TABLET 3 /* graphics tablet */
+#define VSE_AUX 4 /* auxiliary */
+#define VSE_CONSOLE 5 /* console */
+
+typedef struct _vsStats{
+ int errors; /* count errors */
+ int unsolIntr; /* count unsolicited interrupts */
+ int overruns; /* event queue overruns */
+ int flashes; /* flashes on fiber link */
+ int ignites; /* times turned on */
+ int douses; /* times turned off */
+ int linkErrors; /* link errors */
+}vsStats;
+
+typedef struct _vs_cursor{
+ short x;
+ short y;
+}vsCursor;
+
+typedef struct _vs_box {
+ short bottom;
+ short right;
+ short left;
+ short top;
+}vsBox;
+
+typedef struct _vsIoAddr {
+ short *ioreg;
+ short status;
+ caddr_t obuff;
+ int obufflen;
+ int reloc;
+ vsEvent *ibuff;
+ int iqsize; /* may assume power of 2 */
+ int ihead; /* atomic write */
+ int itail; /* atomic read */
+ vsCursor mouse; /* atomic read/write */
+ vsBox mbox; /* atomic read/write */
+} vsIoAddr;
+typedef vsIoAddr *vsIoAddrAddr;
diff --git a/sys/sys/wait.h b/sys/sys/wait.h
new file mode 100644
index 000000000000..33a68d9f33fa
--- /dev/null
+++ b/sys/sys/wait.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)wait.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * This file holds definitions relevent to the wait4 system call
+ * and the alternate interfaces that use it (wait, wait3, waitpid).
+ */
+
+/*
+ * Macros to test the exit status returned by wait
+ * and extract the relevant values.
+ */
+#ifdef _POSIX_SOURCE
+#define _W_INT(i) (i)
+#else
+#define _W_INT(w) (*(int *)&(w)) /* convert union wait to int */
+#define WCOREFLAG 0200
+#endif
+
+#define _WSTATUS(x) (_W_INT(x) & 0177)
+#define _WSTOPPED 0177 /* _WSTATUS if process is stopped */
+#define WIFSTOPPED(x) (_WSTATUS(x) == _WSTOPPED)
+#define WSTOPSIG(x) (_W_INT(x) >> 8)
+#define WIFSIGNALED(x) (_WSTATUS(x) != _WSTOPPED && _WSTATUS(x) != 0)
+#define WTERMSIG(x) (_WSTATUS(x))
+#define WIFEXITED(x) (_WSTATUS(x) == 0)
+#define WEXITSTATUS(x) (_W_INT(x) >> 8)
+#ifndef _POSIX_SOURCE
+#define WCOREDUMP(x) (_W_INT(x) & WCOREFLAG)
+
+#define W_EXITCODE(ret, sig) ((ret) << 8 | (sig))
+#define W_STOPCODE(sig) ((sig) << 8 | _WSTOPPED)
+#endif
+
+/*
+ * Option bits for the third argument of wait4. WNOHANG causes the
+ * wait to not hang if there are no stopped or terminated processes, rather
+ * returning an error indication in this case (pid==0). WUNTRACED
+ * indicates that the caller should receive status about untraced children
+ * which stop due to signals. If children are stopped and a wait without
+ * this option is done, it is as though they were still running... nothing
+ * about them is returned.
+ */
+#define WNOHANG 1 /* dont hang in wait */
+#define WUNTRACED 2 /* tell about stopped, untraced children */
+
+#ifndef _POSIX_SOURCE
+/* POSIX extensions and 4.2/4.3 compatability: */
+
+/*
+ * Tokens for special values of the "pid" parameter to wait4.
+ */
+#define WAIT_ANY (-1) /* any process */
+#define WAIT_MYPGRP 0 /* any process in my process group */
+
+#include <machine/endian.h>
+
+/*
+ * Deprecated:
+ * Structure of the information in the status word returned by wait4.
+ * If w_stopval==WSTOPPED, then the second structure describes
+ * the information returned, else the first.
+ */
+union wait {
+ int w_status; /* used in syscall */
+ /*
+ * Terminated process status.
+ */
+ struct {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned int w_Termsig:7, /* termination signal */
+ w_Coredump:1, /* core dump indicator */
+ w_Retcode:8, /* exit code if w_termsig==0 */
+ w_Filler:16; /* upper bits filler */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned int w_Filler:16, /* upper bits filler */
+ w_Retcode:8, /* exit code if w_termsig==0 */
+ w_Coredump:1, /* core dump indicator */
+ w_Termsig:7; /* termination signal */
+#endif
+ } w_T;
+ /*
+ * Stopped process status. Returned
+ * only for traced children unless requested
+ * with the WUNTRACED option bit.
+ */
+ struct {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned int w_Stopval:8, /* == W_STOPPED if stopped */
+ w_Stopsig:8, /* signal that stopped us */
+ w_Filler:16; /* upper bits filler */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned int w_Filler:16, /* upper bits filler */
+ w_Stopsig:8, /* signal that stopped us */
+ w_Stopval:8; /* == W_STOPPED if stopped */
+#endif
+ } w_S;
+};
+#define w_termsig w_T.w_Termsig
+#define w_coredump w_T.w_Coredump
+#define w_retcode w_T.w_Retcode
+#define w_stopval w_S.w_Stopval
+#define w_stopsig w_S.w_Stopsig
+
+#define WSTOPPED _WSTOPPED
+#endif /* _POSIX_SOURCE */
+
+#ifndef KERNEL
+#include <sys/types.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct rusage; /* forward declaration */
+
+pid_t wait __P((int *));
+pid_t waitpid __P((pid_t, int *, int));
+#ifndef _POSIX_SOURCE
+pid_t wait3 __P((int *, int, struct rusage *));
+pid_t wait4 __P((pid_t, int *, int, struct rusage *));
+#endif
+__END_DECLS
+#endif
diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk
new file mode 100644
index 000000000000..e190fa04836d
--- /dev/null
+++ b/sys/tools/vnode_if.awk
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+# (where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk. Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+ echo 'usage: vnode_if.sh srcfile'
+ exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ name = $1;
+ uname = toupper(name);
+
+ # Get the function arguments.
+ for (c1 = 0;; ++c1) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ a[c1] = $0;
+ }
+
+ # Print out the vop_F_args structure.
+ printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+ name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%sa_%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("};\n");
+
+ # Print out extern declaration.
+ printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+ # Print out inline struct.
+ printf("static inline int %s(", uname);
+ sep = ", ";
+ for (c2 = 0; c2 < c1; ++c2) {
+ if (c2 == c1 - 1)
+ sep = ")\n";
+ c3 = split(a[c2], t);
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("%s%s", substr(t[c3], beg, end - beg), sep);
+ }
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%s%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("{\n\tstruct %s_args a;\n\n", name);
+ printf("\ta.a_desc = VDESC(%s);\n", name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("a.a_%s = %s\n",
+ substr(t[c3], beg, end - beg), substr(t[c3], beg));
+ }
+ c1 = split(a[0], t);
+ beg = match(t[c1], "[^*]");
+ end = match(t[c1], ";");
+ printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+ substr(t[c1], beg, end - beg), name);
+ }' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+ 0,
+ "default",
+ 0,
+ NULL,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+ sub (/^[ \t]*/, "", s);
+ sub (/[ \t]*$/, "", s);
+ return s;
+ }
+
+ function read_args() {
+ numargs = 0;
+ while (getline ln) {
+ if (ln ~ /}/) {
+ break;
+ };
+
+ # Delete comments, if any.
+ gsub (/\/\*.*\*\//, "", ln);
+
+ # Delete leading/trailing space.
+ ln = kill_surrounding_ws(ln);
+
+ # Pick off direction.
+ if (1 == sub(/^INOUT[ \t]+/, "", ln))
+ dir = "INOUT";
+ else if (1 == sub(/^IN[ \t]+/, "", ln))
+ dir = "IN";
+ else if (1 == sub(/^OUT[ \t]+/, "", ln))
+ dir = "OUT";
+ else
+ bail("No IN/OUT direction for \"" ln "\".");
+
+ # check for "WILLRELE"
+ if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+ rele = "WILLRELE";
+ } else {
+ rele = "WONTRELE";
+ };
+
+ # kill trailing ;
+ if (1 != sub (/;$/, "", ln)) {
+ bail("Missing end-of-line ; in \"" ln "\".");
+ };
+
+ # pick off variable name
+ if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+ bail("Missing var name \"a_foo\" in \"" ln "\".");
+ };
+ arg = substr (ln, i);
+ # Want to <<substr(ln, i) = "";>>, but nawk cannot.
+ # Hack around this.
+ ln = substr(ln, 1, i-1);
+
+ # what is left must be type
+ # (put clean it up some)
+ type = ln;
+ gsub (/[ \t]+/, " ", type); # condense whitespace
+ type = kill_surrounding_ws(type);
+
+ # (boy this was easier in Perl)
+
+ numargs++;
+ dirs[numargs] = dir;
+ reles[numargs] = rele;
+ types[numargs] = type;
+ args[numargs] = arg;
+ };
+ }
+
+ function generate_operation_vp_offsets() {
+ printf ("int %s_vp_offsets[] = {\n", name);
+ # as a side effect, figure out the releflags
+ releflags = "";
+ vpnum = 0;
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode *") {
+ printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+ name, args[i]);
+ if (reles[i] == "WILLRELE") {
+ releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+ };
+ vpnum++;
+ };
+ };
+ sub (/^\|/, "", releflags);
+ print "\tVDESC_NO_OFFSET";
+ print "};";
+ }
+
+ function find_arg_with_type (type) {
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == type) {
+ return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+ };
+ };
+ return "VDESC_NO_OFFSET";
+ }
+
+ function generate_operation_desc() {
+ printf ("struct vnodeop_desc %s_desc = {\n", name);
+ # offset
+ printf ("\t0,\n");
+ # printable name
+ printf ("\t\"%s\",\n", name);
+ # flags
+ vppwillrele = "";
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode **" &&
+ (reles[i] == "WILLRELE")) {
+ vppwillrele = "|VDESC_VPP_WILLRELE";
+ };
+ };
+ if (releflags == "") {
+ printf ("\t0%s,\n", vppwillrele);
+ } else {
+ printf ("\t%s%s,\n", releflags, vppwillrele);
+ };
+ # vp offsets
+ printf ("\t%s_vp_offsets,\n", name);
+ # vpp (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+ # cred (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+ # proc (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+ # componentname
+ printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+ # transport layer information
+ printf ("\tNULL,\n};\n");
+ }
+
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # get the function name
+ name = $1;
+
+ # get the function arguments
+ read_args();
+
+ # Print out the vop_F_vp_offsets structure. This all depends
+ # on naming conventions and nothing else.
+ generate_operation_vp_offsets();
+
+ # Print out the vnodeop_desc structure.
+ generate_operation_desc();
+
+ printf "\n";
+
+ }' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+#
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments. This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+ struct buf *bp;
+{
+ struct vop_strategy_args a;
+
+ a.a_desc = VDESC(vop_strategy);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+ struct buf *bp;
+{
+ struct vop_bwrite_args a;
+
+ a.a_desc = VDESC(vop_bwrite);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+ 0,
+ "vop_strategy",
+ 0,
+ vop_strategy_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+ 0,
+ "vop_bwrite",
+ 0,
+ vop_bwrite_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+ BEGIN {
+ printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+ printf("\t&vop_default_desc, /* MUST BE FIRST */\n");
+ printf("\t&vop_strategy_desc, /* XXX: SPECIAL CASE */\n");
+ printf("\t&vop_bwrite_desc, /* XXX: SPECIAL CASE */\n");
+ }
+ END {
+ printf("\tNULL\n};\n");
+ }
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ printf("\t&%s_desc,\n", $1);
+
+ # Skip the function arguments.
+ for (;;) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ }
+ }' < $SRC >> $CFILE
+
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
new file mode 100644
index 000000000000..cdd2e4b2b35c
--- /dev/null
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -0,0 +1,1474 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+extern u_long nextgennumber;
+
+static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int));
+static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t));
+static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int));
+static ino_t ffs_dirpref __P((struct fs *));
+static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
+static void ffs_fserr __P((struct fs *, u_int, char *));
+static u_long ffs_hashalloc
+ __P((struct inode *, int, long, int, u_long (*)()));
+static ino_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int));
+static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int));
+
+/*
+ * Allocate a block in the file system.
+ *
+ * The size of the requested block is given, which must be some
+ * multiple of fs_fsize and <= fs_bsize.
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ * 1) allocate the requested block.
+ * 2) allocate a rotationally optimal block in the same cylinder.
+ * 3) allocate a block in the same cylinder group.
+ * 4) quadradically rehash into other cylinder groups, until an
+ * available block is located.
+ * If no block preference is given the following heirarchy is used
+ * to allocate a block:
+ * 1) allocate a block in the cylinder group that contains the
+ * inode for the file.
+ * 2) quadradically rehash into other cylinder groups, until an
+ * available block is located.
+ */
+ffs_alloc(ip, lbn, bpref, size, cred, bnp)
+ register struct inode *ip;
+ daddr_t lbn, bpref;
+ int size;
+ struct ucred *cred;
+ daddr_t *bnp;
+{
+ register struct fs *fs;
+ daddr_t bno;
+ int cg, error;
+
+ *bnp = 0;
+ fs = ip->i_fs;
+#ifdef DIAGNOSTIC
+ if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+ printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
+ ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
+ panic("ffs_alloc: bad size");
+ }
+ if (cred == NOCRED)
+ panic("ffs_alloc: missing credential\n");
+#endif /* DIAGNOSTIC */
+ if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
+ goto nospace;
+ if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
+ goto nospace;
+#ifdef QUOTA
+ if (error = chkdq(ip, (long)btodb(size), cred, 0))
+ return (error);
+#endif
+ if (bpref >= fs->fs_size)
+ bpref = 0;
+ if (bpref == 0)
+ cg = ino_to_cg(fs, ip->i_number);
+ else
+ cg = dtog(fs, bpref);
+ bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
+ (u_long (*)())ffs_alloccg);
+ if (bno > 0) {
+ ip->i_blocks += btodb(size);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ *bnp = bno;
+ return (0);
+ }
+#ifdef QUOTA
+ /*
+ * Restore user's disk quota because allocation failed.
+ */
+ (void) chkdq(ip, (long)-btodb(size), cred, FORCE);
+#endif
+nospace:
+ ffs_fserr(fs, cred->cr_uid, "file system full");
+ uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+ return (ENOSPC);
+}
+
+/*
+ * Reallocate a fragment to a bigger size
+ *
+ * The number and size of the old block is given, and a preference
+ * and new size is also specified. The allocator attempts to extend
+ * the original block. Failing that, the regular block allocator is
+ * invoked to get an appropriate block.
+ */
+ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
+ register struct inode *ip;
+ daddr_t lbprev;
+ daddr_t bpref;
+ int osize, nsize;
+ struct ucred *cred;
+ struct buf **bpp;
+{
+ register struct fs *fs;
+ struct buf *bp;
+ int cg, request, error;
+ daddr_t bprev, bno;
+
+ *bpp = 0;
+ fs = ip->i_fs;
+#ifdef DIAGNOSTIC
+ if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
+ (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
+ printf(
+ "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
+ ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
+ panic("ffs_realloccg: bad size");
+ }
+ if (cred == NOCRED)
+ panic("ffs_realloccg: missing credential\n");
+#endif /* DIAGNOSTIC */
+ if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
+ goto nospace;
+ if ((bprev = ip->i_db[lbprev]) == 0) {
+ printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
+ ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
+ panic("ffs_realloccg: bad bprev");
+ }
+ /*
+ * Allocate the extra space in the buffer.
+ */
+ if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
+ brelse(bp);
+ return (error);
+ }
+#ifdef QUOTA
+ if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) {
+ brelse(bp);
+ return (error);
+ }
+#endif
+ /*
+ * Check for extension in the existing location.
+ */
+ cg = dtog(fs, bprev);
+ if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
+ if (bp->b_blkno != fsbtodb(fs, bno))
+ panic("bad blockno");
+ ip->i_blocks += btodb(nsize - osize);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ allocbuf(bp, nsize);
+ bp->b_flags |= B_DONE;
+ bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
+ *bpp = bp;
+ return (0);
+ }
+ /*
+ * Allocate a new disk location.
+ */
+ if (bpref >= fs->fs_size)
+ bpref = 0;
+ switch ((int)fs->fs_optim) {
+ case FS_OPTSPACE:
+ /*
+ * Allocate an exact sized fragment. Although this makes
+ * best use of space, we will waste time relocating it if
+ * the file continues to grow. If the fragmentation is
+ * less than half of the minimum free reserve, we choose
+ * to begin optimizing for time.
+ */
+ request = nsize;
+ if (fs->fs_minfree < 5 ||
+ fs->fs_cstotal.cs_nffree >
+ fs->fs_dsize * fs->fs_minfree / (2 * 100))
+ break;
+ log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
+ fs->fs_fsmnt);
+ fs->fs_optim = FS_OPTTIME;
+ break;
+ case FS_OPTTIME:
+ /*
+ * At this point we have discovered a file that is trying to
+ * grow a small fragment to a larger fragment. To save time,
+ * we allocate a full sized block, then free the unused portion.
+ * If the file continues to grow, the `ffs_fragextend' call
+ * above will be able to grow it in place without further
+ * copying. If aberrant programs cause disk fragmentation to
+ * grow within 2% of the free reserve, we choose to begin
+ * optimizing for space.
+ */
+ request = fs->fs_bsize;
+ if (fs->fs_cstotal.cs_nffree <
+ fs->fs_dsize * (fs->fs_minfree - 2) / 100)
+ break;
+ log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
+ fs->fs_fsmnt);
+ fs->fs_optim = FS_OPTSPACE;
+ break;
+ default:
+ printf("dev = 0x%x, optim = %d, fs = %s\n",
+ ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
+ panic("ffs_realloccg: bad optim");
+ /* NOTREACHED */
+ }
+ bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
+ (u_long (*)())ffs_alloccg);
+ if (bno > 0) {
+ bp->b_blkno = fsbtodb(fs, bno);
+ (void) vnode_pager_uncache(ITOV(ip));
+ ffs_blkfree(ip, bprev, (long)osize);
+ if (nsize < request)
+ ffs_blkfree(ip, bno + numfrags(fs, nsize),
+ (long)(request - nsize));
+ ip->i_blocks += btodb(nsize - osize);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ allocbuf(bp, nsize);
+ bp->b_flags |= B_DONE;
+ bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
+ *bpp = bp;
+ return (0);
+ }
+#ifdef QUOTA
+ /*
+ * Restore user's disk quota because allocation failed.
+ */
+ (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
+#endif
+ brelse(bp);
+nospace:
+ /*
+ * no space available
+ */
+ ffs_fserr(fs, cred->cr_uid, "file system full");
+ uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+ return (ENOSPC);
+}
+
+/*
+ * Reallocate a sequence of blocks into a contiguous sequence of blocks.
+ *
+ * The vnode and an array of buffer pointers for a range of sequential
+ * logical blocks to be made contiguous is given. The allocator attempts
+ * to find a range of sequential blocks starting as close as possible to
+ * an fs_rotdelay offset from the end of the allocation for the logical
+ * block immediately preceeding the current range. If successful, the
+ * physical block numbers in the buffer pointers and in the inode are
+ * changed to reflect the new allocation. If unsuccessful, the allocation
+ * is left unchanged. The success in doing the reallocation is returned.
+ * Note that the error return is not reflected back to the user. Rather
+ * the previous block allocation will be used.
+ */
+#include <sys/sysctl.h>
+int doasyncfree = 1;
+struct ctldebug debug14 = { "doasyncfree", &doasyncfree };
+int
+ffs_reallocblks(ap)
+ struct vop_reallocblks_args /* {
+ struct vnode *a_vp;
+ struct cluster_save *a_buflist;
+ } */ *ap;
+{
+ struct fs *fs;
+ struct inode *ip;
+ struct vnode *vp;
+ struct buf *sbp, *ebp;
+ daddr_t *bap, *sbap, *ebap;
+ struct cluster_save *buflist;
+ daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno;
+ struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
+ int i, len, start_lvl, end_lvl, pref, ssize;
+
+ vp = ap->a_vp;
+ ip = VTOI(vp);
+ fs = ip->i_fs;
+ if (fs->fs_contigsumsize <= 0)
+ return (ENOSPC);
+ buflist = ap->a_buflist;
+ len = buflist->bs_nchildren;
+ start_lbn = buflist->bs_children[0]->b_lblkno;
+ end_lbn = start_lbn + len - 1;
+#ifdef DIAGNOSTIC
+ for (i = 1; i < len; i++)
+ if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
+ panic("ffs_reallocblks: non-cluster");
+#endif
+ /*
+ * If the latest allocation is in a new cylinder group, assume that
+ * the filesystem has decided to move and do not force it back to
+ * the previous cylinder group.
+ */
+ if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
+ dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
+ return (ENOSPC);
+ if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
+ ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
+ return (ENOSPC);
+ /*
+ * Get the starting offset and block map for the first block.
+ */
+ if (start_lvl == 0) {
+ sbap = &ip->i_db[0];
+ soff = start_lbn;
+ } else {
+ idp = &start_ap[start_lvl - 1];
+ if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
+ brelse(sbp);
+ return (ENOSPC);
+ }
+ sbap = (daddr_t *)sbp->b_data;
+ soff = idp->in_off;
+ }
+ /*
+ * Find the preferred location for the cluster.
+ */
+ pref = ffs_blkpref(ip, start_lbn, soff, sbap);
+ /*
+ * If the block range spans two block maps, get the second map.
+ */
+ if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
+ ssize = len;
+ } else {
+#ifdef DIAGNOSTIC
+ if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
+ panic("ffs_reallocblk: start == end");
+#endif
+ ssize = len - (idp->in_off + 1);
+ if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
+ goto fail;
+ ebap = (daddr_t *)ebp->b_data;
+ }
+ /*
+ * Search the block map looking for an allocation of the desired size.
+ */
+ if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
+ len, (u_long (*)())ffs_clusteralloc)) == 0)
+ goto fail;
+ /*
+ * We have found a new contiguous block.
+ *
+ * First we have to replace the old block pointers with the new
+ * block pointers in the inode and indirect blocks associated
+ * with the file.
+ */
+ blkno = newblk;
+ for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
+ if (i == ssize)
+ bap = ebap;
+#ifdef DIAGNOSTIC
+ if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
+ panic("ffs_reallocblks: alloc mismatch");
+#endif
+ *bap++ = blkno;
+ }
+ /*
+ * Next we must write out the modified inode and indirect blocks.
+ * For strict correctness, the writes should be synchronous since
+ * the old block values may have been written to disk. In practise
+ * they are almost never written, but if we are concerned about
+ * strict correctness, the `doasyncfree' flag should be set to zero.
+ *
+ * The test on `doasyncfree' should be changed to test a flag
+ * that shows whether the associated buffers and inodes have
+ * been written. The flag should be set when the cluster is
+ * started and cleared whenever the buffer or inode is flushed.
+ * We can then check below to see if it is set, and do the
+ * synchronous write only when it has been cleared.
+ */
+ if (sbap != &ip->i_db[0]) {
+ if (doasyncfree)
+ bdwrite(sbp);
+ else
+ bwrite(sbp);
+ } else {
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (!doasyncfree)
+ VOP_UPDATE(vp, &time, &time, MNT_WAIT);
+ }
+ if (ssize < len)
+ if (doasyncfree)
+ bdwrite(ebp);
+ else
+ bwrite(ebp);
+ /*
+ * Last, free the old blocks and assign the new blocks to the buffers.
+ */
+ for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
+ ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+ fs->fs_bsize);
+ buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
+ }
+ return (0);
+
+fail:
+ if (ssize < len)
+ brelse(ebp);
+ if (sbap != &ip->i_db[0])
+ brelse(sbp);
+ return (ENOSPC);
+}
+
+/*
+ * Allocate an inode in the file system.
+ *
+ * If allocating a directory, use ffs_dirpref to select the inode.
+ * If allocating in a directory, the following hierarchy is followed:
+ * 1) allocate the preferred inode.
+ * 2) allocate an inode in the same cylinder group.
+ * 3) quadradically rehash into other cylinder groups, until an
+ * available inode is located.
+ * If no inode preference is given the following heirarchy is used
+ * to allocate an inode:
+ * 1) allocate an inode in cylinder group 0.
+ * 2) quadradically rehash into other cylinder groups, until an
+ * available inode is located.
+ */
+ffs_valloc(ap)
+ struct vop_valloc_args /* {
+ struct vnode *a_pvp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct vnode **a_vpp;
+ } */ *ap;
+{
+ register struct vnode *pvp = ap->a_pvp;
+ register struct inode *pip;
+ register struct fs *fs;
+ register struct inode *ip;
+ mode_t mode = ap->a_mode;
+ ino_t ino, ipref;
+ int cg, error;
+
+ *ap->a_vpp = NULL;
+ pip = VTOI(pvp);
+ fs = pip->i_fs;
+ if (fs->fs_cstotal.cs_nifree == 0)
+ goto noinodes;
+
+ if ((mode & IFMT) == IFDIR)
+ ipref = ffs_dirpref(fs);
+ else
+ ipref = pip->i_number;
+ if (ipref >= fs->fs_ncg * fs->fs_ipg)
+ ipref = 0;
+ cg = ino_to_cg(fs, ipref);
+ ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
+ if (ino == 0)
+ goto noinodes;
+ error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
+ if (error) {
+ VOP_VFREE(pvp, ino, mode);
+ return (error);
+ }
+ ip = VTOI(*ap->a_vpp);
+ if (ip->i_mode) {
+ printf("mode = 0%o, inum = %d, fs = %s\n",
+ ip->i_mode, ip->i_number, fs->fs_fsmnt);
+ panic("ffs_valloc: dup alloc");
+ }
+ if (ip->i_blocks) { /* XXX */
+ printf("free inode %s/%d had %d blocks\n",
+ fs->fs_fsmnt, ino, ip->i_blocks);
+ ip->i_blocks = 0;
+ }
+ ip->i_flags = 0;
+ /*
+ * Set up a new generation number for this inode.
+ */
+ if (++nextgennumber < (u_long)time.tv_sec)
+ nextgennumber = time.tv_sec;
+ ip->i_gen = nextgennumber;
+ return (0);
+noinodes:
+ ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
+ uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
+ return (ENOSPC);
+}
+
+/*
+ * Find a cylinder to place a directory.
+ *
+ * The policy implemented by this algorithm is to select from
+ * among those cylinder groups with above the average number of
+ * free inodes, the one with the smallest number of directories.
+ */
+static ino_t
+ffs_dirpref(fs)
+ register struct fs *fs;
+{
+ int cg, minndir, mincg, avgifree;
+
+ avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
+ minndir = fs->fs_ipg;
+ mincg = 0;
+ for (cg = 0; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
+ mincg = cg;
+ minndir = fs->fs_cs(fs, cg).cs_ndir;
+ }
+ return ((ino_t)(fs->fs_ipg * mincg));
+}
+
+/*
+ * Select the desired position for the next block in a file. The file is
+ * logically divided into sections. The first section is composed of the
+ * direct blocks. Each additional section contains fs_maxbpg blocks.
+ *
+ * If no blocks have been allocated in the first section, the policy is to
+ * request a block in the same cylinder group as the inode that describes
+ * the file. If no blocks have been allocated in any other section, the
+ * policy is to place the section in a cylinder group with a greater than
+ * average number of free blocks. An appropriate cylinder group is found
+ * by using a rotor that sweeps the cylinder groups. When a new group of
+ * blocks is needed, the sweep begins in the cylinder group following the
+ * cylinder group from which the previous allocation was made. The sweep
+ * continues until a cylinder group with greater than the average number
+ * of free blocks is found. If the allocation is for the first block in an
+ * indirect block, the information on the previous allocation is unavailable;
+ * here a best guess is made based upon the logical block number being
+ * allocated.
+ *
+ * If a section is already partially allocated, the policy is to
+ * contiguously allocate fs_maxcontig blocks. The end of one of these
+ * contiguous blocks and the beginning of the next is physically separated
+ * so that the disk head will be in transit between them for at least
+ * fs_rotdelay milliseconds. This is to allow time for the processor to
+ * schedule another I/O transfer.
+ */
+daddr_t
+ffs_blkpref(ip, lbn, indx, bap)
+ struct inode *ip;
+ daddr_t lbn;
+ int indx;
+ daddr_t *bap;
+{
+ register struct fs *fs;
+ register int cg;
+ int avgbfree, startcg;
+ daddr_t nextblk;
+
+ fs = ip->i_fs;
+ if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
+ if (lbn < NDADDR) {
+ cg = ino_to_cg(fs, ip->i_number);
+ return (fs->fs_fpg * cg + fs->fs_frag);
+ }
+ /*
+ * Find a cylinder with greater than average number of
+ * unused data blocks.
+ */
+ if (indx == 0 || bap[indx - 1] == 0)
+ startcg =
+ ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+ else
+ startcg = dtog(fs, bap[indx - 1]) + 1;
+ startcg %= fs->fs_ncg;
+ avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+ for (cg = startcg; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+ fs->fs_cgrotor = cg;
+ return (fs->fs_fpg * cg + fs->fs_frag);
+ }
+ for (cg = 0; cg <= startcg; cg++)
+ if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+ fs->fs_cgrotor = cg;
+ return (fs->fs_fpg * cg + fs->fs_frag);
+ }
+ return (NULL);
+ }
+ /*
+ * One or more previous blocks have been laid out. If less
+ * than fs_maxcontig previous blocks are contiguous, the
+ * next block is requested contiguously, otherwise it is
+ * requested rotationally delayed by fs_rotdelay milliseconds.
+ */
+ nextblk = bap[indx - 1] + fs->fs_frag;
+ if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
+ blkstofrags(fs, fs->fs_maxcontig) != nextblk)
+ return (nextblk);
+ if (fs->fs_rotdelay != 0)
+ /*
+ * Here we convert ms of delay to frags as:
+ * (frags) = (ms) * (rev/sec) * (sect/rev) /
+ * ((sect/frag) * (ms/sec))
+ * then round up to the next block.
+ */
+ nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
+ (NSPF(fs) * 1000), fs->fs_frag);
+ return (nextblk);
+}
+
+/*
+ * Implement the cylinder overflow algorithm.
+ *
+ * The policy implemented by this algorithm is:
+ * 1) allocate the block in its requested cylinder group.
+ * 2) quadradically rehash on the cylinder group number.
+ * 3) brute force search for a free block.
+ */
+/*VARARGS5*/
+static u_long
+ffs_hashalloc(ip, cg, pref, size, allocator)
+ struct inode *ip;
+ int cg;
+ long pref;
+ int size; /* size for data blocks, mode for inodes */
+ u_long (*allocator)();
+{
+ register struct fs *fs;
+ long result;
+ int i, icg = cg;
+
+ fs = ip->i_fs;
+ /*
+ * 1: preferred cylinder group
+ */
+ result = (*allocator)(ip, cg, pref, size);
+ if (result)
+ return (result);
+ /*
+ * 2: quadratic rehash
+ */
+ for (i = 1; i < fs->fs_ncg; i *= 2) {
+ cg += i;
+ if (cg >= fs->fs_ncg)
+ cg -= fs->fs_ncg;
+ result = (*allocator)(ip, cg, 0, size);
+ if (result)
+ return (result);
+ }
+ /*
+ * 3: brute force search
+ * Note that we start at i == 2, since 0 was checked initially,
+ * and 1 is always checked in the quadratic rehash.
+ */
+ cg = (icg + 2) % fs->fs_ncg;
+ for (i = 2; i < fs->fs_ncg; i++) {
+ result = (*allocator)(ip, cg, 0, size);
+ if (result)
+ return (result);
+ cg++;
+ if (cg == fs->fs_ncg)
+ cg = 0;
+ }
+ return (NULL);
+}
+
+/*
+ * Determine whether a fragment can be extended.
+ *
+ * Check to see if the necessary fragments are available, and
+ * if they are, allocate them.
+ */
+static daddr_t
+ffs_fragextend(ip, cg, bprev, osize, nsize)
+ struct inode *ip;
+ int cg;
+ long bprev;
+ int osize, nsize;
+{
+ register struct fs *fs;
+ register struct cg *cgp;
+ struct buf *bp;
+ long bno;
+ int frags, bbase;
+ int i, error;
+
+ fs = ip->i_fs;
+ if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
+ return (NULL);
+ frags = numfrags(fs, nsize);
+ bbase = fragnum(fs, bprev);
+ if (bbase > fragnum(fs, (bprev + frags - 1))) {
+ /* cannot extend across a block boundary */
+ return (NULL);
+ }
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (NULL);
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp)) {
+ brelse(bp);
+ return (NULL);
+ }
+ cgp->cg_time = time.tv_sec;
+ bno = dtogd(fs, bprev);
+ for (i = numfrags(fs, osize); i < frags; i++)
+ if (isclr(cg_blksfree(cgp), bno + i)) {
+ brelse(bp);
+ return (NULL);
+ }
+ /*
+ * the current fragment can be extended
+ * deduct the count on fragment being extended into
+ * increase the count on the remaining fragment (if any)
+ * allocate the extended piece
+ */
+ for (i = frags; i < fs->fs_frag - bbase; i++)
+ if (isclr(cg_blksfree(cgp), bno + i))
+ break;
+ cgp->cg_frsum[i - numfrags(fs, osize)]--;
+ if (i != frags)
+ cgp->cg_frsum[i - frags]++;
+ for (i = numfrags(fs, osize); i < frags; i++) {
+ clrbit(cg_blksfree(cgp), bno + i);
+ cgp->cg_cs.cs_nffree--;
+ fs->fs_cstotal.cs_nffree--;
+ fs->fs_cs(fs, cg).cs_nffree--;
+ }
+ fs->fs_fmod = 1;
+ bdwrite(bp);
+ return (bprev);
+}
+
+/*
+ * Determine whether a block can be allocated.
+ *
+ * Check to see if a block of the appropriate size is available,
+ * and if it is, allocate it.
+ */
+static daddr_t
+ffs_alloccg(ip, cg, bpref, size)
+ struct inode *ip;
+ int cg;
+ daddr_t bpref;
+ int size;
+{
+ register struct fs *fs;
+ register struct cg *cgp;
+ struct buf *bp;
+ register int i;
+ int error, bno, frags, allocsiz;
+
+ fs = ip->i_fs;
+ if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
+ return (NULL);
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (NULL);
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp) ||
+ (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
+ brelse(bp);
+ return (NULL);
+ }
+ cgp->cg_time = time.tv_sec;
+ if (size == fs->fs_bsize) {
+ bno = ffs_alloccgblk(fs, cgp, bpref);
+ bdwrite(bp);
+ return (bno);
+ }
+ /*
+ * check to see if any fragments are already available
+ * allocsiz is the size which will be allocated, hacking
+ * it down to a smaller size if necessary
+ */
+ frags = numfrags(fs, size);
+ for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
+ if (cgp->cg_frsum[allocsiz] != 0)
+ break;
+ if (allocsiz == fs->fs_frag) {
+ /*
+ * no fragments were available, so a block will be
+ * allocated, and hacked up
+ */
+ if (cgp->cg_cs.cs_nbfree == 0) {
+ brelse(bp);
+ return (NULL);
+ }
+ bno = ffs_alloccgblk(fs, cgp, bpref);
+ bpref = dtogd(fs, bno);
+ for (i = frags; i < fs->fs_frag; i++)
+ setbit(cg_blksfree(cgp), bpref + i);
+ i = fs->fs_frag - frags;
+ cgp->cg_cs.cs_nffree += i;
+ fs->fs_cstotal.cs_nffree += i;
+ fs->fs_cs(fs, cg).cs_nffree += i;
+ fs->fs_fmod = 1;
+ cgp->cg_frsum[i]++;
+ bdwrite(bp);
+ return (bno);
+ }
+ bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
+ if (bno < 0) {
+ brelse(bp);
+ return (NULL);
+ }
+ for (i = 0; i < frags; i++)
+ clrbit(cg_blksfree(cgp), bno + i);
+ cgp->cg_cs.cs_nffree -= frags;
+ fs->fs_cstotal.cs_nffree -= frags;
+ fs->fs_cs(fs, cg).cs_nffree -= frags;
+ fs->fs_fmod = 1;
+ cgp->cg_frsum[allocsiz]--;
+ if (frags != allocsiz)
+ cgp->cg_frsum[allocsiz - frags]++;
+ bdwrite(bp);
+ return (cg * fs->fs_fpg + bno);
+}
+
+/*
+ * Allocate a block in a cylinder group.
+ *
+ * This algorithm implements the following policy:
+ * 1) allocate the requested block.
+ * 2) allocate a rotationally optimal block in the same cylinder.
+ * 3) allocate the next available block on the block rotor for the
+ * specified cylinder group.
+ * Note that this routine only allocates fs_bsize blocks; these
+ * blocks may be fragmented by the routine that allocates them.
+ */
+static daddr_t
+ffs_alloccgblk(fs, cgp, bpref)
+ register struct fs *fs;
+ register struct cg *cgp;
+ daddr_t bpref;
+{
+ daddr_t bno, blkno;
+ int cylno, pos, delta;
+ short *cylbp;
+ register int i;
+
+ if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
+ bpref = cgp->cg_rotor;
+ goto norot;
+ }
+ bpref = blknum(fs, bpref);
+ bpref = dtogd(fs, bpref);
+ /*
+ * if the requested block is available, use it
+ */
+ if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
+ bno = bpref;
+ goto gotit;
+ }
+ /*
+ * check for a block available on the same cylinder
+ */
+ cylno = cbtocylno(fs, bpref);
+ if (cg_blktot(cgp)[cylno] == 0)
+ goto norot;
+ if (fs->fs_cpc == 0) {
+ /*
+ * Block layout information is not available.
+ * Leaving bpref unchanged means we take the
+ * next available free block following the one
+ * we just allocated. Hopefully this will at
+ * least hit a track cache on drives of unknown
+ * geometry (e.g. SCSI).
+ */
+ goto norot;
+ }
+ /*
+ * check the summary information to see if a block is
+ * available in the requested cylinder starting at the
+ * requested rotational position and proceeding around.
+ */
+ cylbp = cg_blks(fs, cgp, cylno);
+ pos = cbtorpos(fs, bpref);
+ for (i = pos; i < fs->fs_nrpos; i++)
+ if (cylbp[i] > 0)
+ break;
+ if (i == fs->fs_nrpos)
+ for (i = 0; i < pos; i++)
+ if (cylbp[i] > 0)
+ break;
+ if (cylbp[i] > 0) {
+ /*
+ * found a rotational position, now find the actual
+ * block. A panic if none is actually there.
+ */
+ pos = cylno % fs->fs_cpc;
+ bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
+ if (fs_postbl(fs, pos)[i] == -1) {
+ printf("pos = %d, i = %d, fs = %s\n",
+ pos, i, fs->fs_fsmnt);
+ panic("ffs_alloccgblk: cyl groups corrupted");
+ }
+ for (i = fs_postbl(fs, pos)[i];; ) {
+ if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
+ bno = blkstofrags(fs, (bno + i));
+ goto gotit;
+ }
+ delta = fs_rotbl(fs)[i];
+ if (delta <= 0 ||
+ delta + i > fragstoblks(fs, fs->fs_fpg))
+ break;
+ i += delta;
+ }
+ printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
+ panic("ffs_alloccgblk: can't find blk in cyl");
+ }
+norot:
+ /*
+ * no blocks in the requested cylinder, so take next
+ * available one in this cylinder group.
+ */
+ bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
+ if (bno < 0)
+ return (NULL);
+ cgp->cg_rotor = bno;
+gotit:
+ blkno = fragstoblks(fs, bno);
+ ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
+ ffs_clusteracct(fs, cgp, blkno, -1);
+ cgp->cg_cs.cs_nbfree--;
+ fs->fs_cstotal.cs_nbfree--;
+ fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
+ cylno = cbtocylno(fs, bno);
+ cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
+ cg_blktot(cgp)[cylno]--;
+ fs->fs_fmod = 1;
+ return (cgp->cg_cgx * fs->fs_fpg + bno);
+}
+
+/*
+ * Determine whether a cluster can be allocated.
+ *
+ * We do not currently check for optimal rotational layout if there
+ * are multiple choices in the same cylinder group. Instead we just
+ * take the first one that we find following bpref.
+ */
+static daddr_t
+ffs_clusteralloc(ip, cg, bpref, len)
+ struct inode *ip;
+ int cg;
+ daddr_t bpref;
+ int len;
+{
+ register struct fs *fs;
+ register struct cg *cgp;
+ struct buf *bp;
+ int i, run, bno, bit, map;
+ u_char *mapp;
+
+ fs = ip->i_fs;
+ if (fs->fs_cs(fs, cg).cs_nbfree < len)
+ return (NULL);
+ if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
+ NOCRED, &bp))
+ goto fail;
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp))
+ goto fail;
+ /*
+ * Check to see if a cluster of the needed size (or bigger) is
+ * available in this cylinder group.
+ */
+ for (i = len; i <= fs->fs_contigsumsize; i++)
+ if (cg_clustersum(cgp)[i] > 0)
+ break;
+ if (i > fs->fs_contigsumsize)
+ goto fail;
+ /*
+ * Search the cluster map to find a big enough cluster.
+ * We take the first one that we find, even if it is larger
+ * than we need as we prefer to get one close to the previous
+ * block allocation. We do not search before the current
+ * preference point as we do not want to allocate a block
+ * that is allocated before the previous one (as we will
+ * then have to wait for another pass of the elevator
+ * algorithm before it will be read). We prefer to fail and
+ * be recalled to try an allocation in the next cylinder group.
+ */
+ if (dtog(fs, bpref) != cg)
+ bpref = 0;
+ else
+ bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
+ mapp = &cg_clustersfree(cgp)[bpref / NBBY];
+ map = *mapp++;
+ bit = 1 << (bpref % NBBY);
+ for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) {
+ if ((map & bit) == 0) {
+ run = 0;
+ } else {
+ run++;
+ if (run == len)
+ break;
+ }
+ if ((i & (NBBY - 1)) != (NBBY - 1)) {
+ bit <<= 1;
+ } else {
+ map = *mapp++;
+ bit = 1;
+ }
+ }
+ if (i == cgp->cg_nclusterblks)
+ goto fail;
+ /*
+ * Allocate the cluster that we have found.
+ */
+ bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1);
+ len = blkstofrags(fs, len);
+ for (i = 0; i < len; i += fs->fs_frag)
+ if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i)
+ panic("ffs_clusteralloc: lost block");
+ brelse(bp);
+ return (bno);
+
+fail:
+ brelse(bp);
+ return (0);
+}
+
+/*
+ * Determine whether an inode can be allocated.
+ *
+ * Check to see if an inode is available, and if it is,
+ * allocate it using the following policy:
+ * 1) allocate the requested inode.
+ * 2) allocate the next available inode after the requested
+ * inode in the specified cylinder group.
+ */
+static ino_t
+ffs_nodealloccg(ip, cg, ipref, mode)
+ struct inode *ip;
+ int cg;
+ daddr_t ipref;
+ int mode;
+{
+ register struct fs *fs;
+ register struct cg *cgp;
+ struct buf *bp;
+ int error, start, len, loc, map, i;
+
+ fs = ip->i_fs;
+ if (fs->fs_cs(fs, cg).cs_nifree == 0)
+ return (NULL);
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (NULL);
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
+ brelse(bp);
+ return (NULL);
+ }
+ cgp->cg_time = time.tv_sec;
+ if (ipref) {
+ ipref %= fs->fs_ipg;
+ if (isclr(cg_inosused(cgp), ipref))
+ goto gotit;
+ }
+ start = cgp->cg_irotor / NBBY;
+ len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
+ loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
+ if (loc == 0) {
+ len = start + 1;
+ start = 0;
+ loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
+ if (loc == 0) {
+ printf("cg = %d, irotor = %d, fs = %s\n",
+ cg, cgp->cg_irotor, fs->fs_fsmnt);
+ panic("ffs_nodealloccg: map corrupted");
+ /* NOTREACHED */
+ }
+ }
+ i = start + len - loc;
+ map = cg_inosused(cgp)[i];
+ ipref = i * NBBY;
+ for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
+ if ((map & i) == 0) {
+ cgp->cg_irotor = ipref;
+ goto gotit;
+ }
+ }
+ printf("fs = %s\n", fs->fs_fsmnt);
+ panic("ffs_nodealloccg: block not in map");
+ /* NOTREACHED */
+gotit:
+ setbit(cg_inosused(cgp), ipref);
+ cgp->cg_cs.cs_nifree--;
+ fs->fs_cstotal.cs_nifree--;
+ fs->fs_cs(fs, cg).cs_nifree--;
+ fs->fs_fmod = 1;
+ if ((mode & IFMT) == IFDIR) {
+ cgp->cg_cs.cs_ndir++;
+ fs->fs_cstotal.cs_ndir++;
+ fs->fs_cs(fs, cg).cs_ndir++;
+ }
+ bdwrite(bp);
+ return (cg * fs->fs_ipg + ipref);
+}
+
+/*
+ * Free a block or fragment.
+ *
+ * The specified block or fragment is placed back in the
+ * free map. If a fragment is deallocated, a possible
+ * block reassembly is checked.
+ */
+ffs_blkfree(ip, bno, size)
+ register struct inode *ip;
+ daddr_t bno;
+ long size;
+{
+ register struct fs *fs;
+ register struct cg *cgp;
+ struct buf *bp;
+ daddr_t blkno;
+ int i, error, cg, blk, frags, bbase;
+
+ fs = ip->i_fs;
+ if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+ printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
+ ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
+ panic("blkfree: bad size");
+ }
+ cg = dtog(fs, bno);
+ if ((u_int)bno >= fs->fs_size) {
+ printf("bad block %d, ino %d\n", bno, ip->i_number);
+ ffs_fserr(fs, ip->i_uid, "bad block");
+ return;
+ }
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return;
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp)) {
+ brelse(bp);
+ return;
+ }
+ cgp->cg_time = time.tv_sec;
+ bno = dtogd(fs, bno);
+ if (size == fs->fs_bsize) {
+ blkno = fragstoblks(fs, bno);
+ if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+ printf("dev = 0x%x, block = %d, fs = %s\n",
+ ip->i_dev, bno, fs->fs_fsmnt);
+ panic("blkfree: freeing free block");
+ }
+ ffs_setblock(fs, cg_blksfree(cgp), blkno);
+ ffs_clusteracct(fs, cgp, blkno, 1);
+ cgp->cg_cs.cs_nbfree++;
+ fs->fs_cstotal.cs_nbfree++;
+ fs->fs_cs(fs, cg).cs_nbfree++;
+ i = cbtocylno(fs, bno);
+ cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
+ cg_blktot(cgp)[i]++;
+ } else {
+ bbase = bno - fragnum(fs, bno);
+ /*
+ * decrement the counts associated with the old frags
+ */
+ blk = blkmap(fs, cg_blksfree(cgp), bbase);
+ ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+ /*
+ * deallocate the fragment
+ */
+ frags = numfrags(fs, size);
+ for (i = 0; i < frags; i++) {
+ if (isset(cg_blksfree(cgp), bno + i)) {
+ printf("dev = 0x%x, block = %d, fs = %s\n",
+ ip->i_dev, bno + i, fs->fs_fsmnt);
+ panic("blkfree: freeing free frag");
+ }
+ setbit(cg_blksfree(cgp), bno + i);
+ }
+ cgp->cg_cs.cs_nffree += i;
+ fs->fs_cstotal.cs_nffree += i;
+ fs->fs_cs(fs, cg).cs_nffree += i;
+ /*
+ * add back in counts associated with the new frags
+ */
+ blk = blkmap(fs, cg_blksfree(cgp), bbase);
+ ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+ /*
+ * if a complete block has been reassembled, account for it
+ */
+ blkno = fragstoblks(fs, bbase);
+ if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+ cgp->cg_cs.cs_nffree -= fs->fs_frag;
+ fs->fs_cstotal.cs_nffree -= fs->fs_frag;
+ fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
+ ffs_clusteracct(fs, cgp, blkno, 1);
+ cgp->cg_cs.cs_nbfree++;
+ fs->fs_cstotal.cs_nbfree++;
+ fs->fs_cs(fs, cg).cs_nbfree++;
+ i = cbtocylno(fs, bbase);
+ cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
+ cg_blktot(cgp)[i]++;
+ }
+ }
+ fs->fs_fmod = 1;
+ bdwrite(bp);
+}
+
+/*
+ * Free an inode.
+ *
+ * The specified inode is placed back in the free map.
+ */
+int
+ffs_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+ register struct fs *fs;
+ register struct cg *cgp;
+ register struct inode *pip;
+ ino_t ino = ap->a_ino;
+ struct buf *bp;
+ int error, cg;
+
+ pip = VTOI(ap->a_pvp);
+ fs = pip->i_fs;
+ if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
+ panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
+ pip->i_dev, ino, fs->fs_fsmnt);
+ cg = ino_to_cg(fs, ino);
+ error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (0);
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp)) {
+ brelse(bp);
+ return (0);
+ }
+ cgp->cg_time = time.tv_sec;
+ ino %= fs->fs_ipg;
+ if (isclr(cg_inosused(cgp), ino)) {
+ printf("dev = 0x%x, ino = %d, fs = %s\n",
+ pip->i_dev, ino, fs->fs_fsmnt);
+ if (fs->fs_ronly == 0)
+ panic("ifree: freeing free inode");
+ }
+ clrbit(cg_inosused(cgp), ino);
+ if (ino < cgp->cg_irotor)
+ cgp->cg_irotor = ino;
+ cgp->cg_cs.cs_nifree++;
+ fs->fs_cstotal.cs_nifree++;
+ fs->fs_cs(fs, cg).cs_nifree++;
+ if ((ap->a_mode & IFMT) == IFDIR) {
+ cgp->cg_cs.cs_ndir--;
+ fs->fs_cstotal.cs_ndir--;
+ fs->fs_cs(fs, cg).cs_ndir--;
+ }
+ fs->fs_fmod = 1;
+ bdwrite(bp);
+ return (0);
+}
+
+/*
+ * Find a block of the specified size in the specified cylinder group.
+ *
+ * It is a panic if a request is made to find a block if none are
+ * available.
+ */
+static daddr_t
+ffs_mapsearch(fs, cgp, bpref, allocsiz)
+ register struct fs *fs;
+ register struct cg *cgp;
+ daddr_t bpref;
+ int allocsiz;
+{
+ daddr_t bno;
+ int start, len, loc, i;
+ int blk, field, subfield, pos;
+
+ /*
+ * find the fragment by searching through the free block
+ * map for an appropriate bit pattern
+ */
+ if (bpref)
+ start = dtogd(fs, bpref) / NBBY;
+ else
+ start = cgp->cg_frotor / NBBY;
+ len = howmany(fs->fs_fpg, NBBY) - start;
+ loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
+ (u_char *)fragtbl[fs->fs_frag],
+ (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+ if (loc == 0) {
+ len = start + 1;
+ start = 0;
+ loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
+ (u_char *)fragtbl[fs->fs_frag],
+ (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+ if (loc == 0) {
+ printf("start = %d, len = %d, fs = %s\n",
+ start, len, fs->fs_fsmnt);
+ panic("ffs_alloccg: map corrupted");
+ /* NOTREACHED */
+ }
+ }
+ bno = (start + len - loc) * NBBY;
+ cgp->cg_frotor = bno;
+ /*
+ * found the byte in the map
+ * sift through the bits to find the selected frag
+ */
+ for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
+ blk = blkmap(fs, cg_blksfree(cgp), bno);
+ blk <<= 1;
+ field = around[allocsiz];
+ subfield = inside[allocsiz];
+ for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
+ if ((blk & field) == subfield)
+ return (bno + pos);
+ field <<= 1;
+ subfield <<= 1;
+ }
+ }
+ printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
+ panic("ffs_alloccg: block not in map");
+ return (-1);
+}
+
+/*
+ * Update the cluster map because of an allocation or free.
+ *
+ * Cnt == 1 means free; cnt == -1 means allocating.
+ */
+ffs_clusteracct(fs, cgp, blkno, cnt)
+ struct fs *fs;
+ struct cg *cgp;
+ daddr_t blkno;
+ int cnt;
+{
+ long *sump;
+ u_char *freemapp, *mapp;
+ int i, start, end, forw, back, map, bit;
+
+ if (fs->fs_contigsumsize <= 0)
+ return;
+ freemapp = cg_clustersfree(cgp);
+ sump = cg_clustersum(cgp);
+ /*
+ * Allocate or clear the actual block.
+ */
+ if (cnt > 0)
+ setbit(freemapp, blkno);
+ else
+ clrbit(freemapp, blkno);
+ /*
+ * Find the size of the cluster going forward.
+ */
+ start = blkno + 1;
+ end = start + fs->fs_contigsumsize;
+ if (end >= cgp->cg_nclusterblks)
+ end = cgp->cg_nclusterblks;
+ mapp = &freemapp[start / NBBY];
+ map = *mapp++;
+ bit = 1 << (start % NBBY);
+ for (i = start; i < end; i++) {
+ if ((map & bit) == 0)
+ break;
+ if ((i & (NBBY - 1)) != (NBBY - 1)) {
+ bit <<= 1;
+ } else {
+ map = *mapp++;
+ bit = 1;
+ }
+ }
+ forw = i - start;
+ /*
+ * Find the size of the cluster going backward.
+ */
+ start = blkno - 1;
+ end = start - fs->fs_contigsumsize;
+ if (end < 0)
+ end = -1;
+ mapp = &freemapp[start / NBBY];
+ map = *mapp--;
+ bit = 1 << (start % NBBY);
+ for (i = start; i > end; i--) {
+ if ((map & bit) == 0)
+ break;
+ if ((i & (NBBY - 1)) != 0) {
+ bit >>= 1;
+ } else {
+ map = *mapp--;
+ bit = 1 << (NBBY - 1);
+ }
+ }
+ back = start - i;
+ /*
+ * Account for old cluster and the possibly new forward and
+ * back clusters.
+ */
+ i = back + forw + 1;
+ if (i > fs->fs_contigsumsize)
+ i = fs->fs_contigsumsize;
+ sump[i] += cnt;
+ if (back > 0)
+ sump[back] -= cnt;
+ if (forw > 0)
+ sump[forw] -= cnt;
+}
+
+/*
+ * Fserr prints the name of a file system with an error diagnostic.
+ *
+ * The form of the error message is:
+ * fs: error message
+ */
+static void
+ffs_fserr(fs, uid, cp)
+ struct fs *fs;
+ u_int uid;
+ char *cp;
+{
+
+ log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
+}
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
new file mode 100644
index 000000000000..752feec9947b
--- /dev/null
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * Balloc defines the structure of file system storage
+ * by allocating the physical blocks on a device given
+ * the inode and the logical block number in a file.
+ */
+ffs_balloc(ip, bn, size, cred, bpp, flags)
+ register struct inode *ip;
+ register daddr_t bn;
+ int size;
+ struct ucred *cred;
+ struct buf **bpp;
+ int flags;
+{
+ register struct fs *fs;
+ register daddr_t nb;
+ struct buf *bp, *nbp;
+ struct vnode *vp = ITOV(ip);
+ struct indir indirs[NIADDR + 2];
+ daddr_t newb, lbn, *bap, pref;
+ int osize, nsize, num, i, error;
+
+ *bpp = NULL;
+ if (bn < 0)
+ return (EFBIG);
+ fs = ip->i_fs;
+ lbn = bn;
+
+ /*
+ * If the next write will extend the file into a new block,
+ * and the file is currently composed of a fragment
+ * this fragment has to be extended to be a full block.
+ */
+ nb = lblkno(fs, ip->i_size);
+ if (nb < NDADDR && nb < bn) {
+ osize = blksize(fs, ip, nb);
+ if (osize < fs->fs_bsize && osize > 0) {
+ error = ffs_realloccg(ip, nb,
+ ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
+ osize, (int)fs->fs_bsize, cred, &bp);
+ if (error)
+ return (error);
+ ip->i_size = (nb + 1) * fs->fs_bsize;
+ vnode_pager_setsize(vp, (u_long)ip->i_size);
+ ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (flags & B_SYNC)
+ bwrite(bp);
+ else
+ bawrite(bp);
+ }
+ }
+ /*
+ * The first NDADDR blocks are direct blocks
+ */
+ if (bn < NDADDR) {
+ nb = ip->i_db[bn];
+ if (nb != 0 && ip->i_size >= (bn + 1) * fs->fs_bsize) {
+ error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ *bpp = bp;
+ return (0);
+ }
+ if (nb != 0) {
+ /*
+ * Consider need to reallocate a fragment.
+ */
+ osize = fragroundup(fs, blkoff(fs, ip->i_size));
+ nsize = fragroundup(fs, size);
+ if (nsize <= osize) {
+ error = bread(vp, bn, osize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ } else {
+ error = ffs_realloccg(ip, bn,
+ ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]),
+ osize, nsize, cred, &bp);
+ if (error)
+ return (error);
+ }
+ } else {
+ if (ip->i_size < (bn + 1) * fs->fs_bsize)
+ nsize = fragroundup(fs, size);
+ else
+ nsize = fs->fs_bsize;
+ error = ffs_alloc(ip, bn,
+ ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]),
+ nsize, cred, &newb);
+ if (error)
+ return (error);
+ bp = getblk(vp, bn, nsize, 0, 0);
+ bp->b_blkno = fsbtodb(fs, newb);
+ if (flags & B_CLRBUF)
+ clrbuf(bp);
+ }
+ ip->i_db[bn] = dbtofsb(fs, bp->b_blkno);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ *bpp = bp;
+ return (0);
+ }
+ /*
+ * Determine the number of levels of indirection.
+ */
+ pref = 0;
+ if (error = ufs_getlbns(vp, bn, indirs, &num))
+ return(error);
+#ifdef DIAGNOSTIC
+ if (num < 1)
+ panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
+#endif
+ /*
+ * Fetch the first indirect block allocating if necessary.
+ */
+ --num;
+ nb = ip->i_ib[indirs[0].in_off];
+ if (nb == 0) {
+ pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
+ if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+ cred, &newb))
+ return (error);
+ nb = newb;
+ bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
+ bp->b_blkno = fsbtodb(fs, newb);
+ clrbuf(bp);
+ /*
+ * Write synchronously so that indirect blocks
+ * never point at garbage.
+ */
+ if (error = bwrite(bp)) {
+ ffs_blkfree(ip, nb, fs->fs_bsize);
+ return (error);
+ }
+ ip->i_ib[indirs[0].in_off] = newb;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ }
+ /*
+ * Fetch through the indirect blocks, allocating as necessary.
+ */
+ for (i = 1;;) {
+ error = bread(vp,
+ indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ bap = (daddr_t *)bp->b_data;
+ nb = bap[indirs[i].in_off];
+ if (i == num)
+ break;
+ i += 1;
+ if (nb != 0) {
+ brelse(bp);
+ continue;
+ }
+ if (pref == 0)
+ pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
+ if (error =
+ ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
+ brelse(bp);
+ return (error);
+ }
+ nb = newb;
+ nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
+ nbp->b_blkno = fsbtodb(fs, nb);
+ clrbuf(nbp);
+ /*
+ * Write synchronously so that indirect blocks
+ * never point at garbage.
+ */
+ if (error = bwrite(nbp)) {
+ ffs_blkfree(ip, nb, fs->fs_bsize);
+ brelse(bp);
+ return (error);
+ }
+ bap[indirs[i - 1].in_off] = nb;
+ /*
+ * If required, write synchronously, otherwise use
+ * delayed write.
+ */
+ if (flags & B_SYNC) {
+ bwrite(bp);
+ } else {
+ bdwrite(bp);
+ }
+ }
+ /*
+ * Get the data block, allocating if necessary.
+ */
+ if (nb == 0) {
+ pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
+ if (error = ffs_alloc(ip,
+ lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
+ brelse(bp);
+ return (error);
+ }
+ nb = newb;
+ nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
+ nbp->b_blkno = fsbtodb(fs, nb);
+ if (flags & B_CLRBUF)
+ clrbuf(nbp);
+ bap[indirs[i].in_off] = nb;
+ /*
+ * If required, write synchronously, otherwise use
+ * delayed write.
+ */
+ if (flags & B_SYNC) {
+ bwrite(bp);
+ } else {
+ bdwrite(bp);
+ }
+ *bpp = nbp;
+ return (0);
+ }
+ brelse(bp);
+ if (flags & B_CLRBUF) {
+ error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+ if (error) {
+ brelse(nbp);
+ return (error);
+ }
+ } else {
+ nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
+ nbp->b_blkno = fsbtodb(fs, nb);
+ }
+ *bpp = nbp;
+ return (0);
+}
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
new file mode 100644
index 000000000000..ab467a272a94
--- /dev/null
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -0,0 +1,101 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94
+ */
+
+struct buf;
+struct fid;
+struct fs;
+struct inode;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct mbuf;
+
+__BEGIN_DECLS
+int ffs_alloc __P((struct inode *,
+ daddr_t, daddr_t, int, struct ucred *, daddr_t *));
+int ffs_balloc __P((struct inode *,
+ daddr_t, int, struct ucred *, struct buf **, int));
+int ffs_blkatoff __P((struct vop_blkatoff_args *));
+int ffs_blkfree __P((struct inode *, daddr_t, long));
+daddr_t ffs_blkpref __P((struct inode *, daddr_t, int, daddr_t *));
+int ffs_bmap __P((struct vop_bmap_args *));
+void ffs_clrblock __P((struct fs *, u_char *, daddr_t));
+int ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+ struct vnode **, int *, struct ucred **));
+void ffs_fragacct __P((struct fs *, int, long [], int));
+int ffs_fsync __P((struct vop_fsync_args *));
+int ffs_init __P((void));
+int ffs_isblock __P((struct fs *, u_char *, daddr_t));
+int ffs_mount __P((struct mount *,
+ char *, caddr_t, struct nameidata *, struct proc *));
+int ffs_mountfs __P((struct vnode *, struct mount *, struct proc *));
+int ffs_mountroot __P((void));
+int ffs_read __P((struct vop_read_args *));
+int ffs_reallocblks __P((struct vop_reallocblks_args *));
+int ffs_realloccg __P((struct inode *,
+ daddr_t, daddr_t, int, int, struct ucred *, struct buf **));
+int ffs_reclaim __P((struct vop_reclaim_args *));
+void ffs_setblock __P((struct fs *, u_char *, daddr_t));
+int ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
+int ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int ffs_truncate __P((struct vop_truncate_args *));
+int ffs_unmount __P((struct mount *, int, struct proc *));
+int ffs_update __P((struct vop_update_args *));
+int ffs_valloc __P((struct vop_valloc_args *));
+int ffs_vfree __P((struct vop_vfree_args *));
+int ffs_vget __P((struct mount *, ino_t, struct vnode **));
+int ffs_vptofh __P((struct vnode *, struct fid *));
+int ffs_write __P((struct vop_write_args *));
+
+int bwrite(); /* FFS needs a bwrite routine. XXX */
+
+#ifdef DIAGNOSTIC
+void ffs_checkoverlap __P((struct buf *, struct inode *));
+#endif
+__END_DECLS
+
+extern int (**ffs_vnodeop_p)();
+extern int (**ffs_specop_p)();
+#ifdef FIFO
+extern int (**ffs_fifoop_p)();
+#define FFS_FIFOOPS ffs_fifoop_p
+#else
+#define FFS_FIFOOPS NULL
+#endif
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
new file mode 100644
index 000000000000..b45aee53552f
--- /dev/null
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_inode.c 8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/trace.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int,
+ long *));
+
+int
+ffs_init()
+{
+ return (ufs_init());
+}
+
+/*
+ * Update the access, modified, and inode change times as specified by the
+ * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is
+ * used to specify that the inode needs to be updated but that the times have
+ * already been set. The access and modified times are taken from the second
+ * and third parameters; the inode change time is always taken from the current
+ * time. If waitfor is set, then wait for the disk write of the inode to
+ * complete.
+ */
+int
+ffs_update(ap)
+ struct vop_update_args /* {
+ struct vnode *a_vp;
+ struct timeval *a_access;
+ struct timeval *a_modify;
+ int a_waitfor;
+ } */ *ap;
+{
+ register struct fs *fs;
+ struct buf *bp;
+ struct inode *ip;
+ int error;
+
+ ip = VTOI(ap->a_vp);
+ if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) {
+ ip->i_flag &=
+ ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+ return (0);
+ }
+ if ((ip->i_flag &
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+ return (0);
+ if (ip->i_flag & IN_ACCESS)
+ ip->i_atime.ts_sec = ap->a_access->tv_sec;
+ if (ip->i_flag & IN_UPDATE) {
+ ip->i_mtime.ts_sec = ap->a_modify->tv_sec;
+ ip->i_modrev++;
+ }
+ if (ip->i_flag & IN_CHANGE)
+ ip->i_ctime.ts_sec = time.tv_sec;
+ ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+ fs = ip->i_fs;
+ /*
+ * Ensure that uid and gid are correct. This is a temporary
+ * fix until fsck has been changed to do the update.
+ */
+ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
+ ip->i_din.di_ouid = ip->i_uid; /* XXX */
+ ip->i_din.di_ogid = ip->i_gid; /* XXX */
+ } /* XXX */
+ if (error = bread(ip->i_devvp,
+ fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+ (int)fs->fs_bsize, NOCRED, &bp)) {
+ brelse(bp);
+ return (error);
+ }
+ *((struct dinode *)bp->b_data +
+ ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
+ if (ap->a_waitfor)
+ return (bwrite(bp));
+ else {
+ bdwrite(bp);
+ return (0);
+ }
+}
+
+#define SINGLE 0 /* index of single indirect block */
+#define DOUBLE 1 /* index of double indirect block */
+#define TRIPLE 2 /* index of triple indirect block */
+/*
+ * Truncate the inode oip to at most length size, freeing the
+ * disk blocks.
+ */
+ffs_truncate(ap)
+ struct vop_truncate_args /* {
+ struct vnode *a_vp;
+ off_t a_length;
+ int a_flags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *ovp = ap->a_vp;
+ register daddr_t lastblock;
+ register struct inode *oip;
+ daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+ daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
+ off_t length = ap->a_length;
+ register struct fs *fs;
+ struct buf *bp;
+ int offset, size, level;
+ long count, nblocks, vflags, blocksreleased = 0;
+ struct timeval tv;
+ register int i;
+ int aflags, error, allerror;
+ off_t osize;
+
+ oip = VTOI(ovp);
+ tv = time;
+ if (ovp->v_type == VLNK &&
+ oip->i_size < ovp->v_mount->mnt_maxsymlinklen) {
+#ifdef DIAGNOSTIC
+ if (length != 0)
+ panic("ffs_truncate: partial truncate of symlink");
+#endif
+ bzero((char *)&oip->i_shortlink, (u_int)oip->i_size);
+ oip->i_size = 0;
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOP_UPDATE(ovp, &tv, &tv, 1));
+ }
+ if (oip->i_size == length) {
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOP_UPDATE(ovp, &tv, &tv, 0));
+ }
+#ifdef QUOTA
+ if (error = getinoquota(oip))
+ return (error);
+#endif
+ vnode_pager_setsize(ovp, (u_long)length);
+ fs = oip->i_fs;
+ osize = oip->i_size;
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of oszie is 0, length will be at least 1.
+ */
+ if (osize < length) {
+ offset = blkoff(fs, length - 1);
+ lbn = lblkno(fs, length - 1);
+ aflags = B_CLRBUF;
+ if (ap->a_flags & IO_SYNC)
+ aflags |= B_SYNC;
+ if (error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp,
+ aflags))
+ return (error);
+ oip->i_size = length;
+ (void) vnode_pager_uncache(ovp);
+ if (aflags & IO_SYNC)
+ bwrite(bp);
+ else
+ bawrite(bp);
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOP_UPDATE(ovp, &tv, &tv, 1));
+ }
+ /*
+ * Shorten the size of the file. If the file is not being
+ * truncated to a block boundry, the contents of the
+ * partial block following the end of the file must be
+ * zero'ed in case it ever become accessable again because
+ * of subsequent file growth.
+ */
+ offset = blkoff(fs, length);
+ if (offset == 0) {
+ oip->i_size = length;
+ } else {
+ lbn = lblkno(fs, length);
+ aflags = B_CLRBUF;
+ if (ap->a_flags & IO_SYNC)
+ aflags |= B_SYNC;
+ if (error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp,
+ aflags))
+ return (error);
+ oip->i_size = length;
+ size = blksize(fs, oip, lbn);
+ (void) vnode_pager_uncache(ovp);
+ bzero((char *)bp->b_data + offset, (u_int)(size - offset));
+ allocbuf(bp, size);
+ if (aflags & IO_SYNC)
+ bwrite(bp);
+ else
+ bawrite(bp);
+ }
+ /*
+ * Calculate index into inode's block list of
+ * last direct and indirect blocks (if any)
+ * which we want to keep. Lastblock is -1 when
+ * the file is truncated to 0.
+ */
+ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
+ lastiblock[SINGLE] = lastblock - NDADDR;
+ lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
+ lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
+ nblocks = btodb(fs->fs_bsize);
+ /*
+ * Update file and block pointers on disk before we start freeing
+ * blocks. If we crash before free'ing blocks below, the blocks
+ * will be returned to the free list. lastiblock values are also
+ * normalized to -1 for calls to ffs_indirtrunc below.
+ */
+ bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
+ for (level = TRIPLE; level >= SINGLE; level--)
+ if (lastiblock[level] < 0) {
+ oip->i_ib[level] = 0;
+ lastiblock[level] = -1;
+ }
+ for (i = NDADDR - 1; i > lastblock; i--)
+ oip->i_db[i] = 0;
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (error = VOP_UPDATE(ovp, &tv, &tv, MNT_WAIT))
+ allerror = error;
+ /*
+ * Having written the new inode to disk, save its new configuration
+ * and put back the old block pointers long enough to process them.
+ * Note that we save the new block configuration so we can check it
+ * when we are done.
+ */
+ bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
+ bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
+ oip->i_size = osize;
+ vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
+ allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0);
+
+ /*
+ * Indirect blocks first.
+ */
+ indir_lbn[SINGLE] = -NDADDR;
+ indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
+ indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
+ for (level = TRIPLE; level >= SINGLE; level--) {
+ bn = oip->i_ib[level];
+ if (bn != 0) {
+ error = ffs_indirtrunc(oip, indir_lbn[level],
+ fsbtodb(fs, bn), lastiblock[level], level, &count);
+ if (error)
+ allerror = error;
+ blocksreleased += count;
+ if (lastiblock[level] < 0) {
+ oip->i_ib[level] = 0;
+ ffs_blkfree(oip, bn, fs->fs_bsize);
+ blocksreleased += nblocks;
+ }
+ }
+ if (lastiblock[level] >= 0)
+ goto done;
+ }
+
+ /*
+ * All whole direct blocks or frags.
+ */
+ for (i = NDADDR - 1; i > lastblock; i--) {
+ register long bsize;
+
+ bn = oip->i_db[i];
+ if (bn == 0)
+ continue;
+ oip->i_db[i] = 0;
+ bsize = blksize(fs, oip, i);
+ ffs_blkfree(oip, bn, bsize);
+ blocksreleased += btodb(bsize);
+ }
+ if (lastblock < 0)
+ goto done;
+
+ /*
+ * Finally, look for a change in size of the
+ * last direct block; release any frags.
+ */
+ bn = oip->i_db[lastblock];
+ if (bn != 0) {
+ long oldspace, newspace;
+
+ /*
+ * Calculate amount of space we're giving
+ * back as old block size minus new block size.
+ */
+ oldspace = blksize(fs, oip, lastblock);
+ oip->i_size = length;
+ newspace = blksize(fs, oip, lastblock);
+ if (newspace == 0)
+ panic("itrunc: newspace");
+ if (oldspace - newspace > 0) {
+ /*
+ * Block number of space to be free'd is
+ * the old block # plus the number of frags
+ * required for the storage we're keeping.
+ */
+ bn += numfrags(fs, newspace);
+ ffs_blkfree(oip, bn, oldspace - newspace);
+ blocksreleased += btodb(oldspace - newspace);
+ }
+ }
+done:
+#ifdef DIAGNOSTIC
+ for (level = SINGLE; level <= TRIPLE; level++)
+ if (newblks[NDADDR + level] != oip->i_ib[level])
+ panic("itrunc1");
+ for (i = 0; i < NDADDR; i++)
+ if (newblks[i] != oip->i_db[i])
+ panic("itrunc2");
+ if (length == 0 &&
+ (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first))
+ panic("itrunc3");
+#endif /* DIAGNOSTIC */
+ /*
+ * Put back the real size.
+ */
+ oip->i_size = length;
+ oip->i_blocks -= blocksreleased;
+ if (oip->i_blocks < 0) /* sanity */
+ oip->i_blocks = 0;
+ oip->i_flag |= IN_CHANGE;
+#ifdef QUOTA
+ (void) chkdq(oip, -blocksreleased, NOCRED, 0);
+#endif
+ return (allerror);
+}
+
+/*
+ * Release blocks associated with the inode ip and stored in the indirect
+ * block bn. Blocks are free'd in LIFO order up to (but not including)
+ * lastbn. If level is greater than SINGLE, the block is an indirect block
+ * and recursive calls to indirtrunc must be used to cleanse other indirect
+ * blocks.
+ *
+ * NB: triple indirect blocks are untested.
+ */
+static int
+ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
+ register struct inode *ip;
+ daddr_t lbn, lastbn;
+ daddr_t dbn;
+ int level;
+ long *countp;
+{
+ register int i;
+ struct buf *bp;
+ register struct fs *fs = ip->i_fs;
+ register daddr_t *bap;
+ struct vnode *vp;
+ daddr_t *copy, nb, nlbn, last;
+ long blkcount, factor;
+ int nblocks, blocksreleased = 0;
+ int error = 0, allerror = 0;
+
+ /*
+ * Calculate index in current block of last
+ * block to be kept. -1 indicates the entire
+ * block so we need not calculate the index.
+ */
+ factor = 1;
+ for (i = SINGLE; i < level; i++)
+ factor *= NINDIR(fs);
+ last = lastbn;
+ if (lastbn > 0)
+ last /= factor;
+ nblocks = btodb(fs->fs_bsize);
+ /*
+ * Get buffer of block pointers, zero those entries corresponding
+ * to blocks to be free'd, and update on disk copy first. Since
+ * double(triple) indirect before single(double) indirect, calls
+ * to bmap on these blocks will fail. However, we already have
+ * the on disk address, so we have to set the b_blkno field
+ * explicitly instead of letting bread do everything for us.
+ */
+ vp = ITOV(ip);
+ bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ /* Braces must be here in case trace evaluates to nothing. */
+ trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
+ } else {
+ trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn);
+ curproc->p_stats->p_ru.ru_inblock++; /* pay for read */
+ bp->b_flags |= B_READ;
+ if (bp->b_bcount > bp->b_bufsize)
+ panic("ffs_indirtrunc: bad buffer size");
+ bp->b_blkno = dbn;
+ VOP_STRATEGY(bp);
+ error = biowait(bp);
+ }
+ if (error) {
+ brelse(bp);
+ *countp = 0;
+ return (error);
+ }
+
+ bap = (daddr_t *)bp->b_data;
+ MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
+ bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
+ bzero((caddr_t)&bap[last + 1],
+ (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
+ if (last == -1)
+ bp->b_flags |= B_INVAL;
+ error = bwrite(bp);
+ if (error)
+ allerror = error;
+ bap = copy;
+
+ /*
+ * Recursively free totally unused blocks.
+ */
+ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
+ i--, nlbn += factor) {
+ nb = bap[i];
+ if (nb == 0)
+ continue;
+ if (level > SINGLE) {
+ if (error = ffs_indirtrunc(ip, nlbn,
+ fsbtodb(fs, nb), (daddr_t)-1, level - 1, &blkcount))
+ allerror = error;
+ blocksreleased += blkcount;
+ }
+ ffs_blkfree(ip, nb, fs->fs_bsize);
+ blocksreleased += nblocks;
+ }
+
+ /*
+ * Recursively free last partial block.
+ */
+ if (level > SINGLE && lastbn >= 0) {
+ last = lastbn % factor;
+ nb = bap[i];
+ if (nb != 0) {
+ if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
+ last, level - 1, &blkcount))
+ allerror = error;
+ blocksreleased += blkcount;
+ }
+ }
+ FREE(copy, M_TEMP);
+ *countp = blocksreleased;
+ return (allerror);
+}
diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
new file mode 100644
index 000000000000..c251b16e6970
--- /dev/null
+++ b/sys/ufs/ffs/ffs_subr.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_subr.c 8.2 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <ufs/ffs/fs.h>
+
+#ifdef KERNEL
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <ufs/ffs/ffs_extern.h>
+#include <sys/buf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip". If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+ffs_blkatoff(ap)
+ struct vop_blkatoff_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ char **a_res;
+ struct buf **a_bpp;
+ } */ *ap;
+{
+ struct inode *ip;
+ register struct fs *fs;
+ struct buf *bp;
+ daddr_t lbn;
+ int bsize, error;
+
+ ip = VTOI(ap->a_vp);
+ fs = ip->i_fs;
+ lbn = lblkno(fs, ap->a_offset);
+ bsize = blksize(fs, ip, lbn);
+
+ *ap->a_bpp = NULL;
+ if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) {
+ brelse(bp);
+ return (error);
+ }
+ if (ap->a_res)
+ *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
+ *ap->a_bpp = bp;
+ return (0);
+}
+#endif
+
+/*
+ * Update the frsum fields to reflect addition or deletion
+ * of some frags.
+ */
+void
+ffs_fragacct(fs, fragmap, fraglist, cnt)
+ struct fs *fs;
+ int fragmap;
+ long fraglist[];
+ int cnt;
+{
+ int inblk;
+ register int field, subfield;
+ register int siz, pos;
+
+ inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
+ fragmap <<= 1;
+ for (siz = 1; siz < fs->fs_frag; siz++) {
+ if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
+ continue;
+ field = around[siz];
+ subfield = inside[siz];
+ for (pos = siz; pos <= fs->fs_frag; pos++) {
+ if ((fragmap & field) == subfield) {
+ fraglist[siz] += cnt;
+ pos += siz;
+ field <<= siz;
+ subfield <<= siz;
+ }
+ field <<= 1;
+ subfield <<= 1;
+ }
+ }
+}
+
+#if defined(KERNEL) && defined(DIAGNOSTIC)
+void
+ffs_checkoverlap(bp, ip)
+ struct buf *bp;
+ struct inode *ip;
+{
+ register struct buf *ebp, *ep;
+ register daddr_t start, last;
+ struct vnode *vp;
+
+ ebp = &buf[nbuf];
+ start = bp->b_blkno;
+ last = start + btodb(bp->b_bcount) - 1;
+ for (ep = buf; ep < ebp; ep++) {
+ if (ep == bp || (ep->b_flags & B_INVAL) ||
+ ep->b_vp == NULLVP)
+ continue;
+ if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL))
+ continue;
+ if (vp != ip->i_devvp)
+ continue;
+ /* look for overlap */
+ if (ep->b_bcount == 0 || ep->b_blkno > last ||
+ ep->b_blkno + btodb(ep->b_bcount) <= start)
+ continue;
+ vprint("Disk overlap", vp);
+ (void)printf("\tstart %d, end %d overlap start %d, end %d\n",
+ start, last, ep->b_blkno,
+ ep->b_blkno + btodb(ep->b_bcount) - 1);
+ panic("Disk buffer overlap");
+ }
+}
+#endif /* DIAGNOSTIC */
+
+/*
+ * block operations
+ *
+ * check if a block is available
+ */
+int
+ffs_isblock(fs, cp, h)
+ struct fs *fs;
+ unsigned char *cp;
+ daddr_t h;
+{
+ unsigned char mask;
+
+ switch ((int)fs->fs_frag) {
+ case 8:
+ return (cp[h] == 0xff);
+ case 4:
+ mask = 0x0f << ((h & 0x1) << 2);
+ return ((cp[h >> 1] & mask) == mask);
+ case 2:
+ mask = 0x03 << ((h & 0x3) << 1);
+ return ((cp[h >> 2] & mask) == mask);
+ case 1:
+ mask = 0x01 << (h & 0x7);
+ return ((cp[h >> 3] & mask) == mask);
+ default:
+ panic("ffs_isblock");
+ }
+}
+
+/*
+ * take a block out of the map
+ */
+void
+ffs_clrblock(fs, cp, h)
+ struct fs *fs;
+ u_char *cp;
+ daddr_t h;
+{
+
+ switch ((int)fs->fs_frag) {
+ case 8:
+ cp[h] = 0;
+ return;
+ case 4:
+ cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
+ return;
+ case 2:
+ cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
+ return;
+ case 1:
+ cp[h >> 3] &= ~(0x01 << (h & 0x7));
+ return;
+ default:
+ panic("ffs_clrblock");
+ }
+}
+
+/*
+ * put a block into the map
+ */
+void
+ffs_setblock(fs, cp, h)
+ struct fs *fs;
+ unsigned char *cp;
+ daddr_t h;
+{
+
+ switch ((int)fs->fs_frag) {
+
+ case 8:
+ cp[h] = 0xff;
+ return;
+ case 4:
+ cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
+ return;
+ case 2:
+ cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
+ return;
+ case 1:
+ cp[h >> 3] |= (0x01 << (h & 0x7));
+ return;
+ default:
+ panic("ffs_setblock");
+ }
+}
diff --git a/sys/ufs/ffs/ffs_tables.c b/sys/ufs/ffs/ffs_tables.c
new file mode 100644
index 000000000000..8cf46b0150a5
--- /dev/null
+++ b/sys/ufs/ffs/ffs_tables.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_tables.c 8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/param.h>
+
+/*
+ * Bit patterns for identifying fragments in the block map
+ * used as ((map & around) == inside)
+ */
+int around[9] = {
+ 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff
+};
+int inside[9] = {
+ 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
+};
+
+/*
+ * Given a block map bit pattern, the frag tables tell whether a
+ * particular size fragment is available.
+ *
+ * used as:
+ * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] {
+ * at least one fragment of the indicated size is available
+ * }
+ *
+ * These tables are used by the scanc instruction on the VAX to
+ * quickly find an appropriate fragment.
+ */
+u_char fragtbl124[256] = {
+ 0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e,
+ 0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a,
+ 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+ 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+ 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+ 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+ 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+ 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+ 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+ 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+ 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+ 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+ 0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e,
+ 0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae,
+ 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+ 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+ 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+ 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+ 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+ 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+ 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+ 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+ 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+ 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+ 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+ 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+ 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+ 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+ 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+ 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+ 0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce,
+ 0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a,
+};
+
+u_char fragtbl8[256] = {
+ 0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04,
+ 0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08,
+ 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+ 0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10,
+ 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+ 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+ 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+ 0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20,
+ 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+ 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+ 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+ 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+ 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+ 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+ 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+ 0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40,
+ 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+ 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+ 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+ 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+ 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+ 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+ 0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21,
+ 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+ 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+ 0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12,
+ 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+ 0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c,
+ 0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c,
+ 0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80,
+};
+
+/*
+ * The actual fragtbl array.
+ */
+u_char *fragtbl[MAXFRAG + 1] = {
+ 0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8,
+};
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
new file mode 100644
index 000000000000..505dd5db8cbf
--- /dev/null
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -0,0 +1,843 @@
+/*
+ * Copyright (c) 1989, 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+int ffs_sbupdate __P((struct ufsmount *, int));
+
+struct vfsops ufs_vfsops = {
+ ffs_mount,
+ ufs_start,
+ ffs_unmount,
+ ufs_root,
+ ufs_quotactl,
+ ffs_statfs,
+ ffs_sync,
+ ffs_vget,
+ ffs_fhtovp,
+ ffs_vptofh,
+ ffs_init,
+};
+
+extern u_long nextgennumber;
+
+/*
+ * Called by main() when ufs is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME "root_device"
+
+ffs_mountroot()
+{
+ extern struct vnode *rootvp;
+ register struct fs *fs;
+ register struct mount *mp;
+ struct proc *p = curproc; /* XXX */
+ struct ufsmount *ump;
+ u_int size;
+ int error;
+
+ /*
+ * Get vnodes for swapdev and rootdev.
+ */
+ if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+ panic("ffs_mountroot: can't setup bdevvp's");
+
+ mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &ufs_vfsops;
+ mp->mnt_flag = MNT_RDONLY;
+ if (error = ffs_mountfs(rootvp, mp, p)) {
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ if (error = vfs_lock(mp)) {
+ (void)ffs_unmount(mp, 0, p);
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
+ fs->fs_fsmnt[0] = '/';
+ bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void)ffs_statfs(mp, &mp->mnt_stat, p);
+ vfs_unlock(mp);
+ inittodr(fs->fs_time);
+ return (0);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+int
+ffs_mount(mp, path, data, ndp, p)
+ register struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp;
+ struct ufs_args args;
+ struct ufsmount *ump;
+ register struct fs *fs;
+ u_int size;
+ int error, flags;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
+ return (error);
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ error = 0;
+ if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+ flags = WRITECLOSE;
+ if (mp->mnt_flag & MNT_FORCE)
+ flags |= FORCECLOSE;
+ if (vfs_busy(mp))
+ return (EBUSY);
+ error = ffs_flushfiles(mp, flags, p);
+ vfs_unbusy(mp);
+ }
+ if (!error && (mp->mnt_flag & MNT_RELOAD))
+ error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
+ if (error)
+ return (error);
+ if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
+ fs->fs_ronly = 0;
+ if (args.fspec == 0) {
+ /*
+ * Process export requests.
+ */
+ return (vfs_export(mp, &ump->um_export, &args.export));
+ }
+ }
+ /*
+ * Not an update, or updating the name: look up the name
+ * and verify that it refers to a sensible block device.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+ if (error = namei(ndp))
+ return (error);
+ devvp = ndp->ni_vp;
+
+ if (devvp->v_type != VBLK) {
+ vrele(devvp);
+ return (ENOTBLK);
+ }
+ if (major(devvp->v_rdev) >= nblkdev) {
+ vrele(devvp);
+ return (ENXIO);
+ }
+ if ((mp->mnt_flag & MNT_UPDATE) == 0)
+ error = ffs_mountfs(devvp, mp, p);
+ else {
+ if (devvp != ump->um_devvp)
+ error = EINVAL; /* needs translation */
+ else
+ vrele(devvp);
+ }
+ if (error) {
+ vrele(devvp);
+ return (error);
+ }
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+ bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+ bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void)ffs_statfs(mp, &mp->mnt_stat, p);
+ return (0);
+}
+
+/*
+ * Reload all incore data for a filesystem (used after running fsck on
+ * the root filesystem and finding things to fix). The filesystem must
+ * be mounted read-only.
+ *
+ * Things to do to update the mount:
+ * 1) invalidate all cached meta-data.
+ * 2) re-read superblock from disk.
+ * 3) re-read summary information from disk.
+ * 4) invalidate all inactive vnodes.
+ * 5) invalidate all cached file data.
+ * 6) re-read inode data for all active vnodes.
+ */
+ffs_reload(mountp, cred, p)
+ register struct mount *mountp;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct vnode *vp, *nvp, *devvp;
+ struct inode *ip;
+ struct csum *space;
+ struct buf *bp;
+ struct fs *fs;
+ int i, blks, size, error;
+
+ if ((mountp->mnt_flag & MNT_RDONLY) == 0)
+ return (EINVAL);
+ /*
+ * Step 1: invalidate all cached meta-data.
+ */
+ devvp = VFSTOUFS(mountp)->um_devvp;
+ if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+ panic("ffs_reload: dirty1");
+ /*
+ * Step 2: re-read superblock from disk.
+ */
+ if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
+ return (error);
+ fs = (struct fs *)bp->b_data;
+ if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+ fs->fs_bsize < sizeof(struct fs)) {
+ brelse(bp);
+ return (EIO); /* XXX needs translation */
+ }
+ fs = VFSTOUFS(mountp)->um_fs;
+ bcopy(&fs->fs_csp[0], &((struct fs *)bp->b_data)->fs_csp[0],
+ sizeof(fs->fs_csp));
+ bcopy(bp->b_data, fs, (u_int)fs->fs_sbsize);
+ if (fs->fs_sbsize < SBSIZE)
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ ffs_oldfscompat(fs);
+ /*
+ * Step 3: re-read summary information from disk.
+ */
+ blks = howmany(fs->fs_cssize, fs->fs_fsize);
+ space = fs->fs_csp[0];
+ for (i = 0; i < blks; i += fs->fs_frag) {
+ size = fs->fs_bsize;
+ if (i + fs->fs_frag > blks)
+ size = (blks - i) * fs->fs_fsize;
+ if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+ NOCRED, &bp))
+ return (error);
+ bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
+ brelse(bp);
+ }
+loop:
+ for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
+ nvp = vp->v_mntvnodes.le_next;
+ /*
+ * Step 4: invalidate all inactive vnodes.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ continue;
+ }
+ /*
+ * Step 5: invalidate all cached file data.
+ */
+ if (vget(vp, 1))
+ goto loop;
+ if (vinvalbuf(vp, 0, cred, p, 0, 0))
+ panic("ffs_reload: dirty2");
+ /*
+ * Step 6: re-read inode data for all active vnodes.
+ */
+ ip = VTOI(vp);
+ if (error =
+ bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+ (int)fs->fs_bsize, NOCRED, &bp)) {
+ vput(vp);
+ return (error);
+ }
+ ip->i_din = *((struct dinode *)bp->b_data +
+ ino_to_fsbo(fs, ip->i_number));
+ brelse(bp);
+ vput(vp);
+ if (vp->v_mount != mountp)
+ goto loop;
+ }
+ return (0);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+ffs_mountfs(devvp, mp, p)
+ register struct vnode *devvp;
+ struct mount *mp;
+ struct proc *p;
+{
+ register struct ufsmount *ump;
+ struct buf *bp;
+ register struct fs *fs;
+ dev_t dev = devvp->v_rdev;
+ struct partinfo dpart;
+ caddr_t base, space;
+ int havepart = 0, blks;
+ int error, i, size;
+ int ronly;
+ extern struct vnode *rootvp;
+
+ /*
+ * Disallow multiple mounts of the same device.
+ * Disallow mounting of a device that is currently in use
+ * (except for root, which might share swap device for miniroot).
+ * Flush out any old buffers remaining from a previous use.
+ */
+ if (error = vfs_mountedon(devvp))
+ return (error);
+ if (vcount(devvp) > 1 && devvp != rootvp)
+ return (EBUSY);
+ if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+ return (error);
+ if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
+ size = DEV_BSIZE;
+ else {
+ havepart = 1;
+ size = dpart.disklab->d_secsize;
+ }
+
+ bp = NULL;
+ ump = NULL;
+ if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
+ goto out;
+ fs = (struct fs *)bp->b_data;
+ if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+ fs->fs_bsize < sizeof(struct fs)) {
+ error = EINVAL; /* XXX needs translation */
+ goto out;
+ }
+ ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
+ bzero((caddr_t)ump, sizeof *ump);
+ ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
+ M_WAITOK);
+ bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
+ if (fs->fs_sbsize < SBSIZE)
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ bp = NULL;
+ fs = ump->um_fs;
+ fs->fs_ronly = ronly;
+ if (ronly == 0)
+ fs->fs_fmod = 1;
+ blks = howmany(fs->fs_cssize, fs->fs_fsize);
+ base = space = malloc((u_long)fs->fs_cssize, M_UFSMNT,
+ M_WAITOK);
+ for (i = 0; i < blks; i += fs->fs_frag) {
+ size = fs->fs_bsize;
+ if (i + fs->fs_frag > blks)
+ size = (blks - i) * fs->fs_fsize;
+ error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+ NOCRED, &bp);
+ if (error) {
+ free(base, M_UFSMNT);
+ goto out;
+ }
+ bcopy(bp->b_data, space, (u_int)size);
+ fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
+ space += size;
+ brelse(bp);
+ bp = NULL;
+ }
+ mp->mnt_data = (qaddr_t)ump;
+ mp->mnt_stat.f_fsid.val[0] = (long)dev;
+ mp->mnt_stat.f_fsid.val[1] = MOUNT_UFS;
+ mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+ mp->mnt_flag |= MNT_LOCAL;
+ ump->um_mountp = mp;
+ ump->um_dev = dev;
+ ump->um_devvp = devvp;
+ ump->um_nindir = fs->fs_nindir;
+ ump->um_bptrtodb = fs->fs_fsbtodb;
+ ump->um_seqinc = fs->fs_frag;
+ for (i = 0; i < MAXQUOTAS; i++)
+ ump->um_quotas[i] = NULLVP;
+ devvp->v_specflags |= SI_MOUNTEDON;
+ ffs_oldfscompat(fs);
+ return (0);
+out:
+ if (bp)
+ brelse(bp);
+ (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+ if (ump) {
+ free(ump->um_fs, M_UFSMNT);
+ free(ump, M_UFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ }
+ return (error);
+}
+
+/*
+ * Sanity checks for old file systems.
+ *
+ * XXX - goes away some day.
+ */
+ffs_oldfscompat(fs)
+ struct fs *fs;
+{
+ int i;
+
+ fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */
+ fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */
+ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
+ fs->fs_nrpos = 8; /* XXX */
+ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
+ quad_t sizepb = fs->fs_bsize; /* XXX */
+ /* XXX */
+ fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
+ for (i = 0; i < NIADDR; i++) { /* XXX */
+ sizepb *= NINDIR(fs); /* XXX */
+ fs->fs_maxfilesize += sizepb; /* XXX */
+ } /* XXX */
+ fs->fs_qbmask = ~fs->fs_bmask; /* XXX */
+ fs->fs_qfmask = ~fs->fs_fmask; /* XXX */
+ } /* XXX */
+ return (0);
+}
+
+/*
+ * unmount system call
+ */
+int
+ffs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct ufsmount *ump;
+ register struct fs *fs;
+ int error, flags, ronly;
+
+ flags = 0;
+ if (mntflags & MNT_FORCE) {
+ if (mp->mnt_flag & MNT_ROOTFS)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+ if (error = ffs_flushfiles(mp, flags, p))
+ return (error);
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ ronly = !fs->fs_ronly;
+ ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+ error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
+ NOCRED, p);
+ vrele(ump->um_devvp);
+ free(fs->fs_csp[0], M_UFSMNT);
+ free(fs, M_UFSMNT);
+ free(ump, M_UFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ return (error);
+}
+
+/*
+ * Flush out all the files in a filesystem.
+ */
+ffs_flushfiles(mp, flags, p)
+ register struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ extern int doforce;
+ register struct ufsmount *ump;
+ int i, error;
+
+ if (!doforce)
+ flags &= ~FORCECLOSE;
+ ump = VFSTOUFS(mp);
+#ifdef QUOTA
+ if (mp->mnt_flag & MNT_QUOTA) {
+ if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags))
+ return (error);
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (ump->um_quotas[i] == NULLVP)
+ continue;
+ quotaoff(p, mp, i);
+ }
+ /*
+ * Here we fall through to vflush again to ensure
+ * that we have gotten rid of all the system vnodes.
+ */
+ }
+#endif
+ error = vflush(mp, NULLVP, flags);
+ return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+int
+ffs_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct ufsmount *ump;
+ register struct fs *fs;
+
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ if (fs->fs_magic != FS_MAGIC)
+ panic("ffs_statfs");
+ sbp->f_type = MOUNT_UFS;
+ sbp->f_bsize = fs->fs_fsize;
+ sbp->f_iosize = fs->fs_bsize;
+ sbp->f_blocks = fs->fs_dsize;
+ sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
+ fs->fs_cstotal.cs_nffree;
+ sbp->f_bavail = (fs->fs_dsize * (100 - fs->fs_minfree) / 100) -
+ (fs->fs_dsize - sbp->f_bfree);
+ sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
+ sbp->f_ffree = fs->fs_cstotal.cs_nifree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+ (caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+ bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+ (caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+ }
+ return (0);
+}
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked `MPBUSY'.
+ */
+int
+ffs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ register struct inode *ip;
+ register struct ufsmount *ump = VFSTOUFS(mp);
+ register struct fs *fs;
+ int error, allerror = 0;
+
+ fs = ump->um_fs;
+ /*
+ * Write back modified superblock.
+ * Consistency check that the superblock
+ * is still in the buffer cache.
+ */
+ if (fs->fs_fmod != 0) {
+ if (fs->fs_ronly != 0) { /* XXX */
+ printf("fs = %s\n", fs->fs_fsmnt);
+ panic("update: rofs mod");
+ }
+ fs->fs_fmod = 0;
+ fs->fs_time = time.tv_sec;
+ allerror = ffs_sbupdate(ump, waitfor);
+ }
+ /*
+ * Write back each (modified) inode.
+ */
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * If the vnode that we are about to sync is no longer
+ * associated with this mount point, start over.
+ */
+ if (vp->v_mount != mp)
+ goto loop;
+ if (VOP_ISLOCKED(vp))
+ continue;
+ ip = VTOI(vp);
+ if ((ip->i_flag &
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+ vp->v_dirtyblkhd.lh_first == NULL)
+ continue;
+ if (vget(vp, 1))
+ goto loop;
+ if (error = VOP_FSYNC(vp, cred, waitfor, p))
+ allerror = error;
+ vput(vp);
+ }
+ /*
+ * Force stale file system control information to be flushed.
+ */
+ if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p))
+ allerror = error;
+#ifdef QUOTA
+ qsync(mp);
+#endif
+ return (allerror);
+}
+
+/*
+ * Look up a FFS dinode number to find its incore vnode, otherwise read it
+ * in from disk. If it is in core, wait for the lock bit to clear, then
+ * return the inode locked. Detection and handling of mount points must be
+ * done by the calling routine.
+ */
+int
+ffs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+ register struct fs *fs;
+ register struct inode *ip;
+ struct ufsmount *ump;
+ struct buf *bp;
+ struct vnode *vp;
+ dev_t dev;
+ int i, type, error;
+
+ ump = VFSTOUFS(mp);
+ dev = ump->um_dev;
+ if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
+ return (0);
+
+ /* Allocate a new vnode/inode. */
+ if (error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) {
+ *vpp = NULL;
+ return (error);
+ }
+ type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
+ MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
+ bzero((caddr_t)ip, sizeof(struct inode));
+ vp->v_data = ip;
+ ip->i_vnode = vp;
+ ip->i_fs = fs = ump->um_fs;
+ ip->i_dev = dev;
+ ip->i_number = ino;
+#ifdef QUOTA
+ for (i = 0; i < MAXQUOTAS; i++)
+ ip->i_dquot[i] = NODQUOT;
+#endif
+ /*
+ * Put it onto its hash chain and lock it so that other requests for
+ * this inode will block if they arrive while we are sleeping waiting
+ * for old data structures to be purged or for the contents of the
+ * disk portion of this inode to be read.
+ */
+ ufs_ihashins(ip);
+
+ /* Read in the disk contents for the inode, copy into the inode. */
+ if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
+ (int)fs->fs_bsize, NOCRED, &bp)) {
+ /*
+ * The inode does not contain anything useful, so it would
+ * be misleading to leave it on its hash chain. With mode
+ * still zero, it will be unlinked and returned to the free
+ * list by vput().
+ */
+ vput(vp);
+ brelse(bp);
+ *vpp = NULL;
+ return (error);
+ }
+ ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+ brelse(bp);
+
+ /*
+ * Initialize the vnode from the inode, check for aliases.
+ * Note that the underlying vnode may have changed.
+ */
+ if (error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp)) {
+ vput(vp);
+ *vpp = NULL;
+ return (error);
+ }
+ /*
+ * Finish inode initialization now that aliasing has been resolved.
+ */
+ ip->i_devvp = ump->um_devvp;
+ VREF(ip->i_devvp);
+ /*
+ * Set up a generation number for this inode if it does not
+ * already have one. This should only happen on old filesystems.
+ */
+ if (ip->i_gen == 0) {
+ if (++nextgennumber < (u_long)time.tv_sec)
+ nextgennumber = time.tv_sec;
+ ip->i_gen = nextgennumber;
+ if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
+ ip->i_flag |= IN_MODIFIED;
+ }
+ /*
+ * Ensure that uid and gid are correct. This is a temporary
+ * fix until fsck has been changed to do the update.
+ */
+ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
+ ip->i_uid = ip->i_din.di_ouid; /* XXX */
+ ip->i_gid = ip->i_din.di_ogid; /* XXX */
+ } /* XXX */
+
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is valid
+ * - call ffs_vget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ * those rights via. exflagsp and credanonp
+ */
+int
+ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ register struct ufid *ufhp;
+ struct fs *fs;
+
+ ufhp = (struct ufid *)fhp;
+ fs = VFSTOUFS(mp)->um_fs;
+ if (ufhp->ufid_ino < ROOTINO ||
+ ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
+ return (ESTALE);
+ return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+ffs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ register struct inode *ip;
+ register struct ufid *ufhp;
+
+ ip = VTOI(vp);
+ ufhp = (struct ufid *)fhp;
+ ufhp->ufid_len = sizeof(struct ufid);
+ ufhp->ufid_ino = ip->i_number;
+ ufhp->ufid_gen = ip->i_gen;
+ return (0);
+}
+
+/*
+ * Write a superblock and associated information back to disk.
+ */
+int
+ffs_sbupdate(mp, waitfor)
+ struct ufsmount *mp;
+ int waitfor;
+{
+ register struct fs *fs = mp->um_fs;
+ register struct buf *bp;
+ int blks;
+ caddr_t space;
+ int i, size, error = 0;
+
+ bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
+ bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+ /* Restore compatibility to old file systems. XXX */
+ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
+ ((struct fs *)bp->b_data)->fs_nrpos = -1; /* XXX */
+ if (waitfor == MNT_WAIT)
+ error = bwrite(bp);
+ else
+ bawrite(bp);
+ blks = howmany(fs->fs_cssize, fs->fs_fsize);
+ space = (caddr_t)fs->fs_csp[0];
+ for (i = 0; i < blks; i += fs->fs_frag) {
+ size = fs->fs_bsize;
+ if (i + fs->fs_frag > blks)
+ size = (blks - i) * fs->fs_fsize;
+ bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
+ size, 0, 0);
+ bcopy(space, bp->b_data, (u_int)size);
+ space += size;
+ if (waitfor == MNT_WAIT)
+ error = bwrite(bp);
+ else
+ bawrite(bp);
+ }
+ return (error);
+}
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
new file mode 100644
index 000000000000..59814f2f3787
--- /dev/null
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_vnops.c 8.7 (Berkeley) 2/3/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/* Global vfs data structures for ufs. */
+int (**ffs_vnodeop_p)();
+struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, ufs_lookup }, /* lookup */
+ { &vop_create_desc, ufs_create }, /* create */
+ { &vop_mknod_desc, ufs_mknod }, /* mknod */
+ { &vop_open_desc, ufs_open }, /* open */
+ { &vop_close_desc, ufs_close }, /* close */
+ { &vop_access_desc, ufs_access }, /* access */
+ { &vop_getattr_desc, ufs_getattr }, /* getattr */
+ { &vop_setattr_desc, ufs_setattr }, /* setattr */
+ { &vop_read_desc, ffs_read }, /* read */
+ { &vop_write_desc, ffs_write }, /* write */
+ { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */
+ { &vop_select_desc, ufs_select }, /* select */
+ { &vop_mmap_desc, ufs_mmap }, /* mmap */
+ { &vop_fsync_desc, ffs_fsync }, /* fsync */
+ { &vop_seek_desc, ufs_seek }, /* seek */
+ { &vop_remove_desc, ufs_remove }, /* remove */
+ { &vop_link_desc, ufs_link }, /* link */
+ { &vop_rename_desc, ufs_rename }, /* rename */
+ { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, ufs_symlink }, /* symlink */
+ { &vop_readdir_desc, ufs_readdir }, /* readdir */
+ { &vop_readlink_desc, ufs_readlink }, /* readlink */
+ { &vop_abortop_desc, ufs_abortop }, /* abortop */
+ { &vop_inactive_desc, ufs_inactive }, /* inactive */
+ { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
+ { &vop_lock_desc, ufs_lock }, /* lock */
+ { &vop_unlock_desc, ufs_unlock }, /* unlock */
+ { &vop_bmap_desc, ufs_bmap }, /* bmap */
+ { &vop_strategy_desc, ufs_strategy }, /* strategy */
+ { &vop_print_desc, ufs_print }, /* print */
+ { &vop_islocked_desc, ufs_islocked }, /* islocked */
+ { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, ufs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, ffs_valloc }, /* valloc */
+ { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, ffs_vfree }, /* vfree */
+ { &vop_truncate_desc, ffs_truncate }, /* truncate */
+ { &vop_update_desc, ffs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_vnodeop_opv_desc =
+ { &ffs_vnodeop_p, ffs_vnodeop_entries };
+
+int (**ffs_specop_p)();
+struct vnodeopv_entry_desc ffs_specop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, ufsspec_close }, /* close */
+ { &vop_access_desc, ufs_access }, /* access */
+ { &vop_getattr_desc, ufs_getattr }, /* getattr */
+ { &vop_setattr_desc, ufs_setattr }, /* setattr */
+ { &vop_read_desc, ufsspec_read }, /* read */
+ { &vop_write_desc, ufsspec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, ffs_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, ufs_inactive }, /* inactive */
+ { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
+ { &vop_lock_desc, ufs_lock }, /* lock */
+ { &vop_unlock_desc, ufs_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, ufs_print }, /* print */
+ { &vop_islocked_desc, ufs_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, ffs_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, ffs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_specop_opv_desc =
+ { &ffs_specop_p, ffs_specop_entries };
+
+#ifdef FIFO
+int (**ffs_fifoop_p)();
+struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, ufsfifo_close }, /* close */
+ { &vop_access_desc, ufs_access }, /* access */
+ { &vop_getattr_desc, ufs_getattr }, /* getattr */
+ { &vop_setattr_desc, ufs_setattr }, /* setattr */
+ { &vop_read_desc, ufsfifo_read }, /* read */
+ { &vop_write_desc, ufsfifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, ffs_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, ufs_inactive }, /* inactive */
+ { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
+ { &vop_lock_desc, ufs_lock }, /* lock */
+ { &vop_unlock_desc, ufs_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_strategy }, /* strategy */
+ { &vop_print_desc, ufs_print }, /* print */
+ { &vop_islocked_desc, ufs_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, ffs_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, ffs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_fifoop_opv_desc =
+ { &ffs_fifoop_p, ffs_fifoop_entries };
+#endif /* FIFO */
+
+#ifdef DEBUG
+/*
+ * Enabling cluster read/write operations.
+ */
+#include <sys/sysctl.h>
+int doclusterread = 1;
+struct ctldebug debug11 = { "doclusterread", &doclusterread };
+int doclusterwrite = 1;
+struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite };
+#else
+/* XXX for ufs_readwrite */
+#define doclusterread 1
+#define doclusterwrite 1
+#endif
+
+#include <ufs/ufs/ufs_readwrite.c>
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+int
+ffs_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct buf *bp;
+ struct timeval tv;
+ struct buf *nbp;
+ int s;
+
+ /*
+ * Flush all dirty buffers associated with a vnode.
+ */
+loop:
+ s = splbio();
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if ((bp->b_flags & B_BUSY))
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("ffs_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ /*
+ * Wait for I/O associated with indirect blocks to complete,
+ * since there is no way to quickly wait for them below.
+ */
+ if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT)
+ (void) bawrite(bp);
+ else
+ (void) bwrite(bp);
+ goto loop;
+ }
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+ }
+#ifdef DIAGNOSTIC
+ if (vp->v_dirtyblkhd.lh_first) {
+ vprint("ffs_fsync: dirty", vp);
+ goto loop;
+ }
+#endif
+ }
+ splx(s);
+ tv = time;
+ return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));
+}
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
new file mode 100644
index 000000000000..bef052feef4a
--- /dev/null
+++ b/sys/ufs/ffs/fs.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fs.h 8.7 (Berkeley) 4/19/94
+ */
+
+/*
+ * Each disk drive contains some number of file systems.
+ * A file system consists of a number of cylinder groups.
+ * Each cylinder group has inodes and data.
+ *
+ * A file system is described by its super-block, which in turn
+ * describes the cylinder groups. The super-block is critical
+ * data and is replicated in each cylinder group to protect against
+ * catastrophic loss. This is done at `newfs' time and the critical
+ * super-block data does not change, so the copies need not be
+ * referenced further unless disaster strikes.
+ *
+ * For file system fs, the offsets of the various blocks of interest
+ * are given in the super block as:
+ * [fs->fs_sblkno] Super-block
+ * [fs->fs_cblkno] Cylinder group block
+ * [fs->fs_iblkno] Inode blocks
+ * [fs->fs_dblkno] Data blocks
+ * The beginning of cylinder group cg in fs, is given by
+ * the ``cgbase(fs, cg)'' macro.
+ *
+ * The first boot and super blocks are given in absolute disk addresses.
+ * The byte-offset forms are preferred, as they don't imply a sector size.
+ */
+#define BBSIZE 8192
+#define SBSIZE 8192
+#define BBOFF ((off_t)(0))
+#define SBOFF ((off_t)(BBOFF + BBSIZE))
+#define BBLOCK ((daddr_t)(0))
+#define SBLOCK ((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE))
+
+/*
+ * Addresses stored in inodes are capable of addressing fragments
+ * of `blocks'. File system blocks of at most size MAXBSIZE can
+ * be optionally broken into 2, 4, or 8 pieces, each of which is
+ * addressible; these pieces may be DEV_BSIZE, or some multiple of
+ * a DEV_BSIZE unit.
+ *
+ * Large files consist of exclusively large data blocks. To avoid
+ * undue wasted disk space, the last data block of a small file may be
+ * allocated as only as many fragments of a large block as are
+ * necessary. The file system format retains only a single pointer
+ * to such a fragment, which is a piece of a single large block that
+ * has been divided. The size of such a fragment is determinable from
+ * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
+ *
+ * The file system records space availability at the fragment level;
+ * to determine block availability, aligned fragments are examined.
+ */
+
+/*
+ * MINBSIZE is the smallest allowable block size.
+ * In order to insure that it is possible to create files of size
+ * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
+ * MINBSIZE must be big enough to hold a cylinder group block,
+ * thus changes to (struct cg) must keep its size within MINBSIZE.
+ * Note that super blocks are always of size SBSIZE,
+ * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
+ */
+#define MINBSIZE 4096
+
+/*
+ * The path name on which the file system is mounted is maintained
+ * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
+ * the super block for this name.
+ * The limit on the amount of summary information per file system
+ * is defined by MAXCSBUFS. It is currently parameterized for a
+ * maximum of two million cylinders.
+ */
+#define MAXMNTLEN 512
+#define MAXCSBUFS 32
+
+/*
+ * A summary of contiguous blocks of various sizes is maintained
+ * in each cylinder group. Normally this is set by the initial
+ * value of fs_maxcontig. To conserve space, a maximum summary size
+ * is set by FS_MAXCONTIG.
+ */
+#define FS_MAXCONTIG 16
+
+/*
+ * MINFREE gives the minimum acceptable percentage of file system
+ * blocks which may be free. If the freelist drops below this level
+ * only the superuser may continue to allocate blocks. This may
+ * be set to 0 if no reserve of free blocks is deemed necessary,
+ * however throughput drops by fifty percent if the file system
+ * is run at between 95% and 100% full; thus the minimum default
+ * value of fs_minfree is 5%. However, to get good clustering
+ * performance, 10% is a better choice. hence we use 10% as our
+ * default value. With 10% free space, fragmentation is not a
+ * problem, so we choose to optimize for time.
+ */
+#define MINFREE 5
+#define DEFAULTOPT FS_OPTTIME
+
+/*
+ * Per cylinder group information; summarized in blocks allocated
+ * from first cylinder group data blocks. These blocks have to be
+ * read in from fs_csaddr (size fs_cssize) in addition to the
+ * super block.
+ *
+ * N.B. sizeof(struct csum) must be a power of two in order for
+ * the ``fs_cs'' macro to work (see below).
+ */
+struct csum {
+ long cs_ndir; /* number of directories */
+ long cs_nbfree; /* number of free blocks */
+ long cs_nifree; /* number of free inodes */
+ long cs_nffree; /* number of free frags */
+};
+
+/*
+ * Super block for a file system.
+ */
+struct fs {
+ struct fs *fs_link; /* linked list of file systems */
+ struct fs *fs_rlink; /* used for incore super blocks */
+ daddr_t fs_sblkno; /* addr of super-block in filesys */
+ daddr_t fs_cblkno; /* offset of cyl-block in filesys */
+ daddr_t fs_iblkno; /* offset of inode-blocks in filesys */
+ daddr_t fs_dblkno; /* offset of first data after cg */
+ long fs_cgoffset; /* cylinder group offset in cylinder */
+ long fs_cgmask; /* used to calc mod fs_ntrak */
+ time_t fs_time; /* last time written */
+ long fs_size; /* number of blocks in fs */
+ long fs_dsize; /* number of data blocks in fs */
+ long fs_ncg; /* number of cylinder groups */
+ long fs_bsize; /* size of basic blocks in fs */
+ long fs_fsize; /* size of frag blocks in fs */
+ long fs_frag; /* number of frags in a block in fs */
+/* these are configuration parameters */
+ long fs_minfree; /* minimum percentage of free blocks */
+ long fs_rotdelay; /* num of ms for optimal next block */
+ long fs_rps; /* disk revolutions per second */
+/* these fields can be computed from the others */
+ long fs_bmask; /* ``blkoff'' calc of blk offsets */
+ long fs_fmask; /* ``fragoff'' calc of frag offsets */
+ long fs_bshift; /* ``lblkno'' calc of logical blkno */
+ long fs_fshift; /* ``numfrags'' calc number of frags */
+/* these are configuration parameters */
+ long fs_maxcontig; /* max number of contiguous blks */
+ long fs_maxbpg; /* max number of blks per cyl group */
+/* these fields can be computed from the others */
+ long fs_fragshift; /* block to frag shift */
+ long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
+ long fs_sbsize; /* actual size of super block */
+ long fs_csmask; /* csum block offset */
+ long fs_csshift; /* csum block number */
+ long fs_nindir; /* value of NINDIR */
+ long fs_inopb; /* value of INOPB */
+ long fs_nspf; /* value of NSPF */
+/* yet another configuration parameter */
+ long fs_optim; /* optimization preference, see below */
+/* these fields are derived from the hardware */
+ long fs_npsect; /* # sectors/track including spares */
+ long fs_interleave; /* hardware sector interleave */
+ long fs_trackskew; /* sector 0 skew, per track */
+ long fs_headswitch; /* head switch time, usec */
+ long fs_trkseek; /* track-to-track seek, usec */
+/* sizes determined by number of cylinder groups and their sizes */
+ daddr_t fs_csaddr; /* blk addr of cyl grp summary area */
+ long fs_cssize; /* size of cyl grp summary area */
+ long fs_cgsize; /* cylinder group size */
+/* these fields are derived from the hardware */
+ long fs_ntrak; /* tracks per cylinder */
+ long fs_nsect; /* sectors per track */
+ long fs_spc; /* sectors per cylinder */
+/* this comes from the disk driver partitioning */
+ long fs_ncyl; /* cylinders in file system */
+/* these fields can be computed from the others */
+ long fs_cpg; /* cylinders per group */
+ long fs_ipg; /* inodes per group */
+ long fs_fpg; /* blocks per group * fs_frag */
+/* this data must be re-computed after crashes */
+ struct csum fs_cstotal; /* cylinder summary information */
+/* these fields are cleared at mount time */
+ char fs_fmod; /* super block modified flag */
+ char fs_clean; /* file system is clean flag */
+ char fs_ronly; /* mounted read-only flag */
+ char fs_flags; /* currently unused flag */
+ char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
+/* these fields retain the current block allocation info */
+ long fs_cgrotor; /* last cg searched */
+ struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
+ long fs_cpc; /* cyl per cycle in postbl */
+ short fs_opostbl[16][8]; /* old rotation block list head */
+ long fs_sparecon[50]; /* reserved for future constants */
+ long fs_contigsumsize; /* size of cluster summary array */
+ long fs_maxsymlinklen; /* max length of an internal symlink */
+ long fs_inodefmt; /* format of on-disk inodes */
+ u_quad_t fs_maxfilesize; /* maximum representable file size */
+ quad_t fs_qbmask; /* ~fs_bmask - for use with quad size */
+ quad_t fs_qfmask; /* ~fs_fmask - for use with quad size */
+ long fs_state; /* validate fs_clean field */
+ long fs_postblformat; /* format of positional layout tables */
+ long fs_nrpos; /* number of rotational positions */
+ long fs_postbloff; /* (short) rotation block list head */
+ long fs_rotbloff; /* (u_char) blocks for each rotation */
+ long fs_magic; /* magic number */
+ u_char fs_space[1]; /* list of blocks for each rotation */
+/* actually longer */
+};
+/*
+ * Filesystem idetification
+ */
+#define FS_MAGIC 0x011954 /* the fast filesystem magic number */
+#define FS_OKAY 0x7c269d38 /* superblock checksum */
+#define FS_42INODEFMT -1 /* 4.2BSD inode format */
+#define FS_44INODEFMT 2 /* 4.4BSD inode format */
+/*
+ * Preference for optimization.
+ */
+#define FS_OPTTIME 0 /* minimize allocation time */
+#define FS_OPTSPACE 1 /* minimize disk fragmentation */
+
+/*
+ * Rotational layout table format types
+ */
+#define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */
+#define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */
+/*
+ * Macros for access to superblock array structures
+ */
+#define fs_postbl(fs, cylno) \
+ (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+ ? ((fs)->fs_opostbl[cylno]) \
+ : ((short *)((char *)(fs) + (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos))
+#define fs_rotbl(fs) \
+ (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+ ? ((fs)->fs_space) \
+ : ((u_char *)((char *)(fs) + (fs)->fs_rotbloff)))
+
+/*
+ * The size of a cylinder group is calculated by CGSIZE. The maximum size
+ * is limited by the fact that cylinder groups are at most one block.
+ * Its size is derived from the size of the maps maintained in the
+ * cylinder group and the (struct cg) size.
+ */
+#define CGSIZE(fs) \
+ /* base cg */ (sizeof(struct cg) + sizeof(long) + \
+ /* blktot size */ (fs)->fs_cpg * sizeof(long) + \
+ /* blks size */ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof(short) + \
+ /* inode map */ howmany((fs)->fs_ipg, NBBY) + \
+ /* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\
+ /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \
+ /* cluster sum */ (fs)->fs_contigsumsize * sizeof(long) + \
+ /* cluster map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY)))
+
+/*
+ * Convert cylinder group to base address of its global summary info.
+ *
+ * N.B. This macro assumes that sizeof(struct csum) is a power of two.
+ */
+#define fs_cs(fs, indx) \
+ fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask]
+
+/*
+ * Cylinder group block for a file system.
+ */
+#define CG_MAGIC 0x090255
+struct cg {
+ struct cg *cg_link; /* linked list of cyl groups */
+ long cg_magic; /* magic number */
+ time_t cg_time; /* time last written */
+ long cg_cgx; /* we are the cgx'th cylinder group */
+ short cg_ncyl; /* number of cyl's this cg */
+ short cg_niblk; /* number of inode blocks this cg */
+ long cg_ndblk; /* number of data blocks this cg */
+ struct csum cg_cs; /* cylinder summary information */
+ long cg_rotor; /* position of last used block */
+ long cg_frotor; /* position of last used frag */
+ long cg_irotor; /* position of last used inode */
+ long cg_frsum[MAXFRAG]; /* counts of available frags */
+ long cg_btotoff; /* (long) block totals per cylinder */
+ long cg_boff; /* (short) free block positions */
+ long cg_iusedoff; /* (char) used inode map */
+ long cg_freeoff; /* (u_char) free block map */
+ long cg_nextfreeoff; /* (u_char) next available space */
+ long cg_clustersumoff; /* (long) counts of avail clusters */
+ long cg_clusteroff; /* (char) free cluster map */
+ long cg_nclusterblks; /* number of clusters this cg */
+ long cg_sparecon[13]; /* reserved for future use */
+ u_char cg_space[1]; /* space for cylinder group maps */
+/* actually longer */
+};
+/*
+ * Macros for access to cylinder group array structures
+ */
+#define cg_blktot(cgp) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_btot) \
+ : ((long *)((char *)(cgp) + (cgp)->cg_btotoff)))
+#define cg_blks(fs, cgp, cylno) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_b[cylno]) \
+ : ((short *)((char *)(cgp) + (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos))
+#define cg_inosused(cgp) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_iused) \
+ : ((char *)((char *)(cgp) + (cgp)->cg_iusedoff)))
+#define cg_blksfree(cgp) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_free) \
+ : ((u_char *)((char *)(cgp) + (cgp)->cg_freeoff)))
+#define cg_chkmagic(cgp) \
+ ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
+#define cg_clustersfree(cgp) \
+ ((u_char *)((char *)(cgp) + (cgp)->cg_clusteroff))
+#define cg_clustersum(cgp) \
+ ((long *)((char *)(cgp) + (cgp)->cg_clustersumoff))
+
+/*
+ * The following structure is defined
+ * for compatibility with old file systems.
+ */
+struct ocg {
+ struct ocg *cg_link; /* linked list of cyl groups */
+ struct ocg *cg_rlink; /* used for incore cyl groups */
+ time_t cg_time; /* time last written */
+ long cg_cgx; /* we are the cgx'th cylinder group */
+ short cg_ncyl; /* number of cyl's this cg */
+ short cg_niblk; /* number of inode blocks this cg */
+ long cg_ndblk; /* number of data blocks this cg */
+ struct csum cg_cs; /* cylinder summary information */
+ long cg_rotor; /* position of last used block */
+ long cg_frotor; /* position of last used frag */
+ long cg_irotor; /* position of last used inode */
+ long cg_frsum[8]; /* counts of available frags */
+ long cg_btot[32]; /* block totals per cylinder */
+ short cg_b[32][8]; /* positions of free blocks */
+ char cg_iused[256]; /* used inode map */
+ long cg_magic; /* magic number */
+ u_char cg_free[1]; /* free block map */
+/* actually longer */
+};
+
+/*
+ * Turn file system block numbers into disk block addresses.
+ * This maps file system blocks to device size blocks.
+ */
+#define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb)
+#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb)
+
+/*
+ * Cylinder group macros to locate things in cylinder groups.
+ * They calc file system addresses of cylinder group data structures.
+ */
+#define cgbase(fs, c) ((daddr_t)((fs)->fs_fpg * (c)))
+#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */
+#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */
+#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */
+#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */
+#define cgstart(fs, c) \
+ (cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
+
+/*
+ * Macros for handling inode numbers:
+ * inode number to file system block offset.
+ * inode number to cylinder group number.
+ * inode number to file system block address.
+ */
+#define ino_to_cg(fs, x) ((x) / (fs)->fs_ipg)
+#define ino_to_fsba(fs, x) \
+ ((daddr_t)(cgimin(fs, ino_to_cg(fs, x)) + \
+ (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs))))))
+#define ino_to_fsbo(fs, x) ((x) % INOPB(fs))
+
+/*
+ * Give cylinder group number for a file system block.
+ * Give cylinder group block number for a file system block.
+ */
+#define dtog(fs, d) ((d) / (fs)->fs_fpg)
+#define dtogd(fs, d) ((d) % (fs)->fs_fpg)
+
+/*
+ * Extract the bits for a block from a map.
+ * Compute the cylinder and rotational position of a cyl block addr.
+ */
+#define blkmap(fs, map, loc) \
+ (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
+#define cbtocylno(fs, bno) \
+ ((bno) * NSPF(fs) / (fs)->fs_spc)
+#define cbtorpos(fs, bno) \
+ (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \
+ (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \
+ (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect)
+
+/*
+ * The following macros optimize certain frequently calculated
+ * quantities by using shifts and masks in place of divisions
+ * modulos and multiplications.
+ */
+#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \
+ ((loc) & (fs)->fs_qbmask)
+#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \
+ ((loc) & (fs)->fs_qfmask)
+#define lblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \
+ ((blk) << (fs)->fs_bshift)
+#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \
+ ((loc) >> (fs)->fs_bshift)
+#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \
+ ((loc) >> (fs)->fs_fshift)
+#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \
+ (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
+#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \
+ (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
+#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \
+ ((frags) >> (fs)->fs_fragshift)
+#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \
+ ((blks) << (fs)->fs_fragshift)
+#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \
+ ((fsb) & ((fs)->fs_frag - 1))
+#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \
+ ((fsb) &~ ((fs)->fs_frag - 1))
+
+/*
+ * Determine the number of available frags given a
+ * percentage to hold in reserve
+ */
+#define freespace(fs, percentreserved) \
+ (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
+ (fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100))
+
+/*
+ * Determining the size of a file block in the file system.
+ */
+#define blksize(fs, ip, lbn) \
+ (((lbn) >= NDADDR || (ip)->i_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+ ? (fs)->fs_bsize \
+ : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
+#define dblksize(fs, dip, lbn) \
+ (((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+ ? (fs)->fs_bsize \
+ : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
+
+/*
+ * Number of disk sectors per block; assumes DEV_BSIZE byte sector size.
+ */
+#define NSPB(fs) ((fs)->fs_nspf << (fs)->fs_fragshift)
+#define NSPF(fs) ((fs)->fs_nspf)
+
+/*
+ * INOPB is the number of inodes in a secondary storage block.
+ */
+#define INOPB(fs) ((fs)->fs_inopb)
+#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift)
+
+/*
+ * NINDIR is the number of indirects in a file system block.
+ */
+#define NINDIR(fs) ((fs)->fs_nindir)
+
+extern int inside[], around[];
+extern u_char *fragtbl[];
diff --git a/sys/ufs/lfs/README b/sys/ufs/lfs/README
new file mode 100644
index 000000000000..724b18fb9ea5
--- /dev/null
+++ b/sys/ufs/lfs/README
@@ -0,0 +1,139 @@
+# @(#)README 8.1 (Berkeley) 6/11/93
+
+The file system is reasonably stable, but incomplete. There are
+places where cleaning performance can be improved dramatically (see
+comments in lfs_syscalls.c). For details on the implementation,
+performance and why garbage collection always wins, see Dr. Margo
+Seltzer's thesis available for anonymous ftp from toe.cs.berkeley.edu,
+in the directory pub/personal/margo/thesis.ps.Z, or the January 1993
+USENIX paper.
+
+Missing Functionality:
+ Multiple block sizes and/or fragments are not yet implemented.
+
+----------
+The disk is laid out in segments. The first segment starts 8K into the
+disk (the first 8K is used for boot information). Each segment is composed
+of the following:
+
+ An optional super block
+ One or more groups of:
+ segment summary
+ 0 or more data blocks
+ 0 or more inode blocks
+
+The segment summary and inode/data blocks start after the super block (if
+present), and grow toward the end of the segment.
+
+ _______________________________________________
+ | | | | |
+ | summary | data/inode | summary | data/inode |
+ | block | blocks | block | blocks | ...
+ |_________|____________|_________|____________|
+
+The data/inode blocks following a summary block are described by the
+summary block. In order to permit the segment to be written in any order
+and in a forward direction only, a checksum is calculated across the
+blocks described by the summary. Additionally, the summary is checksummed
+and timestamped. Both of these are intended for recovery; the former is
+to make it easy to determine that it *is* a summary block and the latter
+is to make it easy to determine when recovery is finished for partially
+written segments. These checksums are also used by the cleaner.
+
+ Summary block (detail)
+ ________________
+ | sum cksum |
+ | data cksum |
+ | next segment |
+ | timestamp |
+ | FINFO count |
+ | inode count |
+ | flags |
+ |______________|
+ | FINFO-1 | 0 or more file info structures, identifying the
+ | . | blocks in the segment.
+ | . |
+ | . |
+ | FINFO-N |
+ | inode-N |
+ | . |
+ | . |
+ | . | 0 or more inode daddr_t's, identifying the inode
+ | inode-1 | blocks in the segment.
+ |______________|
+
+Inode blocks are blocks of on-disk inodes in the same format as those in
+the FFS. However, spare[0] contains the inode number of the inode so we
+can find a particular inode on a page. They are packed page_size /
+sizeof(inode) to a block. Data blocks are exactly as in the FFS. Both
+inodes and data blocks move around the file system at will.
+
+The file system is described by a super-block which is replicated and
+occurs as the first block of the first and other segments. (The maximum
+number of super-blocks is MAXNUMSB). Each super-block maintains a list
+of the disk addresses of all the super-blocks. The super-block maintains
+a small amount of checkpoint information, essentially just enough to find
+the inode for the IFILE (fs->lfs_idaddr).
+
+The IFILE is visible in the file system, as inode number IFILE_INUM. It
+contains information shared between the kernel and various user processes.
+
+ Ifile (detail)
+ ________________
+ | cleaner info | Cleaner information per file system. (Page
+ | | granularity.)
+ |______________|
+ | segment | Space available and last modified times per
+ | usage table | segment. (Page granularity.)
+ |______________|
+ | IFILE-1 | Per inode status information: current version #,
+ | . | if currently allocated, last access time and
+ | . | current disk address of containing inode block.
+ | . | If current disk address is LFS_UNUSED_DADDR, the
+ | IFILE-N | inode is not in use, and it's on the free list.
+ |______________|
+
+
+First Segment at Creation Time:
+_____________________________________________________________
+| | | | | | | |
+| 8K pad | Super | summary | inode | ifile | root | l + f |
+| | block | | block | | dir | dir |
+|________|_______|_________|_______|_______|_______|_______|
+ ^
+ Segment starts here.
+
+Some differences from the Sprite LFS implementation.
+
+1. The LFS implementation placed the ifile metadata and the super block
+ at fixed locations. This implementation replicates the super block
+ and puts each at a fixed location. The checkpoint data is divided into
+ two parts -- just enough information to find the IFILE is stored in
+ two of the super blocks, although it is not toggled between them as in
+ the Sprite implementation. (This was deliberate, to avoid a single
+ point of failure.) The remaining checkpoint information is treated as
+ a regular file, which means that the cleaner info, the segment usage
+ table and the ifile meta-data are stored in normal log segments.
+ (Tastes great, less filling...)
+
+2. The segment layout is radically different in Sprite; this implementation
+ uses something a lot like network framing, where data/inode blocks are
+ written asynchronously, and a checksum is used to validate any set of
+ summary and data/inode blocks. Sprite writes summary blocks synchronously
+ after the data/inode blocks have been written and the existence of the
+ summary block validates the data/inode blocks. This permits us to write
+ everything contiguously, even partial segments and their summaries, whereas
+ Sprite is forced to seek (from the end of the data inode to the summary
+ which lives at the end of the segment). Additionally, writing the summary
+ synchronously should cost about 1/2 a rotation per summary.
+
+3. Sprite LFS distinguishes between different types of blocks in the segment.
+ Other than inode blocks and data blocks, we don't.
+
+4. Sprite LFS traverses the IFILE looking for free blocks. We maintain a
+ free list threaded through the IFILE entries.
+
+5. The cleaner runs in user space, as opposed to kernel space. It shares
+ information with the kernel by reading/writing the IFILE and through
+ cleaner specific system calls.
+
diff --git a/sys/ufs/lfs/TODO b/sys/ufs/lfs/TODO
new file mode 100644
index 000000000000..ace8f5eaef6c
--- /dev/null
+++ b/sys/ufs/lfs/TODO
@@ -0,0 +1,116 @@
+# @(#)TODO 8.1 (Berkeley) 6/11/93
+
+NOTE: Changed the lookup on a page of inodes to search from the back
+in case the same inode gets written twice on the same page.
+
+Make sure that if you are writing a file, but not all the blocks
+make it into a single segment, that you do not write the inode in
+that segment.
+
+Keith:
+ Why not delete the lfs_bmapv call, just mark everything dirty
+ that isn't deleted/truncated? Get some numbers about
+ what percentage of the stuff that the cleaner thinks
+ might be live is live. If it's high, get rid of lfs_bmapv.
+
+ There is a nasty problem in that it may take *more* room to write
+ the data to clean a segment than is returned by the new segment
+ because of indirect blocks in segment 2 being dirtied by the data
+ being copied into the log from segment 1. The suggested solution
+ at this point is to detect it when we have no space left on the
+ filesystem, write the extra data into the last segment (leaving
+ no clean ones), make it a checkpoint and shut down the file system
+ for fixing by a utility reading the raw partition. Argument is
+ that this should never happen and is practically impossible to fix
+ since the cleaner would have to theoretically build a model of the
+ entire filesystem in memory to detect the condition occurring.
+ A file coalescing cleaner will help avoid the problem, and one
+ that reads/writes from the raw disk could fix it.
+
+DONE Currently, inodes are being flushed to disk synchronously upon
+ creation -- see ufs_makeinode. However, only the inode
+ is flushed, the directory "name" is written using VOP_BWRITE,
+ so it's not synchronous. Possible solutions: 1: get some
+ ordering in the writes so that inode/directory entries get
+ stuffed into the same segment. 2: do both synchronously
+ 3: add Mendel's information into the stream so we log
+ creation/deletion of inodes. 4: do some form of partial
+ segment when changing the inode (creation/deletion/rename).
+DONE Fix i_block increment for indirect blocks.
+ If the file system is tar'd, extracted on top of another LFS, the
+ IFILE ain't worth diddly. Is the cleaner writing the IFILE?
+ If not, let's make it read-only.
+DONE Delete unnecessary source from utils in main-line source tree.
+DONE Make sure that we're counting meta blocks in the inode i_block count.
+ Overlap the version and nextfree fields in the IFILE
+DONE Vinvalbuf (Kirk):
+ Why writing blocks that are no longer useful?
+ Are the semantics of close such that blocks have to be flushed?
+ How specify in the buf chain the blocks that don't need
+ to be written? (Different numbering of indirect blocks.)
+
+Margo:
+ Change so that only search one sector of inode block file for the
+ inode by using sector addresses in the ifile instead of
+ logical disk addresses.
+ Fix the use of the ifile version field to use the generation
+ number instead.
+DONE Unmount; not doing a bgetvp (VHOLD) in lfs_newbuf call.
+DONE Document in the README file where the checkpoint information is
+ on disk.
+ Variable block sizes (Margo/Keith).
+ Switch the byte accounting to sector accounting.
+DONE Check lfs.h and make sure that the #defines/structures are all
+ actually needed.
+DONE Add a check in lfs_segment.c so that if the segment is empty,
+ we don't write it.
+ Need to keep vnode v_numoutput up to date for pending writes?
+DONE USENIX paper (Carl/Margo).
+
+
+Evelyn:
+ lfsck: If delete a file that's being executed, the version number
+ isn't updated, and lfsck has to figure this out; case is the same as if have an inode that no directory references,
+ so the file should be reattached into lost+found.
+ Recovery/fsck.
+
+Carl:
+ Investigate: clustering of reads (if blocks in the segment are ordered,
+ should read them all) and writes (McVoy paper).
+ Investigate: should the access time be part of the IFILE:
+ pro: theoretically, saves disk writes
+ con: cacheing inodes should obviate this advantage
+ the IFILE is already humongous
+ Cleaner.
+ Port to OSF/1 (Carl/Keith).
+ Currently there's no notion of write error checking.
+ + Failed data/inode writes should be rescheduled (kernel level
+ bad blocking).
+ + Failed superblock writes should cause selection of new
+ superblock for checkpointing.
+
+FUTURE FANTASIES: ============
+
++ unrm, versioning
++ transactions
++ extended cleaner policies (hot/cold data, data placement)
+
+==============================
+Problem with the concept of multiple buffer headers referencing the segment:
+Positives:
+ Don't lock down 1 segment per file system of physical memory.
+ Don't copy from buffers to segment memory.
+ Don't tie down the bus to transfer 1M.
+ Works on controllers supporting less than large transfers.
+ Disk can start writing immediately instead of waiting 1/2 rotation
+ and the full transfer.
+Negatives:
+ Have to do segment write then segment summary write, since the latter
+ is what verifies that the segment is okay. (Is there another way
+ to do this?)
+==============================
+
+The algorithm for selecting the disk addresses of the super-blocks
+has to be available to the user program which checks the file system.
+
+(Currently in newfs, becomes a common subroutine.)
diff --git a/sys/ufs/lfs/lfs.h b/sys/ufs/lfs/lfs.h
new file mode 100644
index 000000000000..87b8c22ccc02
--- /dev/null
+++ b/sys/ufs/lfs/lfs.h
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs.h 8.3 (Berkeley) 9/23/93
+ */
+
+#define LFS_LABELPAD 8192 /* LFS label size */
+#define LFS_SBPAD 8192 /* LFS superblock size */
+
+/*
+ * XXX
+ * This is a kluge and NEEDS to go away.
+ *
+ * Right now, ufs code handles most of the calls for directory operations
+ * such as create, mkdir, link, etc. As a result VOP_UPDATE is being
+ * called with waitfor set (since ffs does these things synchronously).
+ * Since LFS does not want to do these synchronously, we treat the last
+ * argument to lfs_update as a set of flags. If LFS_SYNC is set, then
+ * the update should be synchronous, if not, do it asynchronously.
+ * Unfortunately, this means that LFS won't work with NFS yet because
+ * NFS goes through paths that will make normal calls to ufs which will
+ * call lfs with a last argument of 1.
+ */
+#define LFS_SYNC 0x02
+
+/* On-disk and in-memory checkpoint segment usage structure. */
+typedef struct segusage SEGUSE;
+struct segusage {
+ u_long su_nbytes; /* number of live bytes */
+ u_long su_lastmod; /* SEGUSE last modified timestamp */
+ u_short su_nsums; /* number of summaries in segment */
+ u_short su_ninos; /* number of inode blocks in seg */
+#define SEGUSE_ACTIVE 0x1 /* segment is currently being written */
+#define SEGUSE_DIRTY 0x2 /* segment has data in it */
+#define SEGUSE_SUPERBLOCK 0x4 /* segment contains a superblock */
+ u_long su_flags;
+};
+
+#define SEGUPB(fs) (1 << (fs)->lfs_sushift)
+#define SEGTABSIZE_SU(fs) \
+ (((fs)->lfs_nseg + SEGUPB(fs) - 1) >> (fs)->lfs_sushift)
+
+/* On-disk file information. One per file with data blocks in the segment. */
+typedef struct finfo FINFO;
+struct finfo {
+ u_long fi_nblocks; /* number of blocks */
+ u_long fi_version; /* version number */
+ u_long fi_ino; /* inode number */
+ long fi_blocks[1]; /* array of logical block numbers */
+};
+
+/* On-disk and in-memory super block. */
+struct lfs {
+#define LFS_MAGIC 0x070162
+ u_long lfs_magic; /* magic number */
+#define LFS_VERSION 1
+ u_long lfs_version; /* version number */
+
+ u_long lfs_size; /* number of blocks in fs */
+ u_long lfs_ssize; /* number of blocks per segment */
+ u_long lfs_dsize; /* number of disk blocks in fs */
+ u_long lfs_bsize; /* file system block size */
+ u_long lfs_fsize; /* size of frag blocks in fs */
+ u_long lfs_frag; /* number of frags in a block in fs */
+
+/* Checkpoint region. */
+ ino_t lfs_free; /* start of the free list */
+ u_long lfs_bfree; /* number of free disk blocks */
+ u_long lfs_nfiles; /* number of allocated inodes */
+ long lfs_avail; /* blocks available for writing */
+ u_long lfs_uinodes; /* inodes in cache not yet on disk */
+ daddr_t lfs_idaddr; /* inode file disk address */
+ ino_t lfs_ifile; /* inode file inode number */
+ daddr_t lfs_lastseg; /* address of last segment written */
+ daddr_t lfs_nextseg; /* address of next segment to write */
+ daddr_t lfs_curseg; /* current segment being written */
+ daddr_t lfs_offset; /* offset in curseg for next partial */
+ daddr_t lfs_lastpseg; /* address of last partial written */
+ u_long lfs_tstamp; /* time stamp */
+
+/* These are configuration parameters. */
+ u_long lfs_minfree; /* minimum percentage of free blocks */
+
+/* These fields can be computed from the others. */
+ u_quad_t lfs_maxfilesize; /* maximum representable file size */
+ u_long lfs_dbpseg; /* disk blocks per segment */
+ u_long lfs_inopb; /* inodes per block */
+ u_long lfs_ifpb; /* IFILE entries per block */
+ u_long lfs_sepb; /* SEGUSE entries per block */
+ u_long lfs_nindir; /* indirect pointers per block */
+ u_long lfs_nseg; /* number of segments */
+ u_long lfs_nspf; /* number of sectors per fragment */
+ u_long lfs_cleansz; /* cleaner info size in blocks */
+ u_long lfs_segtabsz; /* segment table size in blocks */
+
+ u_long lfs_segmask; /* calculate offset within a segment */
+ u_long lfs_segshift; /* fast mult/div for segments */
+ u_long lfs_bmask; /* calc block offset from file offset */
+ u_long lfs_bshift; /* calc block number from file offset */
+ u_long lfs_ffmask; /* calc frag offset from file offset */
+ u_long lfs_ffshift; /* fast mult/div for frag from file */
+ u_long lfs_fbmask; /* calc frag offset from block offset */
+ u_long lfs_fbshift; /* fast mult/div for frag from block */
+ u_long lfs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
+ u_long lfs_sushift; /* fast mult/div for segusage table */
+
+#define LFS_MIN_SBINTERVAL 5 /* minimum superblock segment spacing */
+#define LFS_MAXNUMSB 10 /* superblock disk offsets */
+ daddr_t lfs_sboffs[LFS_MAXNUMSB];
+
+/* These fields are set at mount time and are meaningless on disk. */
+ struct segment *lfs_sp; /* current segment being written */
+ struct vnode *lfs_ivnode; /* vnode for the ifile */
+ u_long lfs_seglock; /* single-thread the segment writer */
+ pid_t lfs_lockpid; /* pid of lock holder */
+ u_long lfs_iocount; /* number of ios pending */
+ u_long lfs_writer; /* don't allow any dirops to start */
+ u_long lfs_dirops; /* count of active directory ops */
+ u_long lfs_doifile; /* Write ifile blocks on next write */
+ u_long lfs_nactive; /* Number of segments since last ckp */
+ u_char lfs_fmod; /* super block modified flag */
+ u_char lfs_clean; /* file system is clean flag */
+ u_char lfs_ronly; /* mounted read-only flag */
+ u_char lfs_flags; /* currently unused flag */
+ u_char lfs_fsmnt[MNAMELEN]; /* name mounted on */
+ u_char pad[3]; /* long-align */
+
+/* Checksum; valid on disk. */
+ u_long lfs_cksum; /* checksum for superblock checking */
+};
+
+/*
+ * Inode 0 is the out-of-band inode number, inode 1 is the inode number for
+ * the IFILE, the root inode is 2 and the lost+found inode is 3.
+ */
+
+/* Fixed inode numbers. */
+#define LFS_UNUSED_INUM 0 /* out of band inode number */
+#define LFS_IFILE_INUM 1 /* IFILE inode number */
+#define LOSTFOUNDINO 3 /* lost+found inode number */
+#define LFS_FIRST_INUM 4 /* first free inode number */
+
+/* Address calculations for metadata located in the inode */
+#define S_INDIR(fs) -NDADDR
+#define D_INDIR(fs) (S_INDIR(fs) - NINDIR(fs) - 1)
+#define T_INDIR(fs) (D_INDIR(fs) - NINDIR(fs) * NINDIR(fs) - 1)
+
+/* Unassigned disk address. */
+#define UNASSIGNED -1
+
+/* Unused logical block number */
+#define LFS_UNUSED_LBN -1
+
+typedef struct ifile IFILE;
+struct ifile {
+ u_long if_version; /* inode version number */
+#define LFS_UNUSED_DADDR 0 /* out-of-band daddr */
+ daddr_t if_daddr; /* inode disk address */
+ ino_t if_nextfree; /* next-unallocated inode */
+};
+
+/*
+ * Cleaner information structure. This resides in the ifile and is used
+ * to pass information between the cleaner and the kernel.
+ */
+typedef struct _cleanerinfo {
+ u_long clean; /* K: number of clean segments */
+ u_long dirty; /* K: number of dirty segments */
+} CLEANERINFO;
+
+#define CLEANSIZE_SU(fs) \
+ ((sizeof(CLEANERINFO) + (fs)->lfs_bsize - 1) >> (fs)->lfs_bshift)
+
+/*
+ * All summary blocks are the same size, so we can always read a summary
+ * block easily from a segment.
+ */
+#define LFS_SUMMARY_SIZE 512
+
+/* On-disk segment summary information */
+typedef struct segsum SEGSUM;
+struct segsum {
+ u_long ss_sumsum; /* check sum of summary block */
+ u_long ss_datasum; /* check sum of data */
+ daddr_t ss_next; /* next segment */
+ u_long ss_create; /* creation time stamp */
+ u_short ss_nfinfo; /* number of file info structures */
+ u_short ss_ninos; /* number of inodes in summary */
+#define SS_DIROP 0x01 /* segment begins a dirop */
+#define SS_CONT 0x02 /* more partials to finish this write*/
+ u_short ss_flags; /* used for directory operations */
+ u_short ss_pad; /* extra space */
+ /* FINFO's and inode daddr's... */
+};
+
+/* NINDIR is the number of indirects in a file system block. */
+#define NINDIR(fs) ((fs)->lfs_nindir)
+
+/* INOPB is the number of inodes in a secondary storage block. */
+#define INOPB(fs) ((fs)->lfs_inopb)
+
+#define blksize(fs) ((fs)->lfs_bsize)
+#define blkoff(fs, loc) ((loc) & (fs)->lfs_bmask)
+#define fsbtodb(fs, b) ((b) << (fs)->lfs_fsbtodb)
+#define dbtofsb(fs, b) ((b) >> (fs)->lfs_fsbtodb)
+#define lblkno(fs, loc) ((loc) >> (fs)->lfs_bshift)
+#define lblktosize(fs, blk) ((blk) << (fs)->lfs_bshift)
+#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \
+ ((loc) >> (fs)->lfs_bshift)
+
+#define datosn(fs, daddr) /* disk address to segment number */ \
+ (((daddr) - (fs)->lfs_sboffs[0]) / fsbtodb((fs), (fs)->lfs_ssize))
+#define sntoda(fs, sn) /* segment number to disk address */ \
+ ((daddr_t)((sn) * ((fs)->lfs_ssize << (fs)->lfs_fsbtodb) + \
+ (fs)->lfs_sboffs[0]))
+
+/* Read in the block with the cleaner info from the ifile. */
+#define LFS_CLEANERINFO(CP, F, BP) { \
+ VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \
+ if (bread((F)->lfs_ivnode, \
+ (daddr_t)0, (F)->lfs_bsize, NOCRED, &(BP))) \
+ panic("lfs: ifile read"); \
+ (CP) = (CLEANERINFO *)(BP)->b_data; \
+}
+
+/* Read in the block with a specific inode from the ifile. */
+#define LFS_IENTRY(IP, F, IN, BP) { \
+ int _e; \
+ VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \
+ if (_e = bread((F)->lfs_ivnode, \
+ (IN) / (F)->lfs_ifpb + (F)->lfs_cleansz + (F)->lfs_segtabsz,\
+ (F)->lfs_bsize, NOCRED, &(BP))) \
+ panic("lfs: ifile read %d", _e); \
+ (IP) = (IFILE *)(BP)->b_data + (IN) % (F)->lfs_ifpb; \
+}
+
+/* Read in the block with a specific segment usage entry from the ifile. */
+#define LFS_SEGENTRY(SP, F, IN, BP) { \
+ int _e; \
+ VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \
+ if (_e = bread((F)->lfs_ivnode, \
+ ((IN) >> (F)->lfs_sushift) + (F)->lfs_cleansz, \
+ (F)->lfs_bsize, NOCRED, &(BP))) \
+ panic("lfs: ifile read: %d", _e); \
+ (SP) = (SEGUSE *)(BP)->b_data + ((IN) & (F)->lfs_sepb - 1); \
+}
+
+/*
+ * Determine if there is enough room currently available to write db
+ * disk blocks. We need enough blocks for the new blocks, the current,
+ * inode blocks, a summary block, plus potentially the ifile inode and
+ * the segment usage table, plus an ifile page.
+ */
+#define LFS_FITS(fs, db) \
+ ((long)((db + ((fs)->lfs_uinodes + INOPB((fs))) / INOPB((fs)) + \
+ fsbtodb(fs, 1) + LFS_SUMMARY_SIZE / DEV_BSIZE + \
+ (fs)->lfs_segtabsz)) < (fs)->lfs_avail)
+
+/* Determine if a buffer belongs to the ifile */
+#define IS_IFILE(bp) (VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM)
+
+/*
+ * Structures used by lfs_bmapv and lfs_markv to communicate information
+ * about inodes and data blocks.
+ */
+typedef struct block_info {
+ ino_t bi_inode; /* inode # */
+ daddr_t bi_lbn; /* logical block w/in file */
+ daddr_t bi_daddr; /* disk address of block */
+ time_t bi_segcreate; /* origin segment create time */
+ int bi_version; /* file version number */
+ void *bi_bp; /* data buffer */
+} BLOCK_INFO;
+
+/* In-memory description of a segment about to be written. */
+struct segment {
+ struct lfs *fs; /* file system pointer */
+ struct buf **bpp; /* pointer to buffer array */
+ struct buf **cbpp; /* pointer to next available bp */
+ struct buf **start_bpp; /* pointer to first bp in this set */
+ struct buf *ibp; /* buffer pointer to inode page */
+ struct finfo *fip; /* current fileinfo pointer */
+ struct vnode *vp; /* vnode being gathered */
+ void *segsum; /* segment summary info */
+ u_long ninodes; /* number of inodes in this segment */
+ u_long seg_bytes_left; /* bytes left in segment */
+ u_long sum_bytes_left; /* bytes left in summary block */
+ u_long seg_number; /* number of this segment */
+ daddr_t *start_lbp; /* beginning lbn for this set */
+#define SEGM_CKP 0x01 /* doing a checkpoint */
+#define SEGM_CLEAN 0x02 /* cleaner call; don't sort */
+#define SEGM_SYNC 0x04 /* wait for segment */
+ u_long seg_flags; /* run-time flags for this segment */
+};
+
+#define ISSPACE(F, BB, C) \
+ (((C)->cr_uid == 0 && (F)->lfs_bfree >= (BB)) || \
+ ((C)->cr_uid != 0 && IS_FREESPACE(F, BB)))
+
+#define IS_FREESPACE(F, BB) \
+ ((F)->lfs_bfree > ((F)->lfs_dsize * (F)->lfs_minfree / 100 + (BB)))
+
+#define ISSPACE_XXX(F, BB) \
+ ((F)->lfs_bfree >= (BB))
+
+#define DOSTATS
+#ifdef DOSTATS
+/* Statistics Counters */
+struct lfs_stats {
+ int segsused;
+ int psegwrites;
+ int psyncwrites;
+ int pcleanwrites;
+ int blocktot;
+ int cleanblocks;
+ int ncheckpoints;
+ int nwrites;
+ int nsync_writes;
+ int wait_exceeded;
+ int write_exceeded;
+ int flush_invoked;
+};
+extern struct lfs_stats lfs_stats;
+#endif
diff --git a/sys/ufs/lfs/lfs_alloc.c b/sys/ufs/lfs/lfs_alloc.c
new file mode 100644
index 000000000000..3f06c8139304
--- /dev/null
+++ b/sys/ufs/lfs/lfs_alloc.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+extern u_long nextgennumber;
+
+/* Allocate a new inode. */
+/* ARGSUSED */
+int
+lfs_valloc(ap)
+ struct vop_valloc_args /* {
+ struct vnode *a_pvp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct vnode **a_vpp;
+ } */ *ap;
+{
+ struct lfs *fs;
+ struct buf *bp;
+ struct ifile *ifp;
+ struct inode *ip;
+ struct vnode *vp;
+ daddr_t blkno;
+ ino_t new_ino;
+ u_long i, max;
+ int error;
+
+ /* Get the head of the freelist. */
+ fs = VTOI(ap->a_pvp)->i_lfs;
+ new_ino = fs->lfs_free;
+#ifdef ALLOCPRINT
+ printf("lfs_ialloc: allocate inode %d\n", new_ino);
+#endif
+
+ /*
+ * Remove the inode from the free list and write the new start
+ * of the free list into the superblock.
+ */
+ LFS_IENTRY(ifp, fs, new_ino, bp);
+ if (ifp->if_daddr != LFS_UNUSED_DADDR)
+ panic("lfs_ialloc: inuse inode on the free list");
+ fs->lfs_free = ifp->if_nextfree;
+ brelse(bp);
+
+ /* Extend IFILE so that the next lfs_valloc will succeed. */
+ if (fs->lfs_free == LFS_UNUSED_INUM) {
+ vp = fs->lfs_ivnode;
+ ip = VTOI(vp);
+ blkno = lblkno(fs, ip->i_size);
+ lfs_balloc(vp, fs->lfs_bsize, blkno, &bp);
+ ip->i_size += fs->lfs_bsize;
+ vnode_pager_setsize(vp, (u_long)ip->i_size);
+ vnode_pager_uncache(vp);
+
+ i = (blkno - fs->lfs_segtabsz - fs->lfs_cleansz) *
+ fs->lfs_ifpb;
+ fs->lfs_free = i;
+ max = i + fs->lfs_ifpb;
+ for (ifp = (struct ifile *)bp->b_data; i < max; ++ifp) {
+ ifp->if_version = 1;
+ ifp->if_daddr = LFS_UNUSED_DADDR;
+ ifp->if_nextfree = ++i;
+ }
+ ifp--;
+ ifp->if_nextfree = LFS_UNUSED_INUM;
+ if (error = VOP_BWRITE(bp))
+ return (error);
+ }
+
+ /* Create a vnode to associate with the inode. */
+ if (error = lfs_vcreate(ap->a_pvp->v_mount, new_ino, &vp))
+ return (error);
+
+
+ ip = VTOI(vp);
+ /* Zero out the direct and indirect block addresses. */
+ bzero(&ip->i_din, sizeof(struct dinode));
+ ip->i_din.di_inumber = new_ino;
+
+ /* Set a new generation number for this inode. */
+ if (++nextgennumber < (u_long)time.tv_sec)
+ nextgennumber = time.tv_sec;
+ ip->i_gen = nextgennumber;
+
+ /* Insert into the inode hash table. */
+ ufs_ihashins(ip);
+
+ if (error = ufs_vinit(vp->v_mount, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+ vput(vp);
+ *ap->a_vpp = NULL;
+ return (error);
+ }
+
+ *ap->a_vpp = vp;
+ vp->v_flag |= VDIROP;
+ VREF(ip->i_devvp);
+
+ /* Set superblock modified bit and increment file count. */
+ fs->lfs_fmod = 1;
+ ++fs->lfs_nfiles;
+ return (0);
+}
+
+/* Create a new vnode/inode pair and initialize what fields we can. */
+int
+lfs_vcreate(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+ extern int (**lfs_vnodeop_p)();
+ struct inode *ip;
+ struct ufsmount *ump;
+ int error, i;
+
+ /* Create the vnode. */
+ if (error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, vpp)) {
+ *vpp = NULL;
+ return (error);
+ }
+
+ /* Get a pointer to the private mount structure. */
+ ump = VFSTOUFS(mp);
+
+ /* Initialize the inode. */
+ MALLOC(ip, struct inode *, sizeof(struct inode), M_LFSNODE, M_WAITOK);
+ (*vpp)->v_data = ip;
+ ip->i_vnode = *vpp;
+ ip->i_devvp = ump->um_devvp;
+ ip->i_flag = IN_MODIFIED;
+ ip->i_dev = ump->um_dev;
+ ip->i_number = ip->i_din.di_inumber = ino;
+ip->i_din.di_spare[0] = 0xdeadbeef;
+ip->i_din.di_spare[1] = 0xdeadbeef;
+ ip->i_lfs = ump->um_lfs;
+#ifdef QUOTA
+ for (i = 0; i < MAXQUOTAS; i++)
+ ip->i_dquot[i] = NODQUOT;
+#endif
+ ip->i_lockf = 0;
+ ip->i_diroff = 0;
+ ip->i_mode = 0;
+ ip->i_size = 0;
+ ip->i_blocks = 0;
+ ++ump->um_lfs->lfs_uinodes;
+ return (0);
+}
+
+/* Free an inode. */
+/* ARGUSED */
+int
+lfs_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+ SEGUSE *sup;
+ struct buf *bp;
+ struct ifile *ifp;
+ struct inode *ip;
+ struct lfs *fs;
+ daddr_t old_iaddr;
+ ino_t ino;
+
+ /* Get the inode number and file system. */
+ ip = VTOI(ap->a_pvp);
+ fs = ip->i_lfs;
+ ino = ip->i_number;
+ if (ip->i_flag & IN_MODIFIED) {
+ --fs->lfs_uinodes;
+ ip->i_flag &=
+ ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+ }
+ /*
+ * Set the ifile's inode entry to unused, increment its version number
+ * and link it into the free chain.
+ */
+ LFS_IENTRY(ifp, fs, ino, bp);
+ old_iaddr = ifp->if_daddr;
+ ifp->if_daddr = LFS_UNUSED_DADDR;
+ ++ifp->if_version;
+ ifp->if_nextfree = fs->lfs_free;
+ fs->lfs_free = ino;
+ (void) VOP_BWRITE(bp);
+
+ if (old_iaddr != LFS_UNUSED_DADDR) {
+ LFS_SEGENTRY(sup, fs, datosn(fs, old_iaddr), bp);
+#ifdef DIAGNOSTIC
+ if (sup->su_nbytes < sizeof(struct dinode))
+ panic("lfs_vfree: negative byte count (segment %d)\n",
+ datosn(fs, old_iaddr));
+#endif
+ sup->su_nbytes -= sizeof(struct dinode);
+ (void) VOP_BWRITE(bp);
+ }
+
+ /* Set superblock modified bit and decrement file count. */
+ fs->lfs_fmod = 1;
+ --fs->lfs_nfiles;
+ return (0);
+}
diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c
new file mode 100644
index 000000000000..b56bc9ec51b2
--- /dev/null
+++ b/sys/ufs/lfs/lfs_balloc.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_balloc.c 8.1 (Berkeley) 6/11/93
+ */
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int
+lfs_balloc(vp, iosize, lbn, bpp)
+ struct vnode *vp;
+ u_long iosize;
+ daddr_t lbn;
+ struct buf **bpp;
+{
+ struct buf *ibp, *bp;
+ struct inode *ip;
+ struct lfs *fs;
+ struct indir indirs[NIADDR+2];
+ daddr_t daddr;
+ int bb, error, i, num;
+
+ ip = VTOI(vp);
+ fs = ip->i_lfs;
+
+ /*
+ * Three cases: it's a block beyond the end of file, it's a block in
+ * the file that may or may not have been assigned a disk address or
+ * we're writing an entire block. Note, if the daddr is unassigned,
+ * the block might still have existed in the cache (if it was read
+ * or written earlier). If it did, make sure we don't count it as a
+ * new block or zero out its contents. If it did not, make sure
+ * we allocate any necessary indirect blocks.
+ */
+
+ *bpp = NULL;
+ if (error = ufs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL ))
+ return (error);
+
+ *bpp = bp = getblk(vp, lbn, fs->lfs_bsize, 0, 0);
+ bb = VFSTOUFS(vp->v_mount)->um_seqinc;
+ if (daddr == UNASSIGNED)
+ /* May need to allocate indirect blocks */
+ for (i = 1; i < num; ++i)
+ if (!indirs[i].in_exists) {
+ ibp =
+ getblk(vp, indirs[i].in_lbn, fs->lfs_bsize,
+ 0, 0);
+ if (!(ibp->b_flags & (B_DONE | B_DELWRI))) {
+ if (!ISSPACE(fs, bb, curproc->p_ucred)){
+ ibp->b_flags |= B_INVAL;
+ brelse(ibp);
+ error = ENOSPC;
+ } else {
+ ip->i_blocks += bb;
+ ip->i_lfs->lfs_bfree -= bb;
+ clrbuf(ibp);
+ error = VOP_BWRITE(ibp);
+ }
+ } else
+ panic ("Indirect block should not exist");
+ }
+ if (error) {
+ if (bp)
+ brelse(bp);
+ return(error);
+ }
+
+
+ /* Now, we may need to allocate the data block */
+ if (!(bp->b_flags & (B_CACHE | B_DONE | B_DELWRI))) {
+ if (daddr == UNASSIGNED)
+ if (!ISSPACE(fs, bb, curproc->p_ucred)) {
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ return(ENOSPC);
+ } else {
+ ip->i_blocks += bb;
+ ip->i_lfs->lfs_bfree -= bb;
+ if (iosize != fs->lfs_bsize)
+ clrbuf(bp);
+ }
+ else if (iosize == fs->lfs_bsize)
+ bp->b_blkno = daddr; /* Skip the I/O */
+ else {
+ bp->b_blkno = daddr;
+ bp->b_flags |= B_READ;
+ VOP_STRATEGY(bp);
+ return(biowait(bp));
+ }
+ }
+ return (error);
+}
diff --git a/sys/ufs/lfs/lfs_bio.c b/sys/ufs/lfs/lfs_bio.c
new file mode 100644
index 000000000000..0f021f172088
--- /dev/null
+++ b/sys/ufs/lfs/lfs_bio.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_bio.c 8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/resourcevar.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/*
+ * LFS block write function.
+ *
+ * XXX
+ * No write cost accounting is done.
+ * This is almost certainly wrong for synchronous operations and NFS.
+ */
+int lfs_allclean_wakeup; /* Cleaner wakeup address. */
+int locked_queue_count; /* XXX Count of locked-down buffers. */
+int lfs_writing; /* Set if already kicked off a writer
+ because of buffer space */
+/*
+#define WRITE_THRESHHOLD ((nbuf >> 2) - 10)
+#define WAIT_THRESHHOLD ((nbuf >> 1) - 10)
+*/
+#define WAIT_THRESHHOLD (nbuf - (nbuf >> 2) - 10)
+#define WRITE_THRESHHOLD ((nbuf >> 1) - 10)
+#define LFS_BUFWAIT 2
+
+int
+lfs_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ struct lfs *fs;
+ struct inode *ip;
+ int error, s;
+
+ /*
+ * Set the delayed write flag and use reassignbuf to move the buffer
+ * from the clean list to the dirty one.
+ *
+ * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
+ * the buffer onto the LOCKED free list. This is necessary, otherwise
+ * getnewbuf() would try to reclaim the buffers using bawrite, which
+ * isn't going to work.
+ *
+ * XXX we don't let meta-data writes run out of space because they can
+ * come from the segment writer. We need to make sure that there is
+ * enough space reserved so that there's room to write meta-data
+ * blocks.
+ */
+ if (!(bp->b_flags & B_LOCKED)) {
+ fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
+ while (!LFS_FITS(fs, fsbtodb(fs, 1)) && !IS_IFILE(bp) &&
+ bp->b_lblkno > 0) {
+ /* Out of space, need cleaner to run */
+ wakeup(&lfs_allclean_wakeup);
+ if (error = tsleep(&fs->lfs_avail, PCATCH | PUSER,
+ "cleaner", NULL)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ ip = VTOI((bp)->b_vp);
+ if (!(ip->i_flag & IN_MODIFIED))
+ ++fs->lfs_uinodes;
+ ip->i_flag |= IN_CHANGE | IN_MODIFIED | IN_UPDATE;
+ fs->lfs_avail -= fsbtodb(fs, 1);
+ ++locked_queue_count;
+ bp->b_flags |= B_DELWRI | B_LOCKED;
+ bp->b_flags &= ~(B_READ | B_ERROR);
+ s = splbio();
+ reassignbuf(bp, bp->b_vp);
+ splx(s);
+ }
+ brelse(bp);
+ return (0);
+}
+
+/*
+ * XXX
+ * This routine flushes buffers out of the B_LOCKED queue when LFS has too
+ * many locked down. Eventually the pageout daemon will simply call LFS
+ * when pages need to be reclaimed. Note, we have one static count of locked
+ * buffers, so we can't have more than a single file system. To make this
+ * work for multiple file systems, put the count into the mount structure.
+ */
+void
+lfs_flush()
+{
+ register struct mount *mp;
+
+#ifdef DOSTATS
+ ++lfs_stats.write_exceeded;
+#endif
+ if (lfs_writing)
+ return;
+ lfs_writing = 1;
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ /* The lock check below is to avoid races with unmount. */
+ if (mp->mnt_stat.f_type == MOUNT_LFS &&
+ (mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_UNMOUNT)) == 0 &&
+ !((((struct ufsmount *)mp->mnt_data))->ufsmount_u.lfs)->lfs_dirops ) {
+ /*
+ * We set the queue to 0 here because we are about to
+ * write all the dirty buffers we have. If more come
+ * in while we're writing the segment, they may not
+ * get written, so we want the count to reflect these
+ * new writes after the segwrite completes.
+ */
+#ifdef DOSTATS
+ ++lfs_stats.flush_invoked;
+#endif
+ lfs_segwrite(mp, 0);
+ }
+ }
+ lfs_writing = 0;
+}
+
+int
+lfs_check(vp, blkno)
+ struct vnode *vp;
+ daddr_t blkno;
+{
+ extern int lfs_allclean_wakeup;
+ int error;
+
+ error = 0;
+ if (incore(vp, blkno))
+ return (0);
+ if (locked_queue_count > WRITE_THRESHHOLD)
+ lfs_flush();
+
+ /* If out of buffers, wait on writer */
+ while (locked_queue_count > WAIT_THRESHHOLD) {
+#ifdef DOSTATS
+ ++lfs_stats.wait_exceeded;
+#endif
+ error = tsleep(&locked_queue_count, PCATCH | PUSER, "buffers",
+ hz * LFS_BUFWAIT);
+ }
+
+ return (error);
+}
diff --git a/sys/ufs/lfs/lfs_cksum.c b/sys/ufs/lfs/lfs_cksum.c
new file mode 100644
index 000000000000..77b011aa2c48
--- /dev/null
+++ b/sys/ufs/lfs/lfs_cksum.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_cksum.c 8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/types.h>
+
+/*
+ * Simple, general purpose, fast checksum. Data must be short-aligned.
+ * Returns a u_long in case we ever want to do something more rigorous.
+ *
+ * XXX
+ * Use the TCP/IP checksum instead.
+ */
+u_long
+cksum(str, len)
+ register void *str;
+ register size_t len;
+{
+ register u_long sum;
+
+ len &= ~(sizeof(u_short) - 1);
+ for (sum = 0; len; len -= sizeof(u_short)) {
+ sum ^= *(u_short *)str;
+ ++(u_short *)str;
+ }
+ return (sum);
+}
diff --git a/sys/ufs/lfs/lfs_debug.c b/sys/ufs/lfs/lfs_debug.c
new file mode 100644
index 000000000000..cc28d6090239
--- /dev/null
+++ b/sys/ufs/lfs/lfs_debug.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_debug.c 8.1 (Berkeley) 6/11/93
+ */
+
+#ifdef DEBUG
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+void
+lfs_dump_super(lfsp)
+ struct lfs *lfsp;
+{
+ int i;
+
+ (void)printf("%s%lx\t%s%lx\t%s%d\t%s%d\n",
+ "magic ", lfsp->lfs_magic,
+ "version ", lfsp->lfs_version,
+ "size ", lfsp->lfs_size,
+ "ssize ", lfsp->lfs_ssize);
+ (void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+ "dsize ", lfsp->lfs_dsize,
+ "bsize ", lfsp->lfs_bsize,
+ "fsize ", lfsp->lfs_fsize,
+ "frag ", lfsp->lfs_frag);
+
+ (void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+ "minfree ", lfsp->lfs_minfree,
+ "inopb ", lfsp->lfs_inopb,
+ "ifpb ", lfsp->lfs_ifpb,
+ "nindir ", lfsp->lfs_nindir);
+
+ (void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+ "nseg ", lfsp->lfs_nseg,
+ "nspf ", lfsp->lfs_nspf,
+ "cleansz ", lfsp->lfs_cleansz,
+ "segtabsz ", lfsp->lfs_segtabsz);
+
+ (void)printf("%s%lx\t%s%d\t%s%lx\t%s%d\n",
+ "segmask ", lfsp->lfs_segmask,
+ "segshift ", lfsp->lfs_segshift,
+ "bmask ", lfsp->lfs_bmask,
+ "bshift ", lfsp->lfs_bshift);
+
+ (void)printf("%s%lx\t%s%d\t%s%lx\t%s%d\n",
+ "ffmask ", lfsp->lfs_ffmask,
+ "ffshift ", lfsp->lfs_ffshift,
+ "fbmask ", lfsp->lfs_fbmask,
+ "fbshift ", lfsp->lfs_fbshift);
+
+ (void)printf("%s%d\t%s%d\t%s%lx\t%s%qx\n",
+ "sushift ", lfsp->lfs_sushift,
+ "fsbtodb ", lfsp->lfs_fsbtodb,
+ "cksum ", lfsp->lfs_cksum,
+ "maxfilesize ", lfsp->lfs_maxfilesize);
+
+ (void)printf("Superblock disk addresses:");
+ for (i = 0; i < LFS_MAXNUMSB; i++)
+ (void)printf(" %lx", lfsp->lfs_sboffs[i]);
+ (void)printf("\n");
+
+ (void)printf("Checkpoint Info\n");
+ (void)printf("%s%d\t%s%lx\t%s%d\n",
+ "free ", lfsp->lfs_free,
+ "idaddr ", lfsp->lfs_idaddr,
+ "ifile ", lfsp->lfs_ifile);
+ (void)printf("%s%lx\t%s%d\t%s%lx\t%s%lx\t%s%lx\t%s%lx\n",
+ "bfree ", lfsp->lfs_bfree,
+ "nfiles ", lfsp->lfs_nfiles,
+ "lastseg ", lfsp->lfs_lastseg,
+ "nextseg ", lfsp->lfs_nextseg,
+ "curseg ", lfsp->lfs_curseg,
+ "offset ", lfsp->lfs_offset);
+ (void)printf("tstamp %lx\n", lfsp->lfs_tstamp);
+}
+
+void
+lfs_dump_dinode(dip)
+ struct dinode *dip;
+{
+ int i;
+
+ (void)printf("%s%u\t%s%d\t%s%u\t%s%u\t%s%lu\n",
+ "mode ", dip->di_mode,
+ "nlink ", dip->di_nlink,
+ "uid ", dip->di_uid,
+ "gid ", dip->di_gid,
+ "size ", dip->di_size);
+ (void)printf("inum %ld\n", dip->di_inumber);
+ (void)printf("Direct Addresses\n");
+ for (i = 0; i < NDADDR; i++) {
+ (void)printf("\t%lx", dip->di_db[i]);
+ if ((i % 6) == 5)
+ (void)printf("\n");
+ }
+ for (i = 0; i < NIADDR; i++)
+ (void)printf("\t%lx", dip->di_ib[i]);
+ (void)printf("\n");
+}
+#endif /* DEBUG */
diff --git a/sys/ufs/lfs/lfs_extern.h b/sys/ufs/lfs/lfs_extern.h
new file mode 100644
index 000000000000..c1157ade02af
--- /dev/null
+++ b/sys/ufs/lfs/lfs_extern.h
@@ -0,0 +1,106 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_extern.h 8.2 (Berkeley) 4/16/94
+ */
+
+struct fid;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct timeval;
+struct inode;
+struct uio;
+struct mbuf;
+
+__BEGIN_DECLS
+u_long cksum __P((void *, size_t)); /* XXX */
+int lfs_balloc __P((struct vnode *, u_long, daddr_t, struct buf **));
+int lfs_blkatoff __P((struct vop_blkatoff_args *));
+int lfs_bwrite __P((struct vop_bwrite_args *));
+int lfs_check __P((struct vnode *, daddr_t));
+int lfs_close __P((struct vop_close_args *));
+int lfs_create __P((struct vop_create_args *));
+int lfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+ struct vnode **, int *, struct ucred **));
+int lfs_fsync __P((struct vop_fsync_args *));
+int lfs_getattr __P((struct vop_getattr_args *));
+struct dinode *
+ lfs_ifind __P((struct lfs *, ino_t, struct dinode *));
+int lfs_inactive __P((struct vop_inactive_args *));
+int lfs_init __P((void));
+int lfs_initseg __P((struct lfs *));
+int lfs_link __P((struct vop_link_args *));
+int lfs_makeinode __P((int, struct nameidata *, struct inode **));
+int lfs_mkdir __P((struct vop_mkdir_args *));
+int lfs_mknod __P((struct vop_mknod_args *));
+int lfs_mount __P((struct mount *,
+ char *, caddr_t, struct nameidata *, struct proc *));
+int lfs_mountroot __P((void));
+struct buf *
+ lfs_newbuf __P((struct vnode *, daddr_t, size_t));
+int lfs_read __P((struct vop_read_args *));
+int lfs_remove __P((struct vop_remove_args *));
+int lfs_rmdir __P((struct vop_rmdir_args *));
+int lfs_rename __P((struct vop_rename_args *));
+void lfs_seglock __P((struct lfs *, unsigned long flags));
+void lfs_segunlock __P((struct lfs *));
+int lfs_segwrite __P((struct mount *, int));
+int lfs_statfs __P((struct mount *, struct statfs *, struct proc *));
+int lfs_symlink __P((struct vop_symlink_args *));
+int lfs_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int lfs_truncate __P((struct vop_truncate_args *));
+int lfs_unmount __P((struct mount *, int, struct proc *));
+int lfs_update __P((struct vop_update_args *));
+int lfs_valloc __P((struct vop_valloc_args *));
+int lfs_vcreate __P((struct mount *, ino_t, struct vnode **));
+int lfs_vfree __P((struct vop_vfree_args *));
+int lfs_vflush __P((struct vnode *));
+int lfs_vget __P((struct mount *, ino_t, struct vnode **));
+int lfs_vptofh __P((struct vnode *, struct fid *));
+int lfs_vref __P((struct vnode *));
+void lfs_vunref __P((struct vnode *));
+int lfs_write __P((struct vop_write_args *));
+#ifdef DEBUG
+void lfs_dump_dinode __P((struct dinode *));
+void lfs_dump_super __P((struct lfs *));
+#endif
+__END_DECLS
+extern int (**lfs_vnodeop_p)();
+extern int (**lfs_specop_p)();
+#ifdef FIFO
+extern int (**lfs_fifoop_p)();
+#define LFS_FIFOOPS lfs_fifoop_p
+#else
+#define LFS_FIFOOPS NULL
+#endif
diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c
new file mode 100644
index 000000000000..1a06aa23ed8f
--- /dev/null
+++ b/sys/ufs/lfs/lfs_inode.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_inode.c 8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int
+lfs_init()
+{
+ return (ufs_init());
+}
+
+/* Search a block for a specific dinode. */
+struct dinode *
+lfs_ifind(fs, ino, dip)
+ struct lfs *fs;
+ ino_t ino;
+ register struct dinode *dip;
+{
+ register int cnt;
+ register struct dinode *ldip;
+
+ for (cnt = INOPB(fs), ldip = dip + (cnt - 1); cnt--; --ldip)
+ if (ldip->di_inumber == ino)
+ return (ldip);
+
+ panic("lfs_ifind: dinode %u not found", ino);
+ /* NOTREACHED */
+}
+
+int
+lfs_update(ap)
+ struct vop_update_args /* {
+ struct vnode *a_vp;
+ struct timeval *a_access;
+ struct timeval *a_modify;
+ int a_waitfor;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip;
+
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (0);
+ ip = VTOI(vp);
+ if ((ip->i_flag &
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+ return (0);
+ if (ip->i_flag & IN_ACCESS)
+ ip->i_atime.ts_sec = ap->a_access->tv_sec;
+ if (ip->i_flag & IN_UPDATE) {
+ ip->i_mtime.ts_sec = ap->a_modify->tv_sec;
+ (ip)->i_modrev++;
+ }
+ if (ip->i_flag & IN_CHANGE)
+ ip->i_ctime.ts_sec = time.tv_sec;
+ ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+
+ if (!(ip->i_flag & IN_MODIFIED))
+ ++(VFSTOUFS(vp->v_mount)->um_lfs->lfs_uinodes);
+ ip->i_flag |= IN_MODIFIED;
+
+ /* If sync, push back the vnode and any dirty blocks it may have. */
+ return (ap->a_waitfor & LFS_SYNC ? lfs_vflush(vp) : 0);
+}
+
+/* Update segment usage information when removing a block. */
+#define UPDATE_SEGUSE \
+ if (lastseg != -1) { \
+ LFS_SEGENTRY(sup, fs, lastseg, sup_bp); \
+ if ((num << fs->lfs_bshift) > sup->su_nbytes) \
+ panic("lfs_truncate: negative bytes in segment %d\n", \
+ lastseg); \
+ sup->su_nbytes -= num << fs->lfs_bshift; \
+ e1 = VOP_BWRITE(sup_bp); \
+ blocksreleased += num; \
+ }
+
+#define SEGDEC { \
+ if (daddr != 0) { \
+ if (lastseg != (seg = datosn(fs, daddr))) { \
+ UPDATE_SEGUSE; \
+ num = 1; \
+ lastseg = seg; \
+ } else \
+ ++num; \
+ } \
+}
+
+/*
+ * Truncate the inode ip to at most length size. Update segment usage
+ * table information.
+ */
+/* ARGSUSED */
+int
+lfs_truncate(ap)
+ struct vop_truncate_args /* {
+ struct vnode *a_vp;
+ off_t a_length;
+ int a_flags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct indir *inp;
+ register int i;
+ register daddr_t *daddrp;
+ register struct vnode *vp = ap->a_vp;
+ off_t length = ap->a_length;
+ struct buf *bp, *sup_bp;
+ struct timeval tv;
+ struct ifile *ifp;
+ struct inode *ip;
+ struct lfs *fs;
+ struct indir a[NIADDR + 2], a_end[NIADDR + 2];
+ SEGUSE *sup;
+ daddr_t daddr, lastblock, lbn, olastblock;
+ long off, a_released, blocksreleased, i_released;
+ int e1, e2, depth, lastseg, num, offset, seg, size;
+
+ ip = VTOI(vp);
+ tv = time;
+ if (vp->v_type == VLNK && vp->v_mount->mnt_maxsymlinklen > 0) {
+#ifdef DIAGNOSTIC
+ if (length != 0)
+ panic("lfs_truncate: partial truncate of symlink");
+#endif
+ bzero((char *)&ip->i_shortlink, (u_int)ip->i_size);
+ ip->i_size = 0;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOP_UPDATE(vp, &tv, &tv, 0));
+ }
+ vnode_pager_setsize(vp, (u_long)length);
+
+ fs = ip->i_lfs;
+
+ /* If length is larger than the file, just update the times. */
+ if (ip->i_size <= length) {
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOP_UPDATE(vp, &tv, &tv, 0));
+ }
+
+ /*
+ * Calculate index into inode's block list of last direct and indirect
+ * blocks (if any) which we want to keep. Lastblock is 0 when the
+ * file is truncated to 0.
+ */
+ lastblock = lblkno(fs, length + fs->lfs_bsize - 1);
+ olastblock = lblkno(fs, ip->i_size + fs->lfs_bsize - 1) - 1;
+
+ /*
+ * Update the size of the file. If the file is not being truncated to
+ * a block boundry, the contents of the partial block following the end
+ * of the file must be zero'ed in case it ever become accessable again
+ * because of subsequent file growth.
+ */
+ offset = blkoff(fs, length);
+ if (offset == 0)
+ ip->i_size = length;
+ else {
+ lbn = lblkno(fs, length);
+#ifdef QUOTA
+ if (e1 = getinoquota(ip))
+ return (e1);
+#endif
+ if (e1 = bread(vp, lbn, fs->lfs_bsize, NOCRED, &bp))
+ return (e1);
+ ip->i_size = length;
+ size = blksize(fs);
+ (void)vnode_pager_uncache(vp);
+ bzero((char *)bp->b_data + offset, (u_int)(size - offset));
+ allocbuf(bp, size);
+ if (e1 = VOP_BWRITE(bp))
+ return (e1);
+ }
+ /*
+ * Modify sup->su_nbyte counters for each deleted block; keep track
+ * of number of blocks removed for ip->i_blocks.
+ */
+ blocksreleased = 0;
+ num = 0;
+ lastseg = -1;
+
+ for (lbn = olastblock; lbn >= lastblock;) {
+ /* XXX use run length from bmap array to make this faster */
+ ufs_bmaparray(vp, lbn, &daddr, a, &depth, NULL);
+ if (lbn == olastblock)
+ for (i = NIADDR + 2; i--;)
+ a_end[i] = a[i];
+ switch (depth) {
+ case 0: /* Direct block. */
+ daddr = ip->i_db[lbn];
+ SEGDEC;
+ ip->i_db[lbn] = 0;
+ --lbn;
+ break;
+#ifdef DIAGNOSTIC
+ case 1: /* An indirect block. */
+ panic("lfs_truncate: ufs_bmaparray returned depth 1");
+ /* NOTREACHED */
+#endif
+ default: /* Chain of indirect blocks. */
+ inp = a + --depth;
+ if (inp->in_off > 0 && lbn != lastblock) {
+ lbn -= inp->in_off < lbn - lastblock ?
+ inp->in_off : lbn - lastblock;
+ break;
+ }
+ for (; depth && (inp->in_off == 0 || lbn == lastblock);
+ --inp, --depth) {
+ if (bread(vp,
+ inp->in_lbn, fs->lfs_bsize, NOCRED, &bp))
+ panic("lfs_truncate: bread bno %d",
+ inp->in_lbn);
+ daddrp = (daddr_t *)bp->b_data + inp->in_off;
+ for (i = inp->in_off;
+ i++ <= a_end[depth].in_off;) {
+ daddr = *daddrp++;
+ SEGDEC;
+ }
+ a_end[depth].in_off = NINDIR(fs) - 1;
+ if (inp->in_off == 0)
+ brelse (bp);
+ else {
+ bzero((daddr_t *)bp->b_data +
+ inp->in_off, fs->lfs_bsize -
+ inp->in_off * sizeof(daddr_t));
+ if (e1 = VOP_BWRITE(bp))
+ return (e1);
+ }
+ }
+ if (depth == 0 && a[1].in_off == 0) {
+ off = a[0].in_off;
+ daddr = ip->i_ib[off];
+ SEGDEC;
+ ip->i_ib[off] = 0;
+ }
+ if (lbn == lastblock || lbn <= NDADDR)
+ --lbn;
+ else {
+ lbn -= NINDIR(fs);
+ if (lbn < lastblock)
+ lbn = lastblock;
+ }
+ }
+ }
+ UPDATE_SEGUSE;
+
+ /* If truncating the file to 0, update the version number. */
+ if (length == 0) {
+ LFS_IENTRY(ifp, fs, ip->i_number, bp);
+ ++ifp->if_version;
+ (void) VOP_BWRITE(bp);
+ }
+
+#ifdef DIAGNOSTIC
+ if (ip->i_blocks < fsbtodb(fs, blocksreleased)) {
+ printf("lfs_truncate: block count < 0\n");
+ blocksreleased = ip->i_blocks;
+ }
+#endif
+ ip->i_blocks -= fsbtodb(fs, blocksreleased);
+ fs->lfs_bfree += fsbtodb(fs, blocksreleased);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ /*
+ * Traverse dirty block list counting number of dirty buffers
+ * that are being deleted out of the cache, so that the lfs_avail
+ * field can be updated.
+ */
+ a_released = 0;
+ i_released = 0;
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next)
+ if (bp->b_flags & B_LOCKED) {
+ ++a_released;
+ /*
+ * XXX
+ * When buffers are created in the cache, their block
+ * number is set equal to their logical block number.
+ * If that is still true, we are assuming that the
+ * blocks are new (not yet on disk) and weren't
+ * counted above. However, there is a slight chance
+ * that a block's disk address is equal to its logical
+ * block number in which case, we'll get an overcounting
+ * here.
+ */
+ if (bp->b_blkno == bp->b_lblkno)
+ ++i_released;
+ }
+ blocksreleased = fsbtodb(fs, i_released);
+#ifdef DIAGNOSTIC
+ if (blocksreleased > ip->i_blocks) {
+ printf("lfs_inode: Warning! %s\n",
+ "more blocks released from inode than are in inode");
+ blocksreleased = ip->i_blocks;
+ }
+#endif
+ fs->lfs_bfree += blocksreleased;
+ ip->i_blocks -= blocksreleased;
+#ifdef DIAGNOSTIC
+ if (length == 0 && ip->i_blocks != 0)
+ printf("lfs_inode: Warning! %s%d%s\n",
+ "Truncation to zero, but ", ip->i_blocks,
+ " blocks left on inode");
+#endif
+ fs->lfs_avail += fsbtodb(fs, a_released);
+ e1 = vinvalbuf(vp, (length > 0) ? V_SAVE : 0, ap->a_cred, ap->a_p,
+ 0, 0);
+ e2 = VOP_UPDATE(vp, &tv, &tv, 0);
+ return (e1 ? e1 : e2 ? e2 : 0);
+}
diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c
new file mode 100644
index 000000000000..249d59ddda50
--- /dev/null
+++ b/sys/ufs/lfs/lfs_segment.c
@@ -0,0 +1,1111 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_segment.c 8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/resourcevar.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+extern int count_lock_queue __P((void));
+
+#define MAX_ACTIVE 10
+/*
+ * Determine if it's OK to start a partial in this segment, or if we need
+ * to go on to a new segment.
+ */
+#define LFS_PARTIAL_FITS(fs) \
+ ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
+ 1 << (fs)->lfs_fsbtodb)
+
+void lfs_callback __P((struct buf *));
+void lfs_gather __P((struct lfs *, struct segment *,
+ struct vnode *, int (*) __P((struct lfs *, struct buf *))));
+int lfs_gatherblock __P((struct segment *, struct buf *, int *));
+void lfs_iset __P((struct inode *, daddr_t, time_t));
+int lfs_match_data __P((struct lfs *, struct buf *));
+int lfs_match_dindir __P((struct lfs *, struct buf *));
+int lfs_match_indir __P((struct lfs *, struct buf *));
+int lfs_match_tindir __P((struct lfs *, struct buf *));
+void lfs_newseg __P((struct lfs *));
+void lfs_shellsort __P((struct buf **, daddr_t *, register int));
+void lfs_supercallback __P((struct buf *));
+void lfs_updatemeta __P((struct segment *));
+int lfs_vref __P((struct vnode *));
+void lfs_vunref __P((struct vnode *));
+void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
+int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
+int lfs_writeseg __P((struct lfs *, struct segment *));
+void lfs_writesuper __P((struct lfs *));
+void lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
+ struct segment *sp, int dirops));
+
+int lfs_allclean_wakeup; /* Cleaner wakeup address. */
+
+/* Statistics Counters */
+#define DOSTATS
+struct lfs_stats lfs_stats;
+
+/* op values to lfs_writevnodes */
+#define VN_REG 0
+#define VN_DIROP 1
+#define VN_EMPTY 2
+
+/*
+ * Ifile and meta data blocks are not marked busy, so segment writes MUST be
+ * single threaded. Currently, there are two paths into lfs_segwrite, sync()
+ * and getnewbuf(). They both mark the file system busy. Lfs_vflush()
+ * explicitly marks the file system busy. So lfs_segwrite is safe. I think.
+ */
+
+int
+lfs_vflush(vp)
+ struct vnode *vp;
+{
+ struct inode *ip;
+ struct lfs *fs;
+ struct segment *sp;
+
+ fs = VFSTOUFS(vp->v_mount)->um_lfs;
+ if (fs->lfs_nactive > MAX_ACTIVE)
+ return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP));
+ lfs_seglock(fs, SEGM_SYNC);
+ sp = fs->lfs_sp;
+
+
+ ip = VTOI(vp);
+ if (vp->v_dirtyblkhd.lh_first == NULL)
+ lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
+
+ do {
+ do {
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ lfs_writefile(fs, sp, vp);
+ } while (lfs_writeinode(fs, sp, ip));
+
+ } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
+
+#ifdef DOSTATS
+ ++lfs_stats.nwrites;
+ if (sp->seg_flags & SEGM_SYNC)
+ ++lfs_stats.nsync_writes;
+ if (sp->seg_flags & SEGM_CKP)
+ ++lfs_stats.ncheckpoints;
+#endif
+ lfs_segunlock(fs);
+ return (0);
+}
+
+void
+lfs_writevnodes(fs, mp, sp, op)
+ struct lfs *fs;
+ struct mount *mp;
+ struct segment *sp;
+ int op;
+{
+ struct inode *ip;
+ struct vnode *vp;
+
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * If the vnode that we are about to sync is no longer
+ * associated with this mount point, start over.
+ */
+ if (vp->v_mount != mp)
+ goto loop;
+
+ /* XXX ignore dirops for now
+ if (op == VN_DIROP && !(vp->v_flag & VDIROP) ||
+ op != VN_DIROP && (vp->v_flag & VDIROP))
+ continue;
+ */
+
+ if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first)
+ continue;
+
+ if (vp->v_type == VNON)
+ continue;
+
+ if (lfs_vref(vp))
+ continue;
+
+ /*
+ * Write the inode/file if dirty and it's not the
+ * the IFILE.
+ */
+ ip = VTOI(vp);
+ if ((ip->i_flag &
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) ||
+ vp->v_dirtyblkhd.lh_first != NULL) &&
+ ip->i_number != LFS_IFILE_INUM) {
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ lfs_writefile(fs, sp, vp);
+ (void) lfs_writeinode(fs, sp, ip);
+ }
+ vp->v_flag &= ~VDIROP;
+ lfs_vunref(vp);
+ }
+}
+
+int
+lfs_segwrite(mp, flags)
+ struct mount *mp;
+ int flags; /* Do a checkpoint. */
+{
+ struct buf *bp;
+ struct inode *ip;
+ struct lfs *fs;
+ struct segment *sp;
+ struct vnode *vp;
+ SEGUSE *segusep;
+ daddr_t ibno;
+ CLEANERINFO *cip;
+ int clean, do_ckp, error, i;
+
+ fs = VFSTOUFS(mp)->um_lfs;
+
+ /*
+ * If we have fewer than 2 clean segments, wait until cleaner
+ * writes.
+ */
+ do {
+ LFS_CLEANERINFO(cip, fs, bp);
+ clean = cip->clean;
+ brelse(bp);
+ if (clean <= 2) {
+ printf ("segs clean: %d\n", clean);
+ wakeup(&lfs_allclean_wakeup);
+ if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
+ "lfs writer", 0))
+ return (error);
+ }
+ } while (clean <= 2 );
+
+ /*
+ * Allocate a segment structure and enough space to hold pointers to
+ * the maximum possible number of buffers which can be described in a
+ * single summary block.
+ */
+ do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE;
+ lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
+ sp = fs->lfs_sp;
+
+ lfs_writevnodes(fs, mp, sp, VN_REG);
+
+ /* XXX ignore ordering of dirops for now */
+ /* XXX
+ fs->lfs_writer = 1;
+ if (fs->lfs_dirops && (error =
+ tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
+ free(sp->bpp, M_SEGMENT);
+ free(sp, M_SEGMENT);
+ fs->lfs_writer = 0;
+ return (error);
+ }
+
+ lfs_writevnodes(fs, mp, sp, VN_DIROP);
+ */
+
+ /*
+ * If we are doing a checkpoint, mark everything since the
+ * last checkpoint as no longer ACTIVE.
+ */
+ if (do_ckp)
+ for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
+ --ibno >= fs->lfs_cleansz; ) {
+ if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
+ NOCRED, &bp))
+
+ panic("lfs: ifile read");
+ segusep = (SEGUSE *)bp->b_data;
+ for (i = fs->lfs_sepb; i--; segusep++)
+ segusep->su_flags &= ~SEGUSE_ACTIVE;
+
+ error = VOP_BWRITE(bp);
+ }
+
+ if (do_ckp || fs->lfs_doifile) {
+redo:
+ vp = fs->lfs_ivnode;
+ while (vget(vp, 1));
+ ip = VTOI(vp);
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ lfs_writefile(fs, sp, vp);
+ (void)lfs_writeinode(fs, sp, ip);
+ vput(vp);
+ if (lfs_writeseg(fs, sp) && do_ckp)
+ goto redo;
+ } else
+ (void) lfs_writeseg(fs, sp);
+
+ /*
+ * If the I/O count is non-zero, sleep until it reaches zero. At the
+ * moment, the user's process hangs around so we can sleep.
+ */
+ /* XXX ignore dirops for now
+ fs->lfs_writer = 0;
+ fs->lfs_doifile = 0;
+ wakeup(&fs->lfs_dirops);
+ */
+
+#ifdef DOSTATS
+ ++lfs_stats.nwrites;
+ if (sp->seg_flags & SEGM_SYNC)
+ ++lfs_stats.nsync_writes;
+ if (sp->seg_flags & SEGM_CKP)
+ ++lfs_stats.ncheckpoints;
+#endif
+ lfs_segunlock(fs);
+ return (0);
+}
+
+/*
+ * Write the dirty blocks associated with a vnode.
+ */
+void
+lfs_writefile(fs, sp, vp)
+ struct lfs *fs;
+ struct segment *sp;
+ struct vnode *vp;
+{
+ struct buf *bp;
+ struct finfo *fip;
+ IFILE *ifp;
+
+ if (sp->seg_bytes_left < fs->lfs_bsize ||
+ sp->sum_bytes_left < sizeof(struct finfo))
+ (void) lfs_writeseg(fs, sp);
+
+ sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
+ ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
+
+ fip = sp->fip;
+ fip->fi_nblocks = 0;
+ fip->fi_ino = VTOI(vp)->i_number;
+ LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
+ fip->fi_version = ifp->if_version;
+ brelse(bp);
+
+ /*
+ * It may not be necessary to write the meta-data blocks at this point,
+ * as the roll-forward recovery code should be able to reconstruct the
+ * list.
+ */
+ lfs_gather(fs, sp, vp, lfs_match_data);
+ lfs_gather(fs, sp, vp, lfs_match_indir);
+ lfs_gather(fs, sp, vp, lfs_match_dindir);
+#ifdef TRIPLE
+ lfs_gather(fs, sp, vp, lfs_match_tindir);
+#endif
+
+ fip = sp->fip;
+ if (fip->fi_nblocks != 0) {
+ sp->fip =
+ (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
+ sizeof(daddr_t) * (fip->fi_nblocks - 1));
+ sp->start_lbp = &sp->fip->fi_blocks[0];
+ } else {
+ sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t);
+ --((SEGSUM *)(sp->segsum))->ss_nfinfo;
+ }
+}
+
+int
+lfs_writeinode(fs, sp, ip)
+ struct lfs *fs;
+ struct segment *sp;
+ struct inode *ip;
+{
+ struct buf *bp, *ibp;
+ IFILE *ifp;
+ SEGUSE *sup;
+ daddr_t daddr;
+ ino_t ino;
+ int error, i, ndx;
+ int redo_ifile = 0;
+
+ if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)))
+ return(0);
+
+ /* Allocate a new inode block if necessary. */
+ if (sp->ibp == NULL) {
+ /* Allocate a new segment if necessary. */
+ if (sp->seg_bytes_left < fs->lfs_bsize ||
+ sp->sum_bytes_left < sizeof(daddr_t))
+ (void) lfs_writeseg(fs, sp);
+
+ /* Get next inode block. */
+ daddr = fs->lfs_offset;
+ fs->lfs_offset += fsbtodb(fs, 1);
+ sp->ibp = *sp->cbpp++ =
+ lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
+ fs->lfs_bsize);
+ /* Zero out inode numbers */
+ for (i = 0; i < INOPB(fs); ++i)
+ ((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0;
+ ++sp->start_bpp;
+ fs->lfs_avail -= fsbtodb(fs, 1);
+ /* Set remaining space counters. */
+ sp->seg_bytes_left -= fs->lfs_bsize;
+ sp->sum_bytes_left -= sizeof(daddr_t);
+ ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
+ sp->ninodes / INOPB(fs) - 1;
+ ((daddr_t *)(sp->segsum))[ndx] = daddr;
+ }
+
+ /* Update the inode times and copy the inode onto the inode page. */
+ if (ip->i_flag & IN_MODIFIED)
+ --fs->lfs_uinodes;
+ ITIMES(ip, &time, &time);
+ ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+ bp = sp->ibp;
+ ((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din;
+ /* Increment inode count in segment summary block. */
+ ++((SEGSUM *)(sp->segsum))->ss_ninos;
+
+ /* If this page is full, set flag to allocate a new page. */
+ if (++sp->ninodes % INOPB(fs) == 0)
+ sp->ibp = NULL;
+
+ /*
+ * If updating the ifile, update the super-block. Update the disk
+ * address and access times for this inode in the ifile.
+ */
+ ino = ip->i_number;
+ if (ino == LFS_IFILE_INUM) {
+ daddr = fs->lfs_idaddr;
+ fs->lfs_idaddr = bp->b_blkno;
+ } else {
+ LFS_IENTRY(ifp, fs, ino, ibp);
+ daddr = ifp->if_daddr;
+ ifp->if_daddr = bp->b_blkno;
+ error = VOP_BWRITE(ibp);
+ }
+
+ /*
+ * No need to update segment usage if there was no former inode address
+ * or if the last inode address is in the current partial segment.
+ */
+ if (daddr != LFS_UNUSED_DADDR &&
+ !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
+ LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+ if (sup->su_nbytes < sizeof(struct dinode)) {
+ /* XXX -- Change to a panic. */
+ printf("lfs: negative bytes (segment %d)\n",
+ datosn(fs, daddr));
+ panic("negative bytes");
+ }
+#endif
+ sup->su_nbytes -= sizeof(struct dinode);
+ redo_ifile =
+ (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
+ error = VOP_BWRITE(bp);
+ }
+ return (redo_ifile);
+}
+
+int
+lfs_gatherblock(sp, bp, sptr)
+ struct segment *sp;
+ struct buf *bp;
+ int *sptr;
+{
+ struct lfs *fs;
+ int version;
+
+ /*
+ * If full, finish this segment. We may be doing I/O, so
+ * release and reacquire the splbio().
+ */
+#ifdef DIAGNOSTIC
+ if (sp->vp == NULL)
+ panic ("lfs_gatherblock: Null vp in segment");
+#endif
+ fs = sp->fs;
+ if (sp->sum_bytes_left < sizeof(daddr_t) ||
+ sp->seg_bytes_left < fs->lfs_bsize) {
+ if (sptr)
+ splx(*sptr);
+ lfs_updatemeta(sp);
+
+ version = sp->fip->fi_version;
+ (void) lfs_writeseg(fs, sp);
+
+ sp->fip->fi_version = version;
+ sp->fip->fi_ino = VTOI(sp->vp)->i_number;
+ /* Add the current file to the segment summary. */
+ ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
+ sp->sum_bytes_left -=
+ sizeof(struct finfo) - sizeof(daddr_t);
+
+ if (sptr)
+ *sptr = splbio();
+ return(1);
+ }
+
+ /* Insert into the buffer list, update the FINFO block. */
+ bp->b_flags |= B_GATHERED;
+ *sp->cbpp++ = bp;
+ sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
+
+ sp->sum_bytes_left -= sizeof(daddr_t);
+ sp->seg_bytes_left -= fs->lfs_bsize;
+ return(0);
+}
+
+void
+lfs_gather(fs, sp, vp, match)
+ struct lfs *fs;
+ struct segment *sp;
+ struct vnode *vp;
+ int (*match) __P((struct lfs *, struct buf *));
+{
+ struct buf *bp;
+ int s;
+
+ sp->vp = vp;
+ s = splbio();
+loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
+ if (bp->b_flags & B_BUSY || !match(fs, bp) ||
+ bp->b_flags & B_GATHERED)
+ continue;
+#ifdef DIAGNOSTIC
+ if (!(bp->b_flags & B_DELWRI))
+ panic("lfs_gather: bp not B_DELWRI");
+ if (!(bp->b_flags & B_LOCKED))
+ panic("lfs_gather: bp not B_LOCKED");
+#endif
+ if (lfs_gatherblock(sp, bp, &s))
+ goto loop;
+ }
+ splx(s);
+ lfs_updatemeta(sp);
+ sp->vp = NULL;
+}
+
+
+/*
+ * Update the metadata that points to the blocks listed in the FINFO
+ * array.
+ */
+void
+lfs_updatemeta(sp)
+ struct segment *sp;
+{
+ SEGUSE *sup;
+ struct buf *bp;
+ struct lfs *fs;
+ struct vnode *vp;
+ struct indir a[NIADDR + 2], *ap;
+ struct inode *ip;
+ daddr_t daddr, lbn, off;
+ int db_per_fsb, error, i, nblocks, num;
+
+ vp = sp->vp;
+ nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
+ if (vp == NULL || nblocks == 0)
+ return;
+
+ /* Sort the blocks. */
+ if (!(sp->seg_flags & SEGM_CLEAN))
+ lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
+
+ /*
+ * Assign disk addresses, and update references to the logical
+ * block and the segment usage information.
+ */
+ fs = sp->fs;
+ db_per_fsb = fsbtodb(fs, 1);
+ for (i = nblocks; i--; ++sp->start_bpp) {
+ lbn = *sp->start_lbp++;
+ (*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
+ fs->lfs_offset += db_per_fsb;
+
+ if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL))
+ panic("lfs_updatemeta: ufs_bmaparray %d", error);
+ ip = VTOI(vp);
+ switch (num) {
+ case 0:
+ ip->i_db[lbn] = off;
+ break;
+ case 1:
+ ip->i_ib[a[0].in_off] = off;
+ break;
+ default:
+ ap = &a[num - 1];
+ if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
+ panic("lfs_updatemeta: bread bno %d",
+ ap->in_lbn);
+ /*
+ * Bread may create a new indirect block which needs
+ * to get counted for the inode.
+ */
+ if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
+printf ("Updatemeta allocating indirect block: shouldn't happen\n");
+ ip->i_blocks += btodb(fs->lfs_bsize);
+ fs->lfs_bfree -= btodb(fs->lfs_bsize);
+ }
+ ((daddr_t *)bp->b_data)[ap->in_off] = off;
+ VOP_BWRITE(bp);
+ }
+
+ /* Update segment usage information. */
+ if (daddr != UNASSIGNED &&
+ !(daddr >= fs->lfs_lastpseg && daddr <= off)) {
+ LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+ if (sup->su_nbytes < fs->lfs_bsize) {
+ /* XXX -- Change to a panic. */
+ printf("lfs: negative bytes (segment %d)\n",
+ datosn(fs, daddr));
+ panic ("Negative Bytes");
+ }
+#endif
+ sup->su_nbytes -= fs->lfs_bsize;
+ error = VOP_BWRITE(bp);
+ }
+ }
+}
+
+/*
+ * Start a new segment.
+ */
+int
+lfs_initseg(fs)
+ struct lfs *fs;
+{
+ struct segment *sp;
+ SEGUSE *sup;
+ SEGSUM *ssp;
+ struct buf *bp;
+ int repeat;
+
+ sp = fs->lfs_sp;
+
+ repeat = 0;
+ /* Advance to the next segment. */
+ if (!LFS_PARTIAL_FITS(fs)) {
+ /* Wake up any cleaning procs waiting on this file system. */
+ wakeup(&lfs_allclean_wakeup);
+
+ lfs_newseg(fs);
+ repeat = 1;
+ fs->lfs_offset = fs->lfs_curseg;
+ sp->seg_number = datosn(fs, fs->lfs_curseg);
+ sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
+
+ /*
+ * If the segment contains a superblock, update the offset
+ * and summary address to skip over it.
+ */
+ LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+ if (sup->su_flags & SEGUSE_SUPERBLOCK) {
+ fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
+ sp->seg_bytes_left -= LFS_SBPAD;
+ }
+ brelse(bp);
+ } else {
+ sp->seg_number = datosn(fs, fs->lfs_curseg);
+ sp->seg_bytes_left = (fs->lfs_dbpseg -
+ (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
+ }
+ fs->lfs_lastpseg = fs->lfs_offset;
+
+ sp->fs = fs;
+ sp->ibp = NULL;
+ sp->ninodes = 0;
+
+ /* Get a new buffer for SEGSUM and enter it into the buffer list. */
+ sp->cbpp = sp->bpp;
+ *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset,
+ LFS_SUMMARY_SIZE);
+ sp->segsum = (*sp->cbpp)->b_data;
+ bzero(sp->segsum, LFS_SUMMARY_SIZE);
+ sp->start_bpp = ++sp->cbpp;
+ fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
+
+ /* Set point to SEGSUM, initialize it. */
+ ssp = sp->segsum;
+ ssp->ss_next = fs->lfs_nextseg;
+ ssp->ss_nfinfo = ssp->ss_ninos = 0;
+
+ /* Set pointer to first FINFO, initialize it. */
+ sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM));
+ sp->fip->fi_nblocks = 0;
+ sp->start_lbp = &sp->fip->fi_blocks[0];
+
+ sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
+ sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
+
+ return(repeat);
+}
+
+/*
+ * Return the next segment to write.
+ */
+void
+lfs_newseg(fs)
+ struct lfs *fs;
+{
+ CLEANERINFO *cip;
+ SEGUSE *sup;
+ struct buf *bp;
+ int curseg, isdirty, sn;
+
+ LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
+ sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
+ sup->su_nbytes = 0;
+ sup->su_nsums = 0;
+ sup->su_ninos = 0;
+ (void) VOP_BWRITE(bp);
+
+ LFS_CLEANERINFO(cip, fs, bp);
+ --cip->clean;
+ ++cip->dirty;
+ (void) VOP_BWRITE(bp);
+
+ fs->lfs_lastseg = fs->lfs_curseg;
+ fs->lfs_curseg = fs->lfs_nextseg;
+ for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
+ sn = (sn + 1) % fs->lfs_nseg;
+ if (sn == curseg)
+ panic("lfs_nextseg: no clean segments");
+ LFS_SEGENTRY(sup, fs, sn, bp);
+ isdirty = sup->su_flags & SEGUSE_DIRTY;
+ brelse(bp);
+ if (!isdirty)
+ break;
+ }
+
+ ++fs->lfs_nactive;
+ fs->lfs_nextseg = sntoda(fs, sn);
+#ifdef DOSTATS
+ ++lfs_stats.segsused;
+#endif
+}
+
+int
+lfs_writeseg(fs, sp)
+ struct lfs *fs;
+ struct segment *sp;
+{
+ extern int locked_queue_count;
+ struct buf **bpp, *bp, *cbp;
+ SEGUSE *sup;
+ SEGSUM *ssp;
+ dev_t i_dev;
+ size_t size;
+ u_long *datap, *dp;
+ int ch_per_blk, do_again, i, nblocks, num, s;
+ int (*strategy)__P((struct vop_strategy_args *));
+ struct vop_strategy_args vop_strategy_a;
+ u_short ninos;
+ char *p;
+
+ /*
+ * If there are no buffers other than the segment summary to write
+ * and it is not a checkpoint, don't do anything. On a checkpoint,
+ * even if there aren't any buffers, you need to write the superblock.
+ */
+ if ((nblocks = sp->cbpp - sp->bpp) == 1)
+ return (0);
+
+ ssp = (SEGSUM *)sp->segsum;
+
+ /* Update the segment usage information. */
+ LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+ ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
+ sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift;
+ sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
+ sup->su_nbytes += LFS_SUMMARY_SIZE;
+ sup->su_lastmod = time.tv_sec;
+ sup->su_ninos += ninos;
+ ++sup->su_nsums;
+ do_again = !(bp->b_flags & B_GATHERED);
+ (void)VOP_BWRITE(bp);
+ /*
+ * Compute checksum across data and then across summary; the first
+ * block (the summary block) is skipped. Set the create time here
+ * so that it's guaranteed to be later than the inode mod times.
+ *
+ * XXX
+ * Fix this to do it inline, instead of malloc/copy.
+ */
+ datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
+ for (bpp = sp->bpp, i = nblocks - 1; i--;) {
+ if ((*++bpp)->b_flags & B_INVAL) {
+ if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long)))
+ panic("lfs_writeseg: copyin failed");
+ } else
+ *dp++ = ((u_long *)(*bpp)->b_data)[0];
+ }
+ ssp->ss_create = time.tv_sec;
+ ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
+ ssp->ss_sumsum =
+ cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
+ free(datap, M_SEGMENT);
+#ifdef DIAGNOSTIC
+ if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE)
+ panic("lfs_writeseg: No diskspace for summary");
+#endif
+ fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
+
+ i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+ strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
+
+ /*
+ * When we simply write the blocks we lose a rotation for every block
+ * written. To avoid this problem, we allocate memory in chunks, copy
+ * the buffers into the chunk and write the chunk. MAXPHYS is the
+ * largest size I/O devices can handle.
+ * When the data is copied to the chunk, turn off the the B_LOCKED bit
+ * and brelse the buffer (which will move them to the LRU list). Add
+ * the B_CALL flag to the buffer header so we can count I/O's for the
+ * checkpoints and so we can release the allocated memory.
+ *
+ * XXX
+ * This should be removed if the new virtual memory system allows us to
+ * easily make the buffers contiguous in kernel memory and if that's
+ * fast enough.
+ */
+ ch_per_blk = MAXPHYS / fs->lfs_bsize;
+ for (bpp = sp->bpp, i = nblocks; i;) {
+ num = ch_per_blk;
+ if (num > i)
+ num = i;
+ i -= num;
+ size = num * fs->lfs_bsize;
+
+ cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
+ (*bpp)->b_blkno, size);
+ cbp->b_dev = i_dev;
+ cbp->b_flags |= B_ASYNC | B_BUSY;
+
+ s = splbio();
+ ++fs->lfs_iocount;
+ for (p = cbp->b_data; num--;) {
+ bp = *bpp++;
+ /*
+ * Fake buffers from the cleaner are marked as B_INVAL.
+ * We need to copy the data from user space rather than
+ * from the buffer indicated.
+ * XXX == what do I do on an error?
+ */
+ if (bp->b_flags & B_INVAL) {
+ if (copyin(bp->b_saveaddr, p, bp->b_bcount))
+ panic("lfs_writeseg: copyin failed");
+ } else
+ bcopy(bp->b_data, p, bp->b_bcount);
+ p += bp->b_bcount;
+ if (bp->b_flags & B_LOCKED)
+ --locked_queue_count;
+ bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
+ B_LOCKED | B_GATHERED);
+ if (bp->b_flags & B_CALL) {
+ /* if B_CALL, it was created with newbuf */
+ brelvp(bp);
+ if (!(bp->b_flags & B_INVAL))
+ free(bp->b_data, M_SEGMENT);
+ free(bp, M_SEGMENT);
+ } else {
+ bremfree(bp);
+ bp->b_flags |= B_DONE;
+ reassignbuf(bp, bp->b_vp);
+ brelse(bp);
+ }
+ }
+ ++cbp->b_vp->v_numoutput;
+ splx(s);
+ cbp->b_bcount = p - (char *)cbp->b_data;
+ /*
+ * XXXX This is a gross and disgusting hack. Since these
+ * buffers are physically addressed, they hang off the
+ * device vnode (devvp). As a result, they have no way
+ * of getting to the LFS superblock or lfs structure to
+ * keep track of the number of I/O's pending. So, I am
+ * going to stuff the fs into the saveaddr field of
+ * the buffer (yuk).
+ */
+ cbp->b_saveaddr = (caddr_t)fs;
+ vop_strategy_a.a_desc = VDESC(vop_strategy);
+ vop_strategy_a.a_bp = cbp;
+ (strategy)(&vop_strategy_a);
+ }
+ /*
+ * XXX
+ * Vinvalbuf can move locked buffers off the locked queue
+ * and we have no way of knowing about this. So, after
+ * doing a big write, we recalculate how many bufers are
+ * really still left on the locked queue.
+ */
+ locked_queue_count = count_lock_queue();
+ wakeup(&locked_queue_count);
+#ifdef DOSTATS
+ ++lfs_stats.psegwrites;
+ lfs_stats.blocktot += nblocks - 1;
+ if (fs->lfs_sp->seg_flags & SEGM_SYNC)
+ ++lfs_stats.psyncwrites;
+ if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
+ ++lfs_stats.pcleanwrites;
+ lfs_stats.cleanblocks += nblocks - 1;
+ }
+#endif
+ return (lfs_initseg(fs) || do_again);
+}
+
+void
+lfs_writesuper(fs)
+ struct lfs *fs;
+{
+ struct buf *bp;
+ dev_t i_dev;
+ int (*strategy) __P((struct vop_strategy_args *));
+ int s;
+ struct vop_strategy_args vop_strategy_a;
+
+ i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+ strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
+
+ /* Checksum the superblock and copy it into a buffer. */
+ fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
+ bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0],
+ LFS_SBPAD);
+ *(struct lfs *)bp->b_data = *fs;
+
+ /* XXX Toggle between first two superblocks; for now just write first */
+ bp->b_dev = i_dev;
+ bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
+ bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
+ bp->b_iodone = lfs_supercallback;
+ vop_strategy_a.a_desc = VDESC(vop_strategy);
+ vop_strategy_a.a_bp = bp;
+ s = splbio();
+ ++bp->b_vp->v_numoutput;
+ splx(s);
+ (strategy)(&vop_strategy_a);
+}
+
+/*
+ * Logical block number match routines used when traversing the dirty block
+ * chain.
+ */
+int
+lfs_match_data(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
+{
+ return (bp->b_lblkno >= 0);
+}
+
+int
+lfs_match_indir(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
+{
+ int lbn;
+
+ lbn = bp->b_lblkno;
+ return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
+}
+
+int
+lfs_match_dindir(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
+{
+ int lbn;
+
+ lbn = bp->b_lblkno;
+ return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
+}
+
+int
+lfs_match_tindir(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
+{
+ int lbn;
+
+ lbn = bp->b_lblkno;
+ return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
+}
+
+/*
+ * Allocate a new buffer header.
+ */
+struct buf *
+lfs_newbuf(vp, daddr, size)
+ struct vnode *vp;
+ daddr_t daddr;
+ size_t size;
+{
+ struct buf *bp;
+ size_t nbytes;
+
+ nbytes = roundup(size, DEV_BSIZE);
+ bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK);
+ bzero(bp, sizeof(struct buf));
+ if (nbytes)
+ bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK);
+ bgetvp(vp, bp);
+ bp->b_bufsize = size;
+ bp->b_bcount = size;
+ bp->b_lblkno = daddr;
+ bp->b_blkno = daddr;
+ bp->b_error = 0;
+ bp->b_resid = 0;
+ bp->b_iodone = lfs_callback;
+ bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
+ return (bp);
+}
+
+void
+lfs_callback(bp)
+ struct buf *bp;
+{
+ struct lfs *fs;
+
+ fs = (struct lfs *)bp->b_saveaddr;
+#ifdef DIAGNOSTIC
+ if (fs->lfs_iocount == 0)
+ panic("lfs_callback: zero iocount\n");
+#endif
+ if (--fs->lfs_iocount == 0)
+ wakeup(&fs->lfs_iocount);
+
+ brelvp(bp);
+ free(bp->b_data, M_SEGMENT);
+ free(bp, M_SEGMENT);
+}
+
+void
+lfs_supercallback(bp)
+ struct buf *bp;
+{
+ brelvp(bp);
+ free(bp->b_data, M_SEGMENT);
+ free(bp, M_SEGMENT);
+}
+
+/*
+ * Shellsort (diminishing increment sort) from Data Structures and
+ * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
+ * see also Knuth Vol. 3, page 84. The increments are selected from
+ * formula (8), page 95. Roughly O(N^3/2).
+ */
+/*
+ * This is our own private copy of shellsort because we want to sort
+ * two parallel arrays (the array of buffer pointers and the array of
+ * logical block numbers) simultaneously. Note that we cast the array
+ * of logical block numbers to a unsigned in this routine so that the
+ * negative block numbers (meta data blocks) sort AFTER the data blocks.
+ */
+void
+lfs_shellsort(bp_array, lb_array, nmemb)
+ struct buf **bp_array;
+ daddr_t *lb_array;
+ register int nmemb;
+{
+ static int __rsshell_increments[] = { 4, 1, 0 };
+ register int incr, *incrp, t1, t2;
+ struct buf *bp_temp;
+ u_long lb_temp;
+
+ for (incrp = __rsshell_increments; incr = *incrp++;)
+ for (t1 = incr; t1 < nmemb; ++t1)
+ for (t2 = t1 - incr; t2 >= 0;)
+ if (lb_array[t2] > lb_array[t2 + incr]) {
+ lb_temp = lb_array[t2];
+ lb_array[t2] = lb_array[t2 + incr];
+ lb_array[t2 + incr] = lb_temp;
+ bp_temp = bp_array[t2];
+ bp_array[t2] = bp_array[t2 + incr];
+ bp_array[t2 + incr] = bp_temp;
+ t2 -= incr;
+ } else
+ break;
+}
+
+/*
+ * Check VXLOCK. Return 1 if the vnode is locked. Otherwise, vget it.
+ */
+lfs_vref(vp)
+ register struct vnode *vp;
+{
+
+ if (vp->v_flag & VXLOCK)
+ return(1);
+ return (vget(vp, 0));
+}
+
+void
+lfs_vunref(vp)
+ register struct vnode *vp;
+{
+ extern int lfs_no_inactive;
+
+ /*
+ * This is vrele except that we do not want to VOP_INACTIVE
+ * this vnode. Rather than inline vrele here, we use a global
+ * flag to tell lfs_inactive not to run. Yes, its gross.
+ */
+ lfs_no_inactive = 1;
+ vrele(vp);
+ lfs_no_inactive = 0;
+}
diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c
new file mode 100644
index 000000000000..afcd8c29b3f1
--- /dev/null
+++ b/sys/ufs/lfs/lfs_subr.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_subr.c 8.2 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip". If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+lfs_blkatoff(ap)
+ struct vop_blkatoff_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ char **a_res;
+ struct buf **a_bpp;
+ } */ *ap;
+{
+ register struct lfs *fs;
+ struct inode *ip;
+ struct buf *bp;
+ daddr_t lbn;
+ int bsize, error;
+
+ ip = VTOI(ap->a_vp);
+ fs = ip->i_lfs;
+ lbn = lblkno(fs, ap->a_offset);
+ bsize = blksize(fs);
+
+ *ap->a_bpp = NULL;
+ if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) {
+ brelse(bp);
+ return (error);
+ }
+ if (ap->a_res)
+ *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
+ *ap->a_bpp = bp;
+ return (0);
+}
+
+
+/*
+ * lfs_seglock --
+ * Single thread the segment writer.
+ */
+void
+lfs_seglock(fs, flags)
+ struct lfs *fs;
+ unsigned long flags;
+{
+ struct segment *sp;
+ int s;
+
+ if (fs->lfs_seglock)
+ if (fs->lfs_lockpid == curproc->p_pid) {
+ ++fs->lfs_seglock;
+ fs->lfs_sp->seg_flags |= flags;
+ return;
+ } else while (fs->lfs_seglock)
+ (void)tsleep(&fs->lfs_seglock, PRIBIO + 1,
+ "lfs seglock", 0);
+
+ fs->lfs_seglock = 1;
+ fs->lfs_lockpid = curproc->p_pid;
+
+ sp = fs->lfs_sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
+ sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
+ sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
+ sp->seg_flags = flags;
+ sp->vp = NULL;
+ (void) lfs_initseg(fs);
+
+ /*
+ * Keep a cumulative count of the outstanding I/O operations. If the
+ * disk drive catches up with us it could go to zero before we finish,
+ * so we artificially increment it by one until we've scheduled all of
+ * the writes we intend to do.
+ */
+ s = splbio();
+ ++fs->lfs_iocount;
+ splx(s);
+}
+/*
+ * lfs_segunlock --
+ * Single thread the segment writer.
+ */
+void
+lfs_segunlock(fs)
+ struct lfs *fs;
+{
+ struct segment *sp;
+ unsigned long sync, ckp;
+ int s;
+
+ if (fs->lfs_seglock == 1) {
+
+ sp = fs->lfs_sp;
+ sync = sp->seg_flags & SEGM_SYNC;
+ ckp = sp->seg_flags & SEGM_CKP;
+ if (sp->bpp != sp->cbpp) {
+ /* Free allocated segment summary */
+ fs->lfs_offset -= LFS_SUMMARY_SIZE / DEV_BSIZE;
+ brelvp(*sp->bpp);
+ free((*sp->bpp)->b_data, M_SEGMENT);
+ free(*sp->bpp, M_SEGMENT);
+ } else
+ printf ("unlock to 0 with no summary");
+ free(sp->bpp, M_SEGMENT);
+ free(sp, M_SEGMENT);
+
+ /*
+ * If the I/O count is non-zero, sleep until it reaches zero.
+ * At the moment, the user's process hangs around so we can
+ * sleep.
+ */
+ s = splbio();
+ --fs->lfs_iocount;
+ /*
+ * We let checkpoints happen asynchronously. That means
+ * that during recovery, we have to roll forward between
+ * the two segments described by the first and second
+ * superblocks to make sure that the checkpoint described
+ * by a superblock completed.
+ */
+ if (sync && fs->lfs_iocount)
+ (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0);
+ splx(s);
+ if (ckp) {
+ fs->lfs_nactive = 0;
+ lfs_writesuper(fs);
+ }
+ --fs->lfs_seglock;
+ fs->lfs_lockpid = 0;
+ wakeup(&fs->lfs_seglock);
+ } else if (fs->lfs_seglock == 0) {
+ panic ("Seglock not held");
+ } else {
+ --fs->lfs_seglock;
+ }
+}
diff --git a/sys/ufs/lfs/lfs_syscalls.c b/sys/ufs/lfs/lfs_syscalls.c
new file mode 100644
index 000000000000..666595e6b595
--- /dev/null
+++ b/sys/ufs/lfs/lfs_syscalls.c
@@ -0,0 +1,562 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_syscalls.c 8.5 (Berkeley) 4/20/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+#define BUMP_FIP(SP) \
+ (SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
+
+#define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
+#define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
+
+/*
+ * Before committing to add something to a segment summary, make sure there
+ * is enough room. S is the bytes added to the summary.
+ */
+#define CHECK_SEG(s) \
+if (sp->sum_bytes_left < (s)) { \
+ (void) lfs_writeseg(fs, sp); \
+}
+struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
+
+/*
+ * lfs_markv:
+ *
+ * This will mark inodes and blocks dirty, so they are written into the log.
+ * It will block until all the blocks have been written. The segment create
+ * time passed in the block_info and inode_info structures is used to decide
+ * if the data is valid for each block (in case some process dirtied a block
+ * or inode that is being cleaned between the determination that a block is
+ * live and the lfs_markv call).
+ *
+ * 0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_markv_args {
+ fsid_t *fsidp; /* file system */
+ BLOCK_INFO *blkiov; /* block array */
+ int blkcnt; /* count of block array entries */
+};
+int
+lfs_markv(p, uap, retval)
+ struct proc *p;
+ struct lfs_markv_args *uap;
+ int *retval;
+{
+ struct segment *sp;
+ BLOCK_INFO *blkp;
+ IFILE *ifp;
+ struct buf *bp, **bpp;
+ struct inode *ip;
+ struct lfs *fs;
+ struct mount *mntp;
+ struct vnode *vp;
+ fsid_t fsid;
+ void *start;
+ ino_t lastino;
+ daddr_t b_daddr, v_daddr;
+ u_long bsize;
+ int cnt, error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+
+ if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+ return (error);
+ if ((mntp = getvfs(&fsid)) == NULL)
+ return (EINVAL);
+
+ cnt = uap->blkcnt;
+ start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
+ if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
+ goto err1;
+
+ /* Mark blocks/inodes dirty. */
+ fs = VFSTOUFS(mntp)->um_lfs;
+ bsize = fs->lfs_bsize;
+ error = 0;
+
+ lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
+ sp = fs->lfs_sp;
+ for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
+ blkp = start; cnt--; ++blkp) {
+ /*
+ * Get the IFILE entry (only once) and see if the file still
+ * exists.
+ */
+ if (lastino != blkp->bi_inode) {
+ if (lastino != LFS_UNUSED_INUM) {
+ /* Finish up last file */
+ if (sp->fip->fi_nblocks == 0) {
+ DEC_FINFO(sp);
+ sp->sum_bytes_left +=
+ sizeof(FINFO) - sizeof(daddr_t);
+ } else {
+ lfs_updatemeta(sp);
+ BUMP_FIP(sp);
+ }
+
+ lfs_writeinode(fs, sp, ip);
+ lfs_vunref(vp);
+ }
+
+ /* Start a new file */
+ CHECK_SEG(sizeof(FINFO));
+ sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
+ INC_FINFO(sp);
+ sp->start_lbp = &sp->fip->fi_blocks[0];
+ sp->vp = NULL;
+ sp->fip->fi_version = blkp->bi_version;
+ sp->fip->fi_nblocks = 0;
+ sp->fip->fi_ino = blkp->bi_inode;
+ lastino = blkp->bi_inode;
+ if (blkp->bi_inode == LFS_IFILE_INUM)
+ v_daddr = fs->lfs_idaddr;
+ else {
+ LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
+ v_daddr = ifp->if_daddr;
+ brelse(bp);
+ }
+ if (v_daddr == LFS_UNUSED_DADDR)
+ continue;
+
+ /* Get the vnode/inode. */
+ if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
+ blkp->bi_lbn == LFS_UNUSED_LBN ?
+ blkp->bi_bp : NULL)) {
+#ifdef DIAGNOSTIC
+ printf("lfs_markv: VFS_VGET failed (%d)\n",
+ blkp->bi_inode);
+#endif
+ lastino = LFS_UNUSED_INUM;
+ v_daddr = LFS_UNUSED_DADDR;
+ continue;
+ }
+ sp->vp = vp;
+ ip = VTOI(vp);
+ } else if (v_daddr == LFS_UNUSED_DADDR)
+ continue;
+
+ /* If this BLOCK_INFO didn't contain a block, keep going. */
+ if (blkp->bi_lbn == LFS_UNUSED_LBN)
+ continue;
+ if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
+ b_daddr != blkp->bi_daddr)
+ continue;
+ /*
+ * If we got to here, then we are keeping the block. If it
+ * is an indirect block, we want to actually put it in the
+ * buffer cache so that it can be updated in the finish_meta
+ * section. If it's not, we need to allocate a fake buffer
+ * so that writeseg can perform the copyin and write the buffer.
+ */
+ if (blkp->bi_lbn >= 0) /* Data Block */
+ bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
+ blkp->bi_bp);
+ else {
+ bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
+ if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
+ (error = copyin(blkp->bi_bp, bp->b_data,
+ bsize)))
+ goto err2;
+ if (error = VOP_BWRITE(bp))
+ goto err2;
+ }
+ while (lfs_gatherblock(sp, bp, NULL));
+ }
+ if (sp->vp) {
+ if (sp->fip->fi_nblocks == 0) {
+ DEC_FINFO(sp);
+ sp->sum_bytes_left +=
+ sizeof(FINFO) - sizeof(daddr_t);
+ } else
+ lfs_updatemeta(sp);
+
+ lfs_writeinode(fs, sp, ip);
+ lfs_vunref(vp);
+ }
+ (void) lfs_writeseg(fs, sp);
+ lfs_segunlock(fs);
+ free(start, M_SEGMENT);
+ return (error);
+
+/*
+ * XXX
+ * If we come in to error 2, we might have indirect blocks that were
+ * updated and now have bad block pointers. I don't know what to do
+ * about this.
+ */
+
+err2: lfs_vunref(vp);
+ /* Free up fakebuffers */
+ for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
+ if ((*bpp)->b_flags & B_CALL) {
+ brelvp(*bpp);
+ free(*bpp, M_SEGMENT);
+ } else
+ brelse(*bpp);
+ lfs_segunlock(fs);
+err1:
+ free(start, M_SEGMENT);
+ return (error);
+}
+
+/*
+ * lfs_bmapv:
+ *
+ * This will fill in the current disk address for arrays of blocks.
+ *
+ * 0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_bmapv_args {
+ fsid_t *fsidp; /* file system */
+ BLOCK_INFO *blkiov; /* block array */
+ int blkcnt; /* count of block array entries */
+};
+int
+lfs_bmapv(p, uap, retval)
+ struct proc *p;
+ struct lfs_bmapv_args *uap;
+ int *retval;
+{
+ BLOCK_INFO *blkp;
+ struct mount *mntp;
+ struct vnode *vp;
+ fsid_t fsid;
+ void *start;
+ daddr_t daddr;
+ int cnt, error, step;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+
+ if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+ return (error);
+ if ((mntp = getvfs(&fsid)) == NULL)
+ return (EINVAL);
+
+ cnt = uap->blkcnt;
+ start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
+ if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
+ free(blkp, M_SEGMENT);
+ return (error);
+ }
+
+ for (step = cnt; step--; ++blkp) {
+ if (blkp->bi_lbn == LFS_UNUSED_LBN)
+ continue;
+ /* Could be a deadlock ? */
+ if (VFS_VGET(mntp, blkp->bi_inode, &vp))
+ daddr = LFS_UNUSED_DADDR;
+ else {
+ if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
+ daddr = LFS_UNUSED_DADDR;
+ vput(vp);
+ }
+ blkp->bi_daddr = daddr;
+ }
+ copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
+ free(start, M_SEGMENT);
+ return (0);
+}
+
+/*
+ * lfs_segclean:
+ *
+ * Mark the segment clean.
+ *
+ * 0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_segclean_args {
+ fsid_t *fsidp; /* file system */
+ u_long segment; /* segment number */
+};
+int
+lfs_segclean(p, uap, retval)
+ struct proc *p;
+ struct lfs_segclean_args *uap;
+ int *retval;
+{
+ CLEANERINFO *cip;
+ SEGUSE *sup;
+ struct buf *bp;
+ struct mount *mntp;
+ struct lfs *fs;
+ fsid_t fsid;
+ int error;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+
+ if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+ return (error);
+ if ((mntp = getvfs(&fsid)) == NULL)
+ return (EINVAL);
+
+ fs = VFSTOUFS(mntp)->um_lfs;
+
+ if (datosn(fs, fs->lfs_curseg) == uap->segment)
+ return (EBUSY);
+
+ LFS_SEGENTRY(sup, fs, uap->segment, bp);
+ if (sup->su_flags & SEGUSE_ACTIVE) {
+ brelse(bp);
+ return (EBUSY);
+ }
+ fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
+ fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
+ sup->su_ninos * btodb(fs->lfs_bsize);
+ sup->su_flags &= ~SEGUSE_DIRTY;
+ (void) VOP_BWRITE(bp);
+
+ LFS_CLEANERINFO(cip, fs, bp);
+ ++cip->clean;
+ --cip->dirty;
+ (void) VOP_BWRITE(bp);
+ wakeup(&fs->lfs_avail);
+ return (0);
+}
+
+/*
+ * lfs_segwait:
+ *
+ * This will block until a segment in file system fsid is written. A timeout
+ * in milliseconds may be specified which will awake the cleaner automatically.
+ * An fsid of -1 means any file system, and a timeout of 0 means forever.
+ *
+ * 0 on success
+ * 1 on timeout
+ * -1/errno is return on error.
+ */
+struct lfs_segwait_args {
+ fsid_t *fsidp; /* file system */
+ struct timeval *tv; /* timeout */
+};
+int
+lfs_segwait(p, uap, retval)
+ struct proc *p;
+ struct lfs_segwait_args *uap;
+ int *retval;
+{
+ extern int lfs_allclean_wakeup;
+ struct mount *mntp;
+ struct timeval atv;
+ fsid_t fsid;
+ void *addr;
+ u_long timeout;
+ int error, s;
+
+ if (error = suser(p->p_ucred, &p->p_acflag)) {
+ return (error);
+}
+#ifdef WHEN_QUADS_WORK
+ if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+ return (error);
+ if (fsid == (fsid_t)-1)
+ addr = &lfs_allclean_wakeup;
+ else {
+ if ((mntp = getvfs(&fsid)) == NULL)
+ return (EINVAL);
+ addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
+ }
+#else
+ if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+ return (error);
+ if ((mntp = getvfs(&fsid)) == NULL)
+ addr = &lfs_allclean_wakeup;
+ else
+ addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
+#endif
+
+ if (uap->tv) {
+ if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
+ return (error);
+ if (itimerfix(&atv))
+ return (EINVAL);
+ s = splclock();
+ timevaladd(&atv, (struct timeval *)&time);
+ timeout = hzto(&atv);
+ splx(s);
+ } else
+ timeout = 0;
+
+ error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
+ return (error == ERESTART ? EINTR : 0);
+}
+
+/*
+ * VFS_VGET call specialized for the cleaner. The cleaner already knows the
+ * daddr from the ifile, so don't look it up again. If the cleaner is
+ * processing IINFO structures, it may have the ondisk inode already, so
+ * don't go retrieving it again.
+ */
+int
+lfs_fastvget(mp, ino, daddr, vpp, dinp)
+ struct mount *mp;
+ ino_t ino;
+ daddr_t daddr;
+ struct vnode **vpp;
+ struct dinode *dinp;
+{
+ register struct inode *ip;
+ struct vnode *vp;
+ struct ufsmount *ump;
+ struct buf *bp;
+ dev_t dev;
+ int error;
+
+ ump = VFSTOUFS(mp);
+ dev = ump->um_dev;
+ /*
+ * This is playing fast and loose. Someone may have the inode
+ * locked, in which case they are going to be distinctly unhappy
+ * if we trash something.
+ */
+ if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
+ lfs_vref(*vpp);
+ if ((*vpp)->v_flag & VXLOCK)
+ printf ("Cleaned vnode VXLOCKED\n");
+ ip = VTOI(*vpp);
+ if (ip->i_flags & IN_LOCKED)
+ printf("cleaned vnode locked\n");
+ if (!(ip->i_flag & IN_MODIFIED)) {
+ ++ump->um_lfs->lfs_uinodes;
+ ip->i_flag |= IN_MODIFIED;
+ }
+ ip->i_flag |= IN_MODIFIED;
+ return (0);
+ }
+
+ /* Allocate new vnode/inode. */
+ if (error = lfs_vcreate(mp, ino, &vp)) {
+ *vpp = NULL;
+ return (error);
+ }
+
+ /*
+ * Put it onto its hash chain and lock it so that other requests for
+ * this inode will block if they arrive while we are sleeping waiting
+ * for old data structures to be purged or for the contents of the
+ * disk portion of this inode to be read.
+ */
+ ip = VTOI(vp);
+ ufs_ihashins(ip);
+
+ /*
+ * XXX
+ * This may not need to be here, logically it should go down with
+ * the i_devvp initialization.
+ * Ask Kirk.
+ */
+ ip->i_lfs = ump->um_lfs;
+
+ /* Read in the disk contents for the inode, copy into the inode. */
+ if (dinp)
+ if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
+ return (error);
+ else {
+ if (error = bread(ump->um_devvp, daddr,
+ (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
+ /*
+ * The inode does not contain anything useful, so it
+ * would be misleading to leave it on its hash chain.
+ * Iput() will return it to the free list.
+ */
+ ufs_ihashrem(ip);
+
+ /* Unlock and discard unneeded inode. */
+ lfs_vunref(vp);
+ brelse(bp);
+ *vpp = NULL;
+ return (error);
+ }
+ ip->i_din =
+ *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
+ brelse(bp);
+ }
+
+ /* Inode was just read from user space or disk, make sure it's locked */
+ ip->i_flag |= IN_LOCKED;
+
+ /*
+ * Initialize the vnode from the inode, check for aliases. In all
+ * cases re-init ip, the underlying vnode/inode may have changed.
+ */
+ if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+ lfs_vunref(vp);
+ *vpp = NULL;
+ return (error);
+ }
+ /*
+ * Finish inode initialization now that aliasing has been resolved.
+ */
+ ip->i_devvp = ump->um_devvp;
+ ip->i_flag |= IN_MODIFIED;
+ ++ump->um_lfs->lfs_uinodes;
+ VREF(ip->i_devvp);
+ *vpp = vp;
+ return (0);
+}
+struct buf *
+lfs_fakebuf(vp, lbn, size, uaddr)
+ struct vnode *vp;
+ int lbn;
+ size_t size;
+ caddr_t uaddr;
+{
+ struct buf *bp;
+
+ bp = lfs_newbuf(vp, lbn, 0);
+ bp->b_saveaddr = uaddr;
+ bp->b_bufsize = size;
+ bp->b_bcount = size;
+ bp->b_flags |= B_INVAL;
+ return (bp);
+}
diff --git a/sys/ufs/lfs/lfs_vfsops.c b/sys/ufs/lfs/lfs_vfsops.c
new file mode 100644
index 000000000000..0c8186e2322a
--- /dev/null
+++ b/sys/ufs/lfs/lfs_vfsops.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 1989, 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_vfsops.c 8.7 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int lfs_mountfs __P((struct vnode *, struct mount *, struct proc *));
+
+struct vfsops lfs_vfsops = {
+ lfs_mount,
+ ufs_start,
+ lfs_unmount,
+ ufs_root,
+ ufs_quotactl,
+ lfs_statfs,
+ lfs_sync,
+ lfs_vget,
+ lfs_fhtovp,
+ lfs_vptofh,
+ lfs_init,
+};
+
+int
+lfs_mountroot()
+{
+ panic("lfs_mountroot"); /* XXX -- implement */
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+lfs_mount(mp, path, data, ndp, p)
+ register struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp;
+ struct ufs_args args;
+ struct ufsmount *ump;
+ register struct lfs *fs; /* LFS */
+ u_int size;
+ int error;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
+ return (error);
+
+ /* Until LFS can do NFS right. XXX */
+ if (args.export.ex_flags & MNT_EXPORTED)
+ return (EINVAL);
+
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ ump = VFSTOUFS(mp);
+#ifdef NOTLFS /* LFS */
+ fs = ump->um_fs;
+ if (fs->fs_ronly && (mp->mnt_flag & MNT_RDONLY) == 0)
+ fs->fs_ronly = 0;
+#else
+ fs = ump->um_lfs;
+ if (fs->lfs_ronly && (mp->mnt_flag & MNT_RDONLY) == 0)
+ fs->lfs_ronly = 0;
+#endif
+ if (args.fspec == 0) {
+ /*
+ * Process export requests.
+ */
+ return (vfs_export(mp, &ump->um_export, &args.export));
+ }
+ }
+ /*
+ * Not an update, or updating the name: look up the name
+ * and verify that it refers to a sensible block device.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+ if (error = namei(ndp))
+ return (error);
+ devvp = ndp->ni_vp;
+ if (devvp->v_type != VBLK) {
+ vrele(devvp);
+ return (ENOTBLK);
+ }
+ if (major(devvp->v_rdev) >= nblkdev) {
+ vrele(devvp);
+ return (ENXIO);
+ }
+ if ((mp->mnt_flag & MNT_UPDATE) == 0)
+ error = lfs_mountfs(devvp, mp, p); /* LFS */
+ else {
+ if (devvp != ump->um_devvp)
+ error = EINVAL; /* needs translation */
+ else
+ vrele(devvp);
+ }
+ if (error) {
+ vrele(devvp);
+ return (error);
+ }
+ ump = VFSTOUFS(mp);
+ fs = ump->um_lfs; /* LFS */
+#ifdef NOTLFS /* LFS */
+ (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+ bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+ bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) ufs_statfs(mp, &mp->mnt_stat, p);
+#else
+ (void)copyinstr(path, fs->lfs_fsmnt, sizeof(fs->lfs_fsmnt) - 1, &size);
+ bzero(fs->lfs_fsmnt + size, sizeof(fs->lfs_fsmnt) - size);
+ bcopy((caddr_t)fs->lfs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) lfs_statfs(mp, &mp->mnt_stat, p);
+#endif
+ return (0);
+}
+
+/*
+ * Common code for mount and mountroot
+ * LFS specific
+ */
+int
+lfs_mountfs(devvp, mp, p)
+ register struct vnode *devvp;
+ struct mount *mp;
+ struct proc *p;
+{
+ extern struct vnode *rootvp;
+ register struct lfs *fs;
+ register struct ufsmount *ump;
+ struct vnode *vp;
+ struct buf *bp;
+ struct partinfo dpart;
+ dev_t dev;
+ int error, i, ronly, size;
+
+ /*
+ * Disallow multiple mounts of the same device.
+ * Disallow mounting of a device that is currently in use
+ * (except for root, which might share swap device for miniroot).
+ * Flush out any old buffers remaining from a previous use.
+ */
+ if (error = vfs_mountedon(devvp))
+ return (error);
+ if (vcount(devvp) > 1 && devvp != rootvp)
+ return (EBUSY);
+ if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+ return (error);
+
+ if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
+ size = DEV_BSIZE;
+ else {
+ size = dpart.disklab->d_secsize;
+#ifdef NEVER_USED
+ dpart.part->p_fstype = FS_LFS;
+ dpart.part->p_fsize = fs->lfs_fsize; /* frag size */
+ dpart.part->p_frag = fs->lfs_frag; /* frags per block */
+ dpart.part->p_cpg = fs->lfs_segshift; /* segment shift */
+#endif
+ }
+
+ /* Don't free random space on error. */
+ bp = NULL;
+ ump = NULL;
+
+ /* Read in the superblock. */
+ if (error = bread(devvp, LFS_LABELPAD / size, LFS_SBPAD, NOCRED, &bp))
+ goto out;
+ fs = (struct lfs *)bp->b_data;
+
+ /* Check the basics. */
+ if (fs->lfs_magic != LFS_MAGIC || fs->lfs_bsize > MAXBSIZE ||
+ fs->lfs_bsize < sizeof(struct lfs)) {
+ error = EINVAL; /* XXX needs translation */
+ goto out;
+ }
+
+ /* Allocate the mount structure, copy the superblock into it. */
+ ump = (struct ufsmount *)malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
+ fs = ump->um_lfs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK);
+ bcopy(bp->b_data, fs, sizeof(struct lfs));
+ if (sizeof(struct lfs) < LFS_SBPAD) /* XXX why? */
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ bp = NULL;
+
+ /* Set up the I/O information */
+ fs->lfs_iocount = 0;
+
+ /* Set up the ifile and lock aflags */
+ fs->lfs_doifile = 0;
+ fs->lfs_writer = 0;
+ fs->lfs_dirops = 0;
+ fs->lfs_seglock = 0;
+
+ /* Set the file system readonly/modify bits. */
+ fs->lfs_ronly = ronly;
+ if (ronly == 0)
+ fs->lfs_fmod = 1;
+
+ /* Initialize the mount structure. */
+ dev = devvp->v_rdev;
+ mp->mnt_data = (qaddr_t)ump;
+ mp->mnt_stat.f_fsid.val[0] = (long)dev;
+ mp->mnt_stat.f_fsid.val[1] = MOUNT_LFS;
+ mp->mnt_flag |= MNT_LOCAL;
+ ump->um_mountp = mp;
+ ump->um_dev = dev;
+ ump->um_devvp = devvp;
+ ump->um_bptrtodb = 0;
+ ump->um_seqinc = 1 << fs->lfs_fsbtodb;
+ ump->um_nindir = fs->lfs_nindir;
+ for (i = 0; i < MAXQUOTAS; i++)
+ ump->um_quotas[i] = NULLVP;
+ devvp->v_specflags |= SI_MOUNTEDON;
+
+ /*
+ * We use the ifile vnode for almost every operation. Instead of
+ * retrieving it from the hash table each time we retrieve it here,
+ * artificially increment the reference count and keep a pointer
+ * to it in the incore copy of the superblock.
+ */
+ if (error = VFS_VGET(mp, LFS_IFILE_INUM, &vp))
+ goto out;
+ fs->lfs_ivnode = vp;
+ VREF(vp);
+ vput(vp);
+
+ return (0);
+out:
+ if (bp)
+ brelse(bp);
+ (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+ if (ump) {
+ free(ump->um_lfs, M_UFSMNT);
+ free(ump, M_UFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ }
+ return (error);
+}
+
+/*
+ * unmount system call
+ */
+lfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ extern int doforce;
+ register struct ufsmount *ump;
+ register struct lfs *fs;
+ int i, error, flags, ronly;
+
+ flags = 0;
+ if (mntflags & MNT_FORCE) {
+ if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ ump = VFSTOUFS(mp);
+ fs = ump->um_lfs;
+#ifdef QUOTA
+ if (mp->mnt_flag & MNT_QUOTA) {
+ if (error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags))
+ return (error);
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (ump->um_quotas[i] == NULLVP)
+ continue;
+ quotaoff(p, mp, i);
+ }
+ /*
+ * Here we fall through to vflush again to ensure
+ * that we have gotten rid of all the system vnodes.
+ */
+ }
+#endif
+ if (error = vflush(mp, fs->lfs_ivnode, flags))
+ return (error);
+ fs->lfs_clean = 1;
+ if (error = VFS_SYNC(mp, 1, p->p_ucred, p))
+ return (error);
+ if (fs->lfs_ivnode->v_dirtyblkhd.lh_first)
+ panic("lfs_unmount: still dirty blocks on ifile vnode\n");
+ vrele(fs->lfs_ivnode);
+ vgone(fs->lfs_ivnode);
+
+ ronly = !fs->lfs_ronly;
+ ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+ error = VOP_CLOSE(ump->um_devvp,
+ ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+ vrele(ump->um_devvp);
+ free(fs, M_UFSMNT);
+ free(ump, M_UFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+lfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct lfs *fs;
+ register struct ufsmount *ump;
+
+ ump = VFSTOUFS(mp);
+ fs = ump->um_lfs;
+ if (fs->lfs_magic != LFS_MAGIC)
+ panic("lfs_statfs: magic");
+ sbp->f_type = MOUNT_LFS;
+ sbp->f_bsize = fs->lfs_bsize;
+ sbp->f_iosize = fs->lfs_bsize;
+ sbp->f_blocks = dbtofsb(fs,fs->lfs_dsize);
+ sbp->f_bfree = dbtofsb(fs, fs->lfs_bfree);
+ sbp->f_bavail = (fs->lfs_dsize * (100 - fs->lfs_minfree) / 100) -
+ (fs->lfs_dsize - fs->lfs_bfree);
+ sbp->f_bavail = dbtofsb(fs, sbp->f_bavail);
+ sbp->f_files = fs->lfs_nfiles;
+ sbp->f_ffree = sbp->f_bfree * INOPB(fs);
+ if (sbp != &mp->mnt_stat) {
+ bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+ (caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+ bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+ (caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+ }
+ return (0);
+}
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked `MPBUSY'.
+ */
+lfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ int error;
+
+ /* All syncs must be checkpoints until roll-forward is implemented. */
+ error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0));
+#ifdef QUOTA
+ qsync(mp);
+#endif
+ return (error);
+}
+
+/*
+ * Look up an LFS dinode number to find its incore vnode. If not already
+ * in core, read it in from the specified device. Return the inode locked.
+ * Detection and handling of mount points must be done by the calling routine.
+ */
+int
+lfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+ register struct lfs *fs;
+ register struct inode *ip;
+ struct buf *bp;
+ struct ifile *ifp;
+ struct vnode *vp;
+ struct ufsmount *ump;
+ daddr_t daddr;
+ dev_t dev;
+ int error;
+
+ ump = VFSTOUFS(mp);
+ dev = ump->um_dev;
+ if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
+ return (0);
+
+ /* Translate the inode number to a disk address. */
+ fs = ump->um_lfs;
+ if (ino == LFS_IFILE_INUM)
+ daddr = fs->lfs_idaddr;
+ else {
+ LFS_IENTRY(ifp, fs, ino, bp);
+ daddr = ifp->if_daddr;
+ brelse(bp);
+ if (daddr == LFS_UNUSED_DADDR)
+ return (ENOENT);
+ }
+
+ /* Allocate new vnode/inode. */
+ if (error = lfs_vcreate(mp, ino, &vp)) {
+ *vpp = NULL;
+ return (error);
+ }
+
+ /*
+ * Put it onto its hash chain and lock it so that other requests for
+ * this inode will block if they arrive while we are sleeping waiting
+ * for old data structures to be purged or for the contents of the
+ * disk portion of this inode to be read.
+ */
+ ip = VTOI(vp);
+ ufs_ihashins(ip);
+
+ /*
+ * XXX
+ * This may not need to be here, logically it should go down with
+ * the i_devvp initialization.
+ * Ask Kirk.
+ */
+ ip->i_lfs = ump->um_lfs;
+
+ /* Read in the disk contents for the inode, copy into the inode. */
+ if (error =
+ bread(ump->um_devvp, daddr, (int)fs->lfs_bsize, NOCRED, &bp)) {
+ /*
+ * The inode does not contain anything useful, so it would
+ * be misleading to leave it on its hash chain. With mode
+ * still zero, it will be unlinked and returned to the free
+ * list by vput().
+ */
+ vput(vp);
+ brelse(bp);
+ *vpp = NULL;
+ return (error);
+ }
+ ip->i_din = *lfs_ifind(fs, ino, (struct dinode *)bp->b_data);
+ brelse(bp);
+
+ /*
+ * Initialize the vnode from the inode, check for aliases. In all
+ * cases re-init ip, the underlying vnode/inode may have changed.
+ */
+ if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+ vput(vp);
+ *vpp = NULL;
+ return (error);
+ }
+ /*
+ * Finish inode initialization now that aliasing has been resolved.
+ */
+ ip->i_devvp = ump->um_devvp;
+ VREF(ip->i_devvp);
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is valid
+ * - call lfs_vget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ * those rights via. exflagsp and credanonp
+ *
+ * XXX
+ * use ifile to see if inode is allocated instead of reading off disk
+ * what is the relationship between my generational number and the NFS
+ * generational number.
+ */
+int
+lfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ register struct ufid *ufhp;
+
+ ufhp = (struct ufid *)fhp;
+ if (ufhp->ufid_ino < ROOTINO)
+ return (ESTALE);
+ return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+lfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ register struct inode *ip;
+ register struct ufid *ufhp;
+
+ ip = VTOI(vp);
+ ufhp = (struct ufid *)fhp;
+ ufhp->ufid_len = sizeof(struct ufid);
+ ufhp->ufid_ino = ip->i_number;
+ ufhp->ufid_gen = ip->i_gen;
+ return (0);
+}
diff --git a/sys/ufs/lfs/lfs_vnops.c b/sys/ufs/lfs/lfs_vnops.c
new file mode 100644
index 000000000000..fc6bd480d22a
--- /dev/null
+++ b/sys/ufs/lfs/lfs_vnops.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lfs_vnops.c 8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/* Global vfs data structures for lfs. */
+int (**lfs_vnodeop_p)();
+struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, ufs_lookup }, /* lookup */
+ { &vop_create_desc, ufs_create }, /* create */
+ { &vop_mknod_desc, ufs_mknod }, /* mknod */
+ { &vop_open_desc, ufs_open }, /* open */
+ { &vop_close_desc, lfs_close }, /* close */
+ { &vop_access_desc, ufs_access }, /* access */
+ { &vop_getattr_desc, lfs_getattr }, /* getattr */
+ { &vop_setattr_desc, ufs_setattr }, /* setattr */
+ { &vop_read_desc, lfs_read }, /* read */
+ { &vop_write_desc, lfs_write }, /* write */
+ { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */
+ { &vop_select_desc, ufs_select }, /* select */
+ { &vop_mmap_desc, ufs_mmap }, /* mmap */
+ { &vop_fsync_desc, lfs_fsync }, /* fsync */
+ { &vop_seek_desc, ufs_seek }, /* seek */
+ { &vop_remove_desc, ufs_remove }, /* remove */
+ { &vop_link_desc, ufs_link }, /* link */
+ { &vop_rename_desc, ufs_rename }, /* rename */
+ { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, ufs_symlink }, /* symlink */
+ { &vop_readdir_desc, ufs_readdir }, /* readdir */
+ { &vop_readlink_desc, ufs_readlink }, /* readlink */
+ { &vop_abortop_desc, ufs_abortop }, /* abortop */
+ { &vop_inactive_desc, lfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
+ { &vop_lock_desc, ufs_lock }, /* lock */
+ { &vop_unlock_desc, ufs_unlock }, /* unlock */
+ { &vop_bmap_desc, ufs_bmap }, /* bmap */
+ { &vop_strategy_desc, ufs_strategy }, /* strategy */
+ { &vop_print_desc, ufs_print }, /* print */
+ { &vop_islocked_desc, ufs_islocked }, /* islocked */
+ { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, ufs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, lfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, lfs_valloc }, /* valloc */
+ { &vop_vfree_desc, lfs_vfree }, /* vfree */
+ { &vop_truncate_desc, lfs_truncate }, /* truncate */
+ { &vop_update_desc, lfs_update }, /* update */
+ { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_vnodeop_opv_desc =
+ { &lfs_vnodeop_p, lfs_vnodeop_entries };
+
+int (**lfs_specop_p)();
+struct vnodeopv_entry_desc lfs_specop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, ufsspec_close }, /* close */
+ { &vop_access_desc, ufs_access }, /* access */
+ { &vop_getattr_desc, lfs_getattr }, /* getattr */
+ { &vop_setattr_desc, ufs_setattr }, /* setattr */
+ { &vop_read_desc, ufsspec_read }, /* read */
+ { &vop_write_desc, ufsspec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, lfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
+ { &vop_lock_desc, ufs_lock }, /* lock */
+ { &vop_unlock_desc, ufs_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, ufs_print }, /* print */
+ { &vop_islocked_desc, ufs_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_vfree_desc, lfs_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, lfs_update }, /* update */
+ { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_specop_opv_desc =
+ { &lfs_specop_p, lfs_specop_entries };
+
+#ifdef FIFO
+int (**lfs_fifoop_p)();
+struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, ufsfifo_close }, /* close */
+ { &vop_access_desc, ufs_access }, /* access */
+ { &vop_getattr_desc, lfs_getattr }, /* getattr */
+ { &vop_setattr_desc, ufs_setattr }, /* setattr */
+ { &vop_read_desc, ufsfifo_read }, /* read */
+ { &vop_write_desc, ufsfifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, fifo_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, lfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
+ { &vop_lock_desc, ufs_lock }, /* lock */
+ { &vop_unlock_desc, ufs_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_strategy }, /* strategy */
+ { &vop_print_desc, ufs_print }, /* print */
+ { &vop_islocked_desc, ufs_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_vfree_desc, lfs_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, lfs_update }, /* update */
+ { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_fifoop_opv_desc =
+ { &lfs_fifoop_p, lfs_fifoop_entries };
+#endif /* FIFO */
+
+#define LFS_READWRITE
+#include <ufs/ufs/ufs_readwrite.c>
+#undef LFS_READWRITE
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+lfs_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct timeval tv;
+
+ tv = time;
+ return (VOP_UPDATE(ap->a_vp, &tv, &tv,
+ ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
+}
+
+/*
+ * These macros are used to bracket UFS directory ops, so that we can
+ * identify all the pages touched during directory ops which need to
+ * be ordered and flushed atomically, so that they may be recovered.
+ */
+#define SET_DIROP(fs) { \
+ if ((fs)->lfs_writer) \
+ tsleep(&(fs)->lfs_dirops, PRIBIO + 1, "lfs_dirop", 0); \
+ ++(fs)->lfs_dirops; \
+ (fs)->lfs_doifile = 1; \
+}
+
+#define SET_ENDOP(fs) { \
+ --(fs)->lfs_dirops; \
+ if (!(fs)->lfs_dirops) \
+ wakeup(&(fs)->lfs_writer); \
+}
+
+#define MARK_VNODE(dvp) (dvp)->v_flag |= VDIROP
+
+int
+lfs_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+ MARK_VNODE(ap->a_dvp);
+ ret = ufs_symlink(ap);
+ SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+ return (ret);
+}
+
+int
+lfs_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+ MARK_VNODE(ap->a_dvp);
+ ret = ufs_mknod(ap);
+ SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+ return (ret);
+}
+
+int
+lfs_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+ MARK_VNODE(ap->a_dvp);
+ ret = ufs_create(ap);
+ SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+ return (ret);
+}
+
+int
+lfs_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+ MARK_VNODE(ap->a_dvp);
+ ret = ufs_mkdir(ap);
+ SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+ return (ret);
+}
+
+int
+lfs_remove(ap)
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+ MARK_VNODE(ap->a_dvp);
+ MARK_VNODE(ap->a_vp);
+ ret = ufs_remove(ap);
+ SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+ return (ret);
+}
+
+int
+lfs_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+ MARK_VNODE(ap->a_dvp);
+ MARK_VNODE(ap->a_vp);
+ ret = ufs_rmdir(ap);
+ SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+ return (ret);
+}
+
+int
+lfs_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_vp)->i_lfs);
+ MARK_VNODE(ap->a_vp);
+ ret = ufs_link(ap);
+ SET_ENDOP(VTOI(ap->a_vp)->i_lfs);
+ return (ret);
+}
+
+int
+lfs_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int ret;
+
+ SET_DIROP(VTOI(ap->a_fdvp)->i_lfs);
+ MARK_VNODE(ap->a_fdvp);
+ MARK_VNODE(ap->a_tdvp);
+ ret = ufs_rename(ap);
+ SET_ENDOP(VTOI(ap->a_fdvp)->i_lfs);
+ return (ret);
+}
+/* XXX hack to avoid calling ITIMES in getattr */
+int
+lfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+ register struct vattr *vap = ap->a_vap;
+ /*
+ * Copy from inode table
+ */
+ vap->va_fsid = ip->i_dev;
+ vap->va_fileid = ip->i_number;
+ vap->va_mode = ip->i_mode & ~IFMT;
+ vap->va_nlink = ip->i_nlink;
+ vap->va_uid = ip->i_uid;
+ vap->va_gid = ip->i_gid;
+ vap->va_rdev = (dev_t)ip->i_rdev;
+ vap->va_size = ip->i_din.di_size;
+ vap->va_atime = ip->i_atime;
+ vap->va_mtime = ip->i_mtime;
+ vap->va_ctime = ip->i_ctime;
+ vap->va_flags = ip->i_flags;
+ vap->va_gen = ip->i_gen;
+ /* this doesn't belong here */
+ if (vp->v_type == VBLK)
+ vap->va_blocksize = BLKDEV_IOSIZE;
+ else if (vp->v_type == VCHR)
+ vap->va_blocksize = MAXBSIZE;
+ else
+ vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+ vap->va_bytes = dbtob(ip->i_blocks);
+ vap->va_type = vp->v_type;
+ vap->va_filerev = ip->i_modrev;
+ return (0);
+}
+/*
+ * Close called
+ *
+ * XXX -- we were using ufs_close, but since it updates the
+ * times on the inode, we might need to bump the uinodes
+ * count.
+ */
+/* ARGSUSED */
+int
+lfs_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+ int mod;
+
+ if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) {
+ mod = ip->i_flag & IN_MODIFIED;
+ ITIMES(ip, &time, &time);
+ if (!mod && ip->i_flag & IN_MODIFIED)
+ ip->i_lfs->lfs_uinodes++;
+ }
+ return (0);
+}
+
+/*
+ * Stub inactive routine that avoid calling ufs_inactive in some cases.
+ */
+int lfs_no_inactive = 0;
+
+int
+lfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (lfs_no_inactive)
+ return (0);
+ return (ufs_inactive(ap));
+}
diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h
new file mode 100644
index 000000000000..e357faf6fa54
--- /dev/null
+++ b/sys/ufs/mfs/mfs_extern.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mfs_extern.h 8.1 (Berkeley) 6/11/93
+ */
+
+struct buf;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct ucred;
+struct vnode;
+
+__BEGIN_DECLS
+int mfs_badop __P((void));
+int mfs_bmap __P((struct vop_bmap_args *));
+int mfs_close __P((struct vop_close_args *));
+void mfs_doio __P((struct buf *bp, caddr_t base));
+int mfs_inactive __P((struct vop_inactive_args *)); /* XXX */
+int mfs_reclaim __P((struct vop_reclaim_args *)); /* XXX */
+int mfs_init __P((void));
+int mfs_ioctl __P((struct vop_ioctl_args *));
+int mfs_mount __P((struct mount *mp,
+ char *path, caddr_t data, struct nameidata *ndp, struct proc *p));
+int mfs_open __P((struct vop_open_args *));
+int mfs_print __P((struct vop_print_args *)); /* XXX */
+int mfs_start __P((struct mount *mp, int flags, struct proc *p));
+int mfs_statfs __P((struct mount *mp, struct statfs *sbp, struct proc *p));
+int mfs_strategy __P((struct vop_strategy_args *)); /* XXX */
+__END_DECLS
diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c
new file mode 100644
index 000000000000..3fcbdf379284
--- /dev/null
+++ b/sys/ufs/mfs/mfs_vfsops.c
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 1989, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mfs_vfsops.c 8.4 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#include <ufs/mfs/mfsnode.h>
+#include <ufs/mfs/mfs_extern.h>
+
+caddr_t mfs_rootbase; /* address of mini-root in kernel virtual memory */
+u_long mfs_rootsize; /* size of mini-root in bytes */
+
+static int mfs_minor; /* used for building internal dev_t */
+
+extern int (**mfs_vnodeop_p)();
+
+/*
+ * mfs vfs operations.
+ */
+struct vfsops mfs_vfsops = {
+ mfs_mount,
+ mfs_start,
+ ffs_unmount,
+ ufs_root,
+ ufs_quotactl,
+ mfs_statfs,
+ ffs_sync,
+ ffs_vget,
+ ffs_fhtovp,
+ ffs_vptofh,
+ mfs_init,
+};
+
+/*
+ * Called by main() when mfs is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME "mfs_root"
+
+mfs_mountroot()
+{
+ extern struct vnode *rootvp;
+ register struct fs *fs;
+ register struct mount *mp;
+ struct proc *p = curproc; /* XXX */
+ struct ufsmount *ump;
+ struct mfsnode *mfsp;
+ u_int size;
+ int error;
+
+ /*
+ * Get vnodes for swapdev and rootdev.
+ */
+ if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+ panic("mfs_mountroot: can't setup bdevvp's");
+
+ mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &mfs_vfsops;
+ mp->mnt_flag = MNT_RDONLY;
+ mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
+ rootvp->v_data = mfsp;
+ rootvp->v_op = mfs_vnodeop_p;
+ rootvp->v_tag = VT_MFS;
+ mfsp->mfs_baseoff = mfs_rootbase;
+ mfsp->mfs_size = mfs_rootsize;
+ mfsp->mfs_vnode = rootvp;
+ mfsp->mfs_pid = p->p_pid;
+ mfsp->mfs_buflist = (struct buf *)0;
+ if (error = ffs_mountfs(rootvp, mp, p)) {
+ free(mp, M_MOUNT);
+ free(mfsp, M_MFSNODE);
+ return (error);
+ }
+ if (error = vfs_lock(mp)) {
+ (void)ffs_unmount(mp, 0, p);
+ free(mp, M_MOUNT);
+ free(mfsp, M_MFSNODE);
+ return (error);
+ }
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
+ fs->fs_fsmnt[0] = '/';
+ bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void)ffs_statfs(mp, &mp->mnt_stat, p);
+ vfs_unlock(mp);
+ inittodr((time_t)0);
+ return (0);
+}
+
+/*
+ * This is called early in boot to set the base address and size
+ * of the mini-root.
+ */
+mfs_initminiroot(base)
+ caddr_t base;
+{
+ struct fs *fs = (struct fs *)(base + SBOFF);
+ extern int (*mountroot)();
+
+ /* check for valid super block */
+ if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+ fs->fs_bsize < sizeof(struct fs))
+ return (0);
+ mountroot = mfs_mountroot;
+ mfs_rootbase = base;
+ mfs_rootsize = fs->fs_fsize * fs->fs_size;
+ rootdev = makedev(255, mfs_minor++);
+ return (mfs_rootsize);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+int
+mfs_mount(mp, path, data, ndp, p)
+ register struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp;
+ struct mfs_args args;
+ struct ufsmount *ump;
+ register struct fs *fs;
+ register struct mfsnode *mfsp;
+ u_int size;
+ int flags, error;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct mfs_args)))
+ return (error);
+
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+ flags = WRITECLOSE;
+ if (mp->mnt_flag & MNT_FORCE)
+ flags |= FORCECLOSE;
+ if (vfs_busy(mp))
+ return (EBUSY);
+ error = ffs_flushfiles(mp, flags, p);
+ vfs_unbusy(mp);
+ if (error)
+ return (error);
+ }
+ if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
+ fs->fs_ronly = 0;
+#ifdef EXPORTMFS
+ if (args.fspec == 0)
+ return (vfs_export(mp, &ump->um_export, &args.export));
+#endif
+ return (0);
+ }
+ error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp);
+ if (error)
+ return (error);
+ devvp->v_type = VBLK;
+ if (checkalias(devvp, makedev(255, mfs_minor++), (struct mount *)0))
+ panic("mfs_mount: dup dev");
+ mfsp = (struct mfsnode *)malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
+ devvp->v_data = mfsp;
+ mfsp->mfs_baseoff = args.base;
+ mfsp->mfs_size = args.size;
+ mfsp->mfs_vnode = devvp;
+ mfsp->mfs_pid = p->p_pid;
+ mfsp->mfs_buflist = (struct buf *)0;
+ if (error = ffs_mountfs(devvp, mp, p)) {
+ mfsp->mfs_buflist = (struct buf *)-1;
+ vrele(devvp);
+ return (error);
+ }
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+ bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+ bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) mfs_statfs(mp, &mp->mnt_stat, p);
+ return (0);
+}
+
+int mfs_pri = PWAIT | PCATCH; /* XXX prob. temp */
+
+/*
+ * Used to grab the process and keep it in the kernel to service
+ * memory filesystem I/O requests.
+ *
+ * Loop servicing I/O requests.
+ * Copy the requested data into or out of the memory filesystem
+ * address space.
+ */
+/* ARGSUSED */
+int
+mfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ register struct vnode *vp = VFSTOUFS(mp)->um_devvp;
+ register struct mfsnode *mfsp = VTOMFS(vp);
+ register struct buf *bp;
+ register caddr_t base;
+ int error = 0;
+
+ base = mfsp->mfs_baseoff;
+ while (mfsp->mfs_buflist != (struct buf *)(-1)) {
+ while (bp = mfsp->mfs_buflist) {
+ mfsp->mfs_buflist = bp->b_actf;
+ mfs_doio(bp, base);
+ wakeup((caddr_t)bp);
+ }
+ /*
+ * If a non-ignored signal is received, try to unmount.
+ * If that fails, clear the signal (it has been "processed"),
+ * otherwise we will loop here, as tsleep will always return
+ * EINTR/ERESTART.
+ */
+ if (error = tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0))
+ if (dounmount(mp, 0, p) != 0)
+ CLRSIG(p, CURSIG(p));
+ }
+ return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+mfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+
+ error = ffs_statfs(mp, sbp, p);
+ sbp->f_type = MOUNT_MFS;
+ return (error);
+}
diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c
new file mode 100644
index 000000000000..71adf069b1db
--- /dev/null
+++ b/sys/ufs/mfs/mfs_vnops.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mfs_vnops.c 8.3 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/map.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <machine/vmparam.h>
+
+#include <ufs/mfs/mfsnode.h>
+#include <ufs/mfs/mfsiom.h>
+#include <ufs/mfs/mfs_extern.h>
+
+#if !defined(hp300) && !defined(i386) && !defined(mips) && !defined(sparc) && !defined(luna68k)
+static int mfsmap_want; /* 1 => need kernel I/O resources */
+struct map mfsmap[MFS_MAPSIZE];
+extern char mfsiobuf[];
+#endif
+
+/*
+ * mfs vnode operations.
+ */
+int (**mfs_vnodeop_p)();
+struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, mfs_lookup }, /* lookup */
+ { &vop_create_desc, mfs_create }, /* create */
+ { &vop_mknod_desc, mfs_mknod }, /* mknod */
+ { &vop_open_desc, mfs_open }, /* open */
+ { &vop_close_desc, mfs_close }, /* close */
+ { &vop_access_desc, mfs_access }, /* access */
+ { &vop_getattr_desc, mfs_getattr }, /* getattr */
+ { &vop_setattr_desc, mfs_setattr }, /* setattr */
+ { &vop_read_desc, mfs_read }, /* read */
+ { &vop_write_desc, mfs_write }, /* write */
+ { &vop_ioctl_desc, mfs_ioctl }, /* ioctl */
+ { &vop_select_desc, mfs_select }, /* select */
+ { &vop_mmap_desc, mfs_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, mfs_seek }, /* seek */
+ { &vop_remove_desc, mfs_remove }, /* remove */
+ { &vop_link_desc, mfs_link }, /* link */
+ { &vop_rename_desc, mfs_rename }, /* rename */
+ { &vop_mkdir_desc, mfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, mfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, mfs_symlink }, /* symlink */
+ { &vop_readdir_desc, mfs_readdir }, /* readdir */
+ { &vop_readlink_desc, mfs_readlink }, /* readlink */
+ { &vop_abortop_desc, mfs_abortop }, /* abortop */
+ { &vop_inactive_desc, mfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, mfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, mfs_lock }, /* lock */
+ { &vop_unlock_desc, mfs_unlock }, /* unlock */
+ { &vop_bmap_desc, mfs_bmap }, /* bmap */
+ { &vop_strategy_desc, mfs_strategy }, /* strategy */
+ { &vop_print_desc, mfs_print }, /* print */
+ { &vop_islocked_desc, mfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, mfs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, mfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, mfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, mfs_valloc }, /* valloc */
+ { &vop_vfree_desc, mfs_vfree }, /* vfree */
+ { &vop_truncate_desc, mfs_truncate }, /* truncate */
+ { &vop_update_desc, mfs_update }, /* update */
+ { &vop_bwrite_desc, mfs_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc mfs_vnodeop_opv_desc =
+ { &mfs_vnodeop_p, mfs_vnodeop_entries };
+
+/*
+ * Vnode Operations.
+ *
+ * Open called to allow memory filesystem to initialize and
+ * validate before actual IO. Record our process identifier
+ * so we can tell when we are doing I/O to ourself.
+ */
+/* ARGSUSED */
+int
+mfs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ if (ap->a_vp->v_type != VBLK) {
+ panic("mfs_ioctl not VBLK");
+ /* NOTREACHED */
+ }
+ return (0);
+}
+
+/*
+ * Ioctl operation.
+ */
+/* ARGSUSED */
+int
+mfs_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (ENOTTY);
+}
+
+/*
+ * Pass I/O requests to the memory filesystem process.
+ */
+int
+mfs_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ register struct mfsnode *mfsp;
+ struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+
+ if (!vfinddev(bp->b_dev, VBLK, &vp) || vp->v_usecount == 0)
+ panic("mfs_strategy: bad dev");
+ mfsp = VTOMFS(vp);
+ /* check for mini-root access */
+ if (mfsp->mfs_pid == 0) {
+ caddr_t base;
+
+ base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
+ if (bp->b_flags & B_READ)
+ bcopy(base, bp->b_data, bp->b_bcount);
+ else
+ bcopy(bp->b_data, base, bp->b_bcount);
+ biodone(bp);
+ } else if (mfsp->mfs_pid == p->p_pid) {
+ mfs_doio(bp, mfsp->mfs_baseoff);
+ } else {
+ bp->b_actf = mfsp->mfs_buflist;
+ mfsp->mfs_buflist = bp;
+ wakeup((caddr_t)vp);
+ }
+ return (0);
+}
+
+#if defined(vax) || defined(tahoe)
+/*
+ * Memory file system I/O.
+ *
+ * Essentially play ubasetup() and disk interrupt service routine by
+ * doing the copies to or from the memfs process. If doing physio
+ * (i.e. pagein), we must map the I/O through the kernel virtual
+ * address space.
+ */
+void
+mfs_doio(bp, base)
+ register struct buf *bp;
+ caddr_t base;
+{
+ register struct pte *pte, *ppte;
+ register caddr_t vaddr;
+ int off, npf, npf2, reg;
+ caddr_t kernaddr, offset;
+
+ /*
+ * For phys I/O, map the b_data into kernel virtual space using
+ * the Mfsiomap pte's.
+ */
+ if ((bp->b_flags & B_PHYS) == 0) {
+ kernaddr = bp->b_data;
+ } else {
+ if (bp->b_flags & (B_PAGET | B_UAREA | B_DIRTY))
+ panic("swap on memfs?");
+ off = (int)bp->b_data & PGOFSET;
+ npf = btoc(bp->b_bcount + off);
+ /*
+ * Get some mapping page table entries
+ */
+ while ((reg = rmalloc(mfsmap, (long)npf)) == 0) {
+ mfsmap_want++;
+ sleep((caddr_t)&mfsmap_want, PZERO-1);
+ }
+ reg--;
+ pte = vtopte(bp->b_proc, btop(bp->b_data));
+ /*
+ * Do vmaccess() but with the Mfsiomap page table.
+ */
+ ppte = &Mfsiomap[reg];
+ vaddr = &mfsiobuf[reg * NBPG];
+ kernaddr = vaddr + off;
+ for (npf2 = npf; npf2; npf2--) {
+ mapin(ppte, (u_int)vaddr, pte->pg_pfnum,
+ (int)(PG_V|PG_KW));
+#if defined(tahoe)
+ if ((bp->b_flags & B_READ) == 0)
+ mtpr(P1DC, vaddr);
+#endif
+ ppte++;
+ pte++;
+ vaddr += NBPG;
+ }
+ }
+ offset = base + (bp->b_blkno << DEV_BSHIFT);
+ if (bp->b_flags & B_READ)
+ bp->b_error = copyin(offset, kernaddr, bp->b_bcount);
+ else
+ bp->b_error = copyout(kernaddr, offset, bp->b_bcount);
+ if (bp->b_error)
+ bp->b_flags |= B_ERROR;
+ /*
+ * Release pte's used by physical I/O.
+ */
+ if (bp->b_flags & B_PHYS) {
+ rmfree(mfsmap, (long)npf, (long)++reg);
+ if (mfsmap_want) {
+ mfsmap_want = 0;
+ wakeup((caddr_t)&mfsmap_want);
+ }
+ }
+ biodone(bp);
+}
+#endif /* vax || tahoe */
+
+#if defined(hp300) || defined(i386) || defined(mips) || defined(sparc) || defined(luna68k)
+/*
+ * Memory file system I/O.
+ *
+ * Trivial on the HP since buffer has already been mapping into KVA space.
+ */
+void
+mfs_doio(bp, base)
+ register struct buf *bp;
+ caddr_t base;
+{
+
+ base += (bp->b_blkno << DEV_BSHIFT);
+ if (bp->b_flags & B_READ)
+ bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
+ else
+ bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
+ if (bp->b_error)
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+}
+#endif
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+int
+mfs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * Memory filesystem close routine
+ */
+/* ARGSUSED */
+int
+mfs_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct mfsnode *mfsp = VTOMFS(vp);
+ register struct buf *bp;
+ int error;
+
+ /*
+ * Finish any pending I/O requests.
+ */
+ while (bp = mfsp->mfs_buflist) {
+ mfsp->mfs_buflist = bp->b_actf;
+ mfs_doio(bp, mfsp->mfs_baseoff);
+ wakeup((caddr_t)bp);
+ }
+ /*
+ * On last close of a memory filesystem
+ * we must invalidate any in core blocks, so that
+ * we can, free up its vnode.
+ */
+ if (error = vinvalbuf(vp, 1, ap->a_cred, ap->a_p, 0, 0))
+ return (error);
+ /*
+ * There should be no way to have any more uses of this
+ * vnode, so if we find any other uses, it is a panic.
+ */
+ if (vp->v_usecount > 1)
+ printf("mfs_close: ref count %d > 1\n", vp->v_usecount);
+ if (vp->v_usecount > 1 || mfsp->mfs_buflist)
+ panic("mfs_close");
+ /*
+ * Send a request to the filesystem server to exit.
+ */
+ mfsp->mfs_buflist = (struct buf *)(-1);
+ wakeup((caddr_t)vp);
+ return (0);
+}
+
+/*
+ * Memory filesystem inactive routine
+ */
+/* ARGSUSED */
+int
+mfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
+
+ if (mfsp->mfs_buflist && mfsp->mfs_buflist != (struct buf *)(-1))
+ panic("mfs_inactive: not inactive (mfs_buflist %x)",
+ mfsp->mfs_buflist);
+ return (0);
+}
+
+/*
+ * Reclaim a memory filesystem devvp so that it can be reused.
+ */
+int
+mfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ FREE(ap->a_vp->v_data, M_MFSNODE);
+ ap->a_vp->v_data = NULL;
+ return (0);
+}
+
+/*
+ * Print out the contents of an mfsnode.
+ */
+int
+mfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
+
+ printf("tag VT_MFS, pid %d, base %d, size %d\n", mfsp->mfs_pid,
+ mfsp->mfs_baseoff, mfsp->mfs_size);
+ return (0);
+}
+
+/*
+ * Block device bad operation
+ */
+int
+mfs_badop()
+{
+
+ panic("mfs_badop called\n");
+ /* NOTREACHED */
+}
+
+/*
+ * Memory based filesystem initialization.
+ */
+mfs_init()
+{
+
+#if !defined(hp300) && !defined(i386) && !defined(mips) && !defined(sparc) && !defined(luna68k)
+ rminit(mfsmap, (long)MFS_MAPREG, (long)1, "mfs mapreg", MFS_MAPSIZE);
+#endif
+}
diff --git a/sys/ufs/mfs/mfsiom.h b/sys/ufs/mfs/mfsiom.h
new file mode 100644
index 000000000000..98aca855f6a6
--- /dev/null
+++ b/sys/ufs/mfs/mfsiom.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mfsiom.h 8.1 (Berkeley) 6/11/93
+ */
+
+#define MFS_MAPREG (MAXPHYS/NBPG + 2) /* Kernel mapping pte's */
+#define MFS_MAPSIZE 10 /* Size of alloc map for pte's */
diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h
new file mode 100644
index 000000000000..4480ab024077
--- /dev/null
+++ b/sys/ufs/mfs/mfsnode.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)mfsnode.h 8.2 (Berkeley) 8/11/93
+ */
+
+/*
+ * This structure defines the control data for the memory based file system.
+ */
+
+struct mfsnode {
+ struct vnode *mfs_vnode; /* vnode associated with this mfsnode */
+ caddr_t mfs_baseoff; /* base of file system in memory */
+ long mfs_size; /* size of memory file system */
+ pid_t mfs_pid; /* supporting process pid */
+ struct buf *mfs_buflist; /* list of I/O requests */
+ long mfs_spare[4];
+};
+
+/*
+ * Convert between mfsnode pointers and vnode pointers
+ */
+#define VTOMFS(vp) ((struct mfsnode *)(vp)->v_data)
+#define MFSTOV(mfsp) ((mfsp)->mfs_vnode)
+
+/* Prototypes for MFS operations on vnodes. */
+#define mfs_lookup ((int (*) __P((struct vop_lookup_args *)))mfs_badop)
+#define mfs_create ((int (*) __P((struct vop_create_args *)))mfs_badop)
+#define mfs_mknod ((int (*) __P((struct vop_mknod_args *)))mfs_badop)
+#define mfs_access ((int (*) __P((struct vop_access_args *)))mfs_badop)
+#define mfs_getattr ((int (*) __P((struct vop_getattr_args *)))mfs_badop)
+#define mfs_setattr ((int (*) __P((struct vop_setattr_args *)))mfs_badop)
+#define mfs_read ((int (*) __P((struct vop_read_args *)))mfs_badop)
+#define mfs_write ((int (*) __P((struct vop_write_args *)))mfs_badop)
+#define mfs_select ((int (*) __P((struct vop_select_args *)))mfs_badop)
+#define mfs_mmap ((int (*) __P((struct vop_mmap_args *)))mfs_badop)
+#define mfs_seek ((int (*) __P((struct vop_seek_args *)))mfs_badop)
+#define mfs_remove ((int (*) __P((struct vop_remove_args *)))mfs_badop)
+#define mfs_link ((int (*) __P((struct vop_link_args *)))mfs_badop)
+#define mfs_rename ((int (*) __P((struct vop_rename_args *)))mfs_badop)
+#define mfs_mkdir ((int (*) __P((struct vop_mkdir_args *)))mfs_badop)
+#define mfs_rmdir ((int (*) __P((struct vop_rmdir_args *)))mfs_badop)
+#define mfs_symlink ((int (*) __P((struct vop_symlink_args *)))mfs_badop)
+#define mfs_readdir ((int (*) __P((struct vop_readdir_args *)))mfs_badop)
+#define mfs_readlink ((int (*) __P((struct vop_readlink_args *)))mfs_badop)
+#define mfs_abortop ((int (*) __P((struct vop_abortop_args *)))mfs_badop)
+#define mfs_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define mfs_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define mfs_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define mfs_pathconf ((int (*) __P((struct vop_pathconf_args *)))mfs_badop)
+#define mfs_advlock ((int (*) __P((struct vop_advlock_args *)))mfs_badop)
+#define mfs_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))mfs_badop)
+#define mfs_valloc ((int (*) __P((struct vop_valloc_args *)))mfs_badop)
+#define mfs_vfree ((int (*) __P((struct vop_vfree_args *)))mfs_badop)
+#define mfs_truncate ((int (*) __P((struct vop_truncate_args *)))mfs_badop)
+#define mfs_update ((int (*) __P((struct vop_update_args *)))mfs_badop)
+#define mfs_bwrite ((int (*) __P((struct vop_bwrite_args *)))vn_bwrite)
diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h
new file mode 100644
index 000000000000..5b9915d9cfdb
--- /dev/null
+++ b/sys/ufs/ufs/dinode.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dinode.h 8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * The root inode is the root of the file system. Inode 0 can't be used for
+ * normal purposes and historically bad blocks were linked to inode 1, thus
+ * the root inode is 2. (Inode 1 is no longer used for this purpose, however
+ * numerous dump tapes make this assumption, so we are stuck with it).
+ */
+#define ROOTINO ((ino_t)2)
+
+/*
+ * A dinode contains all the meta-data associated with a UFS file.
+ * This structure defines the on-disk format of a dinode.
+ */
+
+#define NDADDR 12 /* Direct addresses in inode. */
+#define NIADDR 3 /* Indirect addresses in inode. */
+
+struct dinode {
+ u_short di_mode; /* 0: IFMT and permissions. */
+ short di_nlink; /* 2: File link count. */
+ union {
+ u_short oldids[2]; /* 4: Ffs: old user and group ids. */
+ ino_t inumber; /* 4: Lfs: inode number. */
+ } di_u;
+ u_quad_t di_size; /* 8: File byte count. */
+ struct timespec di_atime; /* 16: Last access time. */
+ struct timespec di_mtime; /* 24: Last modified time. */
+ struct timespec di_ctime; /* 32: Last inode change time. */
+ daddr_t di_db[NDADDR]; /* 40: Direct disk blocks. */
+ daddr_t di_ib[NIADDR]; /* 88: Indirect disk blocks. */
+ u_long di_flags; /* 100: Status flags (chflags). */
+ long di_blocks; /* 104: Blocks actually held. */
+ long di_gen; /* 108: Generation number. */
+ u_long di_uid; /* 112: File owner. */
+ u_long di_gid; /* 116: File group. */
+ long di_spare[2]; /* 120: Reserved; currently unused */
+};
+
+/*
+ * The di_db fields may be overlaid with other information for
+ * file types that do not have associated disk storage. Block
+ * and character devices overlay the first data block with their
+ * dev_t value. Short symbolic links place their path in the
+ * di_db area.
+ */
+#define di_inumber di_u.inumber
+#define di_ogid di_u.oldids[1]
+#define di_ouid di_u.oldids[0]
+#define di_rdev di_db[0]
+#define di_shortlink di_db
+#define MAXSYMLINKLEN ((NDADDR + NIADDR) * sizeof(daddr_t))
+
+/* File modes. */
+#define IEXEC 0000100 /* Executable. */
+#define IWRITE 0000200 /* Writeable. */
+#define IREAD 0000400 /* Readable. */
+#define ISVTX 0001000 /* Sticky bit. */
+#define ISGID 0002000 /* Set-gid. */
+#define ISUID 0004000 /* Set-uid. */
+
+/* File types. */
+#define IFMT 0170000 /* Mask of file type. */
+#define IFIFO 0010000 /* Named pipe (fifo). */
+#define IFCHR 0020000 /* Character device. */
+#define IFDIR 0040000 /* Directory file. */
+#define IFBLK 0060000 /* Block device. */
+#define IFREG 0100000 /* Regular file. */
+#define IFLNK 0120000 /* Symbolic link. */
+#define IFSOCK 0140000 /* UNIX domain socket. */
diff --git a/sys/ufs/ufs/dir.h b/sys/ufs/ufs/dir.h
new file mode 100644
index 000000000000..c51bd1cf6e16
--- /dev/null
+++ b/sys/ufs/ufs/dir.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dir.h 8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef _DIR_H_
+#define _DIR_H_
+
+/*
+ * A directory consists of some number of blocks of DIRBLKSIZ
+ * bytes, where DIRBLKSIZ is chosen such that it can be transferred
+ * to disk in a single atomic operation (e.g. 512 bytes on most machines).
+ *
+ * Each DIRBLKSIZ byte block contains some number of directory entry
+ * structures, which are of variable length. Each directory entry has
+ * a struct direct at the front of it, containing its inode number,
+ * the length of the entry, and the length of the name contained in
+ * the entry. These are followed by the name padded to a 4 byte boundary
+ * with null bytes. All names are guaranteed null terminated.
+ * The maximum length of a name in a directory is MAXNAMLEN.
+ *
+ * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent
+ * a directory entry. Free space in a directory is represented by
+ * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All DIRBLKSIZ bytes
+ * in a directory block are claimed by the directory entries. This
+ * usually results in the last entry in a directory having a large
+ * dp->d_reclen. When entries are deleted from a directory, the
+ * space is returned to the previous entry in the same directory
+ * block by increasing its dp->d_reclen. If the first entry of
+ * a directory block is free, then its dp->d_ino is set to 0.
+ * Entries other than the first in a directory do not normally have
+ * dp->d_ino set to 0.
+ */
+#define DIRBLKSIZ DEV_BSIZE
+#define MAXNAMLEN 255
+
+struct direct {
+ u_long d_ino; /* inode number of entry */
+ u_short d_reclen; /* length of this record */
+ u_char d_type; /* file type, see below */
+ u_char d_namlen; /* length of string in d_name */
+ char d_name[MAXNAMLEN + 1]; /* name with length <= MAXNAMLEN */
+};
+
+/*
+ * File types
+ */
+#define DT_UNKNOWN 0
+#define DT_FIFO 1
+#define DT_CHR 2
+#define DT_DIR 4
+#define DT_BLK 6
+#define DT_REG 8
+#define DT_LNK 10
+#define DT_SOCK 12
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define IFTODT(mode) (((mode) & 0170000) >> 12)
+#define DTTOIF(dirtype) ((dirtype) << 12)
+
+/*
+ * The DIRSIZ macro gives the minimum record length which will hold
+ * the directory entry. This requires the amount of space in struct direct
+ * without the d_name field, plus enough space for the name with a terminating
+ * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ */
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define DIRSIZ(oldfmt, dp) \
+ ((oldfmt) ? \
+ ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_type+1 + 3) &~ 3)) : \
+ ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)))
+#else
+#define DIRSIZ(oldfmt, dp) \
+ ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+#endif
+#define OLDDIRFMT 1
+#define NEWDIRFMT 0
+
+/*
+ * Template for manipulating directories.
+ * Should use struct direct's, but the name field
+ * is MAXNAMLEN - 1, and this just won't do.
+ */
+struct dirtemplate {
+ u_long dot_ino;
+ short dot_reclen;
+ u_char dot_type;
+ u_char dot_namlen;
+ char dot_name[4]; /* must be multiple of 4 */
+ u_long dotdot_ino;
+ short dotdot_reclen;
+ u_char dotdot_type;
+ u_char dotdot_namlen;
+ char dotdot_name[4]; /* ditto */
+};
+
+/*
+ * This is the old format of directories, sanz type element.
+ */
+struct odirtemplate {
+ u_long dot_ino;
+ short dot_reclen;
+ u_short dot_namlen;
+ char dot_name[4]; /* must be multiple of 4 */
+ u_long dotdot_ino;
+ short dotdot_reclen;
+ u_short dotdot_namlen;
+ char dotdot_name[4]; /* ditto */
+};
+#endif /* !_DIR_H_ */
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
new file mode 100644
index 000000000000..df155967a7df
--- /dev/null
+++ b/sys/ufs/ufs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)inode.h 8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+ struct inode *i_next; /* Hash chain forward. */
+ struct inode **i_prev; /* Hash chain back. */
+ struct vnode *i_vnode; /* Vnode associated with this inode. */
+ struct vnode *i_devvp; /* Vnode for block I/O. */
+ u_long i_flag; /* I* flags. */
+ dev_t i_dev; /* Device associated with the inode. */
+ ino_t i_number; /* The identity of the inode. */
+ union { /* Associated filesystem. */
+ struct fs *fs; /* FFS */
+ struct lfs *lfs; /* LFS */
+ } inode_u;
+#define i_fs inode_u.fs
+#define i_lfs inode_u.lfs
+ struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
+ u_quad_t i_modrev; /* Revision level for lease. */
+ struct lockf *i_lockf; /* Head of byte-level lock list. */
+ pid_t i_lockholder; /* DEBUG: holder of inode lock. */
+ pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */
+ /*
+ * Side effects; used during directory lookup.
+ */
+ long i_count; /* Size of free slot in directory. */
+ doff_t i_endoff; /* End of useful stuff in directory. */
+ doff_t i_diroff; /* Offset in dir, where we found last entry. */
+ doff_t i_offset; /* Offset of free space in directory. */
+ ino_t i_ino; /* Inode number of found directory. */
+ u_long i_reclen; /* Size of found directory entry. */
+ long i_spare[11]; /* Spares to round up to 128 bytes. */
+ /*
+ * The on-disk dinode itself.
+ */
+ struct dinode i_din; /* 128 bytes of the on-disk dinode. */
+};
+
+#define i_atime i_din.di_atime
+#define i_blocks i_din.di_blocks
+#define i_ctime i_din.di_ctime
+#define i_db i_din.di_db
+#define i_flags i_din.di_flags
+#define i_gen i_din.di_gen
+#define i_gid i_din.di_gid
+#define i_ib i_din.di_ib
+#define i_mode i_din.di_mode
+#define i_mtime i_din.di_mtime
+#define i_nlink i_din.di_nlink
+#define i_rdev i_din.di_rdev
+#define i_shortlink i_din.di_shortlink
+#define i_size i_din.di_size
+#define i_uid i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define IN_ACCESS 0x0001 /* Access time update request. */
+#define IN_CHANGE 0x0002 /* Inode change time update request. */
+#define IN_EXLOCK 0x0004 /* File has exclusive lock. */
+#define IN_LOCKED 0x0008 /* Inode lock. */
+#define IN_LWAIT 0x0010 /* Process waiting on file lock. */
+#define IN_MODIFIED 0x0020 /* Inode has been modified. */
+#define IN_RENAME 0x0040 /* Inode is being renamed. */
+#define IN_SHLOCK 0x0080 /* File has shared lock. */
+#define IN_UPDATE 0x0100 /* Modification time update request. */
+#define IN_WANTED 0x0200 /* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+ daddr_t in_lbn; /* Logical block number. */
+ int in_off; /* Offset in buffer. */
+ int in_exists; /* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp) ((struct inode *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ITIMES(ip, t1, t2) { \
+ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \
+ (ip)->i_flag |= IN_MODIFIED; \
+ if ((ip)->i_flag & IN_ACCESS) \
+ (ip)->i_atime.ts_sec = (t1)->tv_sec; \
+ if ((ip)->i_flag & IN_UPDATE) { \
+ (ip)->i_mtime.ts_sec = (t2)->tv_sec; \
+ (ip)->i_modrev++; \
+ } \
+ if ((ip)->i_flag & IN_CHANGE) \
+ (ip)->i_ctime.ts_sec = time.tv_sec; \
+ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \
+ } \
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+ u_short ufid_len; /* Length of structure. */
+ u_short ufid_pad; /* Force long alignment. */
+ ino_t ufid_ino; /* File number (ino). */
+ long ufid_gen; /* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/ufs/ufs/lockf.h b/sys/ufs/ufs/lockf.h
new file mode 100644
index 000000000000..0ec61dbb0cfe
--- /dev/null
+++ b/sys/ufs/ufs/lockf.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Scooter Morris at Genentech Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lockf.h 8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * The lockf structure is a kernel structure which contains the information
+ * associated with a byte range lock. The lockf structures are linked into
+ * the inode structure. Locks are sorted by the starting byte of the lock for
+ * efficiency.
+ */
+struct lockf {
+ short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
+ short lf_type; /* Lock type: F_RDLCK, F_WRLCK */
+ off_t lf_start; /* The byte # of the start of the lock */
+ off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/
+ caddr_t lf_id; /* The id of the resource holding the lock */
+ struct inode *lf_inode; /* Back pointer to the inode */
+ struct lockf *lf_next; /* A pointer to the next lock on this inode */
+ struct lockf *lf_block; /* The list of blocked locks */
+};
+
+/* Maximum length of sleep chains to traverse to try and detect deadlock. */
+#define MAXDEPTH 50
+
+__BEGIN_DECLS
+void lf_addblock __P((struct lockf *, struct lockf *));
+int lf_clearlock __P((struct lockf *));
+int lf_findoverlap __P((struct lockf *,
+ struct lockf *, int, struct lockf ***, struct lockf **));
+struct lockf *
+ lf_getblock __P((struct lockf *));
+int lf_getlock __P((struct lockf *, struct flock *));
+int lf_setlock __P((struct lockf *));
+void lf_split __P((struct lockf *, struct lockf *));
+void lf_wakelock __P((struct lockf *));
+__END_DECLS
+
+#ifdef LOCKF_DEBUG
+extern int lockf_debug;
+
+__BEGIN_DECLS
+void lf_print __P((char *, struct lockf *));
+void lf_printlist __P((char *, struct lockf *));
+__END_DECLS
+#endif
diff --git a/sys/ufs/ufs/quota.h b/sys/ufs/ufs/quota.h
new file mode 100644
index 000000000000..11efb402c91c
--- /dev/null
+++ b/sys/ufs/ufs/quota.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)quota.h 8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef _QUOTA_
+#define _QUOTA_
+
+/*
+ * Definitions for disk quotas imposed on the average user
+ * (big brother finally hits UNIX).
+ *
+ * The following constants define the amount of time given a user before the
+ * soft limits are treated as hard limits (usually resulting in an allocation
+ * failure). The timer is started when the user crosses their soft limit, it
+ * is reset when they go below their soft limit.
+ */
+#define MAX_IQ_TIME (7*24*60*60) /* 1 week */
+#define MAX_DQ_TIME (7*24*60*60) /* 1 week */
+
+/*
+ * The following constants define the usage of the quota file array in the
+ * ufsmount structure and dquot array in the inode structure. The semantics
+ * of the elements of these arrays are defined in the routine getinoquota;
+ * the remainder of the quota code treats them generically and need not be
+ * inspected when changing the size of the array.
+ */
+#define MAXQUOTAS 2
+#define USRQUOTA 0 /* element used for user quotas */
+#define GRPQUOTA 1 /* element used for group quotas */
+
+/*
+ * Definitions for the default names of the quotas files.
+ */
+#define INITQFNAMES { \
+ "user", /* USRQUOTA */ \
+ "group", /* GRPQUOTA */ \
+ "undefined", \
+};
+#define QUOTAFILENAME "quota"
+#define QUOTAGROUP "operator"
+
+/*
+ * Command definitions for the 'quotactl' system call. The commands are
+ * broken into a main command defined below and a subcommand that is used
+ * to convey the type of quota that is being manipulated (see above).
+ */
+#define SUBCMDMASK 0x00ff
+#define SUBCMDSHIFT 8
+#define QCMD(cmd, type) (((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK))
+
+#define Q_QUOTAON 0x0100 /* enable quotas */
+#define Q_QUOTAOFF 0x0200 /* disable quotas */
+#define Q_GETQUOTA 0x0300 /* get limits and usage */
+#define Q_SETQUOTA 0x0400 /* set limits and usage */
+#define Q_SETUSE 0x0500 /* set usage */
+#define Q_SYNC 0x0600 /* sync disk copy of a filesystems quotas */
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is an array of these structures
+ * indexed by user or group number. The setquota system call establishes
+ * the vnode for each quota file (a pointer is retained in the ufsmount
+ * structure).
+ */
+struct dqblk {
+ u_long dqb_bhardlimit; /* absolute limit on disk blks alloc */
+ u_long dqb_bsoftlimit; /* preferred limit on disk blks */
+ u_long dqb_curblocks; /* current block count */
+ u_long dqb_ihardlimit; /* maximum # allocated inodes + 1 */
+ u_long dqb_isoftlimit; /* preferred inode limit */
+ u_long dqb_curinodes; /* current # allocated inodes */
+ time_t dqb_btime; /* time limit for excessive disk use */
+ time_t dqb_itime; /* time limit for excessive files */
+};
+
+/*
+ * The following structure records disk usage for a user or group on a
+ * filesystem. There is one allocated for each quota that exists on any
+ * filesystem for the current user or group. A cache is kept of recently
+ * used entries.
+ */
+struct dquot {
+ struct dquot *dq_forw, **dq_back; /* hash list */
+ struct dquot *dq_freef, **dq_freeb; /* free list */
+ short dq_flags; /* flags, see below */
+ short dq_cnt; /* count of active references */
+ short dq_spare; /* unused spare padding */
+ short dq_type; /* quota type of this dquot */
+ u_long dq_id; /* identifier this applies to */
+ struct ufsmount *dq_ump; /* filesystem that this is taken from */
+ struct dqblk dq_dqb; /* actual usage & quotas */
+};
+/*
+ * Flag values.
+ */
+#define DQ_LOCK 0x01 /* this quota locked (no MODS) */
+#define DQ_WANT 0x02 /* wakeup on unlock */
+#define DQ_MOD 0x04 /* this quota modified since read */
+#define DQ_FAKE 0x08 /* no limits here, just usage */
+#define DQ_BLKS 0x10 /* has been warned about blk limit */
+#define DQ_INODS 0x20 /* has been warned about inode limit */
+/*
+ * Shorthand notation.
+ */
+#define dq_bhardlimit dq_dqb.dqb_bhardlimit
+#define dq_bsoftlimit dq_dqb.dqb_bsoftlimit
+#define dq_curblocks dq_dqb.dqb_curblocks
+#define dq_ihardlimit dq_dqb.dqb_ihardlimit
+#define dq_isoftlimit dq_dqb.dqb_isoftlimit
+#define dq_curinodes dq_dqb.dqb_curinodes
+#define dq_btime dq_dqb.dqb_btime
+#define dq_itime dq_dqb.dqb_itime
+
+/*
+ * If the system has never checked for a quota for this file, then it is set
+ * to NODQUOT. Once a write attempt is made the inode pointer is set to
+ * reference a dquot structure.
+ */
+#define NODQUOT ((struct dquot *) 0)
+
+/*
+ * Flags to chkdq() and chkiq()
+ */
+#define FORCE 0x01 /* force usage changes independent of limits */
+#define CHOWN 0x02 /* (advisory) change initiated by chown */
+
+/*
+ * Macros to avoid subroutine calls to trivial functions.
+ */
+#ifdef DIAGNOSTIC
+#define DQREF(dq) dqref(dq)
+#else
+#define DQREF(dq) (dq)->dq_cnt++
+#endif
+
+#include <sys/cdefs.h>
+
+struct dquot;
+struct inode;
+struct mount;
+struct proc;
+struct ucred;
+struct ufsmount;
+struct vnode;
+__BEGIN_DECLS
+int chkdq __P((struct inode *, long, struct ucred *, int));
+int chkdqchg __P((struct inode *, long, struct ucred *, int));
+int chkiq __P((struct inode *, long, struct ucred *, int));
+int chkiqchg __P((struct inode *, long, struct ucred *, int));
+void dqflush __P((struct vnode *));
+int dqget __P((struct vnode *,
+ u_long, struct ufsmount *, int, struct dquot **));
+void dqinit __P((void));
+void dqref __P((struct dquot *));
+void dqrele __P((struct vnode *, struct dquot *));
+int dqsync __P((struct vnode *, struct dquot *));
+int getinoquota __P((struct inode *));
+int getquota __P((struct mount *, u_long, int, caddr_t));
+int qsync __P((struct mount *mp));
+int quotaoff __P((struct proc *, struct mount *, int));
+int quotaon __P((struct proc *, struct mount *, int, caddr_t));
+int setquota __P((struct mount *, u_long, int, caddr_t));
+int setuse __P((struct mount *, u_long, int, caddr_t));
+int ufs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+__END_DECLS
+
+#ifdef DIAGNOSTIC
+__BEGIN_DECLS
+void chkdquot __P((struct inode *));
+__END_DECLS
+#endif
+
+#endif /* _QUOTA_ */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
new file mode 100644
index 000000000000..bcd838d036a1
--- /dev/null
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+ ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file. They are given negative
+ * logical block numbers. Indirect blocks are addressed by the negative
+ * address of the first data block to which they point. Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point. Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+ struct vnode *vp;
+ register daddr_t bn;
+ daddr_t *bnp;
+ struct indir *ap;
+ int *nump;
+ int *runp;
+{
+ register struct inode *ip;
+ struct buf *bp;
+ struct ufsmount *ump;
+ struct mount *mp;
+ struct vnode *devvp;
+ struct indir a[NIADDR], *xap;
+ daddr_t daddr;
+ long metalbn;
+ int error, maxrun, num;
+
+ ip = VTOI(vp);
+ mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+ if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+ panic("ufs_bmaparray: invalid arguments");
+#endif
+
+ if (runp) {
+ /*
+ * XXX
+ * If MAXBSIZE is the largest transfer the disks can handle,
+ * we probably want maxrun to be 1 block less so that we
+ * don't create a block larger than the device can handle.
+ */
+ *runp = 0;
+ maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+ }
+
+ xap = ap == NULL ? a : ap;
+ if (!nump)
+ nump = &num;
+ if (error = ufs_getlbns(vp, bn, xap, nump))
+ return (error);
+
+ num = *nump;
+ if (num == 0) {
+ *bnp = blkptrtodb(ump, ip->i_db[bn]);
+ if (*bnp == 0)
+ *bnp = -1;
+ else if (runp)
+ for (++bn; bn < NDADDR && *runp < maxrun &&
+ is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+ ++bn, ++*runp);
+ return (0);
+ }
+
+
+ /* Get disk address out of indirect block array */
+ daddr = ip->i_ib[xap->in_off];
+
+ devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+ for (bp = NULL, ++xap; --num; ++xap) {
+ /*
+ * Exit the loop if there is no disk address assigned yet and
+ * the indirect block isn't in the cache, or if we were
+ * looking for an indirect block and we've found it.
+ */
+
+ metalbn = xap->in_lbn;
+ if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+ break;
+ /*
+ * If we get here, we've either got the block in the cache
+ * or we have a disk address for it, go fetch it.
+ */
+ if (bp)
+ brelse(bp);
+
+ xap->in_exists = 1;
+ bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ trace(TR_BREADHIT, pack(vp, size), metalbn);
+ }
+#ifdef DIAGNOSTIC
+ else if (!daddr)
+ panic("ufs_bmaparry: indirect block not in cache");
+#endif
+ else {
+ trace(TR_BREADMISS, pack(vp, size), metalbn);
+ bp->b_blkno = blkptrtodb(ump, daddr);
+ bp->b_flags |= B_READ;
+ VOP_STRATEGY(bp);
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ if (error = biowait(bp)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+ if (num == 1 && daddr && runp)
+ for (bn = xap->in_off + 1;
+ bn < MNINDIR(ump) && *runp < maxrun &&
+ is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+ ((daddr_t *)bp->b_data)[bn]);
+ ++bn, ++*runp);
+ }
+ if (bp)
+ brelse(bp);
+
+ daddr = blkptrtodb(ump, daddr);
+ *bnp = daddr == 0 ? -1 : daddr;
+ return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block. The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+ struct vnode *vp;
+ register daddr_t bn;
+ struct indir *ap;
+ int *nump;
+{
+ long metalbn, realbn;
+ struct ufsmount *ump;
+ int blockcnt, i, numlevels, off;
+
+ ump = VFSTOUFS(vp->v_mount);
+ if (nump)
+ *nump = 0;
+ numlevels = 0;
+ realbn = bn;
+ if ((long)bn < 0)
+ bn = -(long)bn;
+
+ /* The first NDADDR blocks are direct blocks. */
+ if (bn < NDADDR)
+ return (0);
+
+ /*
+ * Determine the number of levels of indirection. After this loop
+ * is done, blockcnt indicates the number of data blocks possible
+ * at the given level of indirection, and NIADDR - i is the number
+ * of levels of indirection needed to locate the requested block.
+ */
+ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ if (i == 0)
+ return (EFBIG);
+ blockcnt *= MNINDIR(ump);
+ if (bn < blockcnt)
+ break;
+ }
+
+ /* Calculate the address of the first meta-block. */
+ if (realbn >= 0)
+ metalbn = -(realbn - bn + NIADDR - i);
+ else
+ metalbn = -(-realbn - bn + NIADDR - i);
+
+ /*
+ * At each iteration, off is the offset into the bap array which is
+ * an array of disk addresses at the current level of indirection.
+ * The logical block number and the offset in that block are stored
+ * into the argument array.
+ */
+ ap->in_lbn = metalbn;
+ ap->in_off = off = NIADDR - i;
+ ap->in_exists = 0;
+ ap++;
+ for (++numlevels; i <= NIADDR; i++) {
+ /* If searching for a meta-data block, quit when found. */
+ if (metalbn == realbn)
+ break;
+
+ blockcnt /= MNINDIR(ump);
+ off = (bn / blockcnt) % MNINDIR(ump);
+
+ ++numlevels;
+ ap->in_lbn = metalbn;
+ ap->in_off = off;
+ ap->in_exists = 0;
+ ++ap;
+
+ metalbn -= -1 + off * blockcnt;
+ }
+ if (nump)
+ *nump = numlevels;
+ return (0);
+}
diff --git a/sys/ufs/ufs/ufs_disksubr.c b/sys/ufs/ufs/ufs_disksubr.c
new file mode 100644
index 000000000000..78dede4da773
--- /dev/null
+++ b/sys/ufs/ufs/ufs_disksubr.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/syslog.h>
+
+/*
+ * Seek sort for disks. We depend on the driver which calls us using b_resid
+ * as the current cylinder number.
+ *
+ * The argument ap structure holds a b_actf activity chain pointer on which we
+ * keep two queues, sorted in ascending cylinder order. The first queue holds
+ * those requests which are positioned after the current cylinder (in the first
+ * request); the second holds requests which came in after their cylinder number
+ * was passed. Thus we implement a one way scan, retracting after reaching the
+ * end of the drive to the first request on the second queue, at which time it
+ * becomes the first queue.
+ *
+ * A one-way scan is natural because of the way UNIX read-ahead blocks are
+ * allocated.
+ */
+
+/*
+ * For portability with historic industry practice, the
+ * cylinder number has to be maintained in the `b_resid'
+ * field.
+ */
+#define b_cylinder b_resid
+
+void
+disksort(ap, bp)
+ register struct buf *ap, *bp;
+{
+ register struct buf *bq;
+
+ /* If the queue is empty, then it's easy. */
+ if (ap->b_actf == NULL) {
+ bp->b_actf = NULL;
+ ap->b_actf = bp;
+ return;
+ }
+
+ /*
+ * If we lie after the first (currently active) request, then we
+ * must locate the second request list and add ourselves to it.
+ */
+ bq = ap->b_actf;
+ if (bp->b_cylinder < bq->b_cylinder) {
+ while (bq->b_actf) {
+ /*
+ * Check for an ``inversion'' in the normally ascending
+ * cylinder numbers, indicating the start of the second
+ * request list.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder) {
+ /*
+ * Search the second request list for the first
+ * request at a larger cylinder number. We go
+ * before that; if there is no such request, we
+ * go at end.
+ */
+ do {
+ if (bp->b_cylinder <
+ bq->b_actf->b_cylinder)
+ goto insert;
+ if (bp->b_cylinder ==
+ bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno)
+ goto insert;
+ bq = bq->b_actf;
+ } while (bq->b_actf);
+ goto insert; /* after last */
+ }
+ bq = bq->b_actf;
+ }
+ /*
+ * No inversions... we will go after the last, and
+ * be the first request in the second request list.
+ */
+ goto insert;
+ }
+ /*
+ * Request is at/after the current request...
+ * sort in the first request list.
+ */
+ while (bq->b_actf) {
+ /*
+ * We want to go after the current request if there is an
+ * inversion after it (i.e. it is the end of the first
+ * request list), or if the next request is a larger cylinder
+ * than our request.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder ||
+ bp->b_cylinder < bq->b_actf->b_cylinder ||
+ (bp->b_cylinder == bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno))
+ goto insert;
+ bq = bq->b_actf;
+ }
+ /*
+ * Neither a second list nor a larger request... we go at the end of
+ * the first list, which is the same as the end of the whole schebang.
+ */
+insert: bp->b_actf = bq->b_actf;
+ bq->b_actf = bp;
+}
+
+/*
+ * Attempt to read a disk label from a device using the indicated stategy
+ * routine. The label must be partly set up before this: secpercyl and
+ * anything required in the strategy routine (e.g., sector size) must be
+ * filled in before calling us. Returns NULL on success and an error
+ * string on failure.
+ */
+char *
+readdisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ register struct buf *bp;
+ struct disklabel *dlp;
+ char *msg = NULL;
+
+ if (lp->d_secperunit == 0)
+ lp->d_secperunit = 0x1fffffff;
+ lp->d_npartitions = 1;
+ if (lp->d_partitions[0].p_size == 0)
+ lp->d_partitions[0].p_size = 0x1fffffff;
+ lp->d_partitions[0].p_offset = 0;
+
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = dev;
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_BUSY | B_READ;
+ bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
+ (*strat)(bp);
+ if (biowait(bp))
+ msg = "I/O error";
+ else for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)((char *)bp->b_data +
+ DEV_BSIZE - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
+ if (msg == NULL)
+ msg = "no disk label";
+ } else if (dlp->d_npartitions > MAXPARTITIONS ||
+ dkcksum(dlp) != 0)
+ msg = "disk label corrupted";
+ else {
+ *lp = *dlp;
+ msg = NULL;
+ break;
+ }
+ }
+ bp->b_flags = B_INVAL | B_AGE;
+ brelse(bp);
+ return (msg);
+}
+
+/*
+ * Check new disk label for sensibility before setting it.
+ */
+int
+setdisklabel(olp, nlp, openmask)
+ register struct disklabel *olp, *nlp;
+ u_long openmask;
+{
+ register i;
+ register struct partition *opp, *npp;
+
+ if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
+ dkcksum(nlp) != 0)
+ return (EINVAL);
+ while ((i = ffs((long)openmask)) != 0) {
+ i--;
+ openmask &= ~(1 << i);
+ if (nlp->d_npartitions <= i)
+ return (EBUSY);
+ opp = &olp->d_partitions[i];
+ npp = &nlp->d_partitions[i];
+ if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
+ return (EBUSY);
+ /*
+ * Copy internally-set partition information
+ * if new label doesn't include it. XXX
+ */
+ if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
+ npp->p_fstype = opp->p_fstype;
+ npp->p_fsize = opp->p_fsize;
+ npp->p_frag = opp->p_frag;
+ npp->p_cpg = opp->p_cpg;
+ }
+ }
+ nlp->d_checksum = 0;
+ nlp->d_checksum = dkcksum(nlp);
+ *olp = *nlp;
+ return (0);
+}
+
+/* encoding of disk minor numbers, should be elsewhere... */
+#define dkunit(dev) (minor(dev) >> 3)
+#define dkpart(dev) (minor(dev) & 07)
+#define dkminor(unit, part) (((unit) << 3) | (part))
+
+/*
+ * Write disk label back to device after modification.
+ */
+int
+writedisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ struct buf *bp;
+ struct disklabel *dlp;
+ int labelpart;
+ int error = 0;
+
+ labelpart = dkpart(dev);
+ if (lp->d_partitions[labelpart].p_offset != 0) {
+ if (lp->d_partitions[0].p_offset != 0)
+ return (EXDEV); /* not quite right */
+ labelpart = 0;
+ }
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_READ;
+ (*strat)(bp);
+ if (error = biowait(bp))
+ goto done;
+ for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)
+ ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
+ dkcksum(dlp) == 0) {
+ *dlp = *lp;
+ bp->b_flags = B_WRITE;
+ (*strat)(bp);
+ error = biowait(bp);
+ goto done;
+ }
+ }
+ error = ESRCH;
+done:
+ brelse(bp);
+ return (error);
+}
+
+/*
+ * Compute checksum for disk label.
+ */
+dkcksum(lp)
+ register struct disklabel *lp;
+{
+ register u_short *start, *end;
+ register u_short sum = 0;
+
+ start = (u_short *)lp;
+ end = (u_short *)&lp->d_partitions[lp->d_npartitions];
+ while (start < end)
+ sum ^= *start++;
+ return (sum);
+}
+
+/*
+ * Disk error is the preface to plaintive error messages
+ * about failing disk transfers. It prints messages of the form
+
+hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
+
+ * if the offset of the error in the transfer and a disk label
+ * are both available. blkdone should be -1 if the position of the error
+ * is unknown; the disklabel pointer may be null from drivers that have not
+ * been converted to use them. The message is printed with printf
+ * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
+ * The message should be completed (with at least a newline) with printf
+ * or addlog, respectively. There is no trailing space.
+ */
+void
+diskerr(bp, dname, what, pri, blkdone, lp)
+ register struct buf *bp;
+ char *dname, *what;
+ int pri, blkdone;
+ register struct disklabel *lp;
+{
+ int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
+ register void (*pr) __P((const char *, ...));
+ char partname = 'a' + part;
+ int sn;
+
+ if (pri != LOG_PRINTF) {
+ log(pri, "");
+ pr = addlog;
+ } else
+ pr = printf;
+ (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
+ bp->b_flags & B_READ ? "read" : "writ");
+ sn = bp->b_blkno;
+ if (bp->b_bcount <= DEV_BSIZE)
+ (*pr)("%d", sn);
+ else {
+ if (blkdone >= 0) {
+ sn += blkdone;
+ (*pr)("%d of ", sn);
+ }
+ (*pr)("%d-%d", bp->b_blkno,
+ bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
+ }
+ if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
+#ifdef tahoe
+ sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
+#endif
+ sn += lp->d_partitions[part].p_offset;
+ (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
+ sn / lp->d_secpercyl);
+ sn %= lp->d_secpercyl;
+ (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
+ }
+}
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
new file mode 100644
index 000000000000..e25923e947d5
--- /dev/null
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_extern.h 8.3 (Berkeley) 4/16/94
+ */
+
+struct buf;
+struct direct;
+struct disklabel;
+struct fid;
+struct flock;
+struct inode;
+struct mbuf;
+struct mount;
+struct nameidata;
+struct proc;
+struct ucred;
+struct uio;
+struct vattr;
+struct vnode;
+struct ufs_args;
+
+__BEGIN_DECLS
+void diskerr
+ __P((struct buf *, char *, char *, int, int, struct disklabel *));
+void disksort __P((struct buf *, struct buf *));
+u_int dkcksum __P((struct disklabel *));
+char *readdisklabel __P((dev_t, int (*)(), struct disklabel *));
+int setdisklabel __P((struct disklabel *, struct disklabel *, u_long));
+int writedisklabel __P((dev_t, int (*)(), struct disklabel *));
+
+int ufs_abortop __P((struct vop_abortop_args *));
+int ufs_access __P((struct vop_access_args *));
+int ufs_advlock __P((struct vop_advlock_args *));
+int ufs_bmap __P((struct vop_bmap_args *));
+int ufs_check_export __P((struct mount *, struct ufid *, struct mbuf *,
+ struct vnode **, int *exflagsp, struct ucred **));
+int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *));
+int ufs_close __P((struct vop_close_args *));
+int ufs_create __P((struct vop_create_args *));
+void ufs_dirbad __P((struct inode *, doff_t, char *));
+int ufs_dirbadentry __P((struct vnode *, struct direct *, int));
+int ufs_dirempty __P((struct inode *, ino_t, struct ucred *));
+int ufs_direnter __P((struct inode *, struct vnode *,struct componentname *));
+int ufs_dirremove __P((struct vnode *, struct componentname*));
+int ufs_dirrewrite
+ __P((struct inode *, struct inode *, struct componentname *));
+int ufs_getattr __P((struct vop_getattr_args *));
+int ufs_getlbns __P((struct vnode *, daddr_t, struct indir *, int *));
+struct vnode *
+ ufs_ihashget __P((dev_t, ino_t));
+void ufs_ihashinit __P((void));
+void ufs_ihashins __P((struct inode *));
+struct vnode *
+ ufs_ihashlookup __P((dev_t, ino_t));
+void ufs_ihashrem __P((struct inode *));
+int ufs_inactive __P((struct vop_inactive_args *));
+int ufs_init __P((void));
+int ufs_ioctl __P((struct vop_ioctl_args *));
+int ufs_islocked __P((struct vop_islocked_args *));
+int ufs_link __P((struct vop_link_args *));
+int ufs_lock __P((struct vop_lock_args *));
+int ufs_lookup __P((struct vop_lookup_args *));
+int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *));
+int ufs_mkdir __P((struct vop_mkdir_args *));
+int ufs_mknod __P((struct vop_mknod_args *));
+int ufs_mmap __P((struct vop_mmap_args *));
+int ufs_open __P((struct vop_open_args *));
+int ufs_pathconf __P((struct vop_pathconf_args *));
+int ufs_print __P((struct vop_print_args *));
+int ufs_readdir __P((struct vop_readdir_args *));
+int ufs_readlink __P((struct vop_readlink_args *));
+int ufs_reclaim __P((struct vop_reclaim_args *));
+int ufs_remove __P((struct vop_remove_args *));
+int ufs_rename __P((struct vop_rename_args *));
+int ufs_rmdir __P((struct vop_rmdir_args *));
+int ufs_root __P((struct mount *, struct vnode **));
+int ufs_seek __P((struct vop_seek_args *));
+int ufs_select __P((struct vop_select_args *));
+int ufs_setattr __P((struct vop_setattr_args *));
+int ufs_start __P((struct mount *, int, struct proc *));
+int ufs_strategy __P((struct vop_strategy_args *));
+int ufs_symlink __P((struct vop_symlink_args *));
+int ufs_unlock __P((struct vop_unlock_args *));
+int ufs_vinit __P((struct mount *,
+ int (**)(), int (**)(), struct vnode **));
+int ufsspec_close __P((struct vop_close_args *));
+int ufsspec_read __P((struct vop_read_args *));
+int ufsspec_write __P((struct vop_write_args *));
+
+#ifdef FIFO
+int ufsfifo_read __P((struct vop_read_args *));
+int ufsfifo_write __P((struct vop_write_args *));
+int ufsfifo_close __P((struct vop_close_args *));
+#endif
+__END_DECLS
diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c
new file mode 100644
index 000000000000..4a37c907ef63
--- /dev/null
+++ b/sys/ufs/ufs/ufs_ihash.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_ihash.c 8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Structures associated with inode cacheing.
+ */
+struct inode **ihashtbl;
+u_long ihash; /* size of hash table - 1 */
+#define INOHASH(device, inum) (((device) + (inum)) & ihash)
+
+/*
+ * Initialize inode hash table.
+ */
+void
+ufs_ihashinit()
+{
+
+ ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, return it, even if it is locked.
+ */
+struct vnode *
+ufs_ihashlookup(device, inum)
+ dev_t device;
+ ino_t inum;
+{
+ register struct inode *ip;
+
+ for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+ if (ip == NULL)
+ return (NULL);
+ if (inum == ip->i_number && device == ip->i_dev)
+ return (ITOV(ip));
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+ufs_ihashget(device, inum)
+ dev_t device;
+ ino_t inum;
+{
+ register struct inode *ip;
+ struct vnode *vp;
+
+ for (;;)
+ for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+ if (ip == NULL)
+ return (NULL);
+ if (inum == ip->i_number && device == ip->i_dev) {
+ if (ip->i_flag & IN_LOCKED) {
+ ip->i_flag |= IN_WANTED;
+ sleep(ip, PINOD);
+ break;
+ }
+ vp = ITOV(ip);
+ if (!vget(vp, 1))
+ return (vp);
+ break;
+ }
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+ufs_ihashins(ip)
+ struct inode *ip;
+{
+ struct inode **ipp, *iq;
+
+ ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
+ if (iq = *ipp)
+ iq->i_prev = &ip->i_next;
+ ip->i_next = iq;
+ ip->i_prev = ipp;
+ *ipp = ip;
+ if (ip->i_flag & IN_LOCKED)
+ panic("ufs_ihashins: already locked");
+ if (curproc)
+ ip->i_lockholder = curproc->p_pid;
+ else
+ ip->i_lockholder = -1;
+ ip->i_flag |= IN_LOCKED;
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+void
+ufs_ihashrem(ip)
+ register struct inode *ip;
+{
+ register struct inode *iq;
+
+ if (iq = ip->i_next)
+ iq->i_prev = ip->i_prev;
+ *ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+ ip->i_next = NULL;
+ ip->i_prev = NULL;
+#endif
+}
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
new file mode 100644
index 000000000000..ac876f9d34d4
--- /dev/null
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_inode.c 8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+u_long nextgennumber; /* Next generation number to assign. */
+int prtactive = 0; /* 1 => print out reclaim of active vnodes */
+
+int
+ufs_init()
+{
+ static int first = 1;
+
+ if (!first)
+ return (0);
+ first = 0;
+
+#ifdef DIAGNOSTIC
+ if ((sizeof(struct inode) - 1) & sizeof(struct inode))
+ printf("ufs_init: bad size %d\n", sizeof(struct inode));
+#endif
+ ufs_ihashinit();
+ dqinit();
+ return (0);
+}
+
+/*
+ * Last reference to an inode. If necessary, write or delete it.
+ */
+int
+ufs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+ struct timeval tv;
+ int mode, error;
+ extern int prtactive;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("ffs_inactive: pushing active", vp);
+
+ /* Get rid of inodes related to stale file handles. */
+ if (ip->i_mode == 0) {
+ if ((vp->v_flag & VXLOCK) == 0)
+ vgone(vp);
+ return (0);
+ }
+
+ error = 0;
+#ifdef DIAGNOSTIC
+ if (VOP_ISLOCKED(vp))
+ panic("ffs_inactive: locked inode");
+ if (curproc)
+ ip->i_lockholder = curproc->p_pid;
+ else
+ ip->i_lockholder = -1;
+#endif
+ ip->i_flag |= IN_LOCKED;
+ if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+#ifdef QUOTA
+ if (!getinoquota(ip))
+ (void)chkiq(ip, -1, NOCRED, 0);
+#endif
+ error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL);
+ ip->i_rdev = 0;
+ mode = ip->i_mode;
+ ip->i_mode = 0;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ VOP_VFREE(vp, ip->i_number, mode);
+ }
+ if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
+ tv = time;
+ VOP_UPDATE(vp, &tv, &tv, 0);
+ }
+ VOP_UNLOCK(vp);
+ /*
+ * If we are done with the inode, reclaim it
+ * so that it can be reused immediately.
+ */
+ if (vp->v_usecount == 0 && ip->i_mode == 0)
+ vgone(vp);
+ return (error);
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+ufs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip;
+ int i, type;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("ufs_reclaim: pushing active", vp);
+ /*
+ * Remove the inode from its hash chain.
+ */
+ ip = VTOI(vp);
+ ufs_ihashrem(ip);
+ /*
+ * Purge old data structures associated with the inode.
+ */
+ cache_purge(vp);
+ if (ip->i_devvp) {
+ vrele(ip->i_devvp);
+ ip->i_devvp = 0;
+ }
+#ifdef QUOTA
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (ip->i_dquot[i] != NODQUOT) {
+ dqrele(vp, ip->i_dquot[i]);
+ ip->i_dquot[i] = NODQUOT;
+ }
+ }
+#endif
+ switch (vp->v_mount->mnt_stat.f_type) {
+ case MOUNT_UFS:
+ type = M_FFSNODE;
+ break;
+ case MOUNT_MFS:
+ type = M_MFSNODE;
+ break;
+ case MOUNT_LFS:
+ type = M_LFSNODE;
+ break;
+ default:
+ panic("ufs_reclaim: not ufs file");
+ }
+ FREE(vp->v_data, type);
+ vp->v_data = NULL;
+ return (0);
+}
diff --git a/sys/ufs/ufs/ufs_lockf.c b/sys/ufs/ufs/ufs_lockf.c
new file mode 100644
index 000000000000..cb9a7375de10
--- /dev/null
+++ b/sys/ufs/ufs/ufs_lockf.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Scooter Morris at Genentech Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * This variable controls the maximum number of processes that will
+ * be checked in doing deadlock detection.
+ */
+int maxlockdepth = MAXDEPTH;
+
+#ifdef LOCKF_DEBUG
+int lockf_debug = 0;
+#endif
+
+#define NOLOCKF (struct lockf *)0
+#define SELF 0x1
+#define OTHERS 0x2
+
+/*
+ * Set a byte-range lock.
+ */
+int
+lf_setlock(lock)
+ register struct lockf *lock;
+{
+ register struct lockf *block;
+ struct inode *ip = lock->lf_inode;
+ struct lockf **prev, *overlap, *ltmp;
+ static char lockstr[] = "lockf";
+ int ovcase, priority, needtolink, error;
+
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 1)
+ lf_print("lf_setlock", lock);
+#endif /* LOCKF_DEBUG */
+
+ /*
+ * Set the priority
+ */
+ priority = PLOCK;
+ if (lock->lf_type == F_WRLCK)
+ priority += 4;
+ priority |= PCATCH;
+ /*
+ * Scan lock list for this file looking for locks that would block us.
+ */
+ while (block = lf_getblock(lock)) {
+ /*
+ * Free the structure and return if nonblocking.
+ */
+ if ((lock->lf_flags & F_WAIT) == 0) {
+ FREE(lock, M_LOCKF);
+ return (EAGAIN);
+ }
+ /*
+ * We are blocked. Since flock style locks cover
+ * the whole file, there is no chance for deadlock.
+ * For byte-range locks we must check for deadlock.
+ *
+ * Deadlock detection is done by looking through the
+ * wait channels to see if there are any cycles that
+ * involve us. MAXDEPTH is set just to make sure we
+ * do not go off into neverland.
+ */
+ if ((lock->lf_flags & F_POSIX) &&
+ (block->lf_flags & F_POSIX)) {
+ register struct proc *wproc;
+ register struct lockf *waitblock;
+ int i = 0;
+
+ /* The block is waiting on something */
+ wproc = (struct proc *)block->lf_id;
+ while (wproc->p_wchan &&
+ (wproc->p_wmesg == lockstr) &&
+ (i++ < maxlockdepth)) {
+ waitblock = (struct lockf *)wproc->p_wchan;
+ /* Get the owner of the blocking lock */
+ waitblock = waitblock->lf_next;
+ if ((waitblock->lf_flags & F_POSIX) == 0)
+ break;
+ wproc = (struct proc *)waitblock->lf_id;
+ if (wproc == (struct proc *)lock->lf_id) {
+ free(lock, M_LOCKF);
+ return (EDEADLK);
+ }
+ }
+ }
+ /*
+ * For flock type locks, we must first remove
+ * any shared locks that we hold before we sleep
+ * waiting for an exclusive lock.
+ */
+ if ((lock->lf_flags & F_FLOCK) &&
+ lock->lf_type == F_WRLCK) {
+ lock->lf_type = F_UNLCK;
+ (void) lf_clearlock(lock);
+ lock->lf_type = F_WRLCK;
+ }
+ /*
+ * Add our lock to the blocked list and sleep until we're free.
+ * Remember who blocked us (for deadlock detection).
+ */
+ lock->lf_next = block;
+ lf_addblock(block, lock);
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 1) {
+ lf_print("lf_setlock: blocking on", block);
+ lf_printlist("lf_setlock", block);
+ }
+#endif /* LOCKF_DEBUG */
+ if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) {
+ /*
+ * Delete ourselves from the waiting to lock list.
+ */
+ for (block = lock->lf_next;
+ block != NOLOCKF;
+ block = block->lf_block) {
+ if (block->lf_block != lock)
+ continue;
+ block->lf_block = block->lf_block->lf_block;
+ break;
+ }
+ /*
+ * If we did not find ourselves on the list, but
+ * are still linked onto a lock list, then something
+ * is very wrong.
+ */
+ if (block == NOLOCKF && lock->lf_next != NOLOCKF)
+ panic("lf_setlock: lost lock");
+ free(lock, M_LOCKF);
+ return (error);
+ }
+ }
+ /*
+ * No blocks!! Add the lock. Note that we will
+ * downgrade or upgrade any overlapping locks this
+ * process already owns.
+ *
+ * Skip over locks owned by other processes.
+ * Handle any locks that overlap and are owned by ourselves.
+ */
+ prev = &ip->i_lockf;
+ block = ip->i_lockf;
+ needtolink = 1;
+ for (;;) {
+ if (ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap))
+ block = overlap->lf_next;
+ /*
+ * Six cases:
+ * 0) no overlap
+ * 1) overlap == lock
+ * 2) overlap contains lock
+ * 3) lock contains overlap
+ * 4) overlap starts before lock
+ * 5) overlap ends after lock
+ */
+ switch (ovcase) {
+ case 0: /* no overlap */
+ if (needtolink) {
+ *prev = lock;
+ lock->lf_next = overlap;
+ }
+ break;
+
+ case 1: /* overlap == lock */
+ /*
+ * If downgrading lock, others may be
+ * able to acquire it.
+ */
+ if (lock->lf_type == F_RDLCK &&
+ overlap->lf_type == F_WRLCK)
+ lf_wakelock(overlap);
+ overlap->lf_type = lock->lf_type;
+ FREE(lock, M_LOCKF);
+ lock = overlap; /* for debug output below */
+ break;
+
+ case 2: /* overlap contains lock */
+ /*
+ * Check for common starting point and different types.
+ */
+ if (overlap->lf_type == lock->lf_type) {
+ free(lock, M_LOCKF);
+ lock = overlap; /* for debug output below */
+ break;
+ }
+ if (overlap->lf_start == lock->lf_start) {
+ *prev = lock;
+ lock->lf_next = overlap;
+ overlap->lf_start = lock->lf_end + 1;
+ } else
+ lf_split(overlap, lock);
+ lf_wakelock(overlap);
+ break;
+
+ case 3: /* lock contains overlap */
+ /*
+ * If downgrading lock, others may be able to
+ * acquire it, otherwise take the list.
+ */
+ if (lock->lf_type == F_RDLCK &&
+ overlap->lf_type == F_WRLCK) {
+ lf_wakelock(overlap);
+ } else {
+ ltmp = lock->lf_block;
+ lock->lf_block = overlap->lf_block;
+ lf_addblock(lock, ltmp);
+ }
+ /*
+ * Add the new lock if necessary and delete the overlap.
+ */
+ if (needtolink) {
+ *prev = lock;
+ lock->lf_next = overlap->lf_next;
+ prev = &lock->lf_next;
+ needtolink = 0;
+ } else
+ *prev = overlap->lf_next;
+ free(overlap, M_LOCKF);
+ continue;
+
+ case 4: /* overlap starts before lock */
+ /*
+ * Add lock after overlap on the list.
+ */
+ lock->lf_next = overlap->lf_next;
+ overlap->lf_next = lock;
+ overlap->lf_end = lock->lf_start - 1;
+ prev = &lock->lf_next;
+ lf_wakelock(overlap);
+ needtolink = 0;
+ continue;
+
+ case 5: /* overlap ends after lock */
+ /*
+ * Add the new lock before overlap.
+ */
+ if (needtolink) {
+ *prev = lock;
+ lock->lf_next = overlap;
+ }
+ overlap->lf_start = lock->lf_end + 1;
+ lf_wakelock(overlap);
+ break;
+ }
+ break;
+ }
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 1) {
+ lf_print("lf_setlock: got the lock", lock);
+ lf_printlist("lf_setlock", lock);
+ }
+#endif /* LOCKF_DEBUG */
+ return (0);
+}
+
+/*
+ * Remove a byte-range lock on an inode.
+ *
+ * Generally, find the lock (or an overlap to that lock)
+ * and remove it (or shrink it), then wakeup anyone we can.
+ */
+int
+lf_clearlock(unlock)
+ register struct lockf *unlock;
+{
+ struct inode *ip = unlock->lf_inode;
+ register struct lockf *lf = ip->i_lockf;
+ struct lockf *overlap, **prev;
+ int ovcase;
+
+ if (lf == NOLOCKF)
+ return (0);
+#ifdef LOCKF_DEBUG
+ if (unlock->lf_type != F_UNLCK)
+ panic("lf_clearlock: bad type");
+ if (lockf_debug & 1)
+ lf_print("lf_clearlock", unlock);
+#endif /* LOCKF_DEBUG */
+ prev = &ip->i_lockf;
+ while (ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) {
+ /*
+ * Wakeup the list of locks to be retried.
+ */
+ lf_wakelock(overlap);
+
+ switch (ovcase) {
+
+ case 1: /* overlap == lock */
+ *prev = overlap->lf_next;
+ FREE(overlap, M_LOCKF);
+ break;
+
+ case 2: /* overlap contains lock: split it */
+ if (overlap->lf_start == unlock->lf_start) {
+ overlap->lf_start = unlock->lf_end + 1;
+ break;
+ }
+ lf_split(overlap, unlock);
+ overlap->lf_next = unlock->lf_next;
+ break;
+
+ case 3: /* lock contains overlap */
+ *prev = overlap->lf_next;
+ lf = overlap->lf_next;
+ free(overlap, M_LOCKF);
+ continue;
+
+ case 4: /* overlap starts before lock */
+ overlap->lf_end = unlock->lf_start - 1;
+ prev = &overlap->lf_next;
+ lf = overlap->lf_next;
+ continue;
+
+ case 5: /* overlap ends after lock */
+ overlap->lf_start = unlock->lf_end + 1;
+ break;
+ }
+ break;
+ }
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 1)
+ lf_printlist("lf_clearlock", unlock);
+#endif /* LOCKF_DEBUG */
+ return (0);
+}
+
+/*
+ * Check whether there is a blocking lock,
+ * and if so return its process identifier.
+ */
+int
+lf_getlock(lock, fl)
+ register struct lockf *lock;
+ register struct flock *fl;
+{
+ register struct lockf *block;
+
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 1)
+ lf_print("lf_getlock", lock);
+#endif /* LOCKF_DEBUG */
+
+ if (block = lf_getblock(lock)) {
+ fl->l_type = block->lf_type;
+ fl->l_whence = SEEK_SET;
+ fl->l_start = block->lf_start;
+ if (block->lf_end == -1)
+ fl->l_len = 0;
+ else
+ fl->l_len = block->lf_end - block->lf_start + 1;
+ if (block->lf_flags & F_POSIX)
+ fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
+ else
+ fl->l_pid = -1;
+ } else {
+ fl->l_type = F_UNLCK;
+ }
+ return (0);
+}
+
+/*
+ * Walk the list of locks for an inode and
+ * return the first blocking lock.
+ */
+struct lockf *
+lf_getblock(lock)
+ register struct lockf *lock;
+{
+ struct lockf **prev, *overlap, *lf = lock->lf_inode->i_lockf;
+ int ovcase;
+
+ prev = &lock->lf_inode->i_lockf;
+ while (ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) {
+ /*
+ * We've found an overlap, see if it blocks us
+ */
+ if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
+ return (overlap);
+ /*
+ * Nope, point to the next one on the list and
+ * see if it blocks us
+ */
+ lf = overlap->lf_next;
+ }
+ return (NOLOCKF);
+}
+
+/*
+ * Walk the list of locks for an inode to
+ * find an overlapping lock (if any).
+ *
+ * NOTE: this returns only the FIRST overlapping lock. There
+ * may be more than one.
+ */
+int
+lf_findoverlap(lf, lock, type, prev, overlap)
+ register struct lockf *lf;
+ struct lockf *lock;
+ int type;
+ struct lockf ***prev;
+ struct lockf **overlap;
+{
+ off_t start, end;
+
+ *overlap = lf;
+ if (lf == NOLOCKF)
+ return (0);
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ lf_print("lf_findoverlap: looking for overlap in", lock);
+#endif /* LOCKF_DEBUG */
+ start = lock->lf_start;
+ end = lock->lf_end;
+ while (lf != NOLOCKF) {
+ if (((type & SELF) && lf->lf_id != lock->lf_id) ||
+ ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
+ *prev = &lf->lf_next;
+ *overlap = lf = lf->lf_next;
+ continue;
+ }
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ lf_print("\tchecking", lf);
+#endif /* LOCKF_DEBUG */
+ /*
+ * OK, check for overlap
+ *
+ * Six cases:
+ * 0) no overlap
+ * 1) overlap == lock
+ * 2) overlap contains lock
+ * 3) lock contains overlap
+ * 4) overlap starts before lock
+ * 5) overlap ends after lock
+ */
+ if ((lf->lf_end != -1 && start > lf->lf_end) ||
+ (end != -1 && lf->lf_start > end)) {
+ /* Case 0 */
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ printf("no overlap\n");
+#endif /* LOCKF_DEBUG */
+ if ((type & SELF) && end != -1 && lf->lf_start > end)
+ return (0);
+ *prev = &lf->lf_next;
+ *overlap = lf = lf->lf_next;
+ continue;
+ }
+ if ((lf->lf_start == start) && (lf->lf_end == end)) {
+ /* Case 1 */
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ printf("overlap == lock\n");
+#endif /* LOCKF_DEBUG */
+ return (1);
+ }
+ if ((lf->lf_start <= start) &&
+ (end != -1) &&
+ ((lf->lf_end >= end) || (lf->lf_end == -1))) {
+ /* Case 2 */
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ printf("overlap contains lock\n");
+#endif /* LOCKF_DEBUG */
+ return (2);
+ }
+ if (start <= lf->lf_start &&
+ (end == -1 ||
+ (lf->lf_end != -1 && end >= lf->lf_end))) {
+ /* Case 3 */
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ printf("lock contains overlap\n");
+#endif /* LOCKF_DEBUG */
+ return (3);
+ }
+ if ((lf->lf_start < start) &&
+ ((lf->lf_end >= start) || (lf->lf_end == -1))) {
+ /* Case 4 */
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ printf("overlap starts before lock\n");
+#endif /* LOCKF_DEBUG */
+ return (4);
+ }
+ if ((lf->lf_start > start) &&
+ (end != -1) &&
+ ((lf->lf_end > end) || (lf->lf_end == -1))) {
+ /* Case 5 */
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ printf("overlap ends after lock\n");
+#endif /* LOCKF_DEBUG */
+ return (5);
+ }
+ panic("lf_findoverlap: default");
+ }
+ return (0);
+}
+
+/*
+ * Add a lock to the end of the blocked list.
+ */
+void
+lf_addblock(lock, blocked)
+ struct lockf *lock;
+ struct lockf *blocked;
+{
+ register struct lockf *lf;
+
+ if (blocked == NOLOCKF)
+ return;
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2) {
+ lf_print("addblock: adding", blocked);
+ lf_print("to blocked list of", lock);
+ }
+#endif /* LOCKF_DEBUG */
+ if ((lf = lock->lf_block) == NOLOCKF) {
+ lock->lf_block = blocked;
+ return;
+ }
+ while (lf->lf_block != NOLOCKF)
+ lf = lf->lf_block;
+ lf->lf_block = blocked;
+ return;
+}
+
+/*
+ * Split a lock and a contained region into
+ * two or three locks as necessary.
+ */
+void
+lf_split(lock1, lock2)
+ register struct lockf *lock1;
+ register struct lockf *lock2;
+{
+ register struct lockf *splitlock;
+
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2) {
+ lf_print("lf_split", lock1);
+ lf_print("splitting from", lock2);
+ }
+#endif /* LOCKF_DEBUG */
+ /*
+ * Check to see if spliting into only two pieces.
+ */
+ if (lock1->lf_start == lock2->lf_start) {
+ lock1->lf_start = lock2->lf_end + 1;
+ lock2->lf_next = lock1;
+ return;
+ }
+ if (lock1->lf_end == lock2->lf_end) {
+ lock1->lf_end = lock2->lf_start - 1;
+ lock2->lf_next = lock1->lf_next;
+ lock1->lf_next = lock2;
+ return;
+ }
+ /*
+ * Make a new lock consisting of the last part of
+ * the encompassing lock
+ */
+ MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
+ bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
+ splitlock->lf_start = lock2->lf_end + 1;
+ splitlock->lf_block = NOLOCKF;
+ lock1->lf_end = lock2->lf_start - 1;
+ /*
+ * OK, now link it in
+ */
+ splitlock->lf_next = lock1->lf_next;
+ lock2->lf_next = splitlock;
+ lock1->lf_next = lock2;
+}
+
+/*
+ * Wakeup a blocklist
+ */
+void
+lf_wakelock(listhead)
+ struct lockf *listhead;
+{
+ register struct lockf *blocklist, *wakelock;
+
+ blocklist = listhead->lf_block;
+ listhead->lf_block = NOLOCKF;
+ while (blocklist != NOLOCKF) {
+ wakelock = blocklist;
+ blocklist = blocklist->lf_block;
+ wakelock->lf_block = NOLOCKF;
+ wakelock->lf_next = NOLOCKF;
+#ifdef LOCKF_DEBUG
+ if (lockf_debug & 2)
+ lf_print("lf_wakelock: awakening", wakelock);
+#endif /* LOCKF_DEBUG */
+ wakeup((caddr_t)wakelock);
+ }
+}
+
+#ifdef LOCKF_DEBUG
+/*
+ * Print out a lock.
+ */
+void
+lf_print(tag, lock)
+ char *tag;
+ register struct lockf *lock;
+{
+
+ printf("%s: lock 0x%lx for ", tag, lock);
+ if (lock->lf_flags & F_POSIX)
+ printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid);
+ else
+ printf("id 0x%x", lock->lf_id);
+ printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d",
+ lock->lf_inode->i_number,
+ major(lock->lf_inode->i_dev),
+ minor(lock->lf_inode->i_dev),
+ lock->lf_type == F_RDLCK ? "shared" :
+ lock->lf_type == F_WRLCK ? "exclusive" :
+ lock->lf_type == F_UNLCK ? "unlock" :
+ "unknown", lock->lf_start, lock->lf_end);
+ if (lock->lf_block)
+ printf(" block 0x%x\n", lock->lf_block);
+ else
+ printf("\n");
+}
+
+void
+lf_printlist(tag, lock)
+ char *tag;
+ struct lockf *lock;
+{
+ register struct lockf *lf;
+
+ printf("%s: Lock list for ino %d on dev <%d, %d>:\n",
+ tag, lock->lf_inode->i_number,
+ major(lock->lf_inode->i_dev),
+ minor(lock->lf_inode->i_dev));
+ for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
+ printf("\tlock 0x%lx for ", lf);
+ if (lf->lf_flags & F_POSIX)
+ printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid);
+ else
+ printf("id 0x%x", lf->lf_id);
+ printf(", %s, start %d, end %d",
+ lf->lf_type == F_RDLCK ? "shared" :
+ lf->lf_type == F_WRLCK ? "exclusive" :
+ lf->lf_type == F_UNLCK ? "unlock" :
+ "unknown", lf->lf_start, lf->lf_end);
+ if (lf->lf_block)
+ printf(" block 0x%x\n", lf->lf_block);
+ else
+ printf("\n");
+ }
+}
+#endif /* LOCKF_DEBUG */
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
new file mode 100644
index 000000000000..87c6802c79f7
--- /dev/null
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -0,0 +1,970 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+struct nchstats nchstats;
+#ifdef DIAGNOSTIC
+int dirchk = 1;
+#else
+int dirchk = 0;
+#endif
+
+#define FSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0)
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
+ * on whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".". When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and vput
+ * instead of two vputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ * check accessibility of directory
+ * look for name in cache, if found, then if at end of path
+ * and deleting or creating, drop it, else return name
+ * search for name in directory, to found or notfound
+ * notfound:
+ * if creating, return locked directory, leaving info on available slots
+ * else return error
+ * found:
+ * if at end of path and deleting, return information to allow delete
+ * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ * inode and return info to allow rewrite
+ * if not at end, add name to cache; if at end and neither creating
+ * nor deleting, add name to cache
+ */
+int
+ufs_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vdp; /* vnode for directory being searched */
+ register struct inode *dp; /* inode for directory being searched */
+ struct buf *bp; /* a buffer of directory entries */
+ register struct direct *ep; /* the current directory entry */
+ int entryoffsetinblock; /* offset of ep in bp's buffer */
+ enum {NONE, COMPACT, FOUND} slotstatus;
+ doff_t slotoffset; /* offset of area with free space */
+ int slotsize; /* size of area at slotoffset */
+ int slotfreespace; /* amount of space free in slot */
+ int slotneeded; /* size of the entry we're seeking */
+ int numdirpasses; /* strategy for directory search */
+ doff_t endsearch; /* offset to end directory search */
+ doff_t prevoff; /* prev entry dp->i_offset */
+ struct vnode *pdp; /* saved dp during symlink work */
+ struct vnode *tdp; /* returned by VFS_VGET */
+ doff_t enduseful; /* pointer past last used dir slot */
+ u_long bmask; /* block offset mask */
+ int lockparent; /* 1 => lockparent flag is set */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int namlen, error;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ int flags = cnp->cn_flags;
+ int nameiop = cnp->cn_nameiop;
+
+ bp = NULL;
+ slotoffset = -1;
+ *vpp = NULL;
+ vdp = ap->a_dvp;
+ dp = VTOI(vdp);
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+ /*
+ * Check accessiblity of directory.
+ */
+ if ((dp->i_mode & IFMT) != IFDIR)
+ return (ENOTDIR);
+ if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+ return (error);
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ *
+ * Before tediously performing a linear scan of the directory,
+ * check the name cache to see if the directory/name pair
+ * we are looking for is known already.
+ */
+ if (error = cache_lookup(vdp, vpp, cnp)) {
+ int vpid; /* capability number of vnode */
+
+ if (error == ENOENT)
+ return (error);
+ /*
+ * Get the next vnode in the path.
+ * See comment below starting `Step through' for
+ * an explaination of the locking protocol.
+ */
+ pdp = vdp;
+ dp = VTOI(*vpp);
+ vdp = *vpp;
+ vpid = vdp->v_id;
+ if (pdp == vdp) { /* lookup on "." */
+ VREF(vdp);
+ error = 0;
+ } else if (flags & ISDOTDOT) {
+ VOP_UNLOCK(pdp);
+ error = vget(vdp, 1);
+ if (!error && lockparent && (flags & ISLASTCN))
+ error = VOP_LOCK(pdp);
+ } else {
+ error = vget(vdp, 1);
+ if (!lockparent || error || !(flags & ISLASTCN))
+ VOP_UNLOCK(pdp);
+ }
+ /*
+ * Check that the capability number did not change
+ * while we were waiting for the lock.
+ */
+ if (!error) {
+ if (vpid == vdp->v_id)
+ return (0);
+ vput(vdp);
+ if (lockparent && pdp != vdp && (flags & ISLASTCN))
+ VOP_UNLOCK(pdp);
+ }
+ if (error = VOP_LOCK(pdp))
+ return (error);
+ vdp = pdp;
+ dp = VTOI(pdp);
+ *vpp = NULL;
+ }
+
+ /*
+ * Suppress search for slots unless creating
+ * file and at end of pathname, in which case
+ * we watch for a place to put the new file in
+ * case it doesn't already exist.
+ */
+ slotstatus = FOUND;
+ slotfreespace = slotsize = slotneeded = 0;
+ if ((nameiop == CREATE || nameiop == RENAME) &&
+ (flags & ISLASTCN)) {
+ slotstatus = NONE;
+ slotneeded = (sizeof(struct direct) - MAXNAMLEN +
+ cnp->cn_namelen + 3) &~ 3;
+ }
+
+ /*
+ * If there is cached information on a previous search of
+ * this directory, pick up where we last left off.
+ * We cache only lookups as these are the most common
+ * and have the greatest payoff. Caching CREATE has little
+ * benefit as it usually must search the entire directory
+ * to determine that the entry does not exist. Caching the
+ * location of the last DELETE or RENAME has not reduced
+ * profiling time and hence has been removed in the interest
+ * of simplicity.
+ */
+ bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
+ if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+ dp->i_diroff > dp->i_size) {
+ entryoffsetinblock = 0;
+ dp->i_offset = 0;
+ numdirpasses = 1;
+ } else {
+ dp->i_offset = dp->i_diroff;
+ if ((entryoffsetinblock = dp->i_offset & bmask) &&
+ (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
+ return (error);
+ numdirpasses = 2;
+ nchstats.ncs_2passes++;
+ }
+ prevoff = dp->i_offset;
+ endsearch = roundup(dp->i_size, DIRBLKSIZ);
+ enduseful = 0;
+
+searchloop:
+ while (dp->i_offset < endsearch) {
+ /*
+ * If necessary, get the next directory block.
+ */
+ if ((dp->i_offset & bmask) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error =
+ VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))
+ return (error);
+ entryoffsetinblock = 0;
+ }
+ /*
+ * If still looking for a slot, and at a DIRBLKSIZE
+ * boundary, have to start looking for free space again.
+ */
+ if (slotstatus == NONE &&
+ (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
+ slotoffset = -1;
+ slotfreespace = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ * Full validation checks are slow, so we only check
+ * enough to insure forward progress through the
+ * directory. Complete checks can be run by patching
+ * "dirchk" to be true.
+ */
+ ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
+ if (ep->d_reclen == 0 ||
+ dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock)) {
+ int i;
+
+ ufs_dirbad(dp, dp->i_offset, "mangled entry");
+ i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
+ dp->i_offset += i;
+ entryoffsetinblock += i;
+ continue;
+ }
+
+ /*
+ * If an appropriate sized slot has not yet been found,
+ * check to see if one is available. Also accumulate space
+ * in the current block so that we can determine if
+ * compaction is viable.
+ */
+ if (slotstatus != FOUND) {
+ int size = ep->d_reclen;
+
+ if (ep->d_ino != 0)
+ size -= DIRSIZ(FSFMT(vdp), ep);
+ if (size > 0) {
+ if (size >= slotneeded) {
+ slotstatus = FOUND;
+ slotoffset = dp->i_offset;
+ slotsize = ep->d_reclen;
+ } else if (slotstatus == NONE) {
+ slotfreespace += size;
+ if (slotoffset == -1)
+ slotoffset = dp->i_offset;
+ if (slotfreespace >= slotneeded) {
+ slotstatus = COMPACT;
+ slotsize = dp->i_offset +
+ ep->d_reclen - slotoffset;
+ }
+ }
+ }
+ }
+
+ /*
+ * Check for a name match.
+ */
+ if (ep->d_ino) {
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (vdp->v_mount->mnt_maxsymlinklen > 0)
+ namlen = ep->d_namlen;
+ else
+ namlen = ep->d_type;
+# else
+ namlen = ep->d_namlen;
+# endif
+ if (namlen == cnp->cn_namelen &&
+ !bcmp(cnp->cn_nameptr, ep->d_name,
+ (unsigned)namlen)) {
+ /*
+ * Save directory entry's inode number and
+ * reclen in ndp->ni_ufs area, and release
+ * directory buffer.
+ */
+ dp->i_ino = ep->d_ino;
+ dp->i_reclen = ep->d_reclen;
+ brelse(bp);
+ goto found;
+ }
+ }
+ prevoff = dp->i_offset;
+ dp->i_offset += ep->d_reclen;
+ entryoffsetinblock += ep->d_reclen;
+ if (ep->d_ino)
+ enduseful = dp->i_offset;
+ }
+/* notfound: */
+ /*
+ * If we started in the middle of the directory and failed
+ * to find our target, we must check the beginning as well.
+ */
+ if (numdirpasses == 2) {
+ numdirpasses--;
+ dp->i_offset = 0;
+ endsearch = dp->i_diroff;
+ goto searchloop;
+ }
+ if (bp != NULL)
+ brelse(bp);
+ /*
+ * If creating, and at end of pathname and current
+ * directory has not been removed, then can consider
+ * allowing file to be created.
+ */
+ if ((nameiop == CREATE || nameiop == RENAME) &&
+ (flags & ISLASTCN) && dp->i_nlink != 0) {
+ /*
+ * Access for write is interpreted as allowing
+ * creation of files in the directory.
+ */
+ if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+ return (error);
+ /*
+ * Return an indication of where the new directory
+ * entry should be put. If we didn't find a slot,
+ * then set dp->i_count to 0 indicating
+ * that the new slot belongs at the end of the
+ * directory. If we found a slot, then the new entry
+ * can be put in the range from dp->i_offset to
+ * dp->i_offset + dp->i_count.
+ */
+ if (slotstatus == NONE) {
+ dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
+ dp->i_count = 0;
+ enduseful = dp->i_offset;
+ } else {
+ dp->i_offset = slotoffset;
+ dp->i_count = slotsize;
+ if (enduseful < slotoffset + slotsize)
+ enduseful = slotoffset + slotsize;
+ }
+ dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ /*
+ * We return with the directory locked, so that
+ * the parameters we set up above will still be
+ * valid if we actually decide to do a direnter().
+ * We return ni_vp == NULL to indicate that the entry
+ * does not currently exist; we leave a pointer to
+ * the (locked) directory inode in ndp->ni_dvp.
+ * The pathname buffer is saved so that the name
+ * can be obtained later.
+ *
+ * NB - if the directory is unlocked, then this
+ * information cannot be used.
+ */
+ cnp->cn_flags |= SAVENAME;
+ if (!lockparent)
+ VOP_UNLOCK(vdp);
+ return (EJUSTRETURN);
+ }
+ /*
+ * Insert name into cache (as non-existent) if appropriate.
+ */
+ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+ cache_enter(vdp, *vpp, cnp);
+ return (ENOENT);
+
+found:
+ if (numdirpasses == 2)
+ nchstats.ncs_pass2++;
+ /*
+ * Check that directory length properly reflects presence
+ * of this entry.
+ */
+ if (entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep) > dp->i_size) {
+ ufs_dirbad(dp, dp->i_offset, "i_size too small");
+ dp->i_size = entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep);
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ }
+
+ /*
+ * Found component in pathname.
+ * If the final component of path name, save information
+ * in the cache as to where the entry was found.
+ */
+ if ((flags & ISLASTCN) && nameiop == LOOKUP)
+ dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
+
+ /*
+ * If deleting, and at end of pathname, return
+ * parameters which can be used to remove file.
+ * If the wantparent flag isn't set, we return only
+ * the directory (in ndp->ni_dvp), otherwise we go
+ * on and lock the inode, being careful with ".".
+ */
+ if (nameiop == DELETE && (flags & ISLASTCN)) {
+ /*
+ * Write access to directory required to delete files.
+ */
+ if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+ return (error);
+ /*
+ * Return pointer to current entry in dp->i_offset,
+ * and distance past previous entry (if there
+ * is a previous entry in this block) in dp->i_count.
+ * Save directory inode pointer in ndp->ni_dvp for dirremove().
+ */
+ if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
+ dp->i_count = 0;
+ else
+ dp->i_count = dp->i_offset - prevoff;
+ if (dp->i_number == dp->i_ino) {
+ VREF(vdp);
+ *vpp = vdp;
+ return (0);
+ }
+ if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+ return (error);
+ /*
+ * If directory is "sticky", then user must own
+ * the directory, or the file in it, else she
+ * may not delete it (unless she's root). This
+ * implements append-only directories.
+ */
+ if ((dp->i_mode & ISVTX) &&
+ cred->cr_uid != 0 &&
+ cred->cr_uid != dp->i_uid &&
+ VTOI(tdp)->i_uid != cred->cr_uid) {
+ vput(tdp);
+ return (EPERM);
+ }
+ *vpp = tdp;
+ if (!lockparent)
+ VOP_UNLOCK(vdp);
+ return (0);
+ }
+
+ /*
+ * If rewriting (RENAME), return the inode and the
+ * information required to rewrite the present directory
+ * Must get inode of directory entry to verify it's a
+ * regular file, or empty directory.
+ */
+ if (nameiop == RENAME && wantparent &&
+ (flags & ISLASTCN)) {
+ if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+ return (error);
+ /*
+ * Careful about locking second inode.
+ * This can only occur if the target is ".".
+ */
+ if (dp->i_number == dp->i_ino)
+ return (EISDIR);
+ if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+ return (error);
+ *vpp = tdp;
+ cnp->cn_flags |= SAVENAME;
+ if (!lockparent)
+ VOP_UNLOCK(vdp);
+ return (0);
+ }
+
+ /*
+ * Step through the translation in the name. We do not `vput' the
+ * directory because we may need it again if a symbolic link
+ * is relative to the current directory. Instead we save it
+ * unlocked as "pdp". We must get the target inode before unlocking
+ * the directory to insure that the inode will not be removed
+ * before we get it. We prevent deadlock by always fetching
+ * inodes from the root, moving down the directory tree. Thus
+ * when following backward pointers ".." we must unlock the
+ * parent directory before getting the requested directory.
+ * There is a potential race condition here if both the current
+ * and parent directories are removed before the VFS_VGET for the
+ * inode associated with ".." returns. We hope that this occurs
+ * infrequently since we cannot avoid this race condition without
+ * implementing a sophisticated deadlock detection algorithm.
+ * Note also that this simple deadlock detection scheme will not
+ * work if the file system has any hard links other than ".."
+ * that point backwards in the directory structure.
+ */
+ pdp = vdp;
+ if (flags & ISDOTDOT) {
+ VOP_UNLOCK(pdp); /* race to get the inode */
+ if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) {
+ VOP_LOCK(pdp);
+ return (error);
+ }
+ if (lockparent && (flags & ISLASTCN) &&
+ (error = VOP_LOCK(pdp))) {
+ vput(tdp);
+ return (error);
+ }
+ *vpp = tdp;
+ } else if (dp->i_number == dp->i_ino) {
+ VREF(vdp); /* we want ourself, ie "." */
+ *vpp = vdp;
+ } else {
+ if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+ return (error);
+ if (!lockparent || !(flags & ISLASTCN))
+ VOP_UNLOCK(pdp);
+ *vpp = tdp;
+ }
+
+ /*
+ * Insert name into cache if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ return (0);
+}
+
+void
+ufs_dirbad(ip, offset, how)
+ struct inode *ip;
+ doff_t offset;
+ char *how;
+{
+ struct mount *mp;
+
+ mp = ITOV(ip)->v_mount;
+ (void)printf("%s: bad dir ino %d at offset %d: %s\n",
+ mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
+ if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0)
+ panic("bad dir");
+}
+
+/*
+ * Do consistency checking on a directory entry:
+ * record length must be multiple of 4
+ * entry must fit in rest of its DIRBLKSIZ block
+ * record must be large enough to contain entry
+ * name is not longer than MAXNAMLEN
+ * name must be as long as advertised, and null terminated
+ */
+int
+ufs_dirbadentry(dp, ep, entryoffsetinblock)
+ struct vnode *dp;
+ register struct direct *ep;
+ int entryoffsetinblock;
+{
+ register int i;
+ int namlen;
+
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (dp->v_mount->mnt_maxsymlinklen > 0)
+ namlen = ep->d_namlen;
+ else
+ namlen = ep->d_type;
+# else
+ namlen = ep->d_namlen;
+# endif
+ if ((ep->d_reclen & 0x3) != 0 ||
+ ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
+ ep->d_reclen < DIRSIZ(FSFMT(dp), ep) || namlen > MAXNAMLEN) {
+ /*return (1); */
+ printf("First bad\n");
+ goto bad;
+ }
+ for (i = 0; i < namlen; i++)
+ if (ep->d_name[i] == '\0') {
+ /*return (1); */
+ printf("Second bad\n");
+ goto bad;
+ }
+ if (ep->d_name[i])
+ goto bad;
+ return (ep->d_name[i]);
+bad:
+ return(1);
+}
+
+/*
+ * Write a directory entry after a call to namei, using the parameters
+ * that it left in nameidata. The argument ip is the inode which the new
+ * directory entry will refer to. Dvp is a pointer to the directory to
+ * be written, which was left locked by namei. Remaining parameters
+ * (dp->i_offset, dp->i_count) indicate how the space for the new
+ * entry is to be obtained.
+ */
+int
+ufs_direnter(ip, dvp, cnp)
+ struct inode *ip;
+ struct vnode *dvp;
+ register struct componentname *cnp;
+{
+ register struct direct *ep, *nep;
+ register struct inode *dp;
+ struct buf *bp;
+ struct direct newdir;
+ struct iovec aiov;
+ struct uio auio;
+ u_int dsize;
+ int error, loc, newentrysize, spacefree;
+ char *dirbuf;
+
+#ifdef DIAGNOSTIC
+ if ((cnp->cn_flags & SAVENAME) == 0)
+ panic("direnter: missing name");
+#endif
+ dp = VTOI(dvp);
+ newdir.d_ino = ip->i_number;
+ newdir.d_namlen = cnp->cn_namelen;
+ bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
+ if (dvp->v_mount->mnt_maxsymlinklen > 0)
+ newdir.d_type = IFTODT(ip->i_mode);
+ else {
+ newdir.d_type = 0;
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ { u_char tmp = newdir.d_namlen;
+ newdir.d_namlen = newdir.d_type;
+ newdir.d_type = tmp; }
+# endif
+ }
+ newentrysize = DIRSIZ(FSFMT(dvp), &newdir);
+ if (dp->i_count == 0) {
+ /*
+ * If dp->i_count is 0, then namei could find no
+ * space in the directory. Here, dp->i_offset will
+ * be on a directory block boundary and we will write the
+ * new entry into a fresh block.
+ */
+ if (dp->i_offset & (DIRBLKSIZ - 1))
+ panic("ufs_direnter: newblk");
+ auio.uio_offset = dp->i_offset;
+ newdir.d_reclen = DIRBLKSIZ;
+ auio.uio_resid = newentrysize;
+ aiov.iov_len = newentrysize;
+ aiov.iov_base = (caddr_t)&newdir;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = (struct proc *)0;
+ error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
+ if (DIRBLKSIZ >
+ VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
+ /* XXX should grow with balloc() */
+ panic("ufs_direnter: frag size");
+ else if (!error) {
+ dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
+ dp->i_flag |= IN_CHANGE;
+ }
+ return (error);
+ }
+
+ /*
+ * If dp->i_count is non-zero, then namei found space
+ * for the new entry in the range dp->i_offset to
+ * dp->i_offset + dp->i_count in the directory.
+ * To use this space, we may have to compact the entries located
+ * there, by copying them together towards the beginning of the
+ * block, leaving the free space in one usable chunk at the end.
+ */
+
+ /*
+ * Increase size of directory if entry eats into new space.
+ * This should never push the size past a new multiple of
+ * DIRBLKSIZE.
+ *
+ * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
+ */
+ if (dp->i_offset + dp->i_count > dp->i_size)
+ dp->i_size = dp->i_offset + dp->i_count;
+ /*
+ * Get the block containing the space for the new directory entry.
+ */
+ if (error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp))
+ return (error);
+ /*
+ * Find space for the new entry. In the simple case, the entry at
+ * offset base will have the space. If it does not, then namei
+ * arranged that compacting the region dp->i_offset to
+ * dp->i_offset + dp->i_count would yield the
+ * space.
+ */
+ ep = (struct direct *)dirbuf;
+ dsize = DIRSIZ(FSFMT(dvp), ep);
+ spacefree = ep->d_reclen - dsize;
+ for (loc = ep->d_reclen; loc < dp->i_count; ) {
+ nep = (struct direct *)(dirbuf + loc);
+ if (ep->d_ino) {
+ /* trim the existing slot */
+ ep->d_reclen = dsize;
+ ep = (struct direct *)((char *)ep + dsize);
+ } else {
+ /* overwrite; nothing there; header is ours */
+ spacefree += dsize;
+ }
+ dsize = DIRSIZ(FSFMT(dvp), nep);
+ spacefree += nep->d_reclen - dsize;
+ loc += nep->d_reclen;
+ bcopy((caddr_t)nep, (caddr_t)ep, dsize);
+ }
+ /*
+ * Update the pointer fields in the previous entry (if any),
+ * copy in the new entry, and write out the block.
+ */
+ if (ep->d_ino == 0) {
+ if (spacefree + dsize < newentrysize)
+ panic("ufs_direnter: compact1");
+ newdir.d_reclen = spacefree + dsize;
+ } else {
+ if (spacefree < newentrysize)
+ panic("ufs_direnter: compact2");
+ newdir.d_reclen = spacefree;
+ ep->d_reclen = dsize;
+ ep = (struct direct *)((char *)ep + dsize);
+ }
+ bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
+ error = VOP_BWRITE(bp);
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
+ error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC,
+ cnp->cn_cred, cnp->cn_proc);
+ return (error);
+}
+
+/*
+ * Remove a directory entry after a call to namei, using
+ * the parameters which it left in nameidata. The entry
+ * dp->i_offset contains the offset into the directory of the
+ * entry to be eliminated. The dp->i_count field contains the
+ * size of the previous record in the directory. If this
+ * is 0, the first entry is being deleted, so we need only
+ * zero the inode number to mark the entry as free. If the
+ * entry is not the first in the directory, we must reclaim
+ * the space of the now empty record by adding the record size
+ * to the size of the previous entry.
+ */
+int
+ufs_dirremove(dvp, cnp)
+ struct vnode *dvp;
+ struct componentname *cnp;
+{
+ register struct inode *dp;
+ struct direct *ep;
+ struct buf *bp;
+ int error;
+
+ dp = VTOI(dvp);
+ if (dp->i_count == 0) {
+ /*
+ * First entry in block: set d_ino to zero.
+ */
+ if (error =
+ VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
+ return (error);
+ ep->d_ino = 0;
+ error = VOP_BWRITE(bp);
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (error);
+ }
+ /*
+ * Collapse new free space into previous entry.
+ */
+ if (error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
+ (char **)&ep, &bp))
+ return (error);
+ ep->d_reclen += dp->i_reclen;
+ error = VOP_BWRITE(bp);
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (error);
+}
+
+/*
+ * Rewrite an existing directory entry to point at the inode
+ * supplied. The parameters describing the directory entry are
+ * set up by a call to namei.
+ */
+int
+ufs_dirrewrite(dp, ip, cnp)
+ struct inode *dp, *ip;
+ struct componentname *cnp;
+{
+ struct buf *bp;
+ struct direct *ep;
+ struct vnode *vdp = ITOV(dp);
+ int error;
+
+ if (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp))
+ return (error);
+ ep->d_ino = ip->i_number;
+ if (vdp->v_mount->mnt_maxsymlinklen > 0)
+ ep->d_type = IFTODT(ip->i_mode);
+ error = VOP_BWRITE(bp);
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (error);
+}
+
+/*
+ * Check if a directory is empty or not.
+ * Inode supplied must be locked.
+ *
+ * Using a struct dirtemplate here is not precisely
+ * what we want, but better than using a struct direct.
+ *
+ * NB: does not handle corrupted directories.
+ */
+int
+ufs_dirempty(ip, parentino, cred)
+ register struct inode *ip;
+ ino_t parentino;
+ struct ucred *cred;
+{
+ register off_t off;
+ struct dirtemplate dbuf;
+ register struct direct *dp = (struct direct *)&dbuf;
+ int error, count, namlen;
+#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
+
+ for (off = 0; off < ip->i_size; off += dp->d_reclen) {
+ error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
+ UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
+ /*
+ * Since we read MINDIRSIZ, residual must
+ * be 0 unless we're at end of file.
+ */
+ if (error || count != 0)
+ return (0);
+ /* avoid infinite loops */
+ if (dp->d_reclen == 0)
+ return (0);
+ /* skip empty entries */
+ if (dp->d_ino == 0)
+ continue;
+ /* accept only "." and ".." */
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
+ namlen = dp->d_namlen;
+ else
+ namlen = dp->d_type;
+# else
+ namlen = dp->d_namlen;
+# endif
+ if (namlen > 2)
+ return (0);
+ if (dp->d_name[0] != '.')
+ return (0);
+ /*
+ * At this point namlen must be 1 or 2.
+ * 1 implies ".", 2 implies ".." if second
+ * char is also "."
+ */
+ if (namlen == 1)
+ continue;
+ if (dp->d_name[1] == '.' && dp->d_ino == parentino)
+ continue;
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * Check if source directory is in the path of the target directory.
+ * Target is supplied locked, source is unlocked.
+ * The target is always vput before returning.
+ */
+int
+ufs_checkpath(source, target, cred)
+ struct inode *source, *target;
+ struct ucred *cred;
+{
+ struct vnode *vp;
+ int error, rootino, namlen;
+ struct dirtemplate dirbuf;
+
+ vp = ITOV(target);
+ if (target->i_number == source->i_number) {
+ error = EEXIST;
+ goto out;
+ }
+ rootino = ROOTINO;
+ error = 0;
+ if (target->i_number == rootino)
+ goto out;
+
+ for (;;) {
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ break;
+ }
+ error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
+ sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
+ IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
+ if (error != 0)
+ break;
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (vp->v_mount->mnt_maxsymlinklen > 0)
+ namlen = dirbuf.dotdot_namlen;
+ else
+ namlen = dirbuf.dotdot_type;
+# else
+ namlen = dirbuf.dotdot_namlen;
+# endif
+ if (namlen != 2 ||
+ dirbuf.dotdot_name[0] != '.' ||
+ dirbuf.dotdot_name[1] != '.') {
+ error = ENOTDIR;
+ break;
+ }
+ if (dirbuf.dotdot_ino == source->i_number) {
+ error = EINVAL;
+ break;
+ }
+ if (dirbuf.dotdot_ino == rootino)
+ break;
+ vput(vp);
+ if (error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp)) {
+ vp = NULL;
+ break;
+ }
+ }
+
+out:
+ if (error == ENOTDIR)
+ printf("checkpath: .. not a directory\n");
+ if (vp != NULL)
+ vput(vp);
+ return (error);
+}
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
new file mode 100644
index 000000000000..15cb1cfbb23d
--- /dev/null
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_quota.c 8.2 (Berkeley) 12/30/93
+ */
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Quota name to error message mapping.
+ */
+static char *quotatypes[] = INITQFNAMES;
+
+/*
+ * Set up the quotas for an inode.
+ *
+ * This routine completely defines the semantics of quotas.
+ * If other criterion want to be used to establish quotas, the
+ * MAXQUOTAS value in quotas.h should be increased, and the
+ * additional dquots set up here.
+ */
+int
+getinoquota(ip)
+ register struct inode *ip;
+{
+ struct ufsmount *ump;
+ struct vnode *vp = ITOV(ip);
+ int error;
+
+ ump = VFSTOUFS(vp->v_mount);
+ /*
+ * Set up the user quota based on file uid.
+ * EINVAL means that quotas are not enabled.
+ */
+ if (ip->i_dquot[USRQUOTA] == NODQUOT &&
+ (error =
+ dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) &&
+ error != EINVAL)
+ return (error);
+ /*
+ * Set up the group quota based on file gid.
+ * EINVAL means that quotas are not enabled.
+ */
+ if (ip->i_dquot[GRPQUOTA] == NODQUOT &&
+ (error =
+ dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) &&
+ error != EINVAL)
+ return (error);
+ return (0);
+}
+
+/*
+ * Update disk usage, and take corrective action.
+ */
+int
+chkdq(ip, change, cred, flags)
+ register struct inode *ip;
+ long change;
+ struct ucred *cred;
+ int flags;
+{
+ register struct dquot *dq;
+ register int i;
+ int ncurblocks, error;
+
+#ifdef DIAGNOSTIC
+ if ((flags & CHOWN) == 0)
+ chkdquot(ip);
+#endif
+ if (change == 0)
+ return (0);
+ if (change < 0) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if ((dq = ip->i_dquot[i]) == NODQUOT)
+ continue;
+ while (dq->dq_flags & DQ_LOCK) {
+ dq->dq_flags |= DQ_WANT;
+ sleep((caddr_t)dq, PINOD+1);
+ }
+ ncurblocks = dq->dq_curblocks + change;
+ if (ncurblocks >= 0)
+ dq->dq_curblocks = ncurblocks;
+ else
+ dq->dq_curblocks = 0;
+ dq->dq_flags &= ~DQ_BLKS;
+ dq->dq_flags |= DQ_MOD;
+ }
+ return (0);
+ }
+ if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if ((dq = ip->i_dquot[i]) == NODQUOT)
+ continue;
+ if (error = chkdqchg(ip, change, cred, i))
+ return (error);
+ }
+ }
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if ((dq = ip->i_dquot[i]) == NODQUOT)
+ continue;
+ while (dq->dq_flags & DQ_LOCK) {
+ dq->dq_flags |= DQ_WANT;
+ sleep((caddr_t)dq, PINOD+1);
+ }
+ dq->dq_curblocks += change;
+ dq->dq_flags |= DQ_MOD;
+ }
+ return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+int
+chkdqchg(ip, change, cred, type)
+ struct inode *ip;
+ long change;
+ struct ucred *cred;
+ int type;
+{
+ register struct dquot *dq = ip->i_dquot[type];
+ long ncurblocks = dq->dq_curblocks + change;
+
+ /*
+ * If user would exceed their hard limit, disallow space allocation.
+ */
+ if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) {
+ if ((dq->dq_flags & DQ_BLKS) == 0 &&
+ ip->i_uid == cred->cr_uid) {
+ uprintf("\n%s: write failed, %s disk limit reached\n",
+ ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+ quotatypes[type]);
+ dq->dq_flags |= DQ_BLKS;
+ }
+ return (EDQUOT);
+ }
+ /*
+ * If user is over their soft limit for too long, disallow space
+ * allocation. Reset time limit as they cross their soft limit.
+ */
+ if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) {
+ if (dq->dq_curblocks < dq->dq_bsoftlimit) {
+ dq->dq_btime = time.tv_sec +
+ VFSTOUFS(ITOV(ip)->v_mount)->um_btime[type];
+ if (ip->i_uid == cred->cr_uid)
+ uprintf("\n%s: warning, %s %s\n",
+ ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+ quotatypes[type], "disk quota exceeded");
+ return (0);
+ }
+ if (time.tv_sec > dq->dq_btime) {
+ if ((dq->dq_flags & DQ_BLKS) == 0 &&
+ ip->i_uid == cred->cr_uid) {
+ uprintf("\n%s: write failed, %s %s\n",
+ ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+ quotatypes[type],
+ "disk quota exceeded for too long");
+ dq->dq_flags |= DQ_BLKS;
+ }
+ return (EDQUOT);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the inode limit, applying corrective action.
+ */
+int
+chkiq(ip, change, cred, flags)
+ register struct inode *ip;
+ long change;
+ struct ucred *cred;
+ int flags;
+{
+ register struct dquot *dq;
+ register int i;
+ int ncurinodes, error;
+
+#ifdef DIAGNOSTIC
+ if ((flags & CHOWN) == 0)
+ chkdquot(ip);
+#endif
+ if (change == 0)
+ return (0);
+ if (change < 0) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if ((dq = ip->i_dquot[i]) == NODQUOT)
+ continue;
+ while (dq->dq_flags & DQ_LOCK) {
+ dq->dq_flags |= DQ_WANT;
+ sleep((caddr_t)dq, PINOD+1);
+ }
+ ncurinodes = dq->dq_curinodes + change;
+ if (ncurinodes >= 0)
+ dq->dq_curinodes = ncurinodes;
+ else
+ dq->dq_curinodes = 0;
+ dq->dq_flags &= ~DQ_INODS;
+ dq->dq_flags |= DQ_MOD;
+ }
+ return (0);
+ }
+ if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if ((dq = ip->i_dquot[i]) == NODQUOT)
+ continue;
+ if (error = chkiqchg(ip, change, cred, i))
+ return (error);
+ }
+ }
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if ((dq = ip->i_dquot[i]) == NODQUOT)
+ continue;
+ while (dq->dq_flags & DQ_LOCK) {
+ dq->dq_flags |= DQ_WANT;
+ sleep((caddr_t)dq, PINOD+1);
+ }
+ dq->dq_curinodes += change;
+ dq->dq_flags |= DQ_MOD;
+ }
+ return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+int
+chkiqchg(ip, change, cred, type)
+ struct inode *ip;
+ long change;
+ struct ucred *cred;
+ int type;
+{
+ register struct dquot *dq = ip->i_dquot[type];
+ long ncurinodes = dq->dq_curinodes + change;
+
+ /*
+ * If user would exceed their hard limit, disallow inode allocation.
+ */
+ if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
+ if ((dq->dq_flags & DQ_INODS) == 0 &&
+ ip->i_uid == cred->cr_uid) {
+ uprintf("\n%s: write failed, %s inode limit reached\n",
+ ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+ quotatypes[type]);
+ dq->dq_flags |= DQ_INODS;
+ }
+ return (EDQUOT);
+ }
+ /*
+ * If user is over their soft limit for too long, disallow inode
+ * allocation. Reset time limit as they cross their soft limit.
+ */
+ if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
+ if (dq->dq_curinodes < dq->dq_isoftlimit) {
+ dq->dq_itime = time.tv_sec +
+ VFSTOUFS(ITOV(ip)->v_mount)->um_itime[type];
+ if (ip->i_uid == cred->cr_uid)
+ uprintf("\n%s: warning, %s %s\n",
+ ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+ quotatypes[type], "inode quota exceeded");
+ return (0);
+ }
+ if (time.tv_sec > dq->dq_itime) {
+ if ((dq->dq_flags & DQ_INODS) == 0 &&
+ ip->i_uid == cred->cr_uid) {
+ uprintf("\n%s: write failed, %s %s\n",
+ ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+ quotatypes[type],
+ "inode quota exceeded for too long");
+ dq->dq_flags |= DQ_INODS;
+ }
+ return (EDQUOT);
+ }
+ }
+ return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * On filesystems with quotas enabled, it is an error for a file to change
+ * size and not to have a dquot structure associated with it.
+ */
+void
+chkdquot(ip)
+ register struct inode *ip;
+{
+ struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);
+ register int i;
+
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (ump->um_quotas[i] == NULLVP ||
+ (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING)))
+ continue;
+ if (ip->i_dquot[i] == NODQUOT) {
+ vprint("chkdquot: missing dquot", ITOV(ip));
+ panic("missing dquot");
+ }
+ }
+}
+#endif
+
+/*
+ * Code to process quotactl commands.
+ */
+
+/*
+ * Q_QUOTAON - set up a quota file for a particular file system.
+ */
+int
+quotaon(p, mp, type, fname)
+ struct proc *p;
+ struct mount *mp;
+ register int type;
+ caddr_t fname;
+{
+ register struct ufsmount *ump = VFSTOUFS(mp);
+ register struct vnode *vp, **vpp;
+ struct vnode *nextvp;
+ struct dquot *dq;
+ int error;
+ struct nameidata nd;
+
+ vpp = &ump->um_quotas[type];
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, p);
+ if (error = vn_open(&nd, FREAD|FWRITE, 0))
+ return (error);
+ vp = nd.ni_vp;
+ VOP_UNLOCK(vp);
+ if (vp->v_type != VREG) {
+ (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
+ return (EACCES);
+ }
+ if (vfs_busy(mp)) {
+ (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
+ return (EBUSY);
+ }
+ if (*vpp != vp)
+ quotaoff(p, mp, type);
+ ump->um_qflags[type] |= QTF_OPENING;
+ mp->mnt_flag |= MNT_QUOTA;
+ vp->v_flag |= VSYSTEM;
+ *vpp = vp;
+ /*
+ * Save the credential of the process that turned on quotas.
+ * Set up the time limits for this quota.
+ */
+ crhold(p->p_ucred);
+ ump->um_cred[type] = p->p_ucred;
+ ump->um_btime[type] = MAX_DQ_TIME;
+ ump->um_itime[type] = MAX_IQ_TIME;
+ if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
+ if (dq->dq_btime > 0)
+ ump->um_btime[type] = dq->dq_btime;
+ if (dq->dq_itime > 0)
+ ump->um_itime[type] = dq->dq_itime;
+ dqrele(NULLVP, dq);
+ }
+ /*
+ * Search vnodes associated with this mount point,
+ * adding references to quota file being opened.
+ * NB: only need to add dquot's for inodes being modified.
+ */
+again:
+ for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+ nextvp = vp->v_mntvnodes.le_next;
+ if (vp->v_writecount == 0)
+ continue;
+ if (vget(vp, 1))
+ goto again;
+ if (error = getinoquota(VTOI(vp))) {
+ vput(vp);
+ break;
+ }
+ vput(vp);
+ if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+ goto again;
+ }
+ ump->um_qflags[type] &= ~QTF_OPENING;
+ if (error)
+ quotaoff(p, mp, type);
+ vfs_unbusy(mp);
+ return (error);
+}
+
+/*
+ * Q_QUOTAOFF - turn off disk quotas for a filesystem.
+ */
+int
+quotaoff(p, mp, type)
+ struct proc *p;
+ struct mount *mp;
+ register int type;
+{
+ register struct vnode *vp;
+ struct vnode *qvp, *nextvp;
+ struct ufsmount *ump = VFSTOUFS(mp);
+ register struct dquot *dq;
+ register struct inode *ip;
+ int error;
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("quotaoff: not busy");
+ if ((qvp = ump->um_quotas[type]) == NULLVP)
+ return (0);
+ ump->um_qflags[type] |= QTF_CLOSING;
+ /*
+ * Search vnodes associated with this mount point,
+ * deleting any references to quota file being closed.
+ */
+again:
+ for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+ nextvp = vp->v_mntvnodes.le_next;
+ if (vget(vp, 1))
+ goto again;
+ ip = VTOI(vp);
+ dq = ip->i_dquot[type];
+ ip->i_dquot[type] = NODQUOT;
+ dqrele(vp, dq);
+ vput(vp);
+ if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+ goto again;
+ }
+ dqflush(qvp);
+ qvp->v_flag &= ~VSYSTEM;
+ error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p);
+ ump->um_quotas[type] = NULLVP;
+ crfree(ump->um_cred[type]);
+ ump->um_cred[type] = NOCRED;
+ ump->um_qflags[type] &= ~QTF_CLOSING;
+ for (type = 0; type < MAXQUOTAS; type++)
+ if (ump->um_quotas[type] != NULLVP)
+ break;
+ if (type == MAXQUOTAS)
+ mp->mnt_flag &= ~MNT_QUOTA;
+ return (error);
+}
+
+/*
+ * Q_GETQUOTA - return current values in a dqblk structure.
+ */
+int
+getquota(mp, id, type, addr)
+ struct mount *mp;
+ u_long id;
+ int type;
+ caddr_t addr;
+{
+ struct dquot *dq;
+ int error;
+
+ if (error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq))
+ return (error);
+ error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk));
+ dqrele(NULLVP, dq);
+ return (error);
+}
+
+/*
+ * Q_SETQUOTA - assign an entire dqblk structure.
+ */
+int
+setquota(mp, id, type, addr)
+ struct mount *mp;
+ u_long id;
+ int type;
+ caddr_t addr;
+{
+ register struct dquot *dq;
+ struct dquot *ndq;
+ struct ufsmount *ump = VFSTOUFS(mp);
+ struct dqblk newlim;
+ int error;
+
+ if (error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)))
+ return (error);
+ if (error = dqget(NULLVP, id, ump, type, &ndq))
+ return (error);
+ dq = ndq;
+ while (dq->dq_flags & DQ_LOCK) {
+ dq->dq_flags |= DQ_WANT;
+ sleep((caddr_t)dq, PINOD+1);
+ }
+ /*
+ * Copy all but the current values.
+ * Reset time limit if previously had no soft limit or were
+ * under it, but now have a soft limit and are over it.
+ */
+ newlim.dqb_curblocks = dq->dq_curblocks;
+ newlim.dqb_curinodes = dq->dq_curinodes;
+ if (dq->dq_id != 0) {
+ newlim.dqb_btime = dq->dq_btime;
+ newlim.dqb_itime = dq->dq_itime;
+ }
+ if (newlim.dqb_bsoftlimit &&
+ dq->dq_curblocks >= newlim.dqb_bsoftlimit &&
+ (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit))
+ newlim.dqb_btime = time.tv_sec + ump->um_btime[type];
+ if (newlim.dqb_isoftlimit &&
+ dq->dq_curinodes >= newlim.dqb_isoftlimit &&
+ (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit))
+ newlim.dqb_itime = time.tv_sec + ump->um_itime[type];
+ dq->dq_dqb = newlim;
+ if (dq->dq_curblocks < dq->dq_bsoftlimit)
+ dq->dq_flags &= ~DQ_BLKS;
+ if (dq->dq_curinodes < dq->dq_isoftlimit)
+ dq->dq_flags &= ~DQ_INODS;
+ if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+ dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+ dq->dq_flags |= DQ_FAKE;
+ else
+ dq->dq_flags &= ~DQ_FAKE;
+ dq->dq_flags |= DQ_MOD;
+ dqrele(NULLVP, dq);
+ return (0);
+}
+
+/*
+ * Q_SETUSE - set current inode and block usage.
+ */
+int
+setuse(mp, id, type, addr)
+ struct mount *mp;
+ u_long id;
+ int type;
+ caddr_t addr;
+{
+ register struct dquot *dq;
+ struct ufsmount *ump = VFSTOUFS(mp);
+ struct dquot *ndq;
+ struct dqblk usage;
+ int error;
+
+ if (error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk)))
+ return (error);
+ if (error = dqget(NULLVP, id, ump, type, &ndq))
+ return (error);
+ dq = ndq;
+ while (dq->dq_flags & DQ_LOCK) {
+ dq->dq_flags |= DQ_WANT;
+ sleep((caddr_t)dq, PINOD+1);
+ }
+ /*
+ * Reset time limit if have a soft limit and were
+ * previously under it, but are now over it.
+ */
+ if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit &&
+ usage.dqb_curblocks >= dq->dq_bsoftlimit)
+ dq->dq_btime = time.tv_sec + ump->um_btime[type];
+ if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit &&
+ usage.dqb_curinodes >= dq->dq_isoftlimit)
+ dq->dq_itime = time.tv_sec + ump->um_itime[type];
+ dq->dq_curblocks = usage.dqb_curblocks;
+ dq->dq_curinodes = usage.dqb_curinodes;
+ if (dq->dq_curblocks < dq->dq_bsoftlimit)
+ dq->dq_flags &= ~DQ_BLKS;
+ if (dq->dq_curinodes < dq->dq_isoftlimit)
+ dq->dq_flags &= ~DQ_INODS;
+ dq->dq_flags |= DQ_MOD;
+ dqrele(NULLVP, dq);
+ return (0);
+}
+
+/*
+ * Q_SYNC - sync quota files to disk.
+ */
+int
+qsync(mp)
+ struct mount *mp;
+{
+ struct ufsmount *ump = VFSTOUFS(mp);
+ register struct vnode *vp, *nextvp;
+ register struct dquot *dq;
+ register int i;
+
+ /*
+ * Check if the mount point has any quotas.
+ * If not, simply return.
+ */
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("qsync: not busy");
+ for (i = 0; i < MAXQUOTAS; i++)
+ if (ump->um_quotas[i] != NULLVP)
+ break;
+ if (i == MAXQUOTAS)
+ return (0);
+ /*
+ * Search vnodes associated with this mount point,
+ * synchronizing any modified dquot structures.
+ */
+again:
+ for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+ nextvp = vp->v_mntvnodes.le_next;
+ if (VOP_ISLOCKED(vp))
+ continue;
+ if (vget(vp, 1))
+ goto again;
+ for (i = 0; i < MAXQUOTAS; i++) {
+ dq = VTOI(vp)->i_dquot[i];
+ if (dq != NODQUOT && (dq->dq_flags & DQ_MOD))
+ dqsync(vp, dq);
+ }
+ vput(vp);
+ if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+ goto again;
+ }
+ return (0);
+}
+
+/*
+ * Code pertaining to management of the in-core dquot data structures.
+ */
+struct dquot **dqhashtbl;
+u_long dqhash;
+
+/*
+ * Dquot free list.
+ */
+#define DQUOTINC 5 /* minimum free dquots desired */
+struct dquot *dqfreel, **dqback = &dqfreel;
+long numdquot, desireddquot = DQUOTINC;
+
+/*
+ * Initialize the quota system.
+ */
+void
+dqinit()
+{
+
+ dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash);
+}
+
+/*
+ * Obtain a dquot structure for the specified identifier and quota file
+ * reading the information from the file if necessary.
+ */
+int
+dqget(vp, id, ump, type, dqp)
+ struct vnode *vp;
+ u_long id;
+ register struct ufsmount *ump;
+ register int type;
+ struct dquot **dqp;
+{
+ register struct dquot *dq, *dp, **dpp;
+ register struct vnode *dqvp;
+ struct iovec aiov;
+ struct uio auio;
+ int error;
+
+ dqvp = ump->um_quotas[type];
+ if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) {
+ *dqp = NODQUOT;
+ return (EINVAL);
+ }
+ /*
+ * Check the cache first.
+ */
+ dpp = &dqhashtbl[((((int)(dqvp)) >> 8) + id) & dqhash];
+ for (dq = *dpp; dq; dq = dq->dq_forw) {
+ if (dq->dq_id != id ||
+ dq->dq_ump->um_quotas[dq->dq_type] != dqvp)
+ continue;
+ /*
+ * Cache hit with no references. Take
+ * the structure off the free list.
+ */
+ if (dq->dq_cnt == 0) {
+ if ((dp = dq->dq_freef) != NODQUOT)
+ dp->dq_freeb = dq->dq_freeb;
+ else
+ dqback = dq->dq_freeb;
+ *dq->dq_freeb = dp;
+ }
+ DQREF(dq);
+ *dqp = dq;
+ return (0);
+ }
+ /*
+ * Not in cache, allocate a new one.
+ */
+ if (dqfreel == NODQUOT && numdquot < MAXQUOTAS * desiredvnodes)
+ desireddquot += DQUOTINC;
+ if (numdquot < desireddquot) {
+ dq = (struct dquot *)malloc(sizeof *dq, M_DQUOT, M_WAITOK);
+ bzero((char *)dq, sizeof *dq);
+ numdquot++;
+ } else {
+ if ((dq = dqfreel) == NULL) {
+ tablefull("dquot");
+ *dqp = NODQUOT;
+ return (EUSERS);
+ }
+ if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
+ panic("free dquot isn't");
+ if ((dp = dq->dq_freef) != NODQUOT)
+ dp->dq_freeb = &dqfreel;
+ else
+ dqback = &dqfreel;
+ dqfreel = dp;
+ dq->dq_freef = NULL;
+ dq->dq_freeb = NULL;
+ if (dp = dq->dq_forw)
+ dp->dq_back = dq->dq_back;
+ *dq->dq_back = dp;
+ }
+ /*
+ * Initialize the contents of the dquot structure.
+ */
+ if (vp != dqvp)
+ VOP_LOCK(dqvp);
+ if (dp = *dpp)
+ dp->dq_back = &dq->dq_forw;
+ dq->dq_forw = dp;
+ dq->dq_back = dpp;
+ *dpp = dq;
+ DQREF(dq);
+ dq->dq_flags = DQ_LOCK;
+ dq->dq_id = id;
+ dq->dq_ump = ump;
+ dq->dq_type = type;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ aiov.iov_base = (caddr_t)&dq->dq_dqb;
+ aiov.iov_len = sizeof (struct dqblk);
+ auio.uio_resid = sizeof (struct dqblk);
+ auio.uio_offset = (off_t)(id * sizeof (struct dqblk));
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_procp = (struct proc *)0;
+ error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
+ if (auio.uio_resid == sizeof(struct dqblk) && error == 0)
+ bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk));
+ if (vp != dqvp)
+ VOP_UNLOCK(dqvp);
+ if (dq->dq_flags & DQ_WANT)
+ wakeup((caddr_t)dq);
+ dq->dq_flags = 0;
+ /*
+ * I/O error in reading quota file, release
+ * quota structure and reflect problem to caller.
+ */
+ if (error) {
+ if (dp = dq->dq_forw)
+ dp->dq_back = dq->dq_back;
+ *dq->dq_back = dp;
+ dq->dq_forw = NULL;
+ dq->dq_back = NULL;
+ dqrele(vp, dq);
+ *dqp = NODQUOT;
+ return (error);
+ }
+ /*
+ * Check for no limit to enforce.
+ * Initialize time values if necessary.
+ */
+ if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+ dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+ dq->dq_flags |= DQ_FAKE;
+ if (dq->dq_id != 0) {
+ if (dq->dq_btime == 0)
+ dq->dq_btime = time.tv_sec + ump->um_btime[type];
+ if (dq->dq_itime == 0)
+ dq->dq_itime = time.tv_sec + ump->um_itime[type];
+ }
+ *dqp = dq;
+ return (0);
+}
+
+/*
+ * Obtain a reference to a dquot.
+ */
+void
+dqref(dq)
+ struct dquot *dq;
+{
+
+ dq->dq_cnt++;
+}
+
+/*
+ * Release a reference to a dquot.
+ */
+void
+dqrele(vp, dq)
+ struct vnode *vp;
+ register struct dquot *dq;
+{
+
+ if (dq == NODQUOT)
+ return;
+ if (dq->dq_cnt > 1) {
+ dq->dq_cnt--;
+ return;
+ }
+ if (dq->dq_flags & DQ_MOD)
+ (void) dqsync(vp, dq);
+ if (--dq->dq_cnt > 0)
+ return;
+ if (dqfreel != NODQUOT) {
+ *dqback = dq;
+ dq->dq_freeb = dqback;
+ } else {
+ dqfreel = dq;
+ dq->dq_freeb = &dqfreel;
+ }
+ dq->dq_freef = NODQUOT;
+ dqback = &dq->dq_freef;
+}
+
+/*
+ * Update the disk quota in the quota file.
+ */
+int
+dqsync(vp, dq)
+ struct vnode *vp;
+ register struct dquot *dq;
+{
+ struct vnode *dqvp;
+ struct iovec aiov;
+ struct uio auio;
+ int error;
+
+ if (dq == NODQUOT)
+ panic("dqsync: dquot");
+ if ((dq->dq_flags & DQ_MOD) == 0)
+ return (0);
+ if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
+ panic("dqsync: file");
+ if (vp != dqvp)
+ VOP_LOCK(dqvp);
+ while (dq->dq_flags & DQ_LOCK) {
+ dq->dq_flags |= DQ_WANT;
+ sleep((caddr_t)dq, PINOD+2);
+ if ((dq->dq_flags & DQ_MOD) == 0) {
+ if (vp != dqvp)
+ VOP_UNLOCK(dqvp);
+ return (0);
+ }
+ }
+ dq->dq_flags |= DQ_LOCK;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ aiov.iov_base = (caddr_t)&dq->dq_dqb;
+ aiov.iov_len = sizeof (struct dqblk);
+ auio.uio_resid = sizeof (struct dqblk);
+ auio.uio_offset = (off_t)(dq->dq_id * sizeof (struct dqblk));
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_procp = (struct proc *)0;
+ error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
+ if (auio.uio_resid && error == 0)
+ error = EIO;
+ if (dq->dq_flags & DQ_WANT)
+ wakeup((caddr_t)dq);
+ dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT);
+ if (vp != dqvp)
+ VOP_UNLOCK(dqvp);
+ return (error);
+}
+
+/*
+ * Flush all entries from the cache for a particular vnode.
+ */
+void
+dqflush(vp)
+ register struct vnode *vp;
+{
+ register struct dquot *dq, *dp, **dpp, *nextdq;
+
+ /*
+ * Move all dquot's that used to refer to this quota
+ * file off their hash chains (they will eventually
+ * fall off the head of the free list and be re-used).
+ */
+ for (dpp = &dqhashtbl[dqhash]; dpp >= dqhashtbl; dpp--) {
+ for (dq = *dpp; dq; dq = nextdq) {
+ nextdq = dq->dq_forw;
+ if (dq->dq_ump->um_quotas[dq->dq_type] != vp)
+ continue;
+ if (dq->dq_cnt)
+ panic("dqflush: stray dquot");
+ if (dp = dq->dq_forw)
+ dp->dq_back = dq->dq_back;
+ *dq->dq_back = dp;
+ dq->dq_forw = NULL;
+ dq->dq_back = NULL;
+ dq->dq_ump = (struct ufsmount *)0;
+ }
+ }
+}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
new file mode 100644
index 000000000000..5ead2c1a9adf
--- /dev/null
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -0,0 +1,295 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94
+ */
+
+#ifdef LFS_READWRITE
+#define BLKSIZE(a, b, c) blksize(a)
+#define FS struct lfs
+#define I_FS i_lfs
+#define READ lfs_read
+#define READ_S "lfs_read"
+#define WRITE lfs_write
+#define WRITE_S "lfs_write"
+#define fs_bsize lfs_bsize
+#define fs_maxfilesize lfs_maxfilesize
+#else
+#define BLKSIZE(a, b, c) blksize(a, b, c)
+#define FS struct fs
+#define I_FS i_fs
+#define READ ffs_read
+#define READ_S "ffs_read"
+#define WRITE ffs_write
+#define WRITE_S "ffs_write"
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+/* ARGSUSED */
+READ(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp;
+ register struct inode *ip;
+ register struct uio *uio;
+ register FS *fs;
+ struct buf *bp;
+ daddr_t lbn, nextlbn;
+ off_t bytesinfile;
+ long size, xfersize, blkoffset;
+ int error;
+ u_short mode;
+
+ vp = ap->a_vp;
+ ip = VTOI(vp);
+ mode = ip->i_mode;
+ uio = ap->a_uio;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("%s: mode", READ_S);
+
+ if (vp->v_type == VLNK) {
+ if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
+ panic("%s: short symlink", READ_S);
+ } else if (vp->v_type != VREG && vp->v_type != VDIR)
+ panic("%s: type %d", READ_S, vp->v_type);
+#endif
+ fs = ip->I_FS;
+ if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize)
+ return (EFBIG);
+
+ for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
+ if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
+ break;
+ lbn = lblkno(fs, uio->uio_offset);
+ nextlbn = lbn + 1;
+ size = BLKSIZE(fs, ip, lbn);
+ blkoffset = blkoff(fs, uio->uio_offset);
+ xfersize = fs->fs_bsize - blkoffset;
+ if (uio->uio_resid < xfersize)
+ xfersize = uio->uio_resid;
+ if (bytesinfile < xfersize)
+ xfersize = bytesinfile;
+
+#ifdef LFS_READWRITE
+ (void)lfs_check(vp, lbn);
+ error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, &bp);
+#else
+ if (lblktosize(fs, nextlbn) > ip->i_size)
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ else if (doclusterread)
+ error = cluster_read(vp,
+ ip->i_size, lbn, size, NOCRED, &bp);
+ else if (lbn - 1 == vp->v_lastr) {
+ int nextsize = BLKSIZE(fs, ip, nextlbn);
+ error = breadn(vp, lbn,
+ size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+#endif
+ if (error)
+ break;
+ vp->v_lastr = lbn;
+
+ /*
+ * We should only get non-zero b_resid when an I/O error
+ * has occurred, which should cause us to break above.
+ * However, if the short read did not cause an error,
+ * then we want to ensure that we do not uiomove bad
+ * or uninitialized data.
+ */
+ size -= bp->b_resid;
+ if (size < xfersize) {
+ if (size == 0)
+ break;
+ xfersize = size;
+ }
+ if (error =
+ uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio))
+ break;
+
+ if (S_ISREG(mode) && (xfersize + blkoffset == fs->fs_bsize ||
+ uio->uio_offset == ip->i_size))
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ }
+ if (bp != NULL)
+ brelse(bp);
+ ip->i_flag |= IN_ACCESS;
+ return (error);
+}
+
+/*
+ * Vnode op for writing.
+ */
+WRITE(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp;
+ register struct uio *uio;
+ register struct inode *ip;
+ register FS *fs;
+ struct buf *bp;
+ struct proc *p;
+ daddr_t lbn;
+ off_t osize;
+ int blkoffset, error, flags, ioflag, resid, size, xfersize;
+
+ ioflag = ap->a_ioflag;
+ uio = ap->a_uio;
+ vp = ap->a_vp;
+ ip = VTOI(vp);
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("%s: mode", WRITE_S);
+#endif
+
+ switch (vp->v_type) {
+ case VREG:
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = ip->i_size;
+ if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
+ return (EPERM);
+ /* FALLTHROUGH */
+ case VLNK:
+ break;
+ case VDIR:
+ if ((ioflag & IO_SYNC) == 0)
+ panic("%s: nonsync dir write", WRITE_S);
+ break;
+ default:
+ panic("%s: type", WRITE_S);
+ }
+
+ fs = ip->I_FS;
+ if (uio->uio_offset < 0 ||
+ (u_quad_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
+ return (EFBIG);
+ /*
+ * Maybe this should be above the vnode op call, but so long as
+ * file servers have no limits, I don't think it matters.
+ */
+ p = uio->uio_procp;
+ if (vp->v_type == VREG && p &&
+ uio->uio_offset + uio->uio_resid >
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+ psignal(p, SIGXFSZ);
+ return (EFBIG);
+ }
+
+ resid = uio->uio_resid;
+ osize = ip->i_size;
+ flags = ioflag & IO_SYNC ? B_SYNC : 0;
+
+ for (error = 0; uio->uio_resid > 0;) {
+ lbn = lblkno(fs, uio->uio_offset);
+ blkoffset = blkoff(fs, uio->uio_offset);
+ xfersize = fs->fs_bsize - blkoffset;
+ if (uio->uio_resid < xfersize)
+ xfersize = uio->uio_resid;
+#ifdef LFS_READWRITE
+ (void)lfs_check(vp, lbn);
+ error = lfs_balloc(vp, xfersize, lbn, &bp);
+#else
+ if (fs->fs_bsize > xfersize)
+ flags |= B_CLRBUF;
+ else
+ flags &= ~B_CLRBUF;
+
+ error = ffs_balloc(ip,
+ lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+#endif
+ if (error)
+ break;
+ if (uio->uio_offset + xfersize > ip->i_size) {
+ ip->i_size = uio->uio_offset + xfersize;
+ vnode_pager_setsize(vp, (u_long)ip->i_size);
+ }
+ (void)vnode_pager_uncache(vp);
+
+ size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
+ if (size < xfersize)
+ xfersize = size;
+
+ error =
+ uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+#ifdef LFS_READWRITE
+ (void)VOP_BWRITE(bp);
+#else
+ if (ioflag & IO_SYNC)
+ (void)bwrite(bp);
+ else if (xfersize + blkoffset == fs->fs_bsize)
+ if (doclusterwrite)
+ cluster_write(bp, ip->i_size);
+ else {
+ bp->b_flags |= B_AGE;
+ bawrite(bp);
+ }
+ else
+ bdwrite(bp);
+#endif
+ if (error || xfersize == 0)
+ break;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ }
+ /*
+ * If we successfully wrote any data, and we are not the superuser
+ * we clear the setuid and setgid bits as a precaution against
+ * tampering.
+ */
+ if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
+ ip->i_mode &= ~(ISUID | ISGID);
+ if (error) {
+ if (ioflag & IO_UNIT) {
+ (void)VOP_TRUNCATE(vp, osize,
+ ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
+ uio->uio_offset -= resid - uio->uio_resid;
+ uio->uio_resid = resid;
+ }
+ } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+ error = VOP_UPDATE(vp, &time, &time, 1);
+ return (error);
+}
diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c
new file mode 100644
index 000000000000..f806e0b2a831
--- /dev/null
+++ b/sys/ufs/ufs/ufs_vfsops.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 1991, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_vfsops.c 8.4 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Flag to permit forcible unmounting.
+ */
+int doforce = 1;
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+int
+ufs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Return the root of a filesystem.
+ */
+int
+ufs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *nvp;
+ int error;
+
+ if (error = VFS_VGET(mp, (ino_t)ROOTINO, &nvp))
+ return (error);
+ *vpp = nvp;
+ return (0);
+}
+
+/*
+ * Do operations associated with quotas
+ */
+int
+ufs_quotactl(mp, cmds, uid, arg, p)
+ struct mount *mp;
+ int cmds;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ int cmd, type, error;
+
+#ifndef QUOTA
+ return (EOPNOTSUPP);
+#else
+ if (uid == -1)
+ uid = p->p_cred->p_ruid;
+ cmd = cmds >> SUBCMDSHIFT;
+
+ switch (cmd) {
+ case Q_GETQUOTA:
+ case Q_SYNC:
+ if (uid == p->p_cred->p_ruid)
+ break;
+ /* fall through */
+ default:
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ }
+
+ type = cmd & SUBCMDMASK;
+ if ((u_int)type >= MAXQUOTAS)
+ return (EINVAL);
+
+ switch (cmd) {
+
+ case Q_QUOTAON:
+ return (quotaon(p, mp, type, arg));
+
+ case Q_QUOTAOFF:
+ if (vfs_busy(mp))
+ return (0);
+ error = quotaoff(p, mp, type);
+ vfs_unbusy(mp);
+ return (error);
+
+ case Q_SETQUOTA:
+ return (setquota(mp, uid, type, arg));
+
+ case Q_SETUSE:
+ return (setuse(mp, uid, type, arg));
+
+ case Q_GETQUOTA:
+ return (getquota(mp, uid, type, arg));
+
+ case Q_SYNC:
+ if (vfs_busy(mp))
+ return (0);
+ error = qsync(mp);
+ vfs_unbusy(mp);
+ return (error);
+
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+#endif
+}
+
+/*
+ * This is the generic part of fhtovp called after the underlying
+ * filesystem has validated the file handle.
+ *
+ * Verify that a host should have access to a filesystem, and if so
+ * return a vnode for the presented file handle.
+ */
+int
+ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct ufid *ufhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ register struct inode *ip;
+ register struct netcred *np;
+ register struct ufsmount *ump = VFSTOUFS(mp);
+ struct vnode *nvp;
+ int error;
+
+ /*
+ * Get the export permission structure for this <mp, client> tuple.
+ */
+ np = vfs_export_lookup(mp, &ump->um_export, nam);
+ if (np == NULL)
+ return (EACCES);
+
+ if (error = VFS_VGET(mp, ufhp->ufid_ino, &nvp)) {
+ *vpp = NULLVP;
+ return (error);
+ }
+ ip = VTOI(nvp);
+ if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
+ vput(nvp);
+ *vpp = NULLVP;
+ return (ESTALE);
+ }
+ *vpp = nvp;
+ *exflagsp = np->netc_exflags;
+ *credanonp = &np->netc_anon;
+ return (0);
+}
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
new file mode 100644
index 000000000000..7b7c88376b95
--- /dev/null
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -0,0 +1,2159 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_vnops.c 8.10 (Berkeley) 4/1/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
+static int ufs_chown
+ __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
+
+union _qcvt {
+ quad_t qcvt;
+ long val[2];
+};
+#define SETHIGH(q, h) { \
+ union _qcvt tmp; \
+ tmp.qcvt = (q); \
+ tmp.val[_QUAD_HIGHWORD] = (h); \
+ (q) = tmp.qcvt; \
+}
+#define SETLOW(q, l) { \
+ union _qcvt tmp; \
+ tmp.qcvt = (q); \
+ tmp.val[_QUAD_LOWWORD] = (l); \
+ (q) = tmp.qcvt; \
+}
+
+/*
+ * Create a regular file
+ */
+int
+ufs_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ int error;
+
+ if (error =
+ ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
+ ap->a_dvp, ap->a_vpp, ap->a_cnp))
+ return (error);
+ return (0);
+}
+
+/*
+ * Mknod vnode call
+ */
+/* ARGSUSED */
+int
+ufs_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vattr *vap = ap->a_vap;
+ register struct vnode **vpp = ap->a_vpp;
+ register struct inode *ip;
+ int error;
+
+ if (error =
+ ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
+ ap->a_dvp, vpp, ap->a_cnp))
+ return (error);
+ ip = VTOI(*vpp);
+ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+ if (vap->va_rdev != VNOVAL) {
+ /*
+ * Want to be able to use this to make badblock
+ * inodes, so don't truncate the dev number.
+ */
+ ip->i_rdev = vap->va_rdev;
+ }
+ /*
+ * Remove inode so that it will be reloaded by VFS_VGET and
+ * checked to see if it is an alias of an existing entry in
+ * the inode cache.
+ */
+ vput(*vpp);
+ (*vpp)->v_type = VNON;
+ vgone(*vpp);
+ *vpp = 0;
+ return (0);
+}
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+ufs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Files marked append-only must be opened for appending.
+ */
+ if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
+ (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
+ return (EPERM);
+ return (0);
+}
+
+/*
+ * Close called.
+ *
+ * Update the times on the inode.
+ */
+/* ARGSUSED */
+int
+ufs_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+
+ if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+ ITIMES(ip, &time, &time);
+ return (0);
+}
+
+int
+ufs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+ register struct ucred *cred = ap->a_cred;
+ mode_t mask, mode = ap->a_mode;
+ register gid_t *gp;
+ int i, error;
+
+#ifdef DIAGNOSTIC
+ if (!VOP_ISLOCKED(vp)) {
+ vprint("ufs_access: not locked", vp);
+ panic("ufs_access: not locked");
+ }
+#endif
+#ifdef QUOTA
+ if (mode & VWRITE)
+ switch (vp->v_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ if (error = getinoquota(ip))
+ return (error);
+ break;
+ }
+#endif
+
+ /* If immutable bit set, nobody gets to write it. */
+ if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
+ return (EPERM);
+
+ /* Otherwise, user id 0 always gets access. */
+ if (cred->cr_uid == 0)
+ return (0);
+
+ mask = 0;
+
+ /* Otherwise, check the owner. */
+ if (cred->cr_uid == ip->i_uid) {
+ if (mode & VEXEC)
+ mask |= S_IXUSR;
+ if (mode & VREAD)
+ mask |= S_IRUSR;
+ if (mode & VWRITE)
+ mask |= S_IWUSR;
+ return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+ }
+
+ /* Otherwise, check the groups. */
+ for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
+ if (ip->i_gid == *gp) {
+ if (mode & VEXEC)
+ mask |= S_IXGRP;
+ if (mode & VREAD)
+ mask |= S_IRGRP;
+ if (mode & VWRITE)
+ mask |= S_IWGRP;
+ return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+ }
+
+ /* Otherwise, check everyone else. */
+ if (mode & VEXEC)
+ mask |= S_IXOTH;
+ if (mode & VREAD)
+ mask |= S_IROTH;
+ if (mode & VWRITE)
+ mask |= S_IWOTH;
+ return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+}
+
+/* ARGSUSED */
+int
+ufs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+ register struct vattr *vap = ap->a_vap;
+
+ ITIMES(ip, &time, &time);
+ /*
+ * Copy from inode table
+ */
+ vap->va_fsid = ip->i_dev;
+ vap->va_fileid = ip->i_number;
+ vap->va_mode = ip->i_mode & ~IFMT;
+ vap->va_nlink = ip->i_nlink;
+ vap->va_uid = ip->i_uid;
+ vap->va_gid = ip->i_gid;
+ vap->va_rdev = (dev_t)ip->i_rdev;
+ vap->va_size = ip->i_din.di_size;
+ vap->va_atime = ip->i_atime;
+ vap->va_mtime = ip->i_mtime;
+ vap->va_ctime = ip->i_ctime;
+ vap->va_flags = ip->i_flags;
+ vap->va_gen = ip->i_gen;
+ /* this doesn't belong here */
+ if (vp->v_type == VBLK)
+ vap->va_blocksize = BLKDEV_IOSIZE;
+ else if (vp->v_type == VCHR)
+ vap->va_blocksize = MAXBSIZE;
+ else
+ vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+ vap->va_bytes = dbtob(ip->i_blocks);
+ vap->va_type = vp->v_type;
+ vap->va_filerev = ip->i_modrev;
+ return (0);
+}
+
+/*
+ * Set attribute vnode op. called from several syscalls
+ */
+int
+ufs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vattr *vap = ap->a_vap;
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+ register struct ucred *cred = ap->a_cred;
+ register struct proc *p = ap->a_p;
+ struct timeval atimeval, mtimeval;
+ int error;
+
+ /*
+ * Check for unsettable attributes.
+ */
+ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+ (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+ (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+ ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+ return (EINVAL);
+ }
+ if (vap->va_flags != VNOVAL) {
+ if (cred->cr_uid != ip->i_uid &&
+ (error = suser(cred, &p->p_acflag)))
+ return (error);
+ if (cred->cr_uid == 0) {
+ if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) &&
+ securelevel > 0)
+ return (EPERM);
+ ip->i_flags = vap->va_flags;
+ } else {
+ if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND))
+ return (EPERM);
+ ip->i_flags &= SF_SETTABLE;
+ ip->i_flags |= (vap->va_flags & UF_SETTABLE);
+ }
+ ip->i_flag |= IN_CHANGE;
+ if (vap->va_flags & (IMMUTABLE | APPEND))
+ return (0);
+ }
+ if (ip->i_flags & (IMMUTABLE | APPEND))
+ return (EPERM);
+ /*
+ * Go through the fields and update iff not VNOVAL.
+ */
+ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL)
+ if (error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p))
+ return (error);
+ if (vap->va_size != VNOVAL) {
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ if (error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p))
+ return (error);
+ }
+ ip = VTOI(vp);
+ if (vap->va_atime.ts_sec != VNOVAL || vap->va_mtime.ts_sec != VNOVAL) {
+ if (cred->cr_uid != ip->i_uid &&
+ (error = suser(cred, &p->p_acflag)) &&
+ ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
+ (error = VOP_ACCESS(vp, VWRITE, cred, p))))
+ return (error);
+ if (vap->va_atime.ts_sec != VNOVAL)
+ ip->i_flag |= IN_ACCESS;
+ if (vap->va_mtime.ts_sec != VNOVAL)
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ atimeval.tv_sec = vap->va_atime.ts_sec;
+ atimeval.tv_usec = vap->va_atime.ts_nsec / 1000;
+ mtimeval.tv_sec = vap->va_mtime.ts_sec;
+ mtimeval.tv_usec = vap->va_mtime.ts_nsec / 1000;
+ if (error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1))
+ return (error);
+ }
+ error = 0;
+ if (vap->va_mode != (mode_t)VNOVAL)
+ error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
+ return (error);
+}
+
+/*
+ * Change the mode on a file.
+ * Inode must be locked before calling.
+ */
+static int
+ufs_chmod(vp, mode, cred, p)
+ register struct vnode *vp;
+ register int mode;
+ register struct ucred *cred;
+ struct proc *p;
+{
+ register struct inode *ip = VTOI(vp);
+ int error;
+
+ if (cred->cr_uid != ip->i_uid &&
+ (error = suser(cred, &p->p_acflag)))
+ return (error);
+ if (cred->cr_uid) {
+ if (vp->v_type != VDIR && (mode & S_ISTXT))
+ return (EFTYPE);
+ if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
+ return (EPERM);
+ }
+ ip->i_mode &= ~ALLPERMS;
+ ip->i_mode |= (mode & ALLPERMS);
+ ip->i_flag |= IN_CHANGE;
+ if ((vp->v_flag & VTEXT) && (ip->i_mode & S_ISTXT) == 0)
+ (void) vnode_pager_uncache(vp);
+ return (0);
+}
+
+/*
+ * Perform chown operation on inode ip;
+ * inode must be locked prior to call.
+ */
+static int
+ufs_chown(vp, uid, gid, cred, p)
+ register struct vnode *vp;
+ uid_t uid;
+ gid_t gid;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct inode *ip = VTOI(vp);
+ uid_t ouid;
+ gid_t ogid;
+ int error = 0;
+#ifdef QUOTA
+ register int i;
+ long change;
+#endif
+
+ if (uid == (uid_t)VNOVAL)
+ uid = ip->i_uid;
+ if (gid == (gid_t)VNOVAL)
+ gid = ip->i_gid;
+ /*
+ * If we don't own the file, are trying to change the owner
+ * of the file, or are not a member of the target group,
+ * the caller must be superuser or the call fails.
+ */
+ if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
+ !groupmember((gid_t)gid, cred)) &&
+ (error = suser(cred, &p->p_acflag)))
+ return (error);
+ ogid = ip->i_gid;
+ ouid = ip->i_uid;
+#ifdef QUOTA
+ if (error = getinoquota(ip))
+ return (error);
+ if (ouid == uid) {
+ dqrele(vp, ip->i_dquot[USRQUOTA]);
+ ip->i_dquot[USRQUOTA] = NODQUOT;
+ }
+ if (ogid == gid) {
+ dqrele(vp, ip->i_dquot[GRPQUOTA]);
+ ip->i_dquot[GRPQUOTA] = NODQUOT;
+ }
+ change = ip->i_blocks;
+ (void) chkdq(ip, -change, cred, CHOWN);
+ (void) chkiq(ip, -1, cred, CHOWN);
+ for (i = 0; i < MAXQUOTAS; i++) {
+ dqrele(vp, ip->i_dquot[i]);
+ ip->i_dquot[i] = NODQUOT;
+ }
+#endif
+ ip->i_gid = gid;
+ ip->i_uid = uid;
+#ifdef QUOTA
+ if ((error = getinoquota(ip)) == 0) {
+ if (ouid == uid) {
+ dqrele(vp, ip->i_dquot[USRQUOTA]);
+ ip->i_dquot[USRQUOTA] = NODQUOT;
+ }
+ if (ogid == gid) {
+ dqrele(vp, ip->i_dquot[GRPQUOTA]);
+ ip->i_dquot[GRPQUOTA] = NODQUOT;
+ }
+ if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
+ if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
+ goto good;
+ else
+ (void) chkdq(ip, -change, cred, CHOWN|FORCE);
+ }
+ for (i = 0; i < MAXQUOTAS; i++) {
+ dqrele(vp, ip->i_dquot[i]);
+ ip->i_dquot[i] = NODQUOT;
+ }
+ }
+ ip->i_gid = ogid;
+ ip->i_uid = ouid;
+ if (getinoquota(ip) == 0) {
+ if (ouid == uid) {
+ dqrele(vp, ip->i_dquot[USRQUOTA]);
+ ip->i_dquot[USRQUOTA] = NODQUOT;
+ }
+ if (ogid == gid) {
+ dqrele(vp, ip->i_dquot[GRPQUOTA]);
+ ip->i_dquot[GRPQUOTA] = NODQUOT;
+ }
+ (void) chkdq(ip, change, cred, FORCE|CHOWN);
+ (void) chkiq(ip, 1, cred, FORCE|CHOWN);
+ (void) getinoquota(ip);
+ }
+ return (error);
+good:
+ if (getinoquota(ip))
+ panic("chown: lost quota");
+#endif /* QUOTA */
+ if (ouid != uid || ogid != gid)
+ ip->i_flag |= IN_CHANGE;
+ if (ouid != uid && cred->cr_uid != 0)
+ ip->i_mode &= ~ISUID;
+ if (ogid != gid && cred->cr_uid != 0)
+ ip->i_mode &= ~ISGID;
+ return (0);
+}
+
+/* ARGSUSED */
+int
+ufs_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+ufs_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * We should really check to see if I/O is possible.
+ */
+ return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+ufs_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+ufs_seek(ap)
+ struct vop_seek_args /* {
+ struct vnode *a_vp;
+ off_t a_oldoff;
+ off_t a_newoff;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+int
+ufs_remove(ap)
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct inode *ip;
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ int error;
+
+ ip = VTOI(vp);
+ if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
+ (VTOI(dvp)->i_flags & APPEND)) {
+ error = EPERM;
+ goto out;
+ }
+ if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) {
+ ip->i_nlink--;
+ ip->i_flag |= IN_CHANGE;
+ }
+out:
+ if (dvp == vp)
+ vrele(vp);
+ else
+ vput(vp);
+ vput(dvp);
+ return (error);
+}
+
+/*
+ * link vnode call
+ */
+int
+ufs_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct inode *ip;
+ struct timeval tv;
+ int error;
+
+#ifdef DIAGNOSTIC
+ if ((cnp->cn_flags & HASBUF) == 0)
+ panic("ufs_link: no name");
+#endif
+ if (vp->v_mount != tdvp->v_mount) {
+ VOP_ABORTOP(vp, cnp);
+ error = EXDEV;
+ goto out2;
+ }
+ if (vp != tdvp && (error = VOP_LOCK(tdvp))) {
+ VOP_ABORTOP(vp, cnp);
+ goto out2;
+ }
+ ip = VTOI(tdvp);
+ if ((nlink_t)ip->i_nlink >= LINK_MAX) {
+ VOP_ABORTOP(vp, cnp);
+ error = EMLINK;
+ goto out1;
+ }
+ if (ip->i_flags & (IMMUTABLE | APPEND)) {
+ VOP_ABORTOP(vp, cnp);
+ error = EPERM;
+ goto out1;
+ }
+ ip->i_nlink++;
+ ip->i_flag |= IN_CHANGE;
+ tv = time;
+ error = VOP_UPDATE(tdvp, &tv, &tv, 1);
+ if (!error)
+ error = ufs_direnter(ip, vp, cnp);
+ if (error) {
+ ip->i_nlink--;
+ ip->i_flag |= IN_CHANGE;
+ }
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+out1:
+ if (vp != tdvp)
+ VOP_UNLOCK(tdvp);
+out2:
+ vput(vp);
+ return (error);
+}
+
+
+
+/*
+ * relookup - lookup a path name component
+ * Used by lookup to re-aquire things.
+ */
+int
+relookup(dvp, vpp, cnp)
+ struct vnode *dvp, **vpp;
+ struct componentname *cnp;
+{
+ register struct vnode *dp = 0; /* the directory we are searching */
+ int docache; /* == 0 do not cache last component */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int rdonly; /* lookup read-only flag bit */
+ int error = 0;
+#ifdef NAMEI_DIAGNOSTIC
+ int newhash; /* DEBUG: check name hash */
+ char *cp; /* DEBUG: check name ptr/len */
+#endif
+
+ /*
+ * Setup: break out flag bits into variables.
+ */
+ wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
+ docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+ if (cnp->cn_nameiop == DELETE ||
+ (wantparent && cnp->cn_nameiop != CREATE))
+ docache = 0;
+ rdonly = cnp->cn_flags & RDONLY;
+ cnp->cn_flags &= ~ISSYMLINK;
+ dp = dvp;
+ VOP_LOCK(dp);
+
+/* dirloop: */
+ /*
+ * Search a new directory.
+ *
+ * The cn_hash value is for use by vfs_cache.
+ * The last component of the filename is left accessible via
+ * cnp->cn_nameptr for callers that need the name. Callers needing
+ * the name set the SAVENAME flag. When done, they assume
+ * responsibility for freeing the pathname buffer.
+ */
+#ifdef NAMEI_DIAGNOSTIC
+ for (newhash = 0, cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+ newhash += (unsigned char)*cp;
+ if (newhash != cnp->cn_hash)
+ panic("relookup: bad hash");
+ if (cnp->cn_namelen != cp - cnp->cn_nameptr)
+ panic ("relookup: bad len");
+ if (*cp != 0)
+ panic("relookup: not last component");
+ printf("{%s}: ", cnp->cn_nameptr);
+#endif
+
+ /*
+ * Check for degenerate name (e.g. / or "")
+ * which is a way of talking about a directory,
+ * e.g. like "/." or ".".
+ */
+ if (cnp->cn_nameptr[0] == '\0') {
+ if (cnp->cn_nameiop != LOOKUP || wantparent) {
+ error = EISDIR;
+ goto bad;
+ }
+ if (dp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ if (!(cnp->cn_flags & LOCKLEAF))
+ VOP_UNLOCK(dp);
+ *vpp = dp;
+ if (cnp->cn_flags & SAVESTART)
+ panic("lookup: SAVESTART");
+ return (0);
+ }
+
+ if (cnp->cn_flags & ISDOTDOT)
+ panic ("relookup: lookup on dot-dot");
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ */
+ if (error = VOP_LOOKUP(dp, vpp, cnp)) {
+#ifdef DIAGNOSTIC
+ if (*vpp != NULL)
+ panic("leaf should be empty");
+#endif
+ if (error != EJUSTRETURN)
+ goto bad;
+ /*
+ * If creating and at end of pathname, then can consider
+ * allowing file to be created.
+ */
+ if (rdonly || (dvp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto bad;
+ }
+ /* ASSERT(dvp == ndp->ni_startdir) */
+ if (cnp->cn_flags & SAVESTART)
+ VREF(dvp);
+ /*
+ * We return with ni_vp NULL to indicate that the entry
+ * doesn't currently exist, leaving a pointer to the
+ * (possibly locked) directory inode in ndp->ni_dvp.
+ */
+ return (0);
+ }
+ dp = *vpp;
+
+#ifdef DIAGNOSTIC
+ /*
+ * Check for symbolic link
+ */
+ if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW))
+ panic ("relookup: symlink found.\n");
+#endif
+
+ /*
+ * Check for read-only file systems.
+ */
+ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+ /*
+ * Disallow directory write attempts on read-only
+ * file systems.
+ */
+ if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) ||
+ (wantparent &&
+ (dvp->v_mount->mnt_flag & MNT_RDONLY))) {
+ error = EROFS;
+ goto bad2;
+ }
+ }
+ /* ASSERT(dvp == ndp->ni_startdir) */
+ if (cnp->cn_flags & SAVESTART)
+ VREF(dvp);
+
+ if (!wantparent)
+ vrele(dvp);
+ if ((cnp->cn_flags & LOCKLEAF) == 0)
+ VOP_UNLOCK(dp);
+ return (0);
+
+bad2:
+ if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
+ VOP_UNLOCK(dvp);
+ vrele(dvp);
+bad:
+ vput(dp);
+ *vpp = NULL;
+ return (error);
+}
+
+
+/*
+ * Rename system call.
+ * rename("foo", "bar");
+ * is essentially
+ * unlink("bar");
+ * link("foo", "bar");
+ * unlink("foo");
+ * but ``atomically''. Can't do full commit without saving state in the
+ * inode on disk which isn't feasible at this time. Best we can do is
+ * always guarantee the target exists.
+ *
+ * Basic algorithm is:
+ *
+ * 1) Bump link count on source while we're linking it to the
+ * target. This also ensure the inode won't be deleted out
+ * from underneath us while we work (it may be truncated by
+ * a concurrent `trunc' or `open' for creation).
+ * 2) Link source to destination. If destination already exists,
+ * delete it first.
+ * 3) Unlink source reference to inode if still around. If a
+ * directory was moved and the parent of the destination
+ * is different from the source, patch the ".." entry in the
+ * directory.
+ */
+int
+ufs_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ struct vnode *tvp = ap->a_tvp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *fvp = ap->a_fvp;
+ register struct vnode *fdvp = ap->a_fdvp;
+ register struct componentname *tcnp = ap->a_tcnp;
+ register struct componentname *fcnp = ap->a_fcnp;
+ register struct inode *ip, *xp, *dp;
+ struct dirtemplate dirbuf;
+ struct timeval tv;
+ int doingdirectory = 0, oldparent = 0, newparent = 0;
+ int error = 0;
+ u_char namlen;
+
+#ifdef DIAGNOSTIC
+ if ((tcnp->cn_flags & HASBUF) == 0 ||
+ (fcnp->cn_flags & HASBUF) == 0)
+ panic("ufs_rename: no name");
+#endif
+ /*
+ * Check for cross-device rename.
+ */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+abortit:
+ VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
+ vrele(fdvp);
+ vrele(fvp);
+ return (error);
+ }
+
+ /*
+ * Check if just deleting a link name.
+ */
+ if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
+ (VTOI(tdvp)->i_flags & APPEND))) {
+ error = EPERM;
+ goto abortit;
+ }
+ if (fvp == tvp) {
+ if (fvp->v_type == VDIR) {
+ error = EINVAL;
+ goto abortit;
+ }
+ VOP_ABORTOP(fdvp, fcnp);
+ vrele(fdvp);
+ vrele(fvp);
+ vput(tdvp);
+ vput(tvp);
+ tcnp->cn_flags &= ~MODMASK;
+ tcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+ if ((tcnp->cn_flags & SAVESTART) == 0)
+ panic("ufs_rename: lost from startdir");
+ tcnp->cn_nameiop = DELETE;
+ (void) relookup(tdvp, &tvp, tcnp);
+ return (VOP_REMOVE(tdvp, tvp, tcnp));
+ }
+ if (error = VOP_LOCK(fvp))
+ goto abortit;
+ dp = VTOI(fdvp);
+ ip = VTOI(fvp);
+ if ((ip->i_flags & (IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) {
+ VOP_UNLOCK(fvp);
+ error = EPERM;
+ goto abortit;
+ }
+ if ((ip->i_mode & IFMT) == IFDIR) {
+ /*
+ * Avoid ".", "..", and aliases of "." for obvious reasons.
+ */
+ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+ dp == ip || (fcnp->cn_flags&ISDOTDOT) ||
+ (ip->i_flag & IN_RENAME)) {
+ VOP_UNLOCK(fvp);
+ error = EINVAL;
+ goto abortit;
+ }
+ ip->i_flag |= IN_RENAME;
+ oldparent = dp->i_number;
+ doingdirectory++;
+ }
+ vrele(fdvp);
+
+ /*
+ * When the target exists, both the directory
+ * and target vnodes are returned locked.
+ */
+ dp = VTOI(tdvp);
+ xp = NULL;
+ if (tvp)
+ xp = VTOI(tvp);
+
+ /*
+ * 1) Bump link count while we're moving stuff
+ * around. If we crash somewhere before
+ * completing our work, the link count
+ * may be wrong, but correctable.
+ */
+ ip->i_nlink++;
+ ip->i_flag |= IN_CHANGE;
+ tv = time;
+ if (error = VOP_UPDATE(fvp, &tv, &tv, 1)) {
+ VOP_UNLOCK(fvp);
+ goto bad;
+ }
+
+ /*
+ * If ".." must be changed (ie the directory gets a new
+ * parent) then the source directory must not be in the
+ * directory heirarchy above the target, as this would
+ * orphan everything below the source directory. Also
+ * the user must have write permission in the source so
+ * as to be able to change "..". We must repeat the call
+ * to namei, as the parent directory is unlocked by the
+ * call to checkpath().
+ */
+ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
+ VOP_UNLOCK(fvp);
+ if (oldparent != dp->i_number)
+ newparent = dp->i_number;
+ if (doingdirectory && newparent) {
+ if (error) /* write access check above */
+ goto bad;
+ if (xp != NULL)
+ vput(tvp);
+ if (error = ufs_checkpath(ip, dp, tcnp->cn_cred))
+ goto out;
+ if ((tcnp->cn_flags & SAVESTART) == 0)
+ panic("ufs_rename: lost to startdir");
+ if (error = relookup(tdvp, &tvp, tcnp))
+ goto out;
+ dp = VTOI(tdvp);
+ xp = NULL;
+ if (tvp)
+ xp = VTOI(tvp);
+ }
+ /*
+ * 2) If target doesn't exist, link the target
+ * to the source and unlink the source.
+ * Otherwise, rewrite the target directory
+ * entry to reference the source inode and
+ * expunge the original entry's existence.
+ */
+ if (xp == NULL) {
+ if (dp->i_dev != ip->i_dev)
+ panic("rename: EXDEV");
+ /*
+ * Account for ".." in new directory.
+ * When source and destination have the same
+ * parent we don't fool with the link count.
+ */
+ if (doingdirectory && newparent) {
+ if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+ error = EMLINK;
+ goto bad;
+ }
+ dp->i_nlink++;
+ dp->i_flag |= IN_CHANGE;
+ if (error = VOP_UPDATE(tdvp, &tv, &tv, 1))
+ goto bad;
+ }
+ if (error = ufs_direnter(ip, tdvp, tcnp)) {
+ if (doingdirectory && newparent) {
+ dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ (void)VOP_UPDATE(tdvp, &tv, &tv, 1);
+ }
+ goto bad;
+ }
+ vput(tdvp);
+ } else {
+ if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+ panic("rename: EXDEV");
+ /*
+ * Short circuit rename(foo, foo).
+ */
+ if (xp->i_number == ip->i_number)
+ panic("rename: same file");
+ /*
+ * If the parent directory is "sticky", then the user must
+ * own the parent directory, or the destination of the rename,
+ * otherwise the destination may not be changed (except by
+ * root). This implements append-only directories.
+ */
+ if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
+ tcnp->cn_cred->cr_uid != dp->i_uid &&
+ xp->i_uid != tcnp->cn_cred->cr_uid) {
+ error = EPERM;
+ goto bad;
+ }
+ /*
+ * Target must be empty if a directory and have no links
+ * to it. Also, ensure source and target are compatible
+ * (both directories, or both not directories).
+ */
+ if ((xp->i_mode&IFMT) == IFDIR) {
+ if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) ||
+ xp->i_nlink > 2) {
+ error = ENOTEMPTY;
+ goto bad;
+ }
+ if (!doingdirectory) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ cache_purge(tdvp);
+ } else if (doingdirectory) {
+ error = EISDIR;
+ goto bad;
+ }
+ if (error = ufs_dirrewrite(dp, ip, tcnp))
+ goto bad;
+ /*
+ * If the target directory is in the same
+ * directory as the source directory,
+ * decrement the link count on the parent
+ * of the target directory.
+ */
+ if (doingdirectory && !newparent) {
+ dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ }
+ vput(tdvp);
+ /*
+ * Adjust the link count of the target to
+ * reflect the dirrewrite above. If this is
+ * a directory it is empty and there are
+ * no links to it, so we can squash the inode and
+ * any space associated with it. We disallowed
+ * renaming over top of a directory with links to
+ * it above, as the remaining link would point to
+ * a directory without "." or ".." entries.
+ */
+ xp->i_nlink--;
+ if (doingdirectory) {
+ if (--xp->i_nlink != 0)
+ panic("rename: linked directory");
+ error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+ tcnp->cn_cred, tcnp->cn_proc);
+ }
+ xp->i_flag |= IN_CHANGE;
+ vput(tvp);
+ xp = NULL;
+ }
+
+ /*
+ * 3) Unlink the source.
+ */
+ fcnp->cn_flags &= ~MODMASK;
+ fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+ if ((fcnp->cn_flags & SAVESTART) == 0)
+ panic("ufs_rename: lost from startdir");
+ (void) relookup(fdvp, &fvp, fcnp);
+ if (fvp != NULL) {
+ xp = VTOI(fvp);
+ dp = VTOI(fdvp);
+ } else {
+ /*
+ * From name has disappeared.
+ */
+ if (doingdirectory)
+ panic("rename: lost dir entry");
+ vrele(ap->a_fvp);
+ return (0);
+ }
+ /*
+ * Ensure that the directory entry still exists and has not
+ * changed while the new name has been entered. If the source is
+ * a file then the entry may have been unlinked or renamed. In
+ * either case there is no further work to be done. If the source
+ * is a directory then it cannot have been rmdir'ed; its link
+ * count of three would cause a rmdir to fail with ENOTEMPTY.
+ * The IRENAME flag ensures that it cannot be moved by another
+ * rename.
+ */
+ if (xp != ip) {
+ if (doingdirectory)
+ panic("rename: lost dir entry");
+ } else {
+ /*
+ * If the source is a directory with a
+ * new parent, the link count of the old
+ * parent directory must be decremented
+ * and ".." set to point to the new parent.
+ */
+ if (doingdirectory && newparent) {
+ dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
+ sizeof (struct dirtemplate), (off_t)0,
+ UIO_SYSSPACE, IO_NODELOCKED,
+ tcnp->cn_cred, (int *)0, (struct proc *)0);
+ if (error == 0) {
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (fvp->v_mount->mnt_maxsymlinklen <= 0)
+ namlen = dirbuf.dotdot_type;
+ else
+ namlen = dirbuf.dotdot_namlen;
+# else
+ namlen = dirbuf.dotdot_namlen;
+# endif
+ if (namlen != 2 ||
+ dirbuf.dotdot_name[0] != '.' ||
+ dirbuf.dotdot_name[1] != '.') {
+ ufs_dirbad(xp, (doff_t)12,
+ "rename: mangled dir");
+ } else {
+ dirbuf.dotdot_ino = newparent;
+ (void) vn_rdwr(UIO_WRITE, fvp,
+ (caddr_t)&dirbuf,
+ sizeof (struct dirtemplate),
+ (off_t)0, UIO_SYSSPACE,
+ IO_NODELOCKED|IO_SYNC,
+ tcnp->cn_cred, (int *)0,
+ (struct proc *)0);
+ cache_purge(fdvp);
+ }
+ }
+ }
+ error = ufs_dirremove(fdvp, fcnp);
+ if (!error) {
+ xp->i_nlink--;
+ xp->i_flag |= IN_CHANGE;
+ }
+ xp->i_flag &= ~IN_RENAME;
+ }
+ if (dp)
+ vput(fdvp);
+ if (xp)
+ vput(fvp);
+ vrele(ap->a_fvp);
+ return (error);
+
+bad:
+ if (xp)
+ vput(ITOV(xp));
+ vput(ITOV(dp));
+out:
+ if (VOP_LOCK(fvp) == 0) {
+ ip->i_nlink--;
+ ip->i_flag |= IN_CHANGE;
+ vput(fvp);
+ } else
+ vrele(fvp);
+ return (error);
+}
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static struct dirtemplate mastertemplate = {
+ 0, 12, DT_DIR, 1, ".",
+ 0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+static struct odirtemplate omastertemplate = {
+ 0, 12, 1, ".",
+ 0, DIRBLKSIZ - 12, 2, ".."
+};
+
+/*
+ * Mkdir system call
+ */
+int
+ufs_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct inode *ip, *dp;
+ struct vnode *tvp;
+ struct dirtemplate dirtemplate, *dtp;
+ struct timeval tv;
+ int error, dmode;
+
+#ifdef DIAGNOSTIC
+ if ((cnp->cn_flags & HASBUF) == 0)
+ panic("ufs_mkdir: no name");
+#endif
+ dp = VTOI(dvp);
+ if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+ error = EMLINK;
+ goto out;
+ }
+ dmode = vap->va_mode & 0777;
+ dmode |= IFDIR;
+ /*
+ * Must simulate part of ufs_makeinode here to acquire the inode,
+ * but not have it entered in the parent directory. The entry is
+ * made later after writing "." and ".." entries.
+ */
+ if (error = VOP_VALLOC(dvp, dmode, cnp->cn_cred, &tvp))
+ goto out;
+ ip = VTOI(tvp);
+ ip->i_uid = cnp->cn_cred->cr_uid;
+ ip->i_gid = dp->i_gid;
+#ifdef QUOTA
+ if ((error = getinoquota(ip)) ||
+ (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+ free(cnp->cn_pnbuf, M_NAMEI);
+ VOP_VFREE(tvp, ip->i_number, dmode);
+ vput(tvp);
+ vput(dvp);
+ return (error);
+ }
+#endif
+ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+ ip->i_mode = dmode;
+ tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */
+ ip->i_nlink = 2;
+ tv = time;
+ error = VOP_UPDATE(tvp, &tv, &tv, 1);
+
+ /*
+ * Bump link count in parent directory
+ * to reflect work done below. Should
+ * be done before reference is created
+ * so reparation is possible if we crash.
+ */
+ dp->i_nlink++;
+ dp->i_flag |= IN_CHANGE;
+ if (error = VOP_UPDATE(dvp, &tv, &tv, 1))
+ goto bad;
+
+ /* Initialize directory with "." and ".." from static template. */
+ if (dvp->v_mount->mnt_maxsymlinklen > 0)
+ dtp = &mastertemplate;
+ else
+ dtp = (struct dirtemplate *)&omastertemplate;
+ dirtemplate = *dtp;
+ dirtemplate.dot_ino = ip->i_number;
+ dirtemplate.dotdot_ino = dp->i_number;
+ error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
+ sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
+ IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0);
+ if (error) {
+ dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ goto bad;
+ }
+ if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
+ panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */
+ else {
+ ip->i_size = DIRBLKSIZ;
+ ip->i_flag |= IN_CHANGE;
+ }
+
+ /* Directory set up, now install it's entry in the parent directory. */
+ if (error = ufs_direnter(ip, dvp, cnp)) {
+ dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ }
+bad:
+ /*
+ * No need to do an explicit VOP_TRUNCATE here, vrele will do this
+ * for us because we set the link count to 0.
+ */
+ if (error) {
+ ip->i_nlink = 0;
+ ip->i_flag |= IN_CHANGE;
+ vput(tvp);
+ } else
+ *ap->a_vpp = tvp;
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ vput(dvp);
+ return (error);
+}
+
+/*
+ * Rmdir system call.
+ */
+int
+ufs_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct inode *ip, *dp;
+ int error;
+
+ ip = VTOI(vp);
+ dp = VTOI(dvp);
+ /*
+ * No rmdir "." please.
+ */
+ if (dp == ip) {
+ vrele(dvp);
+ vput(vp);
+ return (EINVAL);
+ }
+ /*
+ * Verify the directory is empty (and valid).
+ * (Rmdir ".." won't be valid since
+ * ".." will contain a reference to
+ * the current directory and thus be
+ * non-empty.)
+ */
+ error = 0;
+ if (ip->i_nlink != 2 ||
+ !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
+ error = ENOTEMPTY;
+ goto out;
+ }
+ if ((dp->i_flags & APPEND) || (ip->i_flags & (IMMUTABLE | APPEND))) {
+ error = EPERM;
+ goto out;
+ }
+ /*
+ * Delete reference to directory before purging
+ * inode. If we crash in between, the directory
+ * will be reattached to lost+found,
+ */
+ if (error = ufs_dirremove(dvp, cnp))
+ goto out;
+ dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ cache_purge(dvp);
+ vput(dvp);
+ dvp = NULL;
+ /*
+ * Truncate inode. The only stuff left
+ * in the directory is "." and "..". The
+ * "." reference is inconsequential since
+ * we're quashing it. The ".." reference
+ * has already been adjusted above. We've
+ * removed the "." reference and the reference
+ * in the parent directory, but there may be
+ * other hard links so decrement by 2 and
+ * worry about them later.
+ */
+ ip->i_nlink -= 2;
+ error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
+ cnp->cn_proc);
+ cache_purge(ITOV(ip));
+out:
+ if (dvp)
+ vput(dvp);
+ vput(vp);
+ return (error);
+}
+
+/*
+ * symlink -- make a symbolic link
+ */
+int
+ufs_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ register struct vnode *vp, **vpp = ap->a_vpp;
+ register struct inode *ip;
+ int len, error;
+
+ if (error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
+ vpp, ap->a_cnp))
+ return (error);
+ vp = *vpp;
+ len = strlen(ap->a_target);
+ if (len < vp->v_mount->mnt_maxsymlinklen) {
+ ip = VTOI(vp);
+ bcopy(ap->a_target, (char *)ip->i_shortlink, len);
+ ip->i_size = len;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ } else
+ error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
+ UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
+ (struct proc *)0);
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Vnode op for reading directories.
+ *
+ * The routine below assumes that the on-disk format of a directory
+ * is the same as that defined by <sys/dirent.h>. If the on-disk
+ * format changes, then it will be necessary to do a conversion
+ * from the on-disk format that read returns to the format defined
+ * by <sys/dirent.h>.
+ */
+int
+ufs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ int count, lost, error;
+
+ count = uio->uio_resid;
+ count &= ~(DIRBLKSIZ - 1);
+ lost = uio->uio_resid - count;
+ if (count < DIRBLKSIZ || (uio->uio_offset & (DIRBLKSIZ -1)))
+ return (EINVAL);
+ uio->uio_resid = count;
+ uio->uio_iov->iov_len = count;
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
+ error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+ } else {
+ struct dirent *dp, *edp;
+ struct uio auio;
+ struct iovec aiov;
+ caddr_t dirbuf;
+ int readcnt;
+ u_char tmp;
+
+ auio = *uio;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ aiov.iov_len = count;
+ MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
+ aiov.iov_base = dirbuf;
+ error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
+ if (error == 0) {
+ readcnt = count - auio.uio_resid;
+ edp = (struct dirent *)&dirbuf[readcnt];
+ for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+ tmp = dp->d_namlen;
+ dp->d_namlen = dp->d_type;
+ dp->d_type = tmp;
+ if (dp->d_reclen > 0) {
+ dp = (struct dirent *)
+ ((char *)dp + dp->d_reclen);
+ } else {
+ error = EIO;
+ break;
+ }
+ }
+ if (dp >= edp)
+ error = uiomove(dirbuf, readcnt, uio);
+ }
+ FREE(dirbuf, M_TEMP);
+ }
+# else
+ error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+# endif
+ uio->uio_resid += lost;
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ */
+int
+ufs_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+ int isize;
+
+ isize = ip->i_size;
+ if (isize < vp->v_mount->mnt_maxsymlinklen) {
+ uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
+ return (0);
+ }
+ return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+/* ARGSUSED */
+int
+ufs_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return (0);
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+int
+ufs_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip;
+ struct proc *p = curproc; /* XXX */
+
+start:
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ }
+ if (vp->v_tag == VT_NON)
+ return (ENOENT);
+ ip = VTOI(vp);
+ if (ip->i_flag & IN_LOCKED) {
+ ip->i_flag |= IN_WANTED;
+#ifdef DIAGNOSTIC
+ if (p) {
+ if (p->p_pid == ip->i_lockholder)
+ panic("locking against myself");
+ ip->i_lockwaiter = p->p_pid;
+ } else
+ ip->i_lockwaiter = -1;
+#endif
+ (void) sleep((caddr_t)ip, PINOD);
+ goto start;
+ }
+#ifdef DIAGNOSTIC
+ ip->i_lockwaiter = 0;
+ if (ip->i_lockholder != 0)
+ panic("lockholder (%d) != 0", ip->i_lockholder);
+ if (p && p->p_pid == 0)
+ printf("locking by process 0\n");
+ if (p)
+ ip->i_lockholder = p->p_pid;
+ else
+ ip->i_lockholder = -1;
+#endif
+ ip->i_flag |= IN_LOCKED;
+ return (0);
+}
+
+/*
+ * Unlock an inode. If WANT bit is on, wakeup.
+ */
+int lockcount = 90;
+int
+ufs_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct inode *ip = VTOI(ap->a_vp);
+ struct proc *p = curproc; /* XXX */
+
+#ifdef DIAGNOSTIC
+ if ((ip->i_flag & IN_LOCKED) == 0) {
+ vprint("ufs_unlock: unlocked inode", ap->a_vp);
+ panic("ufs_unlock NOT LOCKED");
+ }
+ if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
+ ip->i_lockholder > -1 && lockcount++ < 100)
+ panic("unlocker (%d) != lock holder (%d)",
+ p->p_pid, ip->i_lockholder);
+ ip->i_lockholder = 0;
+#endif
+ ip->i_flag &= ~IN_LOCKED;
+ if (ip->i_flag & IN_WANTED) {
+ ip->i_flag &= ~IN_WANTED;
+ wakeup((caddr_t)ip);
+ }
+ return (0);
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+ufs_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (VTOI(ap->a_vp)->i_flag & IN_LOCKED)
+ return (1);
+ return (0);
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+ufs_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ register struct vnode *vp = bp->b_vp;
+ register struct inode *ip;
+ int error;
+
+ ip = VTOI(vp);
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ panic("ufs_strategy: spec");
+ if (bp->b_blkno == bp->b_lblkno) {
+ if (error =
+ VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+ bp->b_error = error;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return (error);
+ }
+ if ((long)bp->b_blkno == -1)
+ clrbuf(bp);
+ }
+ if ((long)bp->b_blkno == -1) {
+ biodone(bp);
+ return (0);
+ }
+ vp = ip->i_devvp;
+ bp->b_dev = vp->v_rdev;
+ VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+ return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+ufs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct inode *ip = VTOI(vp);
+
+ printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number,
+ major(ip->i_dev), minor(ip->i_dev));
+#ifdef FIFO
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+#endif /* FIFO */
+ printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : "");
+ if (ip->i_lockholder == 0)
+ return (0);
+ printf("\towner pid %d", ip->i_lockholder);
+ if (ip->i_lockwaiter)
+ printf(" waiting pid %d", ip->i_lockwaiter);
+ printf("\n");
+ return (0);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+ufsspec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ /*
+ * Set access flag.
+ */
+ VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
+ return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+ufsspec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ /*
+ * Set update and change flags.
+ */
+ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the inode then do device close.
+ */
+int
+ufsspec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct inode *ip = VTOI(ap->a_vp);
+
+ if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+ ITIMES(ip, &time, &time);
+ return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifo's
+ */
+int
+ufsfifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+
+ /*
+ * Set access flag.
+ */
+ VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
+ return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifo's.
+ */
+int
+ufsfifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+
+ /*
+ * Set update and change flags.
+ */
+ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifo's.
+ *
+ * Update the times on the inode then do device close.
+ */
+ufsfifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+ register struct inode *ip = VTOI(ap->a_vp);
+
+ if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+ ITIMES(ip, &time, &time);
+ return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
+
+/*
+ * Return POSIX pathconf information applicable to ufs filesystems.
+ */
+ufs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_NAME_MAX:
+ *ap->a_retval = NAME_MAX;
+ return (0);
+ case _PC_PATH_MAX:
+ *ap->a_retval = PATH_MAX;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_NO_TRUNC:
+ *ap->a_retval = 1;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Advisory record locking support
+ */
+int
+ufs_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+ register struct inode *ip = VTOI(ap->a_vp);
+ register struct flock *fl = ap->a_fl;
+ register struct lockf *lock;
+ off_t start, end;
+ int error;
+
+ /*
+ * Avoid the common case of unlocking when inode has no locks.
+ */
+ if (ip->i_lockf == (struct lockf *)0) {
+ if (ap->a_op != F_SETLK) {
+ fl->l_type = F_UNLCK;
+ return (0);
+ }
+ }
+ /*
+ * Convert the flock structure into a start and end.
+ */
+ switch (fl->l_whence) {
+
+ case SEEK_SET:
+ case SEEK_CUR:
+ /*
+ * Caller is responsible for adding any necessary offset
+ * when SEEK_CUR is used.
+ */
+ start = fl->l_start;
+ break;
+
+ case SEEK_END:
+ start = ip->i_size + fl->l_start;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+ if (start < 0)
+ return (EINVAL);
+ if (fl->l_len == 0)
+ end = -1;
+ else
+ end = start + fl->l_len - 1;
+ /*
+ * Create the lockf structure
+ */
+ MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+ lock->lf_start = start;
+ lock->lf_end = end;
+ lock->lf_id = ap->a_id;
+ lock->lf_inode = ip;
+ lock->lf_type = fl->l_type;
+ lock->lf_next = (struct lockf *)0;
+ lock->lf_block = (struct lockf *)0;
+ lock->lf_flags = ap->a_flags;
+ /*
+ * Do the requested operation.
+ */
+ switch(ap->a_op) {
+ case F_SETLK:
+ return (lf_setlock(lock));
+
+ case F_UNLCK:
+ error = lf_clearlock(lock);
+ FREE(lock, M_LOCKF);
+ return (error);
+
+ case F_GETLK:
+ error = lf_getlock(lock, fl);
+ FREE(lock, M_LOCKF);
+ return (error);
+
+ default:
+ free(lock, M_LOCKF);
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Initialize the vnode associated with a new inode, handle aliased
+ * vnodes.
+ */
+int
+ufs_vinit(mntp, specops, fifoops, vpp)
+ struct mount *mntp;
+ int (**specops)();
+ int (**fifoops)();
+ struct vnode **vpp;
+{
+ struct inode *ip;
+ struct vnode *vp, *nvp;
+
+ vp = *vpp;
+ ip = VTOI(vp);
+ switch(vp->v_type = IFTOVT(ip->i_mode)) {
+ case VCHR:
+ case VBLK:
+ vp->v_op = specops;
+ if (nvp = checkalias(vp, ip->i_rdev, mntp)) {
+ /*
+ * Discard unneeded vnode, but save its inode.
+ */
+ ufs_ihashrem(ip);
+ VOP_UNLOCK(vp);
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased inode.
+ */
+ vp = nvp;
+ ip->i_vnode = vp;
+ ufs_ihashins(ip);
+ }
+ break;
+ case VFIFO:
+#ifdef FIFO
+ vp->v_op = fifoops;
+ break;
+#else
+ return (EOPNOTSUPP);
+#endif
+ }
+ if (ip->i_number == ROOTINO)
+ vp->v_flag |= VROOT;
+ /*
+ * Initialize modrev times
+ */
+ SETHIGH(ip->i_modrev, mono_time.tv_sec);
+ SETLOW(ip->i_modrev, mono_time.tv_usec * 4294);
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Allocate a new inode.
+ */
+int
+ufs_makeinode(mode, dvp, vpp, cnp)
+ int mode;
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+{
+ register struct inode *ip, *pdir;
+ struct timeval tv;
+ struct vnode *tvp;
+ int error;
+
+ pdir = VTOI(dvp);
+#ifdef DIAGNOSTIC
+ if ((cnp->cn_flags & HASBUF) == 0)
+ panic("ufs_makeinode: no name");
+#endif
+ *vpp = NULL;
+ if ((mode & IFMT) == 0)
+ mode |= IFREG;
+
+ if (error = VOP_VALLOC(dvp, mode, cnp->cn_cred, &tvp)) {
+ free(cnp->cn_pnbuf, M_NAMEI);
+ vput(dvp);
+ return (error);
+ }
+ ip = VTOI(tvp);
+ ip->i_gid = pdir->i_gid;
+ if ((mode & IFMT) == IFLNK)
+ ip->i_uid = pdir->i_uid;
+ else
+ ip->i_uid = cnp->cn_cred->cr_uid;
+#ifdef QUOTA
+ if ((error = getinoquota(ip)) ||
+ (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+ free(cnp->cn_pnbuf, M_NAMEI);
+ VOP_VFREE(tvp, ip->i_number, mode);
+ vput(tvp);
+ vput(dvp);
+ return (error);
+ }
+#endif
+ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+ ip->i_mode = mode;
+ tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */
+ ip->i_nlink = 1;
+ if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
+ suser(cnp->cn_cred, NULL))
+ ip->i_mode &= ~ISGID;
+
+ /*
+ * Make sure inode goes to disk before directory entry.
+ */
+ tv = time;
+ if (error = VOP_UPDATE(tvp, &tv, &tv, 1))
+ goto bad;
+ if (error = ufs_direnter(ip, dvp, cnp))
+ goto bad;
+ if ((cnp->cn_flags & SAVESTART) == 0)
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ vput(dvp);
+ *vpp = tvp;
+ return (0);
+
+bad:
+ /*
+ * Write error occurred trying to update the inode
+ * or the directory so must deallocate the inode.
+ */
+ free(cnp->cn_pnbuf, M_NAMEI);
+ vput(dvp);
+ ip->i_nlink = 0;
+ ip->i_flag |= IN_CHANGE;
+ vput(tvp);
+ return (error);
+}
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
new file mode 100644
index 000000000000..237871fdaaca
--- /dev/null
+++ b/sys/ufs/ufs/ufsmount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufsmount.h 8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+ struct mount *um_mountp; /* filesystem vfs structure */
+ dev_t um_dev; /* device mounted */
+ struct vnode *um_devvp; /* block device mounted vnode */
+ union { /* pointer to superblock */
+ struct lfs *lfs; /* LFS */
+ struct fs *fs; /* FFS */
+ } ufsmount_u;
+#define um_fs ufsmount_u.fs
+#define um_lfs ufsmount_u.lfs
+ struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
+ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
+ u_long um_nindir; /* indirect ptrs per block */
+ u_long um_bptrtodb; /* indir ptr to disk block */
+ u_long um_seqinc; /* inc between seq blocks */
+ time_t um_btime[MAXQUOTAS]; /* block quota time limit */
+ time_t um_itime[MAXQUOTAS]; /* inode quota time limit */
+ char um_qflags[MAXQUOTAS]; /* quota specific flags */
+ struct netexport um_export; /* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */
+#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb)
+#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump) ((ump)->um_nindir)
+
+
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
new file mode 100644
index 000000000000..235c917a0c67
--- /dev/null
+++ b/sys/vm/device_pager.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)device_pager.c 8.5 (Berkeley) 1/12/94
+ */
+
+/*
+ * Page to/from special files.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/mman.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/device_pager.h>
+
+struct pagerlst dev_pager_list; /* list of managed devices */
+struct pglist dev_pager_fakelist; /* list of available vm_page_t's */
+
+#ifdef DEBUG
+int dpagerdebug = 0;
+#define DDB_FOLLOW 0x01
+#define DDB_INIT 0x02
+#define DDB_ALLOC 0x04
+#define DDB_FAIL 0x08
+#endif
+
+static vm_pager_t dev_pager_alloc
+ __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void dev_pager_dealloc __P((vm_pager_t));
+static int dev_pager_getpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t));
+static void dev_pager_init __P((void));
+static int dev_pager_putpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static vm_page_t dev_pager_getfake __P((vm_offset_t));
+static void dev_pager_putfake __P((vm_page_t));
+
+struct pagerops devicepagerops = {
+ dev_pager_init,
+ dev_pager_alloc,
+ dev_pager_dealloc,
+ dev_pager_getpage,
+ dev_pager_putpage,
+ dev_pager_haspage,
+ vm_pager_clusternull
+};
+
+static void
+dev_pager_init()
+{
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_init()\n");
+#endif
+ TAILQ_INIT(&dev_pager_list);
+ TAILQ_INIT(&dev_pager_fakelist);
+}
+
+static vm_pager_t
+dev_pager_alloc(handle, size, prot, foff)
+ caddr_t handle;
+ vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t foff;
+{
+ dev_t dev;
+ vm_pager_t pager;
+ int (*mapfunc)();
+ vm_object_t object;
+ dev_pager_t devp;
+ int npages, off;
+
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_alloc(%x, %x, %x, %x)\n",
+ handle, size, prot, foff);
+#endif
+#ifdef DIAGNOSTIC
+ /*
+ * Pageout to device, should never happen.
+ */
+ if (handle == NULL)
+ panic("dev_pager_alloc called");
+#endif
+
+ /*
+ * Make sure this device can be mapped.
+ */
+ dev = (dev_t)handle;
+ mapfunc = cdevsw[major(dev)].d_mmap;
+ if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
+ return(NULL);
+
+ /*
+ * Offset should be page aligned.
+ */
+ if (foff & PAGE_MASK)
+ return(NULL);
+
+ /*
+ * Check that the specified range of the device allows the
+ * desired protection.
+ *
+ * XXX assumes VM_PROT_* == PROT_*
+ */
+ npages = atop(round_page(size));
+ for (off = foff; npages--; off += PAGE_SIZE)
+ if ((*mapfunc)(dev, off, (int)prot) == -1)
+ return(NULL);
+
+ /*
+ * Look up pager, creating as necessary.
+ */
+top:
+ pager = vm_pager_lookup(&dev_pager_list, handle);
+ if (pager == NULL) {
+ /*
+ * Allocate and initialize pager structs
+ */
+ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
+ if (pager == NULL)
+ return(NULL);
+ devp = (dev_pager_t)malloc(sizeof *devp, M_VMPGDATA, M_WAITOK);
+ if (devp == NULL) {
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ pager->pg_handle = handle;
+ pager->pg_ops = &devicepagerops;
+ pager->pg_type = PG_DEVICE;
+ pager->pg_flags = 0;
+ pager->pg_data = devp;
+ TAILQ_INIT(&devp->devp_pglist);
+ /*
+ * Allocate object and associate it with the pager.
+ */
+ object = devp->devp_object = vm_object_allocate(0);
+ vm_object_enter(object, pager);
+ vm_object_setpager(object, pager, (vm_offset_t)0, FALSE);
+ /*
+ * Finally, put it on the managed list so other can find it.
+ * First we re-lookup in case someone else beat us to this
+ * point (due to blocking in the various mallocs). If so,
+ * we free everything and start over.
+ */
+ if (vm_pager_lookup(&dev_pager_list, handle)) {
+ free((caddr_t)devp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+ goto top;
+ }
+ TAILQ_INSERT_TAIL(&dev_pager_list, pager, pg_list);
+#ifdef DEBUG
+ if (dpagerdebug & DDB_ALLOC) {
+ printf("dev_pager_alloc: pager %x devp %x object %x\n",
+ pager, devp, object);
+ vm_object_print(object, FALSE);
+ }
+#endif
+ } else {
+ /*
+ * vm_object_lookup() gains a reference and also
+ * removes the object from the cache.
+ */
+ object = vm_object_lookup(pager);
+#ifdef DIAGNOSTIC
+ devp = (dev_pager_t)pager->pg_data;
+ if (object != devp->devp_object)
+ panic("dev_pager_setup: bad object");
+#endif
+ }
+ return(pager);
+}
+
+static void
+dev_pager_dealloc(pager)
+ vm_pager_t pager;
+{
+ dev_pager_t devp;
+ vm_object_t object;
+ vm_page_t m;
+
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_dealloc(%x)\n", pager);
+#endif
+ TAILQ_REMOVE(&dev_pager_list, pager, pg_list);
+ /*
+ * Get the object.
+ * Note: cannot use vm_object_lookup since object has already
+ * been removed from the hash chain.
+ */
+ devp = (dev_pager_t)pager->pg_data;
+ object = devp->devp_object;
+#ifdef DEBUG
+ if (dpagerdebug & DDB_ALLOC)
+ printf("dev_pager_dealloc: devp %x object %x\n", devp, object);
+#endif
+ /*
+ * Free up our fake pages.
+ */
+ while ((m = devp->devp_pglist.tqh_first) != NULL) {
+ TAILQ_REMOVE(&devp->devp_pglist, m, pageq);
+ dev_pager_putfake(m);
+ }
+ free((caddr_t)devp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+dev_pager_getpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+ register vm_object_t object;
+ vm_offset_t offset, paddr;
+ vm_page_t page;
+ dev_t dev;
+ int (*mapfunc)(), prot;
+ vm_page_t m;
+
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_getpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+
+ if (npages != 1)
+ panic("dev_pager_getpage: cannot handle multiple pages");
+ m = *mlist;
+
+ object = m->object;
+ dev = (dev_t)pager->pg_handle;
+ offset = m->offset + object->paging_offset;
+ prot = PROT_READ; /* XXX should pass in? */
+ mapfunc = cdevsw[major(dev)].d_mmap;
+#ifdef DIAGNOSTIC
+ if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
+ panic("dev_pager_getpage: no map function");
+#endif
+ paddr = pmap_phys_address((*mapfunc)(dev, (int)offset, prot));
+#ifdef DIAGNOSTIC
+ if (paddr == -1)
+ panic("dev_pager_getpage: map function returns error");
+#endif
+ /*
+ * Replace the passed in page with our own fake page and free
+ * up the original.
+ */
+ page = dev_pager_getfake(paddr);
+ TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist, page,
+ pageq);
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ vm_page_free(m);
+ vm_page_insert(page, object, offset);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP(m);
+ if (offset + PAGE_SIZE > object->size)
+ object->size = offset + PAGE_SIZE; /* XXX anal */
+ vm_object_unlock(object);
+
+ return(VM_PAGER_OK);
+}
+
+static int
+dev_pager_putpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_putpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+ if (pager == NULL)
+ return;
+ panic("dev_pager_putpage called");
+}
+
+static boolean_t
+dev_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_haspage(%x, %x)\n", pager, offset);
+#endif
+ return(TRUE);
+}
+
+static vm_page_t
+dev_pager_getfake(paddr)
+ vm_offset_t paddr;
+{
+ vm_page_t m;
+ int i;
+
+ if (dev_pager_fakelist.tqh_first == NULL) {
+ m = (vm_page_t)malloc(PAGE_SIZE, M_VMPGDATA, M_WAITOK);
+ for (i = PAGE_SIZE / sizeof(*m); i > 0; i--) {
+ TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
+ m++;
+ }
+ }
+ m = dev_pager_fakelist.tqh_first;
+ TAILQ_REMOVE(&dev_pager_fakelist, m, pageq);
+ m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS;
+ m->phys_addr = paddr;
+ m->wire_count = 1;
+ return(m);
+}
+
+static void
+dev_pager_putfake(m)
+ vm_page_t m;
+{
+#ifdef DIAGNOSTIC
+ if (!(m->flags & PG_FICTITIOUS))
+ panic("dev_pager_putfake: bad page");
+#endif
+ TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
+}
diff --git a/sys/vm/device_pager.h b/sys/vm/device_pager.h
new file mode 100644
index 000000000000..8840622919de
--- /dev/null
+++ b/sys/vm/device_pager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)device_pager.h 8.3 (Berkeley) 12/13/93
+ */
+
+#ifndef _DEVICE_PAGER_
+#define _DEVICE_PAGER_ 1
+
+/*
+ * Device pager private data.
+ */
+struct devpager {
+ struct pglist devp_pglist; /* list of pages allocated */
+ vm_object_t devp_object; /* object representing this device */
+};
+typedef struct devpager *dev_pager_t;
+
+#endif /* _DEVICE_PAGER_ */
diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c
new file mode 100644
index 000000000000..c4fa05230a79
--- /dev/null
+++ b/sys/vm/kern_lock.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_lock.c 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Locking primitives implementation
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+
+/* XXX */
+#include <sys/proc.h>
+typedef int *thread_t;
+#define current_thread() ((thread_t)&curproc->p_thread)
+/* XXX */
+
+#if NCPUS > 1
+
+/*
+ * Module: lock
+ * Function:
+ * Provide reader/writer sychronization.
+ * Implementation:
+ * Simple interlock on a bit. Readers first interlock
+ * increment the reader count, then let go. Writers hold
+ * the interlock (thus preventing further readers), and
+ * wait for already-accepted readers to go away.
+ */
+
+/*
+ * The simple-lock routines are the primitives out of which
+ * the lock package is built. The implementation is left
+ * to the machine-dependent code.
+ */
+
+#ifdef notdef
+/*
+ * A sample implementation of simple locks.
+ * assumes:
+ * boolean_t test_and_set(boolean_t *)
+ * indivisibly sets the boolean to TRUE
+ * and returns its old value
+ * and that setting a boolean to FALSE is indivisible.
+ */
+/*
+ * simple_lock_init initializes a simple lock. A simple lock
+ * may only be used for exclusive locks.
+ */
+
+void simple_lock_init(l)
+ simple_lock_t l;
+{
+ *(boolean_t *)l = FALSE;
+}
+
+void simple_lock(l)
+ simple_lock_t l;
+{
+ while (test_and_set((boolean_t *)l))
+ continue;
+}
+
+void simple_unlock(l)
+ simple_lock_t l;
+{
+ *(boolean_t *)l = FALSE;
+}
+
+boolean_t simple_lock_try(l)
+ simple_lock_t l;
+{
+ return (!test_and_set((boolean_t *)l));
+}
+#endif /* notdef */
+#endif /* NCPUS > 1 */
+
+#if NCPUS > 1
+int lock_wait_time = 100;
+#else /* NCPUS > 1 */
+
+ /*
+ * It is silly to spin on a uni-processor as if we
+ * thought something magical would happen to the
+ * want_write bit while we are executing.
+ */
+int lock_wait_time = 0;
+#endif /* NCPUS > 1 */
+
+
+/*
+ * Routine: lock_init
+ * Function:
+ * Initialize a lock; required before use.
+ * Note that clients declare the "struct lock"
+ * variables and then initialize them, rather
+ * than getting a new one from this module.
+ */
+void lock_init(l, can_sleep)
+ lock_t l;
+ boolean_t can_sleep;
+{
+ bzero(l, sizeof(lock_data_t));
+ simple_lock_init(&l->interlock);
+ l->want_write = FALSE;
+ l->want_upgrade = FALSE;
+ l->read_count = 0;
+ l->can_sleep = can_sleep;
+ l->thread = (char *)-1; /* XXX */
+ l->recursion_depth = 0;
+}
+
+void lock_sleepable(l, can_sleep)
+ lock_t l;
+ boolean_t can_sleep;
+{
+ simple_lock(&l->interlock);
+ l->can_sleep = can_sleep;
+ simple_unlock(&l->interlock);
+}
+
+
+/*
+ * Sleep locks. These use the same data structure and algorithm
+ * as the spin locks, but the process sleeps while it is waiting
+ * for the lock. These work on uniprocessor systems.
+ */
+
+void lock_write(l)
+ register lock_t l;
+{
+ register int i;
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock.
+ */
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return;
+ }
+
+ /*
+ * Try to acquire the want_write bit.
+ */
+ while (l->want_write) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && l->want_write)
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && l->want_write) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+ l->want_write = TRUE;
+
+ /* Wait for readers (and upgrades) to finish */
+
+ while ((l->read_count != 0) || l->want_upgrade) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && (l->read_count != 0 ||
+ l->want_upgrade))
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+ simple_unlock(&l->interlock);
+}
+
+void lock_done(l)
+ register lock_t l;
+{
+ simple_lock(&l->interlock);
+
+ if (l->read_count != 0)
+ l->read_count--;
+ else
+ if (l->recursion_depth != 0)
+ l->recursion_depth--;
+ else
+ if (l->want_upgrade)
+ l->want_upgrade = FALSE;
+ else
+ l->want_write = FALSE;
+
+ if (l->waiting) {
+ l->waiting = FALSE;
+ thread_wakeup((int) l);
+ }
+ simple_unlock(&l->interlock);
+}
+
+void lock_read(l)
+ register lock_t l;
+{
+ register int i;
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock.
+ */
+ l->read_count++;
+ simple_unlock(&l->interlock);
+ return;
+ }
+
+ while (l->want_write || l->want_upgrade) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && (l->want_write || l->want_upgrade))
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && (l->want_write || l->want_upgrade)) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+
+ l->read_count++;
+ simple_unlock(&l->interlock);
+}
+
+/*
+ * Routine: lock_read_to_write
+ * Function:
+ * Improves a read-only lock to one with
+ * write permission. If another reader has
+ * already requested an upgrade to a write lock,
+ * no lock is held upon return.
+ *
+ * Returns TRUE if the upgrade *failed*.
+ */
+boolean_t lock_read_to_write(l)
+ register lock_t l;
+{
+ register int i;
+
+ simple_lock(&l->interlock);
+
+ l->read_count--;
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock.
+ */
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+
+ if (l->want_upgrade) {
+ /*
+ * Someone else has requested upgrade.
+ * Since we've released a read lock, wake
+ * him up.
+ */
+ if (l->waiting) {
+ l->waiting = FALSE;
+ thread_wakeup((int) l);
+ }
+
+ simple_unlock(&l->interlock);
+ return (TRUE);
+ }
+
+ l->want_upgrade = TRUE;
+
+ while (l->read_count != 0) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && l->read_count != 0)
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && l->read_count != 0) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+
+ simple_unlock(&l->interlock);
+ return (FALSE);
+}
+
+void lock_write_to_read(l)
+ register lock_t l;
+{
+ simple_lock(&l->interlock);
+
+ l->read_count++;
+ if (l->recursion_depth != 0)
+ l->recursion_depth--;
+ else
+ if (l->want_upgrade)
+ l->want_upgrade = FALSE;
+ else
+ l->want_write = FALSE;
+
+ if (l->waiting) {
+ l->waiting = FALSE;
+ thread_wakeup((int) l);
+ }
+
+ simple_unlock(&l->interlock);
+}
+
+
+/*
+ * Routine: lock_try_write
+ * Function:
+ * Tries to get a write lock.
+ *
+ * Returns FALSE if the lock is not held on return.
+ */
+
+boolean_t lock_try_write(l)
+ register lock_t l;
+{
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock
+ */
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+ }
+
+ if (l->want_write || l->want_upgrade || l->read_count) {
+ /*
+ * Can't get lock.
+ */
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+
+ /*
+ * Have lock.
+ */
+
+ l->want_write = TRUE;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+}
+
+/*
+ * Routine: lock_try_read
+ * Function:
+ * Tries to get a read lock.
+ *
+ * Returns FALSE if the lock is not held on return.
+ */
+
+boolean_t lock_try_read(l)
+ register lock_t l;
+{
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock
+ */
+ l->read_count++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+ }
+
+ if (l->want_write || l->want_upgrade) {
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+
+ l->read_count++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+}
+
+/*
+ * Routine: lock_try_read_to_write
+ * Function:
+ * Improves a read-only lock to one with
+ * write permission. If another reader has
+ * already requested an upgrade to a write lock,
+ * the read lock is still held upon return.
+ *
+ * Returns FALSE if the upgrade *failed*.
+ */
+boolean_t lock_try_read_to_write(l)
+ register lock_t l;
+{
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock
+ */
+ l->read_count--;
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+ }
+
+ if (l->want_upgrade) {
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+ l->want_upgrade = TRUE;
+ l->read_count--;
+
+ while (l->read_count != 0) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+
+ simple_unlock(&l->interlock);
+ return(TRUE);
+}
+
+/*
+ * Allow a process that has a lock for write to acquire it
+ * recursively (for read, write, or update).
+ */
+void lock_set_recursive(l)
+ lock_t l;
+{
+ simple_lock(&l->interlock);
+ if (!l->want_write) {
+ panic("lock_set_recursive: don't have write lock");
+ }
+ l->thread = (char *) current_thread();
+ simple_unlock(&l->interlock);
+}
+
+/*
+ * Prevent a lock from being re-acquired.
+ */
+void lock_clear_recursive(l)
+ lock_t l;
+{
+ simple_lock(&l->interlock);
+ if (((thread_t) l->thread) != current_thread()) {
+ panic("lock_clear_recursive: wrong thread");
+ }
+ if (l->recursion_depth == 0)
+ l->thread = (char *)-1; /* XXX */
+ simple_unlock(&l->interlock);
+}
diff --git a/sys/vm/lock.h b/sys/vm/lock.h
new file mode 100644
index 000000000000..26bed1f048a5
--- /dev/null
+++ b/sys/vm/lock.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lock.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Locking primitives definitions
+ */
+
+#ifndef _LOCK_H_
+#define _LOCK_H_
+
+#define NCPUS 1 /* XXX */
+
+/*
+ * A simple spin lock.
+ */
+
+struct slock {
+ int lock_data; /* in general 1 bit is sufficient */
+};
+
+typedef struct slock simple_lock_data_t;
+typedef struct slock *simple_lock_t;
+
+/*
+ * The general lock structure. Provides for multiple readers,
+ * upgrading from read to write, and sleeping until the lock
+ * can be gained.
+ */
+
+struct lock {
+#ifdef vax
+ /*
+ * Efficient VAX implementation -- see field description below.
+ */
+ unsigned int read_count:16,
+ want_upgrade:1,
+ want_write:1,
+ waiting:1,
+ can_sleep:1,
+ :0;
+
+ simple_lock_data_t interlock;
+#else /* vax */
+#ifdef ns32000
+ /*
+ * Efficient ns32000 implementation --
+ * see field description below.
+ */
+ simple_lock_data_t interlock;
+ unsigned int read_count:16,
+ want_upgrade:1,
+ want_write:1,
+ waiting:1,
+ can_sleep:1,
+ :0;
+
+#else /* ns32000 */
+ /* Only the "interlock" field is used for hardware exclusion;
+ * other fields are modified with normal instructions after
+ * acquiring the interlock bit.
+ */
+ simple_lock_data_t
+ interlock; /* Interlock for remaining fields */
+ boolean_t want_write; /* Writer is waiting, or locked for write */
+ boolean_t want_upgrade; /* Read-to-write upgrade waiting */
+ boolean_t waiting; /* Someone is sleeping on lock */
+ boolean_t can_sleep; /* Can attempts to lock go to sleep */
+ int read_count; /* Number of accepted readers */
+#endif /* ns32000 */
+#endif /* vax */
+ char *thread; /* Thread that has lock, if recursive locking allowed */
+ /* (should be thread_t, but but we then have mutually
+ recursive definitions) */
+ int recursion_depth;/* Depth of recursion */
+};
+
+typedef struct lock lock_data_t;
+typedef struct lock *lock_t;
+
+#if NCPUS > 1
+__BEGIN_DECLS
+void simple_lock __P((simple_lock_t));
+void simple_lock_init __P((simple_lock_t));
+boolean_t simple_lock_try __P((simple_lock_t));
+void simple_unlock __P((simple_lock_t));
+__END_DECLS
+#else /* No multiprocessor locking is necessary. */
+#define simple_lock(l)
+#define simple_lock_init(l)
+#define simple_lock_try(l) (1) /* Always succeeds. */
+#define simple_unlock(l)
+#endif
+
+/* Sleep locks must work even if no multiprocessing. */
+
+#define lock_read_done(l) lock_done(l)
+#define lock_write_done(l) lock_done(l)
+
+void lock_clear_recursive __P((lock_t));
+void lock_done __P((lock_t));
+void lock_init __P((lock_t, boolean_t));
+void lock_read __P((lock_t));
+boolean_t lock_read_to_write __P((lock_t));
+void lock_set_recursive __P((lock_t));
+void lock_sleepable __P((lock_t, boolean_t));
+boolean_t lock_try_read __P((lock_t));
+boolean_t lock_try_read_to_write __P((lock_t));
+boolean_t lock_try_write __P((lock_t));
+void lock_write __P((lock_t));
+void lock_write_to_read __P((lock_t));
+#endif /* !_LOCK_H_ */
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
new file mode 100644
index 000000000000..63a83c905782
--- /dev/null
+++ b/sys/vm/pmap.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pmap.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Avadis Tevanian, Jr.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Machine address mapping definitions -- machine-independent
+ * section. [For machine-dependent section, see "machine/pmap.h".]
+ */
+
+#ifndef _PMAP_VM_
+#define _PMAP_VM_
+
+/*
+ * Each machine dependent implementation is expected to
+ * keep certain statistics. They may do this anyway they
+ * so choose, but are expected to return the statistics
+ * in the following structure.
+ */
+struct pmap_statistics {
+ long resident_count; /* # of pages mapped (total)*/
+ long wired_count; /* # of pages wired */
+};
+typedef struct pmap_statistics *pmap_statistics_t;
+
+#include <machine/pmap.h>
+
+#ifdef KERNEL
+__BEGIN_DECLS
+void *pmap_bootstrap_alloc __P((int));
+void pmap_bootstrap( /* machine dependent */ );
+void pmap_change_wiring __P((pmap_t, vm_offset_t, boolean_t));
+void pmap_clear_modify __P((vm_offset_t pa));
+void pmap_clear_reference __P((vm_offset_t pa));
+void pmap_collect __P((pmap_t));
+void pmap_copy __P((pmap_t,
+ pmap_t, vm_offset_t, vm_size_t, vm_offset_t));
+void pmap_copy_page __P((vm_offset_t, vm_offset_t));
+pmap_t pmap_create __P((vm_size_t));
+void pmap_destroy __P((pmap_t));
+void pmap_enter __P((pmap_t,
+ vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
+vm_offset_t pmap_extract __P((pmap_t, vm_offset_t));
+void pmap_init __P((vm_offset_t, vm_offset_t));
+boolean_t pmap_is_modified __P((vm_offset_t pa));
+boolean_t pmap_is_referenced __P((vm_offset_t pa));
+vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int));
+void pmap_page_protect __P((vm_offset_t, vm_prot_t));
+void pmap_pageable __P((pmap_t,
+ vm_offset_t, vm_offset_t, boolean_t));
+vm_offset_t pmap_phys_address __P((int));
+void pmap_pinit __P((pmap_t));
+void pmap_protect __P((pmap_t,
+ vm_offset_t, vm_offset_t, vm_prot_t));
+void pmap_reference __P((pmap_t));
+void pmap_release __P((pmap_t));
+void pmap_remove __P((pmap_t, vm_offset_t, vm_offset_t));
+void pmap_update __P((void));
+void pmap_zero_page __P((vm_offset_t));
+__END_DECLS
+#endif
+
+#endif /* _PMAP_VM_ */
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
new file mode 100644
index 000000000000..899a6cf41a00
--- /dev/null
+++ b/sys/vm/swap_pager.c
@@ -0,0 +1,1009 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
+ *
+ * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
+ */
+
+/*
+ * Quick hack to page to dedicated partition(s).
+ * TODO:
+ * Add multiprocessor locks
+ * Deal with async writes in a better fashion
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/map.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/swap_pager.h>
+
+#define NSWSIZES 16 /* size of swtab */
+#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
+#ifndef NPENDINGIO
+#define NPENDINGIO 64 /* max # of pending cleans */
+#endif
+
+#ifdef DEBUG
+int swpagerdebug = 0x100;
+#define SDB_FOLLOW 0x001
+#define SDB_INIT 0x002
+#define SDB_ALLOC 0x004
+#define SDB_IO 0x008
+#define SDB_WRITE 0x010
+#define SDB_FAIL 0x020
+#define SDB_ALLOCBLK 0x040
+#define SDB_FULL 0x080
+#define SDB_ANOM 0x100
+#define SDB_ANOMPANIC 0x200
+#define SDB_CLUSTER 0x400
+#define SDB_PARANOIA 0x800
+#endif
+
+TAILQ_HEAD(swpclean, swpagerclean);
+
+struct swpagerclean {
+ TAILQ_ENTRY(swpagerclean) spc_list;
+ int spc_flags;
+ struct buf *spc_bp;
+ sw_pager_t spc_swp;
+ vm_offset_t spc_kva;
+ vm_page_t spc_m;
+ int spc_npages;
+} swcleanlist[NPENDINGIO];
+typedef struct swpagerclean *swp_clean_t;
+
+/* spc_flags values */
+#define SPC_FREE 0x00
+#define SPC_BUSY 0x01
+#define SPC_DONE 0x02
+#define SPC_ERROR 0x04
+
+struct swtab {
+ vm_size_t st_osize; /* size of object (bytes) */
+ int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */
+#ifdef DEBUG
+ u_long st_inuse; /* number in this range in use */
+ u_long st_usecnt; /* total used of this size */
+#endif
+} swtab[NSWSIZES+1];
+
+#ifdef DEBUG
+int swap_pager_poip; /* pageouts in progress */
+int swap_pager_piip; /* pageins in progress */
+#endif
+
+int swap_pager_maxcluster; /* maximum cluster size */
+int swap_pager_npendingio; /* number of pager clean structs */
+
+struct swpclean swap_pager_inuse; /* list of pending page cleans */
+struct swpclean swap_pager_free; /* list of free pager clean structs */
+struct pagerlst swap_pager_list; /* list of "named" anon regions */
+
+static void swap_pager_init __P((void));
+static vm_pager_t swap_pager_alloc
+ __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void swap_pager_clean __P((int));
+#ifdef DEBUG
+static void swap_pager_clean_check __P((vm_page_t *, int, int));
+#endif
+static void swap_pager_cluster
+ __P((vm_pager_t, vm_offset_t,
+ vm_offset_t *, vm_offset_t *));
+static void swap_pager_dealloc __P((vm_pager_t));
+static int swap_pager_getpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t));
+static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int));
+static void swap_pager_iodone __P((struct buf *));
+static int swap_pager_putpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+
+struct pagerops swappagerops = {
+ swap_pager_init,
+ swap_pager_alloc,
+ swap_pager_dealloc,
+ swap_pager_getpage,
+ swap_pager_putpage,
+ swap_pager_haspage,
+ swap_pager_cluster
+};
+
+static void
+swap_pager_init()
+{
+ register swp_clean_t spc;
+ register int i, bsize;
+ extern int dmmin, dmmax;
+ int maxbsize;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
+ printf("swpg_init()\n");
+#endif
+ dfltpagerops = &swappagerops;
+ TAILQ_INIT(&swap_pager_list);
+
+ /*
+ * Allocate async IO structures.
+ *
+ * XXX it would be nice if we could do this dynamically based on
+ * the value of nswbuf (since we are ultimately limited by that)
+ * but neither nswbuf or malloc has been initialized yet. So the
+ * structs are statically allocated above.
+ */
+ swap_pager_npendingio = NPENDINGIO;
+
+ /*
+ * Initialize clean lists
+ */
+ TAILQ_INIT(&swap_pager_inuse);
+ TAILQ_INIT(&swap_pager_free);
+ for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) {
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ spc->spc_flags = SPC_FREE;
+ }
+
+ /*
+ * Calculate the swap allocation constants.
+ */
+ if (dmmin == 0) {
+ dmmin = DMMIN;
+ if (dmmin < CLBYTES/DEV_BSIZE)
+ dmmin = CLBYTES/DEV_BSIZE;
+ }
+ if (dmmax == 0)
+ dmmax = DMMAX;
+
+ /*
+ * Fill in our table of object size vs. allocation size
+ */
+ bsize = btodb(PAGE_SIZE);
+ if (bsize < dmmin)
+ bsize = dmmin;
+ maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
+ if (maxbsize > dmmax)
+ maxbsize = dmmax;
+ for (i = 0; i < NSWSIZES; i++) {
+ swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
+ swtab[i].st_bsize = bsize;
+ if (bsize <= btodb(MAXPHYS))
+ swap_pager_maxcluster = dbtob(bsize);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_INIT)
+ printf("swpg_init: ix %d, size %x, bsize %x\n",
+ i, swtab[i].st_osize, swtab[i].st_bsize);
+#endif
+ if (bsize >= maxbsize)
+ break;
+ bsize *= 2;
+ }
+ swtab[i].st_osize = 0;
+ swtab[i].st_bsize = bsize;
+}
+
+/*
+ * Allocate a pager structure and associated resources.
+ * Note that if we are called from the pageout daemon (handle == NULL)
+ * we should not wait for memory as it could resulting in deadlock.
+ */
+static vm_pager_t
+swap_pager_alloc(handle, size, prot, foff)
+ caddr_t handle;
+ register vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t foff;
+{
+ register vm_pager_t pager;
+ register sw_pager_t swp;
+ struct swtab *swt;
+ int waitok;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
+ printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
+#endif
+ /*
+ * If this is a "named" anonymous region, look it up and
+ * return the appropriate pager if it exists.
+ */
+ if (handle) {
+ pager = vm_pager_lookup(&swap_pager_list, handle);
+ if (pager != NULL) {
+ /*
+ * Use vm_object_lookup to gain a reference
+ * to the object and also to remove from the
+ * object cache.
+ */
+ if (vm_object_lookup(pager) == NULL)
+ panic("swap_pager_alloc: bad object");
+ return(pager);
+ }
+ }
+ /*
+ * Pager doesn't exist, allocate swap management resources
+ * and initialize.
+ */
+ waitok = handle ? M_WAITOK : M_NOWAIT;
+ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
+ if (pager == NULL)
+ return(NULL);
+ swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
+ if (swp == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("swpg_alloc: swpager malloc failed\n");
+#endif
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ size = round_page(size);
+ for (swt = swtab; swt->st_osize; swt++)
+ if (size <= swt->st_osize)
+ break;
+#ifdef DEBUG
+ swt->st_inuse++;
+ swt->st_usecnt++;
+#endif
+ swp->sw_osize = size;
+ swp->sw_bsize = swt->st_bsize;
+ swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
+ swp->sw_blocks = (sw_blk_t)
+ malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
+ M_VMPGDATA, M_NOWAIT);
+ if (swp->sw_blocks == NULL) {
+ free((caddr_t)swp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("swpg_alloc: sw_blocks malloc failed\n");
+ swt->st_inuse--;
+ swt->st_usecnt--;
+#endif
+ return(FALSE);
+ }
+ bzero((caddr_t)swp->sw_blocks,
+ swp->sw_nblocks * sizeof(*swp->sw_blocks));
+ swp->sw_poip = 0;
+ if (handle) {
+ vm_object_t object;
+
+ swp->sw_flags = SW_NAMED;
+ TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
+ /*
+ * Consistant with other pagers: return with object
+ * referenced. Can't do this with handle == NULL
+ * since it might be the pageout daemon calling.
+ */
+ object = vm_object_allocate(size);
+ vm_object_enter(object, pager);
+ vm_object_setpager(object, pager, 0, FALSE);
+ } else {
+ swp->sw_flags = 0;
+ pager->pg_list.tqe_next = NULL;
+ pager->pg_list.tqe_prev = NULL;
+ }
+ pager->pg_handle = handle;
+ pager->pg_ops = &swappagerops;
+ pager->pg_type = PG_SWAP;
+ pager->pg_flags = PG_CLUSTERPUT;
+ pager->pg_data = swp;
+
+#ifdef DEBUG
+ if (swpagerdebug & SDB_ALLOC)
+ printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
+ swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
+#endif
+ return(pager);
+}
+
+static void
+swap_pager_dealloc(pager)
+ vm_pager_t pager;
+{
+ register int i;
+ register sw_blk_t bp;
+ register sw_pager_t swp;
+ struct swtab *swt;
+ int s;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return;
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
+ printf("swpg_dealloc(%x)\n", pager);
+#endif
+ /*
+ * Remove from list right away so lookups will fail if we
+ * block for pageout completion.
+ */
+ swp = (sw_pager_t) pager->pg_data;
+ if (swp->sw_flags & SW_NAMED) {
+ TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
+ swp->sw_flags &= ~SW_NAMED;
+ }
+#ifdef DEBUG
+ for (swt = swtab; swt->st_osize; swt++)
+ if (swp->sw_osize <= swt->st_osize)
+ break;
+ swt->st_inuse--;
+#endif
+
+ /*
+ * Wait for all pageouts to finish and remove
+ * all entries from cleaning list.
+ */
+ s = splbio();
+ while (swp->sw_poip) {
+ swp->sw_flags |= SW_WANTED;
+ (void) tsleep(swp, PVM, "swpgdealloc", 0);
+ }
+ splx(s);
+ swap_pager_clean(B_WRITE);
+
+ /*
+ * Free left over swap blocks
+ */
+ for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
+ if (bp->swb_block) {
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
+ printf("swpg_dealloc: blk %x\n",
+ bp->swb_block);
+#endif
+ rmfree(swapmap, swp->sw_bsize, bp->swb_block);
+ }
+ /*
+ * Free swap management resources
+ */
+ free((caddr_t)swp->sw_blocks, M_VMPGDATA);
+ free((caddr_t)swp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+swap_pager_getpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_getpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+ return(swap_pager_io((sw_pager_t)pager->pg_data,
+ mlist, npages, B_READ));
+}
+
+static int
+swap_pager_putpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+ int flags;
+
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_putpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+ if (pager == NULL) {
+ swap_pager_clean(B_WRITE);
+ return (VM_PAGER_OK); /* ??? */
+ }
+ flags = B_WRITE;
+ if (!sync)
+ flags |= B_ASYNC;
+ return(swap_pager_io((sw_pager_t)pager->pg_data,
+ mlist, npages, flags));
+}
+
+static boolean_t
+swap_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ register sw_pager_t swp;
+ register sw_blk_t swb;
+ int ix;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
+ printf("swpg_haspage(%x, %x) ", pager, offset);
+#endif
+ swp = (sw_pager_t) pager->pg_data;
+ ix = offset / dbtob(swp->sw_bsize);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
+ printf("swpg_haspage: %x bad offset %x, ix %x\n",
+ swp->sw_blocks, offset, ix);
+#endif
+ return(FALSE);
+ }
+ swb = &swp->sw_blocks[ix];
+ if (swb->swb_block)
+ ix = atop(offset % dbtob(swp->sw_bsize));
+#ifdef DEBUG
+ if (swpagerdebug & SDB_ALLOCBLK)
+ printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
+ printf("-> %c\n",
+ "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
+#endif
+ if (swb->swb_block && (swb->swb_mask & (1 << ix)))
+ return(TRUE);
+ return(FALSE);
+}
+
+static void
+swap_pager_cluster(pager, offset, loffset, hoffset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+ vm_offset_t *loffset;
+ vm_offset_t *hoffset;
+{
+ sw_pager_t swp;
+ register int bsize;
+ vm_offset_t loff, hoff;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
+ printf("swpg_cluster(%x, %x) ", pager, offset);
+#endif
+ swp = (sw_pager_t) pager->pg_data;
+ bsize = dbtob(swp->sw_bsize);
+ if (bsize > swap_pager_maxcluster)
+ bsize = swap_pager_maxcluster;
+
+ loff = offset - (offset % bsize);
+ if (loff >= swp->sw_osize)
+ panic("swap_pager_cluster: bad offset");
+
+ hoff = loff + bsize;
+ if (hoff > swp->sw_osize)
+ hoff = swp->sw_osize;
+
+ *loffset = loff;
+ *hoffset = hoff;
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
+ printf("returns [%x-%x]\n", loff, hoff);
+#endif
+}
+
+/*
+ * Scaled down version of swap().
+ * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
+ * BOGUS: lower level IO routines expect a KVA so we have to map our
+ * provided physical page into the KVA to keep them happy.
+ */
+static int
+swap_pager_io(swp, mlist, npages, flags)
+ register sw_pager_t swp;
+ vm_page_t *mlist;
+ int npages;
+ int flags;
+{
+ register struct buf *bp;
+ register sw_blk_t swb;
+ register int s;
+ int ix, mask;
+ boolean_t rv;
+ vm_offset_t kva, off;
+ swp_clean_t spc;
+ vm_page_t m;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return (VM_PAGER_FAIL); /* XXX: correct return? */
+ if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
+ printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags);
+ if (flags & B_READ) {
+ if (flags & B_ASYNC)
+ panic("swap_pager_io: cannot do ASYNC reads");
+ if (npages != 1)
+ panic("swap_pager_io: cannot do clustered reads");
+ }
+#endif
+
+ /*
+ * First determine if the page exists in the pager if this is
+ * a sync read. This quickly handles cases where we are
+ * following shadow chains looking for the top level object
+ * with the page.
+ */
+ m = *mlist;
+ off = m->offset + m->object->paging_offset;
+ ix = off / dbtob(swp->sw_bsize);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+#ifdef DEBUG
+ if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
+ printf("swap_pager_io: no swap block on write\n");
+ return(VM_PAGER_BAD);
+ }
+#endif
+ return(VM_PAGER_FAIL);
+ }
+ swb = &swp->sw_blocks[ix];
+ off = off % dbtob(swp->sw_bsize);
+ if ((flags & B_READ) &&
+ (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
+ return(VM_PAGER_FAIL);
+
+ /*
+ * For reads (pageins) and synchronous writes, we clean up
+ * all completed async pageouts.
+ */
+ if ((flags & B_ASYNC) == 0) {
+ s = splbio();
+ swap_pager_clean(flags&B_READ);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_PARANOIA)
+ swap_pager_clean_check(mlist, npages, flags&B_READ);
+#endif
+ splx(s);
+ }
+ /*
+ * For async writes (pageouts), we cleanup completed pageouts so
+ * that all available resources are freed. Also tells us if this
+ * page is already being cleaned. If it is, or no resources
+ * are available, we try again later.
+ */
+ else {
+ swap_pager_clean(B_WRITE);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_PARANOIA)
+ swap_pager_clean_check(mlist, npages, B_WRITE);
+#endif
+ if (swap_pager_free.tqh_first == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("%s: no available io headers\n",
+ "swap_pager_io");
+#endif
+ return(VM_PAGER_AGAIN);
+ }
+ }
+
+ /*
+ * Allocate a swap block if necessary.
+ */
+ if (swb->swb_block == 0) {
+ swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
+ if (swb->swb_block == 0) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("swpg_io: rmalloc of %x failed\n",
+ swp->sw_bsize);
+#endif
+ /*
+ * XXX this is technically a resource shortage that
+ * should return AGAIN, but the situation isn't likely
+ * to be remedied just by delaying a little while and
+ * trying again (the pageout daemon's current response
+ * to AGAIN) so we just return FAIL.
+ */
+ return(VM_PAGER_FAIL);
+ }
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
+ printf("swpg_io: %x alloc blk %x at ix %x\n",
+ swp->sw_blocks, swb->swb_block, ix);
+#endif
+ }
+
+ /*
+ * Allocate a kernel virtual address and initialize so that PTE
+ * is available for lower level IO drivers.
+ */
+ kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC));
+ if (kva == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("%s: no KVA space to map pages\n",
+ "swap_pager_io");
+#endif
+ return(VM_PAGER_AGAIN);
+ }
+
+ /*
+ * Get a swap buffer header and initialize it.
+ */
+ s = splbio();
+ while (bswlist.b_actf == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_ANOM)
+ printf("swap_pager_io: wait on swbuf for %x (%d)\n",
+ m, flags);
+#endif
+ bswlist.b_flags |= B_WANTED;
+ tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0);
+ }
+ bp = bswlist.b_actf;
+ bswlist.b_actf = bp->b_actf;
+ splx(s);
+ bp->b_flags = B_BUSY | (flags & B_READ);
+ bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
+ bp->b_data = (caddr_t)kva;
+ bp->b_blkno = swb->swb_block + btodb(off);
+ VHOLD(swapdev_vp);
+ bp->b_vp = swapdev_vp;
+ if (swapdev_vp->v_type == VBLK)
+ bp->b_dev = swapdev_vp->v_rdev;
+ bp->b_bcount = npages * PAGE_SIZE;
+
+ /*
+ * For writes we set up additional buffer fields, record a pageout
+ * in progress and mark that these swap blocks are now allocated.
+ */
+ if ((bp->b_flags & B_READ) == 0) {
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = npages * PAGE_SIZE;
+ swapdev_vp->v_numoutput++;
+ s = splbio();
+ swp->sw_poip++;
+ splx(s);
+ mask = (~(~0 << npages)) << atop(off);
+#ifdef DEBUG
+ swap_pager_poip++;
+ if (swpagerdebug & SDB_WRITE)
+ printf("swpg_io: write: bp=%x swp=%x poip=%d\n",
+ bp, swp, swp->sw_poip);
+ if ((swpagerdebug & SDB_ALLOCBLK) &&
+ (swb->swb_mask & mask) != mask)
+ printf("swpg_io: %x write %d pages at %x+%x\n",
+ swp->sw_blocks, npages, swb->swb_block,
+ atop(off));
+ if (swpagerdebug & SDB_CLUSTER)
+ printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n",
+ off, npages, mask, swb->swb_mask);
+#endif
+ swb->swb_mask |= mask;
+ }
+ /*
+ * If this is an async write we set up still more buffer fields
+ * and place a "cleaning" entry on the inuse queue.
+ */
+ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
+#ifdef DEBUG
+ if (swap_pager_free.tqh_first == NULL)
+ panic("swpg_io: lost spc");
+#endif
+ spc = swap_pager_free.tqh_first;
+ TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
+#ifdef DEBUG
+ if (spc->spc_flags != SPC_FREE)
+ panic("swpg_io: bad free spc");
+#endif
+ spc->spc_flags = SPC_BUSY;
+ spc->spc_bp = bp;
+ spc->spc_swp = swp;
+ spc->spc_kva = kva;
+ /*
+ * Record the first page. This allows swap_pager_clean
+ * to efficiently handle the common case of a single page.
+ * For clusters, it allows us to locate the object easily
+ * and we then reconstruct the rest of the mlist from spc_kva.
+ */
+ spc->spc_m = m;
+ spc->spc_npages = npages;
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = swap_pager_iodone;
+ s = splbio();
+ TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
+ splx(s);
+ }
+
+ /*
+ * Finally, start the IO operation.
+ * If it is async we are all done, otherwise we must wait for
+ * completion and cleanup afterwards.
+ */
+#ifdef DEBUG
+ if (swpagerdebug & SDB_IO)
+ printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
+ bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
+#endif
+ VOP_STRATEGY(bp);
+ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_IO)
+ printf("swpg_io: IO started: bp %x\n", bp);
+#endif
+ return(VM_PAGER_PEND);
+ }
+ s = splbio();
+#ifdef DEBUG
+ if (flags & B_READ)
+ swap_pager_piip++;
+ else
+ swap_pager_poip++;
+#endif
+ while ((bp->b_flags & B_DONE) == 0)
+ (void) tsleep(bp, PVM, "swpgio", 0);
+ if ((flags & B_READ) == 0)
+ --swp->sw_poip;
+#ifdef DEBUG
+ if (flags & B_READ)
+ --swap_pager_piip;
+ else
+ --swap_pager_poip;
+#endif
+ rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
+ bp->b_actf = bswlist.b_actf;
+ bswlist.b_actf = bp;
+ if (bp->b_vp)
+ brelvp(bp);
+ if (bswlist.b_flags & B_WANTED) {
+ bswlist.b_flags &= ~B_WANTED;
+ wakeup(&bswlist);
+ }
+ if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
+ m->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ }
+ splx(s);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_IO)
+ printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv);
+ if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
+ printf("swpg_io: IO error\n");
+#endif
+ vm_pager_unmap_pages(kva, npages);
+ return(rv);
+}
+
+static void
+swap_pager_clean(rw)
+ int rw;
+{
+ register swp_clean_t spc;
+ register int s, i;
+ vm_object_t object;
+ vm_page_t m;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return;
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_clean(%x)\n", rw);
+#endif
+
+ for (;;) {
+ /*
+ * Look up and removal from inuse list must be done
+ * at splbio() to avoid conflicts with swap_pager_iodone.
+ */
+ s = splbio();
+ for (spc = swap_pager_inuse.tqh_first;
+ spc != NULL;
+ spc = spc->spc_list.tqe_next) {
+ /*
+ * If the operation is done, remove it from the
+ * list and process it.
+ *
+ * XXX if we can't get the object lock we also
+ * leave it on the list and try again later.
+ * Is there something better we could do?
+ */
+ if ((spc->spc_flags & SPC_DONE) &&
+ vm_object_lock_try(spc->spc_m->object)) {
+ TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
+ break;
+ }
+ }
+ splx(s);
+
+ /*
+ * No operations done, thats all we can do for now.
+ */
+ if (spc == NULL)
+ break;
+
+ /*
+ * Found a completed operation so finish it off.
+ * Note: no longer at splbio since entry is off the list.
+ */
+ m = spc->spc_m;
+ object = m->object;
+
+ /*
+ * Process each page in the cluster.
+ * The first page is explicitly kept in the cleaning
+ * entry, others must be reconstructed from the KVA.
+ */
+ for (i = 0; i < spc->spc_npages; i++) {
+ if (i)
+ m = vm_pager_atop(spc->spc_kva + ptoa(i));
+ /*
+ * If no error mark as clean and inform the pmap
+ * system. If there was an error, mark as dirty
+ * so we will try again.
+ *
+ * XXX could get stuck doing this, should give up
+ * after awhile.
+ */
+ if (spc->spc_flags & SPC_ERROR) {
+ printf("%s: clean of page %x failed\n",
+ "swap_pager_clean",
+ VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_LAUNDRY;
+ } else {
+ m->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ }
+ m->flags &= ~PG_BUSY;
+ PAGE_WAKEUP(m);
+ }
+
+ /*
+ * Done with the object, decrement the paging count
+ * and unlock it.
+ */
+ if (--object->paging_in_progress == 0)
+ wakeup(object);
+ vm_object_unlock(object);
+
+ /*
+ * Free up KVM used and put the entry back on the list.
+ */
+ vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages);
+ spc->spc_flags = SPC_FREE;
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_WRITE)
+ printf("swpg_clean: free spc %x\n", spc);
+#endif
+ }
+}
+
+#ifdef DEBUG
+static void
+swap_pager_clean_check(mlist, npages, rw)
+ vm_page_t *mlist;
+ int npages;
+ int rw;
+{
+ register swp_clean_t spc;
+ boolean_t bad;
+ int i, j, s;
+ vm_page_t m;
+
+ if (panicstr)
+ return;
+
+ bad = FALSE;
+ s = splbio();
+ for (spc = swap_pager_inuse.tqh_first;
+ spc != NULL;
+ spc = spc->spc_list.tqe_next) {
+ for (j = 0; j < spc->spc_npages; j++) {
+ m = vm_pager_atop(spc->spc_kva + ptoa(j));
+ for (i = 0; i < npages; i++)
+ if (m == mlist[i]) {
+ if (swpagerdebug & SDB_ANOM)
+ printf(
+ "swpg_clean_check: %s: page %x on list, flags %x\n",
+ rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags);
+ bad = TRUE;
+ }
+ }
+ }
+ splx(s);
+ if (bad)
+ panic("swpg_clean_check");
+}
+#endif
+
+static void
+swap_pager_iodone(bp)
+ register struct buf *bp;
+{
+ register swp_clean_t spc;
+ daddr_t blk;
+ int s;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return;
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_iodone(%x)\n", bp);
+#endif
+ s = splbio();
+ for (spc = swap_pager_inuse.tqh_first;
+ spc != NULL;
+ spc = spc->spc_list.tqe_next)
+ if (spc->spc_bp == bp)
+ break;
+#ifdef DEBUG
+ if (spc == NULL)
+ panic("swap_pager_iodone: bp not found");
+#endif
+
+ spc->spc_flags &= ~SPC_BUSY;
+ spc->spc_flags |= SPC_DONE;
+ if (bp->b_flags & B_ERROR)
+ spc->spc_flags |= SPC_ERROR;
+ spc->spc_bp = NULL;
+ blk = bp->b_blkno;
+
+#ifdef DEBUG
+ --swap_pager_poip;
+ if (swpagerdebug & SDB_WRITE)
+ printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
+ bp, spc->spc_swp, spc->spc_swp->sw_flags,
+ spc, spc->spc_swp->sw_poip);
+#endif
+
+ spc->spc_swp->sw_poip--;
+ if (spc->spc_swp->sw_flags & SW_WANTED) {
+ spc->spc_swp->sw_flags &= ~SW_WANTED;
+ wakeup(spc->spc_swp);
+ }
+
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
+ bp->b_actf = bswlist.b_actf;
+ bswlist.b_actf = bp;
+ if (bp->b_vp)
+ brelvp(bp);
+ if (bswlist.b_flags & B_WANTED) {
+ bswlist.b_flags &= ~B_WANTED;
+ wakeup(&bswlist);
+ }
+ wakeup(&vm_pages_needed);
+ splx(s);
+}
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
new file mode 100644
index 000000000000..497d92a39386
--- /dev/null
+++ b/sys/vm/swap_pager.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)swap_pager.h 8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef _SWAP_PAGER_
+#define _SWAP_PAGER_ 1
+
+/*
+ * In the swap pager, the backing store for an object is organized as an
+ * array of some number of "swap blocks". A swap block consists of a bitmask
+ * and some number of contiguous DEV_BSIZE disk blocks. The minimum size
+ * of a swap block is:
+ *
+ * max(PAGE_SIZE, dmmin*DEV_BSIZE) [ 32k currently ]
+ *
+ * bytes (since the pager interface is page oriented), the maximum size is:
+ *
+ * min(#bits(swb_mask)*PAGE_SIZE, dmmax*DEV_BSIZE) [ 128k currently ]
+ *
+ * where dmmin and dmmax are left over from the old VM interface. The bitmask
+ * (swb_mask) is used by swap_pager_haspage() to determine if a particular
+ * page has actually been written; i.e. the pager copy of the page is valid.
+ * All swap blocks in the backing store of an object will be the same size.
+ *
+ * The reason for variable sized swap blocks is to reduce fragmentation of
+ * swap resources. Whenever possible we allocate smaller swap blocks to
+ * smaller objects. The swap block size is determined from a table of
+ * object-size vs. swap-block-size computed at boot time.
+ */
+typedef int sw_bm_t; /* pager bitmask */
+
+struct swblock {
+ sw_bm_t swb_mask; /* bitmask of valid pages in this block */
+ daddr_t swb_block; /* starting disk block for this block */
+};
+typedef struct swblock *sw_blk_t;
+
+/*
+ * Swap pager private data.
+ */
+struct swpager {
+ vm_size_t sw_osize; /* size of object we are backing (bytes) */
+ int sw_bsize; /* size of swap blocks (DEV_BSIZE units) */
+ int sw_nblocks;/* number of blocks in list (sw_blk_t units) */
+ sw_blk_t sw_blocks; /* pointer to list of swap blocks */
+ short sw_flags; /* flags */
+ short sw_poip; /* pageouts in progress */
+};
+typedef struct swpager *sw_pager_t;
+
+#define SW_WANTED 0x01
+#define SW_NAMED 0x02
+
+#endif /* _SWAP_PAGER_ */
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
new file mode 100644
index 000000000000..85f892f29beb
--- /dev/null
+++ b/sys/vm/vm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm.h 8.2 (Berkeley) 12/13/93
+ */
+
+#ifndef VM_H
+#define VM_H
+
+typedef int vm_inherit_t; /* XXX: inheritance codes */
+
+union vm_map_object;
+typedef union vm_map_object vm_map_object_t;
+
+struct vm_map_entry;
+typedef struct vm_map_entry *vm_map_entry_t;
+
+struct vm_map;
+typedef struct vm_map *vm_map_t;
+
+struct vm_object;
+typedef struct vm_object *vm_object_t;
+
+struct vm_page;
+typedef struct vm_page *vm_page_t;
+
+struct pager_struct;
+typedef struct pager_struct *vm_pager_t;
+
+#include <sys/vmmeter.h>
+#include <sys/queue.h>
+#include <vm/vm_param.h>
+#include <vm/lock.h>
+#include <vm/vm_prot.h>
+#include <vm/vm_inherit.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+
+/*
+ * Shareable process virtual address space.
+ * May eventually be merged with vm_map.
+ * Several fields are temporary (text, data stuff).
+ */
+struct vmspace {
+ struct vm_map vm_map; /* VM address map */
+ struct pmap vm_pmap; /* private physical map */
+ int vm_refcnt; /* number of references */
+ caddr_t vm_shm; /* SYS5 shared memory private data XXX */
+/* we copy from vm_startcopy to the end of the structure on fork */
+#define vm_startcopy vm_rssize
+ segsz_t vm_rssize; /* current resident set size in pages */
+ segsz_t vm_swrss; /* resident set size before last swap */
+ segsz_t vm_tsize; /* text size (pages) XXX */
+ segsz_t vm_dsize; /* data size (pages) XXX */
+ segsz_t vm_ssize; /* stack size (pages) */
+ caddr_t vm_taddr; /* user virtual address of text XXX */
+ caddr_t vm_daddr; /* user virtual address of data XXX */
+ caddr_t vm_maxsaddr; /* user VA at max stack growth */
+};
+#endif /* VM_H */
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
new file mode 100644
index 000000000000..bae5f005273d
--- /dev/null
+++ b/sys/vm/vm_extern.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct loadavg;
+struct proc;
+struct vmspace;
+struct vmtotal;
+struct mount;
+struct vnode;
+
+#ifdef KGDB
+void chgkprot __P((caddr_t, int, int));
+#endif
+
+#ifdef KERNEL
+#ifdef TYPEDEF_FOR_UAP
+int getpagesize __P((struct proc *p, void *, int *));
+int madvise __P((struct proc *, void *, int *));
+int mincore __P((struct proc *, void *, int *));
+int mprotect __P((struct proc *, void *, int *));
+int msync __P((struct proc *, void *, int *));
+int munmap __P((struct proc *, void *, int *));
+int obreak __P((struct proc *, void *, int *));
+int sbrk __P((struct proc *, void *, int *));
+int smmap __P((struct proc *, void *, int *));
+int sstk __P((struct proc *, void *, int *));
+#endif
+
+void assert_wait __P((int, boolean_t));
+int grow __P((struct proc *, u_int));
+void iprintf __P((const char *, ...));
+int kernacc __P((caddr_t, int, int));
+int kinfo_loadavg __P((int, char *, int *, int, int *));
+int kinfo_meter __P((int, caddr_t, int *, int, int *));
+vm_offset_t kmem_alloc __P((vm_map_t, vm_size_t));
+vm_offset_t kmem_alloc_pageable __P((vm_map_t, vm_size_t));
+vm_offset_t kmem_alloc_wait __P((vm_map_t, vm_size_t));
+void kmem_free __P((vm_map_t, vm_offset_t, vm_size_t));
+void kmem_free_wakeup __P((vm_map_t, vm_offset_t, vm_size_t));
+void kmem_init __P((vm_offset_t, vm_offset_t));
+vm_offset_t kmem_malloc __P((vm_map_t, vm_size_t, boolean_t));
+vm_map_t kmem_suballoc __P((vm_map_t, vm_offset_t *, vm_offset_t *,
+ vm_size_t, boolean_t));
+void loadav __P((struct loadavg *));
+void munmapfd __P((int));
+int pager_cache __P((vm_object_t, boolean_t));
+void sched __P((void));
+int svm_allocate __P((struct proc *, void *, int *));
+int svm_deallocate __P((struct proc *, void *, int *));
+int svm_inherit __P((struct proc *, void *, int *));
+int svm_protect __P((struct proc *, void *, int *));
+void swapinit __P((void));
+int swapon __P((struct proc *, void *, int *));
+void swapout __P((struct proc *));
+void swapout_threads __P((void));
+int swfree __P((struct proc *, int));
+void swstrategy __P((struct buf *));
+void thread_block __P((void));
+void thread_sleep __P((int, simple_lock_t, boolean_t));
+void thread_wakeup __P((int));
+int useracc __P((caddr_t, int, int));
+int vm_allocate __P((vm_map_t,
+ vm_offset_t *, vm_size_t, boolean_t));
+int vm_allocate_with_pager __P((vm_map_t, vm_offset_t *,
+ vm_size_t, boolean_t, vm_pager_t, vm_offset_t, boolean_t));
+int vm_deallocate __P((vm_map_t, vm_offset_t, vm_size_t));
+int vm_fault __P((vm_map_t, vm_offset_t, vm_prot_t, boolean_t));
+void vm_fault_copy_entry __P((vm_map_t,
+ vm_map_t, vm_map_entry_t, vm_map_entry_t));
+void vm_fault_unwire __P((vm_map_t, vm_offset_t, vm_offset_t));
+int vm_fault_wire __P((vm_map_t, vm_offset_t, vm_offset_t));
+int vm_fork __P((struct proc *, struct proc *, int));
+int vm_inherit __P((vm_map_t,
+ vm_offset_t, vm_size_t, vm_inherit_t));
+void vm_init_limits __P((struct proc *));
+void vm_mem_init __P((void));
+int vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t,
+ vm_prot_t, vm_prot_t, int, caddr_t, vm_offset_t));
+int vm_protect __P((vm_map_t,
+ vm_offset_t, vm_size_t, boolean_t, vm_prot_t));
+void vm_set_page_size __P((void));
+void vmmeter __P((void));
+struct vmspace *vmspace_alloc __P((vm_offset_t, vm_offset_t, int));
+struct vmspace *vmspace_fork __P((struct vmspace *));
+void vmspace_free __P((struct vmspace *));
+void vmtotal __P((struct vmtotal *));
+void vnode_pager_setsize __P((struct vnode *, u_long));
+void vnode_pager_umount __P((struct mount *));
+boolean_t vnode_pager_uncache __P((struct vnode *));
+void vslock __P((caddr_t, u_int));
+void vsunlock __P((caddr_t, u_int, int));
+#endif
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
new file mode 100644
index 000000000000..f60abf2b5f3a
--- /dev/null
+++ b/sys/vm/vm_fault.c
@@ -0,0 +1,1035 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_fault.c 8.4 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Page fault handling module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+/*
+ * vm_fault:
+ *
+ * Handle a page fault occuring at the given address,
+ * requiring the given permissions, in the map specified.
+ * If successful, the page is inserted into the
+ * associated physical map.
+ *
+ * NOTE: the given address should be truncated to the
+ * proper page address.
+ *
+ * KERN_SUCCESS is returned if the page fault is handled; otherwise,
+ * a standard error specifying why the fault is fatal is returned.
+ *
+ *
+ * The map in question must be referenced, and remains so.
+ * Caller may hold no locks.
+ */
+int
+vm_fault(map, vaddr, fault_type, change_wiring)
+ vm_map_t map;
+ vm_offset_t vaddr;
+ vm_prot_t fault_type;
+ boolean_t change_wiring;
+{
+ vm_object_t first_object;
+ vm_offset_t first_offset;
+ vm_map_entry_t entry;
+ register vm_object_t object;
+ register vm_offset_t offset;
+ register vm_page_t m;
+ vm_page_t first_m;
+ vm_prot_t prot;
+ int result;
+ boolean_t wired;
+ boolean_t su;
+ boolean_t lookup_still_valid;
+ boolean_t page_exists;
+ vm_page_t old_m;
+ vm_object_t next_object;
+
+ cnt.v_faults++; /* needs lock XXX */
+/*
+ * Recovery actions
+ */
+#define FREE_PAGE(m) { \
+ PAGE_WAKEUP(m); \
+ vm_page_lock_queues(); \
+ vm_page_free(m); \
+ vm_page_unlock_queues(); \
+}
+
+#define RELEASE_PAGE(m) { \
+ PAGE_WAKEUP(m); \
+ vm_page_lock_queues(); \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+}
+
+#define UNLOCK_MAP { \
+ if (lookup_still_valid) { \
+ vm_map_lookup_done(map, entry); \
+ lookup_still_valid = FALSE; \
+ } \
+}
+
+#define UNLOCK_THINGS { \
+ object->paging_in_progress--; \
+ vm_object_unlock(object); \
+ if (object != first_object) { \
+ vm_object_lock(first_object); \
+ FREE_PAGE(first_m); \
+ first_object->paging_in_progress--; \
+ vm_object_unlock(first_object); \
+ } \
+ UNLOCK_MAP; \
+}
+
+#define UNLOCK_AND_DEALLOCATE { \
+ UNLOCK_THINGS; \
+ vm_object_deallocate(first_object); \
+}
+
+ RetryFault: ;
+
+ /*
+ * Find the backing store object and offset into
+ * it to begin the search.
+ */
+
+ if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry,
+ &first_object, &first_offset,
+ &prot, &wired, &su)) != KERN_SUCCESS) {
+ return(result);
+ }
+ lookup_still_valid = TRUE;
+
+ if (wired)
+ fault_type = prot;
+
+ first_m = NULL;
+
+ /*
+ * Make a reference to this object to
+ * prevent its disposal while we are messing with
+ * it. Once we have the reference, the map is free
+ * to be diddled. Since objects reference their
+ * shadows (and copies), they will stay around as well.
+ */
+
+ vm_object_lock(first_object);
+
+ first_object->ref_count++;
+ first_object->paging_in_progress++;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * Note that we cannot hold any locks during the
+ * pager access or when waiting for memory, so
+ * we use a busy page then.
+ *
+ * Note also that we aren't as concerned about
+ * more than one thead attempting to pager_data_unlock
+ * the same page at once, so we don't hold the page
+ * as busy then, but do record the highest unlock
+ * value so far. [Unlock requests may also be delivered
+ * out of order.]
+ *
+ * 2) Once we have a busy page, we must remove it from
+ * the pageout queues, so that the pageout daemon
+ * will not grab it away.
+ *
+ * 3) To prevent another thread from racing us down the
+ * shadow chain and entering a new page in the top
+ * object before we do, we must keep a busy page in
+ * the top object while following the shadow chain.
+ *
+ * 4) We must increment paging_in_progress on any object
+ * for which we have a busy page, to prevent
+ * vm_object_collapse from removing the busy page
+ * without our noticing.
+ */
+
+ /*
+ * Search for the page at object/offset.
+ */
+
+ object = first_object;
+ offset = first_offset;
+
+ /*
+ * See whether this page is resident
+ */
+
+ while (TRUE) {
+ m = vm_page_lookup(object, offset);
+ if (m != NULL) {
+ /*
+ * If the page is being brought in,
+ * wait for it and then retry.
+ */
+ if (m->flags & PG_BUSY) {
+#ifdef DOTHREADS
+ int wait_result;
+
+ PAGE_ASSERT_WAIT(m, !change_wiring);
+ UNLOCK_THINGS;
+ thread_block();
+ wait_result = current_thread()->wait_result;
+ vm_object_deallocate(first_object);
+ if (wait_result != THREAD_AWAKENED)
+ return(KERN_SUCCESS);
+ goto RetryFault;
+#else
+ PAGE_ASSERT_WAIT(m, !change_wiring);
+ UNLOCK_THINGS;
+ cnt.v_intrans++;
+ thread_block();
+ vm_object_deallocate(first_object);
+ goto RetryFault;
+#endif
+ }
+
+ /*
+ * Remove the page from the pageout daemon's
+ * reach while we play with it.
+ */
+
+ vm_page_lock_queues();
+ if (m->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ m->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ cnt.v_reactivated++;
+ }
+
+ if (m->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ m->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * Mark page busy for other threads.
+ */
+ m->flags |= PG_BUSY;
+ break;
+ }
+
+ if (((object->pager != NULL) &&
+ (!change_wiring || wired))
+ || (object == first_object)) {
+
+ /*
+ * Allocate a new page for this object/offset
+ * pair.
+ */
+
+ m = vm_page_alloc(object, offset);
+
+ if (m == NULL) {
+ UNLOCK_AND_DEALLOCATE;
+ VM_WAIT;
+ goto RetryFault;
+ }
+ }
+
+ if (object->pager != NULL && (!change_wiring || wired)) {
+ int rv;
+
+ /*
+ * Now that we have a busy page, we can
+ * release the object lock.
+ */
+ vm_object_unlock(object);
+
+ /*
+ * Call the pager to retrieve the data, if any,
+ * after releasing the lock on the map.
+ */
+ UNLOCK_MAP;
+ cnt.v_pageins++;
+ rv = vm_pager_get(object->pager, m, TRUE);
+
+ /*
+ * Reaquire the object lock to preserve our
+ * invariant.
+ */
+ vm_object_lock(object);
+
+ /*
+ * Found the page.
+ * Leave it busy while we play with it.
+ */
+ if (rv == VM_PAGER_OK) {
+ /*
+ * Relookup in case pager changed page.
+ * Pager is responsible for disposition
+ * of old page if moved.
+ */
+ m = vm_page_lookup(object, offset);
+
+ cnt.v_pgpgin++;
+ m->flags &= ~PG_FAKE;
+ m->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ break;
+ }
+
+ /*
+ * IO error or page outside the range of the pager:
+ * cleanup and return an error.
+ */
+ if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
+ FREE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ return(KERN_PROTECTION_FAILURE); /* XXX */
+ }
+ /*
+ * rv == VM_PAGER_FAIL:
+ *
+ * Page does not exist at this object/offset.
+ * Free the bogus page (waking up anyone waiting
+ * for it) and continue on to the next object.
+ *
+ * If this is the top-level object, we must
+ * leave the busy page to prevent another
+ * thread from rushing past us, and inserting
+ * the page in that object at the same time
+ * that we are.
+ */
+ if (object != first_object) {
+ FREE_PAGE(m);
+ /* note that `m' is not used after this */
+ }
+ }
+
+ /*
+ * We get here if the object has no pager (or unwiring)
+ * or the pager doesn't have the page.
+ */
+ if (object == first_object)
+ first_m = m;
+
+ /*
+ * Move on to the next object. Lock the next
+ * object before unlocking the current one.
+ */
+
+ offset += object->shadow_offset;
+ next_object = object->shadow;
+ if (next_object == NULL) {
+ /*
+ * If there's no object left, fill the page
+ * in the top object with zeros.
+ */
+ if (object != first_object) {
+ object->paging_in_progress--;
+ vm_object_unlock(object);
+
+ object = first_object;
+ offset = first_offset;
+ m = first_m;
+ vm_object_lock(object);
+ }
+ first_m = NULL;
+
+ vm_page_zero_fill(m);
+ cnt.v_zfod++;
+ m->flags &= ~PG_FAKE;
+ break;
+ }
+ else {
+ vm_object_lock(next_object);
+ if (object != first_object)
+ object->paging_in_progress--;
+ vm_object_unlock(object);
+ object = next_object;
+ object->paging_in_progress++;
+ }
+ }
+
+ if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_BUSY)) != PG_BUSY)
+ panic("vm_fault: active, inactive or !busy after main loop");
+
+ /*
+ * PAGE HAS BEEN FOUND.
+ * [Loop invariant still holds -- the object lock
+ * is held.]
+ */
+
+ old_m = m; /* save page that would be copied */
+
+ /*
+ * If the page is being written, but isn't
+ * already owned by the top-level object,
+ * we have to copy it into a new page owned
+ * by the top-level object.
+ */
+
+ if (object != first_object) {
+ /*
+ * We only really need to copy if we
+ * want to write it.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+
+ /*
+ * If we try to collapse first_object at this
+ * point, we may deadlock when we try to get
+ * the lock on an intermediate object (since we
+ * have the bottom object locked). We can't
+ * unlock the bottom object, because the page
+ * we found may move (by collapse) if we do.
+ *
+ * Instead, we first copy the page. Then, when
+ * we have no more use for the bottom object,
+ * we unlock it and try to collapse.
+ *
+ * Note that we copy the page even if we didn't
+ * need to... that's the breaks.
+ */
+
+ /*
+ * We already have an empty page in
+ * first_object - use it.
+ */
+
+ vm_page_copy(m, first_m);
+ first_m->flags &= ~PG_FAKE;
+
+ /*
+ * If another map is truly sharing this
+ * page with us, we have to flush all
+ * uses of the original page, since we
+ * can't distinguish those which want the
+ * original from those which need the
+ * new copy.
+ *
+ * XXX If we know that only one map has
+ * access to this page, then we could
+ * avoid the pmap_page_protect() call.
+ */
+
+ vm_page_lock_queues();
+ vm_page_activate(m);
+ vm_page_deactivate(m);
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+ vm_page_unlock_queues();
+
+ /*
+ * We no longer need the old page or object.
+ */
+ PAGE_WAKEUP(m);
+ object->paging_in_progress--;
+ vm_object_unlock(object);
+
+ /*
+ * Only use the new page below...
+ */
+
+ cnt.v_cow_faults++;
+ m = first_m;
+ object = first_object;
+ offset = first_offset;
+
+ /*
+ * Now that we've gotten the copy out of the
+ * way, let's try to collapse the top object.
+ */
+ vm_object_lock(object);
+ /*
+ * But we have to play ugly games with
+ * paging_in_progress to do that...
+ */
+ object->paging_in_progress--;
+ vm_object_collapse(object);
+ object->paging_in_progress++;
+ }
+ else {
+ prot &= ~VM_PROT_WRITE;
+ m->flags |= PG_COPYONWRITE;
+ }
+ }
+
+ if (m->flags & (PG_ACTIVE|PG_INACTIVE))
+ panic("vm_fault: active or inactive before copy object handling");
+
+ /*
+ * If the page is being written, but hasn't been
+ * copied to the copy-object, we have to copy it there.
+ */
+ RetryCopy:
+ if (first_object->copy != NULL) {
+ vm_object_t copy_object = first_object->copy;
+ vm_offset_t copy_offset;
+ vm_page_t copy_m;
+
+ /*
+ * We only need to copy if we want to write it.
+ */
+ if ((fault_type & VM_PROT_WRITE) == 0) {
+ prot &= ~VM_PROT_WRITE;
+ m->flags |= PG_COPYONWRITE;
+ }
+ else {
+ /*
+ * Try to get the lock on the copy_object.
+ */
+ if (!vm_object_lock_try(copy_object)) {
+ vm_object_unlock(object);
+ /* should spin a bit here... */
+ vm_object_lock(object);
+ goto RetryCopy;
+ }
+
+ /*
+ * Make another reference to the copy-object,
+ * to keep it from disappearing during the
+ * copy.
+ */
+ copy_object->ref_count++;
+
+ /*
+ * Does the page exist in the copy?
+ */
+ copy_offset = first_offset
+ - copy_object->shadow_offset;
+ copy_m = vm_page_lookup(copy_object, copy_offset);
+ if (page_exists = (copy_m != NULL)) {
+ if (copy_m->flags & PG_BUSY) {
+#ifdef DOTHREADS
+ int wait_result;
+
+ /*
+ * If the page is being brought
+ * in, wait for it and then retry.
+ */
+ PAGE_ASSERT_WAIT(copy_m, !change_wiring);
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ vm_object_unlock(copy_object);
+ UNLOCK_THINGS;
+ thread_block();
+ wait_result = current_thread()->wait_result;
+ vm_object_deallocate(first_object);
+ if (wait_result != THREAD_AWAKENED)
+ return(KERN_SUCCESS);
+ goto RetryFault;
+#else
+ /*
+ * If the page is being brought
+ * in, wait for it and then retry.
+ */
+ PAGE_ASSERT_WAIT(copy_m, !change_wiring);
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ vm_object_unlock(copy_object);
+ UNLOCK_THINGS;
+ thread_block();
+ vm_object_deallocate(first_object);
+ goto RetryFault;
+#endif
+ }
+ }
+
+ /*
+ * If the page is not in memory (in the object)
+ * and the object has a pager, we have to check
+ * if the pager has the data in secondary
+ * storage.
+ */
+ if (!page_exists) {
+
+ /*
+ * If we don't allocate a (blank) page
+ * here... another thread could try
+ * to page it in, allocate a page, and
+ * then block on the busy page in its
+ * shadow (first_object). Then we'd
+ * trip over the busy page after we
+ * found that the copy_object's pager
+ * doesn't have the page...
+ */
+ copy_m = vm_page_alloc(copy_object,
+ copy_offset);
+ if (copy_m == NULL) {
+ /*
+ * Wait for a page, then retry.
+ */
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ vm_object_unlock(copy_object);
+ UNLOCK_AND_DEALLOCATE;
+ VM_WAIT;
+ goto RetryFault;
+ }
+
+ if (copy_object->pager != NULL) {
+ vm_object_unlock(object);
+ vm_object_unlock(copy_object);
+ UNLOCK_MAP;
+
+ page_exists = vm_pager_has_page(
+ copy_object->pager,
+ (copy_offset + copy_object->paging_offset));
+
+ vm_object_lock(copy_object);
+
+ /*
+ * Since the map is unlocked, someone
+ * else could have copied this object
+ * and put a different copy_object
+ * between the two. Or, the last
+ * reference to the copy-object (other
+ * than the one we have) may have
+ * disappeared - if that has happened,
+ * we don't need to make the copy.
+ */
+ if (copy_object->shadow != object ||
+ copy_object->ref_count == 1) {
+ /*
+ * Gaah... start over!
+ */
+ FREE_PAGE(copy_m);
+ vm_object_unlock(copy_object);
+ vm_object_deallocate(copy_object);
+ /* may block */
+ vm_object_lock(object);
+ goto RetryCopy;
+ }
+ vm_object_lock(object);
+
+ if (page_exists) {
+ /*
+ * We didn't need the page
+ */
+ FREE_PAGE(copy_m);
+ }
+ }
+ }
+ if (!page_exists) {
+ /*
+ * Must copy page into copy-object.
+ */
+ vm_page_copy(m, copy_m);
+ copy_m->flags &= ~PG_FAKE;
+
+ /*
+ * Things to remember:
+ * 1. The copied page must be marked 'dirty'
+ * so it will be paged out to the copy
+ * object.
+ * 2. If the old page was in use by any users
+ * of the copy-object, it must be removed
+ * from all pmaps. (We can't know which
+ * pmaps use it.)
+ */
+ vm_page_lock_queues();
+ pmap_page_protect(VM_PAGE_TO_PHYS(old_m),
+ VM_PROT_NONE);
+ copy_m->flags &= ~PG_CLEAN;
+ vm_page_activate(copy_m); /* XXX */
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP(copy_m);
+ }
+ /*
+ * The reference count on copy_object must be
+ * at least 2: one for our extra reference,
+ * and at least one from the outside world
+ * (we checked that when we last locked
+ * copy_object).
+ */
+ copy_object->ref_count--;
+ vm_object_unlock(copy_object);
+ m->flags &= ~PG_COPYONWRITE;
+ }
+ }
+
+ if (m->flags & (PG_ACTIVE | PG_INACTIVE))
+ panic("vm_fault: active or inactive before retrying lookup");
+
+ /*
+ * We must verify that the maps have not changed
+ * since our last lookup.
+ */
+
+ if (!lookup_still_valid) {
+ vm_object_t retry_object;
+ vm_offset_t retry_offset;
+ vm_prot_t retry_prot;
+
+ /*
+ * Since map entries may be pageable, make sure we can
+ * take a page fault on them.
+ */
+ vm_object_unlock(object);
+
+ /*
+ * To avoid trying to write_lock the map while another
+ * thread has it read_locked (in vm_map_pageable), we
+ * do not try for write permission. If the page is
+ * still writable, we will get write permission. If it
+ * is not, or has been marked needs_copy, we enter the
+ * mapping without write permission, and will merely
+ * take another fault.
+ */
+ result = vm_map_lookup(&map, vaddr,
+ fault_type & ~VM_PROT_WRITE, &entry,
+ &retry_object, &retry_offset, &retry_prot,
+ &wired, &su);
+
+ vm_object_lock(object);
+
+ /*
+ * If we don't need the page any longer, put it on the
+ * active list (the easiest thing to do here). If no
+ * one needs it, pageout will grab it eventually.
+ */
+
+ if (result != KERN_SUCCESS) {
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ return(result);
+ }
+
+ lookup_still_valid = TRUE;
+
+ if ((retry_object != first_object) ||
+ (retry_offset != first_offset)) {
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * Check whether the protection has changed or the object
+ * has been copied while we left the map unlocked.
+ * Changing from read to write permission is OK - we leave
+ * the page write-protected, and catch the write fault.
+ * Changing from write to read permission means that we
+ * can't mark the page write-enabled after all.
+ */
+ prot &= retry_prot;
+ if (m->flags & PG_COPYONWRITE)
+ prot &= ~VM_PROT_WRITE;
+ }
+
+ /*
+ * (the various bits we're fiddling with here are locked by
+ * the object's lock)
+ */
+
+ /* XXX This distorts the meaning of the copy_on_write bit */
+
+ if (prot & VM_PROT_WRITE)
+ m->flags &= ~PG_COPYONWRITE;
+
+ /*
+ * It's critically important that a wired-down page be faulted
+ * only once in each map for which it is wired.
+ */
+
+ if (m->flags & (PG_ACTIVE | PG_INACTIVE))
+ panic("vm_fault: active or inactive before pmap_enter");
+
+ vm_object_unlock(object);
+
+ /*
+ * Put this page into the physical map.
+ * We had to do the unlock above because pmap_enter
+ * may cause other faults. We don't put the
+ * page back on the active queue until later so
+ * that the page-out daemon won't find us (yet).
+ */
+
+ pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
+
+ /*
+ * If the page is not wired down, then put it where the
+ * pageout daemon can find it.
+ */
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ if (change_wiring) {
+ if (wired)
+ vm_page_wire(m);
+ else
+ vm_page_unwire(m);
+ }
+ else
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+
+ /*
+ * Unlock everything, and return
+ */
+
+ PAGE_WAKEUP(m);
+ UNLOCK_AND_DEALLOCATE;
+
+ return(KERN_SUCCESS);
+
+}
+
+/*
+ * vm_fault_wire:
+ *
+ * Wire down a range of virtual addresses in a map.
+ */
+int
+vm_fault_wire(map, start, end)
+ vm_map_t map;
+ vm_offset_t start, end;
+{
+ register vm_offset_t va;
+ register pmap_t pmap;
+ int rv;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * Inform the physical mapping system that the
+ * range of addresses may not fault, so that
+ * page tables and such can be locked down as well.
+ */
+
+ pmap_pageable(pmap, start, end, FALSE);
+
+ /*
+ * We simulate a fault to get the page and enter it
+ * in the physical map.
+ */
+
+ for (va = start; va < end; va += PAGE_SIZE) {
+ rv = vm_fault(map, va, VM_PROT_NONE, TRUE);
+ if (rv) {
+ if (va != start)
+ vm_fault_unwire(map, start, va);
+ return(rv);
+ }
+ }
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * vm_fault_unwire:
+ *
+ * Unwire a range of virtual addresses in a map.
+ */
+void vm_fault_unwire(map, start, end)
+ vm_map_t map;
+ vm_offset_t start, end;
+{
+
+ register vm_offset_t va, pa;
+ register pmap_t pmap;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * Since the pages are wired down, we must be able to
+ * get their mappings from the physical map system.
+ */
+
+ vm_page_lock_queues();
+
+ for (va = start; va < end; va += PAGE_SIZE) {
+ pa = pmap_extract(pmap, va);
+ if (pa == (vm_offset_t) 0) {
+ panic("unwire: page not in pmap");
+ }
+ pmap_change_wiring(pmap, va, FALSE);
+ vm_page_unwire(PHYS_TO_VM_PAGE(pa));
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * Inform the physical mapping system that the range
+ * of addresses may fault, so that page tables and
+ * such may be unwired themselves.
+ */
+
+ pmap_pageable(pmap, start, end, TRUE);
+
+}
+
+/*
+ * Routine:
+ * vm_fault_copy_entry
+ * Function:
+ * Copy all of the pages from a wired-down map entry to another.
+ *
+ * In/out conditions:
+ * The source and destination maps must be locked for write.
+ * The source map entry must be wired down (or be a sharing map
+ * entry corresponding to a main map entry that is wired down).
+ */
+
+void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
+ vm_map_t dst_map;
+ vm_map_t src_map;
+ vm_map_entry_t dst_entry;
+ vm_map_entry_t src_entry;
+{
+
+ vm_object_t dst_object;
+ vm_object_t src_object;
+ vm_offset_t dst_offset;
+ vm_offset_t src_offset;
+ vm_prot_t prot;
+ vm_offset_t vaddr;
+ vm_page_t dst_m;
+ vm_page_t src_m;
+
+#ifdef lint
+ src_map++;
+#endif
+
+ src_object = src_entry->object.vm_object;
+ src_offset = src_entry->offset;
+
+ /*
+ * Create the top-level object for the destination entry.
+ * (Doesn't actually shadow anything - we copy the pages
+ * directly.)
+ */
+ dst_object = vm_object_allocate(
+ (vm_size_t) (dst_entry->end - dst_entry->start));
+
+ dst_entry->object.vm_object = dst_object;
+ dst_entry->offset = 0;
+
+ prot = dst_entry->max_protection;
+
+ /*
+ * Loop through all of the pages in the entry's range, copying
+ * each one from the source object (it should be there) to the
+ * destination object.
+ */
+ for (vaddr = dst_entry->start, dst_offset = 0;
+ vaddr < dst_entry->end;
+ vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
+
+ /*
+ * Allocate a page in the destination object
+ */
+ vm_object_lock(dst_object);
+ do {
+ dst_m = vm_page_alloc(dst_object, dst_offset);
+ if (dst_m == NULL) {
+ vm_object_unlock(dst_object);
+ VM_WAIT;
+ vm_object_lock(dst_object);
+ }
+ } while (dst_m == NULL);
+
+ /*
+ * Find the page in the source object, and copy it in.
+ * (Because the source is wired down, the page will be
+ * in memory.)
+ */
+ vm_object_lock(src_object);
+ src_m = vm_page_lookup(src_object, dst_offset + src_offset);
+ if (src_m == NULL)
+ panic("vm_fault_copy_wired: page missing");
+
+ vm_page_copy(src_m, dst_m);
+
+ /*
+ * Enter it in the pmap...
+ */
+ vm_object_unlock(src_object);
+ vm_object_unlock(dst_object);
+
+ pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m),
+ prot, FALSE);
+
+ /*
+ * Mark it no longer busy, and put it on the active list.
+ */
+ vm_object_lock(dst_object);
+ vm_page_lock_queues();
+ vm_page_activate(dst_m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP(dst_m);
+ vm_object_unlock(dst_object);
+ }
+
+}
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
new file mode 100644
index 000000000000..5676ff3f7cc2
--- /dev/null
+++ b/sys/vm/vm_glue.c
@@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_glue.c 8.6 (Berkeley) 1/5/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/buf.h>
+#include <sys/user.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <machine/cpu.h>
+
+int avefree = 0; /* XXX */
+unsigned maxdmap = MAXDSIZ; /* XXX */
+int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
+
+int
+kernacc(addr, len, rw)
+ caddr_t addr;
+ int len, rw;
+{
+ boolean_t rv;
+ vm_offset_t saddr, eaddr;
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+ saddr = trunc_page(addr);
+ eaddr = round_page(addr+len);
+ rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
+ /*
+ * XXX there are still some things (e.g. the buffer cache) that
+ * are managed behind the VM system's back so even though an
+ * address is accessible in the mind of the VM system, there may
+ * not be physical pages where the VM thinks there is. This can
+ * lead to bogus allocation of pages in the kernel address space
+ * or worse, inconsistencies at the pmap level. We only worry
+ * about the buffer cache for now.
+ */
+ if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
+ saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
+ rv = FALSE;
+ return(rv == TRUE);
+}
+
+int
+useracc(addr, len, rw)
+ caddr_t addr;
+ int len, rw;
+{
+ boolean_t rv;
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+ rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
+ trunc_page(addr), round_page(addr+len), prot);
+ return(rv == TRUE);
+}
+
+#ifdef KGDB
+/*
+ * Change protections on kernel pages from addr to addr+len
+ * (presumably so debugger can plant a breakpoint).
+ *
+ * We force the protection change at the pmap level. If we were
+ * to use vm_map_protect a change to allow writing would be lazily-
+ * applied meaning we would still take a protection fault, something
+ * we really don't want to do. It would also fragment the kernel
+ * map unnecessarily. We cannot use pmap_protect since it also won't
+ * enforce a write-enable request. Using pmap_enter is the only way
+ * we can ensure the change takes place properly.
+ */
+void
+chgkprot(addr, len, rw)
+ register caddr_t addr;
+ int len, rw;
+{
+ vm_prot_t prot;
+ vm_offset_t pa, sva, eva;
+
+ prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
+ eva = round_page(addr + len);
+ for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
+ /*
+ * Extract physical address for the page.
+ * We use a cheezy hack to differentiate physical
+ * page 0 from an invalid mapping, not that it
+ * really matters...
+ */
+ pa = pmap_extract(kernel_pmap, sva|1);
+ if (pa == 0)
+ panic("chgkprot: invalid page");
+ pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
+ }
+}
+#endif
+
+void
+vslock(addr, len)
+ caddr_t addr;
+ u_int len;
+{
+ vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
+ round_page(addr+len), FALSE);
+}
+
+void
+vsunlock(addr, len, dirtied)
+ caddr_t addr;
+ u_int len;
+ int dirtied;
+{
+#ifdef lint
+ dirtied++;
+#endif
+ vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
+ round_page(addr+len), TRUE);
+}
+
+/*
+ * Implement fork's actions on an address space.
+ * Here we arrange for the address space to be copied or referenced,
+ * allocate a user struct (pcb and kernel stack), then call the
+ * machine-dependent layer to fill those in and make the new process
+ * ready to run.
+ * NOTE: the kernel stack may be at a different location in the child
+ * process, and thus addresses of automatic variables may be invalid
+ * after cpu_fork returns in the child process. We do nothing here
+ * after cpu_fork returns.
+ */
+int
+vm_fork(p1, p2, isvfork)
+ register struct proc *p1, *p2;
+ int isvfork;
+{
+ register struct user *up;
+ vm_offset_t addr;
+
+#ifdef i386
+ /*
+ * avoid copying any of the parent's pagetables or other per-process
+ * objects that reside in the map by marking all of them non-inheritable
+ */
+ (void)vm_map_inherit(&p1->p_vmspace->vm_map,
+ UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
+#endif
+ p2->p_vmspace = vmspace_fork(p1->p_vmspace);
+
+#ifdef SYSVSHM
+ if (p1->p_vmspace->vm_shm)
+ shmfork(p1, p2, isvfork);
+#endif
+
+#ifndef i386
+ /*
+ * Allocate a wired-down (for now) pcb and kernel stack for the process
+ */
+ addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
+ if (addr == 0)
+ panic("vm_fork: no more kernel virtual memory");
+ vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
+#else
+/* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
+and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
+not yet clear, yet it does... */
+ addr = kmem_alloc(kernel_map, ctob(UPAGES));
+ if (addr == 0)
+ panic("vm_fork: no more kernel virtual memory");
+#endif
+ up = (struct user *)addr;
+ p2->p_addr = up;
+
+ /*
+ * p_stats and p_sigacts currently point at fields
+ * in the user struct but not at &u, instead at p_addr.
+ * Copy p_sigacts and parts of p_stats; zero the rest
+ * of p_stats (statistics).
+ */
+ p2->p_stats = &up->u_stats;
+ p2->p_sigacts = &up->u_sigacts;
+ up->u_sigacts = *p1->p_sigacts;
+ bzero(&up->u_stats.pstat_startzero,
+ (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
+ (caddr_t)&up->u_stats.pstat_startzero));
+ bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
+ ((caddr_t)&up->u_stats.pstat_endcopy -
+ (caddr_t)&up->u_stats.pstat_startcopy));
+
+#ifdef i386
+ { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
+
+ vp = &p2->p_vmspace->vm_map;
+ (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
+ (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
+ (void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
+ }
+#endif
+ /*
+ * cpu_fork will copy and update the kernel stack and pcb,
+ * and make the child ready to run. It marks the child
+ * so that it can return differently than the parent.
+ * It returns twice, once in the parent process and
+ * once in the child.
+ */
+ return (cpu_fork(p1, p2));
+}
+
+/*
+ * Set default limits for VM system.
+ * Called for proc 0, and then inherited by all others.
+ */
+void
+vm_init_limits(p)
+ register struct proc *p;
+{
+
+ /*
+ * Set up the initial limits on process VM.
+ * Set the maximum resident set size to be all
+ * of (reasonably) available memory. This causes
+ * any single, large process to start random page
+ * replacement once it fills memory.
+ */
+ p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
+ p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
+ p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
+ p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
+ p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
+}
+
+#include <vm/vm_pageout.h>
+
+#ifdef DEBUG
+int enableswap = 1;
+int swapdebug = 0;
+#define SDB_FOLLOW 1
+#define SDB_SWAPIN 2
+#define SDB_SWAPOUT 4
+#endif
+
+/*
+ * Brutally simple:
+ * 1. Attempt to swapin every swaped-out, runnable process in
+ * order of priority.
+ * 2. If not enough memory, wake the pageout daemon and let it
+ * clear some space.
+ */
+void
+scheduler()
+{
+ register struct proc *p;
+ register int pri;
+ struct proc *pp;
+ int ppri;
+ vm_offset_t addr;
+ vm_size_t size;
+
+loop:
+#ifdef DEBUG
+ while (!enableswap)
+ sleep((caddr_t)&proc0, PVM);
+#endif
+ pp = NULL;
+ ppri = INT_MIN;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
+ pri = p->p_swtime + p->p_slptime - p->p_nice * 8;
+ if (pri > ppri) {
+ pp = p;
+ ppri = pri;
+ }
+ }
+ }
+#ifdef DEBUG
+ if (swapdebug & SDB_FOLLOW)
+ printf("sched: running, procp %x pri %d\n", pp, ppri);
+#endif
+ /*
+ * Nothing to do, back to sleep
+ */
+ if ((p = pp) == NULL) {
+ sleep((caddr_t)&proc0, PVM);
+ goto loop;
+ }
+
+ /*
+ * We would like to bring someone in.
+ * This part is really bogus cuz we could deadlock on memory
+ * despite our feeble check.
+ */
+ size = round_page(ctob(UPAGES));
+ addr = (vm_offset_t) p->p_addr;
+ if (cnt.v_free_count > atop(size)) {
+#ifdef DEBUG
+ if (swapdebug & SDB_SWAPIN)
+ printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
+ p->p_pid, p->p_comm, p->p_addr,
+ ppri, cnt.v_free_count);
+#endif
+ vm_map_pageable(kernel_map, addr, addr+size, FALSE);
+ /*
+ * Some architectures need to be notified when the
+ * user area has moved to new physical page(s) (e.g.
+ * see pmax/pmax/vm_machdep.c).
+ */
+ cpu_swapin(p);
+ (void) splstatclock();
+ if (p->p_stat == SRUN)
+ setrunqueue(p);
+ p->p_flag |= P_INMEM;
+ (void) spl0();
+ p->p_swtime = 0;
+ goto loop;
+ }
+ /*
+ * Not enough memory, jab the pageout daemon and wait til the
+ * coast is clear.
+ */
+#ifdef DEBUG
+ if (swapdebug & SDB_FOLLOW)
+ printf("sched: no room for pid %d(%s), free %d\n",
+ p->p_pid, p->p_comm, cnt.v_free_count);
+#endif
+ (void) splhigh();
+ VM_WAIT;
+ (void) spl0();
+#ifdef DEBUG
+ if (swapdebug & SDB_FOLLOW)
+ printf("sched: room again, free %d\n", cnt.v_free_count);
+#endif
+ goto loop;
+}
+
+#define swappable(p) \
+ (((p)->p_flag & \
+ (P_SYSTEM | P_INMEM | P_NOSWAP | P_WEXIT | P_PHYSIO)) == P_INMEM)
+
+/*
+ * Swapout is driven by the pageout daemon. Very simple, we find eligible
+ * procs and unwire their u-areas. We try to always "swap" at least one
+ * process in case we need the room for a swapin.
+ * If any procs have been sleeping/stopped for at least maxslp seconds,
+ * they are swapped. Else, we swap the longest-sleeping or stopped process,
+ * if any, otherwise the longest-resident process.
+ */
+void
+swapout_threads()
+{
+ register struct proc *p;
+ struct proc *outp, *outp2;
+ int outpri, outpri2;
+ int didswap = 0;
+ extern int maxslp;
+
+#ifdef DEBUG
+ if (!enableswap)
+ return;
+#endif
+ outp = outp2 = NULL;
+ outpri = outpri2 = 0;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (!swappable(p))
+ continue;
+ switch (p->p_stat) {
+ case SRUN:
+ if (p->p_swtime > outpri2) {
+ outp2 = p;
+ outpri2 = p->p_swtime;
+ }
+ continue;
+
+ case SSLEEP:
+ case SSTOP:
+ if (p->p_slptime >= maxslp) {
+ swapout(p);
+ didswap++;
+ } else if (p->p_slptime > outpri) {
+ outp = p;
+ outpri = p->p_slptime;
+ }
+ continue;
+ }
+ }
+ /*
+ * If we didn't get rid of any real duds, toss out the next most
+ * likely sleeping/stopped or running candidate. We only do this
+ * if we are real low on memory since we don't gain much by doing
+ * it (UPAGES pages).
+ */
+ if (didswap == 0 &&
+ cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
+ if ((p = outp) == 0)
+ p = outp2;
+#ifdef DEBUG
+ if (swapdebug & SDB_SWAPOUT)
+ printf("swapout_threads: no duds, try procp %x\n", p);
+#endif
+ if (p)
+ swapout(p);
+ }
+}
+
+void
+swapout(p)
+ register struct proc *p;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+
+#ifdef DEBUG
+ if (swapdebug & SDB_SWAPOUT)
+ printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
+ p->p_pid, p->p_comm, p->p_addr, p->p_stat,
+ p->p_slptime, cnt.v_free_count);
+#endif
+ size = round_page(ctob(UPAGES));
+ addr = (vm_offset_t) p->p_addr;
+#if defined(hp300) || defined(luna68k)
+ /*
+ * Ugh! u-area is double mapped to a fixed address behind the
+ * back of the VM system and accesses are usually through that
+ * address rather than the per-process address. Hence reference
+ * and modify information are recorded at the fixed address and
+ * lost at context switch time. We assume the u-struct and
+ * kernel stack are always accessed/modified and force it to be so.
+ */
+ {
+ register int i;
+ volatile long tmp;
+
+ for (i = 0; i < UPAGES; i++) {
+ tmp = *(long *)addr; *(long *)addr = tmp;
+ addr += NBPG;
+ }
+ addr = (vm_offset_t) p->p_addr;
+ }
+#endif
+#ifdef mips
+ /*
+ * Be sure to save the floating point coprocessor state before
+ * paging out the u-struct.
+ */
+ {
+ extern struct proc *machFPCurProcPtr;
+
+ if (p == machFPCurProcPtr) {
+ MachSaveCurFPState(p);
+ machFPCurProcPtr = (struct proc *)0;
+ }
+ }
+#endif
+#ifndef i386 /* temporary measure till we find spontaineous unwire of kstack */
+ vm_map_pageable(kernel_map, addr, addr+size, TRUE);
+ pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
+#endif
+ (void) splhigh();
+ p->p_flag &= ~P_INMEM;
+ if (p->p_stat == SRUN)
+ remrq(p);
+ (void) spl0();
+ p->p_swtime = 0;
+}
+
+/*
+ * The rest of these routines fake thread handling
+ */
+
+void
+assert_wait(event, ruptible)
+ int event;
+ boolean_t ruptible;
+{
+#ifdef lint
+ ruptible++;
+#endif
+ curproc->p_thread = event;
+}
+
+void
+thread_block()
+{
+ int s = splhigh();
+
+ if (curproc->p_thread)
+ sleep((caddr_t)curproc->p_thread, PVM);
+ splx(s);
+}
+
+void
+thread_sleep(event, lock, ruptible)
+ int event;
+ simple_lock_t lock;
+ boolean_t ruptible;
+{
+#ifdef lint
+ ruptible++;
+#endif
+ int s = splhigh();
+
+ curproc->p_thread = event;
+ simple_unlock(lock);
+ if (curproc->p_thread)
+ sleep((caddr_t)event, PVM);
+ splx(s);
+}
+
+void
+thread_wakeup(event)
+ int event;
+{
+ int s = splhigh();
+
+ wakeup((caddr_t)event);
+ splx(s);
+}
+
+/*
+ * DEBUG stuff
+ */
+
+int indent = 0;
+
+#include <machine/stdarg.h> /* see subr_prf.c */
+
+/*ARGSUSED2*/
+void
+#if __STDC__
+iprintf(const char *fmt, ...)
+#else
+iprintf(fmt /* , va_alist */)
+ char *fmt;
+ /* va_dcl */
+#endif
+{
+ register int i;
+ va_list ap;
+
+ for (i = indent; i >= 8; i -= 8)
+ printf("\t");
+ while (--i >= 0)
+ printf(" ");
+ va_start(ap, fmt);
+ printf("%r", fmt, ap);
+ va_end(ap);
+}
diff --git a/sys/vm/vm_inherit.h b/sys/vm/vm_inherit.h
new file mode 100644
index 000000000000..455f91c93908
--- /dev/null
+++ b/sys/vm/vm_inherit.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_inherit.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory map inheritance definitions.
+ */
+
+#ifndef _VM_INHERIT_
+#define _VM_INHERIT_
+
+/*
+ * Enumeration of valid values for vm_inherit_t.
+ */
+
+#define VM_INHERIT_SHARE ((vm_inherit_t) 0) /* share with child */
+#define VM_INHERIT_COPY ((vm_inherit_t) 1) /* copy into child */
+#define VM_INHERIT_NONE ((vm_inherit_t) 2) /* absent from child */
+#define VM_INHERIT_DONATE_COPY ((vm_inherit_t) 3) /* copy and delete */
+
+#define VM_INHERIT_DEFAULT VM_INHERIT_COPY
+
+#endif /* _VM_INHERIT_ */
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
new file mode 100644
index 000000000000..4874f9e707a3
--- /dev/null
+++ b/sys/vm/vm_init.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_init.c 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Initialize the Virtual Memory subsystem.
+ */
+
+#include <sys/param.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+/*
+ * vm_init initializes the virtual memory system.
+ * This is done only by the first cpu up.
+ *
+ * The start and end address of physical memory is passed in.
+ */
+
+void vm_mem_init()
+{
+ extern vm_offset_t avail_start, avail_end;
+ extern vm_offset_t virtual_avail, virtual_end;
+
+ /*
+ * Initializes resident memory structures.
+ * From here on, all physical memory is accounted for,
+ * and we use only virtual addresses.
+ */
+ vm_set_page_size();
+ vm_page_startup(&avail_start, &avail_end);
+
+ /*
+ * Initialize other VM packages
+ */
+ vm_object_init(virtual_end - VM_MIN_KERNEL_ADDRESS);
+ vm_map_startup();
+ kmem_init(virtual_avail, virtual_end);
+ pmap_init(avail_start, avail_end);
+ vm_pager_init();
+}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
new file mode 100644
index 000000000000..7e4db63abf28
--- /dev/null
+++ b/sys/vm/vm_kern.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Kernel memory management.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
+
+/*
+ * kmem_alloc_pageable:
+ *
+ * Allocate pageable memory to the kernel's address map.
+ * map must be "kernel_map" below.
+ */
+
+vm_offset_t kmem_alloc_pageable(map, size)
+ vm_map_t map;
+ register vm_size_t size;
+{
+ vm_offset_t addr;
+ register int result;
+
+#if 0
+ if (map != kernel_map)
+ panic("kmem_alloc_pageable: not called with kernel_map");
+#endif
+
+ size = round_page(size);
+
+ addr = vm_map_min(map);
+ result = vm_map_find(map, NULL, (vm_offset_t) 0,
+ &addr, size, TRUE);
+ if (result != KERN_SUCCESS) {
+ return(0);
+ }
+
+ return(addr);
+}
+
+/*
+ * Allocate wired-down memory in the kernel's address map
+ * or a submap.
+ */
+vm_offset_t kmem_alloc(map, size)
+ register vm_map_t map;
+ register vm_size_t size;
+{
+ vm_offset_t addr;
+ register vm_offset_t offset;
+ extern vm_object_t kernel_object;
+ vm_offset_t i;
+
+ size = round_page(size);
+
+ /*
+ * Use the kernel object for wired-down kernel pages.
+ * Assume that no region of the kernel object is
+ * referenced more than once.
+ */
+
+ /*
+ * Locate sufficient space in the map. This will give us the
+ * final virtual address for the new memory, and thus will tell
+ * us the offset within the kernel map.
+ */
+ vm_map_lock(map);
+ if (vm_map_findspace(map, 0, size, &addr)) {
+ vm_map_unlock(map);
+ return (0);
+ }
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+ vm_object_reference(kernel_object);
+ vm_map_insert(map, kernel_object, offset, addr, addr + size);
+ vm_map_unlock(map);
+
+ /*
+ * Guarantee that there are pages already in this object
+ * before calling vm_map_pageable. This is to prevent the
+ * following scenario:
+ *
+ * 1) Threads have swapped out, so that there is a
+ * pager for the kernel_object.
+ * 2) The kmsg zone is empty, and so we are kmem_allocing
+ * a new page for it.
+ * 3) vm_map_pageable calls vm_fault; there is no page,
+ * but there is a pager, so we call
+ * pager_data_request. But the kmsg zone is empty,
+ * so we must kmem_alloc.
+ * 4) goto 1
+ * 5) Even if the kmsg zone is not empty: when we get
+ * the data back from the pager, it will be (very
+ * stale) non-zero data. kmem_alloc is defined to
+ * return zero-filled memory.
+ *
+ * We're intentionally not activating the pages we allocate
+ * to prevent a race with page-out. vm_map_pageable will wire
+ * the pages.
+ */
+
+ vm_object_lock(kernel_object);
+ for (i = 0 ; i < size; i+= PAGE_SIZE) {
+ vm_page_t mem;
+
+ while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) {
+ vm_object_unlock(kernel_object);
+ VM_WAIT;
+ vm_object_lock(kernel_object);
+ }
+ vm_page_zero_fill(mem);
+ mem->flags &= ~PG_BUSY;
+ }
+ vm_object_unlock(kernel_object);
+
+ /*
+ * And finally, mark the data as non-pageable.
+ */
+
+ (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
+
+ /*
+ * Try to coalesce the map
+ */
+
+ vm_map_simplify(map, addr);
+
+ return(addr);
+}
+
+/*
+ * kmem_free:
+ *
+ * Release a region of kernel virtual memory allocated
+ * with kmem_alloc, and return the physical pages
+ * associated with that region.
+ */
+void kmem_free(map, addr, size)
+ vm_map_t map;
+ register vm_offset_t addr;
+ vm_size_t size;
+{
+ (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+}
+
+/*
+ * kmem_suballoc:
+ *
+ * Allocates a map to manage a subrange
+ * of the kernel virtual address space.
+ *
+ * Arguments are as follows:
+ *
+ * parent Map to take range from
+ * size Size of range to find
+ * min, max Returned endpoints of map
+ * pageable Can the region be paged
+ */
+vm_map_t kmem_suballoc(parent, min, max, size, pageable)
+ register vm_map_t parent;
+ vm_offset_t *min, *max;
+ register vm_size_t size;
+ boolean_t pageable;
+{
+ register int ret;
+ vm_map_t result;
+
+ size = round_page(size);
+
+ *min = (vm_offset_t) vm_map_min(parent);
+ ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
+ min, size, TRUE);
+ if (ret != KERN_SUCCESS) {
+ printf("kmem_suballoc: bad status return of %d.\n", ret);
+ panic("kmem_suballoc");
+ }
+ *max = *min + size;
+ pmap_reference(vm_map_pmap(parent));
+ result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
+ if (result == NULL)
+ panic("kmem_suballoc: cannot create submap");
+ if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
+ panic("kmem_suballoc: unable to change range to submap");
+ return(result);
+}
+
+/*
+ * Allocate wired-down memory in the kernel's address map for the higher
+ * level kernel memory allocator (kern/kern_malloc.c). We cannot use
+ * kmem_alloc() because we may need to allocate memory at interrupt
+ * level where we cannot block (canwait == FALSE).
+ *
+ * This routine has its own private kernel submap (kmem_map) and object
+ * (kmem_object). This, combined with the fact that only malloc uses
+ * this routine, ensures that we will never block in map or object waits.
+ *
+ * Note that this still only works in a uni-processor environment and
+ * when called at splhigh().
+ *
+ * We don't worry about expanding the map (adding entries) since entries
+ * for wired maps are statically allocated.
+ */
+vm_offset_t
+kmem_malloc(map, size, canwait)
+ register vm_map_t map;
+ register vm_size_t size;
+ boolean_t canwait;
+{
+ register vm_offset_t offset, i;
+ vm_map_entry_t entry;
+ vm_offset_t addr;
+ vm_page_t m;
+ extern vm_object_t kmem_object;
+
+ if (map != kmem_map && map != mb_map)
+ panic("kern_malloc_alloc: map != {kmem,mb}_map");
+
+ size = round_page(size);
+ addr = vm_map_min(map);
+
+ /*
+ * Locate sufficient space in the map. This will give us the
+ * final virtual address for the new memory, and thus will tell
+ * us the offset within the kernel map.
+ */
+ vm_map_lock(map);
+ if (vm_map_findspace(map, 0, size, &addr)) {
+ vm_map_unlock(map);
+ if (canwait) /* XXX should wait */
+ panic("kmem_malloc: %s too small",
+ map == kmem_map ? "kmem_map" : "mb_map");
+ return (0);
+ }
+ offset = addr - vm_map_min(kmem_map);
+ vm_object_reference(kmem_object);
+ vm_map_insert(map, kmem_object, offset, addr, addr + size);
+
+ /*
+ * If we can wait, just mark the range as wired
+ * (will fault pages as necessary).
+ */
+ if (canwait) {
+ vm_map_unlock(map);
+ (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
+ FALSE);
+ vm_map_simplify(map, addr);
+ return(addr);
+ }
+
+ /*
+ * If we cannot wait then we must allocate all memory up front,
+ * pulling it off the active queue to prevent pageout.
+ */
+ vm_object_lock(kmem_object);
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ m = vm_page_alloc(kmem_object, offset + i);
+
+ /*
+ * Ran out of space, free everything up and return.
+ * Don't need to lock page queues here as we know
+ * that the pages we got aren't on any queues.
+ */
+ if (m == NULL) {
+ while (i != 0) {
+ i -= PAGE_SIZE;
+ m = vm_page_lookup(kmem_object, offset + i);
+ vm_page_free(m);
+ }
+ vm_object_unlock(kmem_object);
+ vm_map_delete(map, addr, addr + size);
+ vm_map_unlock(map);
+ return(0);
+ }
+#if 0
+ vm_page_zero_fill(m);
+#endif
+ m->flags &= ~PG_BUSY;
+ }
+ vm_object_unlock(kmem_object);
+
+ /*
+ * Mark map entry as non-pageable.
+ * Assert: vm_map_insert() will never be able to extend the previous
+ * entry so there will be a new entry exactly corresponding to this
+ * address range and it will have wired_count == 0.
+ */
+ if (!vm_map_lookup_entry(map, addr, &entry) ||
+ entry->start != addr || entry->end != addr + size ||
+ entry->wired_count)
+ panic("kmem_malloc: entry not found or misaligned");
+ entry->wired_count++;
+
+ /*
+ * Loop thru pages, entering them in the pmap.
+ * (We cannot add them to the wired count without
+ * wrapping the vm_page_queue_lock in splimp...)
+ */
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ vm_object_lock(kmem_object);
+ m = vm_page_lookup(kmem_object, offset + i);
+ vm_object_unlock(kmem_object);
+ pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
+ VM_PROT_DEFAULT, TRUE);
+ }
+ vm_map_unlock(map);
+
+ vm_map_simplify(map, addr);
+ return(addr);
+}
+
+/*
+ * kmem_alloc_wait
+ *
+ * Allocates pageable memory from a sub-map of the kernel. If the submap
+ * has no room, the caller sleeps waiting for more memory in the submap.
+ *
+ */
+vm_offset_t kmem_alloc_wait(map, size)
+ vm_map_t map;
+ vm_size_t size;
+{
+ vm_offset_t addr;
+
+ size = round_page(size);
+
+ for (;;) {
+ /*
+ * To make this work for more than one map,
+ * use the map's lock to lock out sleepers/wakers.
+ */
+ vm_map_lock(map);
+ if (vm_map_findspace(map, 0, size, &addr) == 0)
+ break;
+ /* no space now; see if we can ever get space */
+ if (vm_map_max(map) - vm_map_min(map) < size) {
+ vm_map_unlock(map);
+ return (0);
+ }
+ assert_wait((int)map, TRUE);
+ vm_map_unlock(map);
+ thread_block();
+ }
+ vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
+ vm_map_unlock(map);
+ return (addr);
+}
+
+/*
+ * kmem_free_wakeup
+ *
+ * Returns memory to a submap of the kernel, and wakes up any threads
+ * waiting for memory in that map.
+ */
+void kmem_free_wakeup(map, addr, size)
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+{
+ vm_map_lock(map);
+ (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
+ thread_wakeup((int)map);
+ vm_map_unlock(map);
+}
+
+/*
+ * Create the kernel map; insert a mapping covering kernel text, data, bss,
+ * and all space allocated thus far (`boostrap' data). The new map will thus
+ * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
+ * the range between `start' and `end' as free.
+ */
+void kmem_init(start, end)
+ vm_offset_t start, end;
+{
+ register vm_map_t m;
+
+ m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
+ vm_map_lock(m);
+ /* N.B.: cannot use kgdb to debug, starting with this assignment ... */
+ kernel_map = m;
+ (void) vm_map_insert(m, NULL, (vm_offset_t)0,
+ VM_MIN_KERNEL_ADDRESS, start);
+ /* ... and ending with the completion of the above `insert' */
+ vm_map_unlock(m);
+}
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
new file mode 100644
index 000000000000..d0d2c358af06
--- /dev/null
+++ b/sys/vm/vm_kern.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_kern.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/* Kernel memory management definitions. */
+
+vm_map_t buffer_map;
+vm_map_t exec_map;
+vm_map_t kernel_map;
+vm_map_t kmem_map;
+vm_map_t mb_map;
+vm_map_t phys_map;
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
new file mode 100644
index 000000000000..425fe0de4326
--- /dev/null
+++ b/sys/vm/vm_map.c
@@ -0,0 +1,2626 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory mapping module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+
+/*
+ * Virtual memory maps provide for the mapping, protection,
+ * and sharing of virtual memory objects. In addition,
+ * this module provides for an efficient virtual copy of
+ * memory from one map to another.
+ *
+ * Synchronization is required prior to most operations.
+ *
+ * Maps consist of an ordered doubly-linked list of simple
+ * entries; a single hint is used to speed up lookups.
+ *
+ * In order to properly represent the sharing of virtual
+ * memory regions among maps, the map structure is bi-level.
+ * Top-level ("address") maps refer to regions of sharable
+ * virtual memory. These regions are implemented as
+ * ("sharing") maps, which then refer to the actual virtual
+ * memory objects. When two address maps "share" memory,
+ * their top-level maps both have references to the same
+ * sharing map. When memory is virtual-copied from one
+ * address map to another, the references in the sharing
+ * maps are actually copied -- no copying occurs at the
+ * virtual memory object level.
+ *
+ * Since portions of maps are specified by start/end addreses,
+ * which may not align with existing map entries, all
+ * routines merely "clip" entries to these start/end values.
+ * [That is, an entry is split into two, bordering at a
+ * start or end value.] Note that these clippings may not
+ * always be necessary (as the two resulting entries are then
+ * not changed); however, the clipping is done for convenience.
+ * No attempt is currently made to "glue back together" two
+ * abutting entries.
+ *
+ * As mentioned above, virtual copy operations are performed
+ * by copying VM object references from one sharing map to
+ * another, and then marking both regions as copy-on-write.
+ * It is important to note that only one writeable reference
+ * to a VM object region exists in any map -- this means that
+ * shadow object creation can be delayed until a write operation
+ * occurs.
+ */
+
+/*
+ * vm_map_startup:
+ *
+ * Initialize the vm_map module. Must be called before
+ * any other vm_map routines.
+ *
+ * Map and entry structures are allocated from the general
+ * purpose memory pool with some exceptions:
+ *
+ * - The kernel map and kmem submap are allocated statically.
+ * - Kernel map entries are allocated out of a static pool.
+ *
+ * These restrictions are necessary since malloc() uses the
+ * maps and requires map entries.
+ */
+
+vm_offset_t kentry_data;
+vm_size_t kentry_data_size;
+vm_map_entry_t kentry_free;
+vm_map_t kmap_free;
+
+static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
+static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
+
+void vm_map_startup()
+{
+ register int i;
+ register vm_map_entry_t mep;
+ vm_map_t mp;
+
+ /*
+ * Static map structures for allocation before initialization of
+ * kernel map or kmem map. vm_map_create knows how to deal with them.
+ */
+ kmap_free = mp = (vm_map_t) kentry_data;
+ i = MAX_KMAP;
+ while (--i > 0) {
+ mp->header.next = (vm_map_entry_t) (mp + 1);
+ mp++;
+ }
+ mp++->header.next = NULL;
+
+ /*
+ * Form a free list of statically allocated kernel map entries
+ * with the rest.
+ */
+ kentry_free = mep = (vm_map_entry_t) mp;
+ i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
+ while (--i > 0) {
+ mep->next = mep + 1;
+ mep++;
+ }
+ mep->next = NULL;
+}
+
+/*
+ * Allocate a vmspace structure, including a vm_map and pmap,
+ * and initialize those structures. The refcnt is set to 1.
+ * The remaining fields must be initialized by the caller.
+ */
+struct vmspace *
+vmspace_alloc(min, max, pageable)
+ vm_offset_t min, max;
+ int pageable;
+{
+ register struct vmspace *vm;
+
+ MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
+ bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
+ vm_map_init(&vm->vm_map, min, max, pageable);
+ pmap_pinit(&vm->vm_pmap);
+ vm->vm_map.pmap = &vm->vm_pmap; /* XXX */
+ vm->vm_refcnt = 1;
+ return (vm);
+}
+
+void
+vmspace_free(vm)
+ register struct vmspace *vm;
+{
+
+ if (--vm->vm_refcnt == 0) {
+ /*
+ * Lock the map, to wait out all other references to it.
+ * Delete all of the mappings and pages they hold,
+ * then call the pmap module to reclaim anything left.
+ */
+ vm_map_lock(&vm->vm_map);
+ (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
+ vm->vm_map.max_offset);
+ pmap_release(&vm->vm_pmap);
+ FREE(vm, M_VMMAP);
+ }
+}
+
+/*
+ * vm_map_create:
+ *
+ * Creates and returns a new empty VM map with
+ * the given physical map structure, and having
+ * the given lower and upper address bounds.
+ */
+vm_map_t vm_map_create(pmap, min, max, pageable)
+ pmap_t pmap;
+ vm_offset_t min, max;
+ boolean_t pageable;
+{
+ register vm_map_t result;
+ extern vm_map_t kmem_map;
+
+ if (kmem_map == NULL) {
+ result = kmap_free;
+ kmap_free = (vm_map_t) result->header.next;
+ if (result == NULL)
+ panic("vm_map_create: out of maps");
+ } else
+ MALLOC(result, vm_map_t, sizeof(struct vm_map),
+ M_VMMAP, M_WAITOK);
+
+ vm_map_init(result, min, max, pageable);
+ result->pmap = pmap;
+ return(result);
+}
+
+/*
+ * Initialize an existing vm_map structure
+ * such as that in the vmspace structure.
+ * The pmap is set elsewhere.
+ */
+void
+vm_map_init(map, min, max, pageable)
+ register struct vm_map *map;
+ vm_offset_t min, max;
+ boolean_t pageable;
+{
+ map->header.next = map->header.prev = &map->header;
+ map->nentries = 0;
+ map->size = 0;
+ map->ref_count = 1;
+ map->is_main_map = TRUE;
+ map->min_offset = min;
+ map->max_offset = max;
+ map->entries_pageable = pageable;
+ map->first_free = &map->header;
+ map->hint = &map->header;
+ map->timestamp = 0;
+ lock_init(&map->lock, TRUE);
+ simple_lock_init(&map->ref_lock);
+ simple_lock_init(&map->hint_lock);
+}
+
+/*
+ * vm_map_entry_create: [ internal use only ]
+ *
+ * Allocates a VM map entry for insertion.
+ * No entry fields are filled in. This routine is
+ */
+vm_map_entry_t vm_map_entry_create(map)
+ vm_map_t map;
+{
+ vm_map_entry_t entry;
+#ifdef DEBUG
+ extern vm_map_t kernel_map, kmem_map, mb_map, pager_map;
+ boolean_t isspecial;
+
+ isspecial = (map == kernel_map || map == kmem_map ||
+ map == mb_map || map == pager_map);
+ if (isspecial && map->entries_pageable ||
+ !isspecial && !map->entries_pageable)
+ panic("vm_map_entry_create: bogus map");
+#endif
+ if (map->entries_pageable) {
+ MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
+ M_VMMAPENT, M_WAITOK);
+ } else {
+ if (entry = kentry_free)
+ kentry_free = kentry_free->next;
+ }
+ if (entry == NULL)
+ panic("vm_map_entry_create: out of map entries");
+
+ return(entry);
+}
+
+/*
+ * vm_map_entry_dispose: [ internal use only ]
+ *
+ * Inverse of vm_map_entry_create.
+ */
+void vm_map_entry_dispose(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+#ifdef DEBUG
+ extern vm_map_t kernel_map, kmem_map, mb_map, pager_map;
+ boolean_t isspecial;
+
+ isspecial = (map == kernel_map || map == kmem_map ||
+ map == mb_map || map == pager_map);
+ if (isspecial && map->entries_pageable ||
+ !isspecial && !map->entries_pageable)
+ panic("vm_map_entry_dispose: bogus map");
+#endif
+ if (map->entries_pageable) {
+ FREE(entry, M_VMMAPENT);
+ } else {
+ entry->next = kentry_free;
+ kentry_free = entry;
+ }
+}
+
+/*
+ * vm_map_entry_{un,}link:
+ *
+ * Insert/remove entries from maps.
+ */
+#define vm_map_entry_link(map, after_where, entry) \
+ { \
+ (map)->nentries++; \
+ (entry)->prev = (after_where); \
+ (entry)->next = (after_where)->next; \
+ (entry)->prev->next = (entry); \
+ (entry)->next->prev = (entry); \
+ }
+#define vm_map_entry_unlink(map, entry) \
+ { \
+ (map)->nentries--; \
+ (entry)->next->prev = (entry)->prev; \
+ (entry)->prev->next = (entry)->next; \
+ }
+
+/*
+ * vm_map_reference:
+ *
+ * Creates another valid reference to the given map.
+ *
+ */
+void vm_map_reference(map)
+ register vm_map_t map;
+{
+ if (map == NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ map->ref_count++;
+ simple_unlock(&map->ref_lock);
+}
+
+/*
+ * vm_map_deallocate:
+ *
+ * Removes a reference from the specified map,
+ * destroying it if no references remain.
+ * The map should not be locked.
+ */
+void vm_map_deallocate(map)
+ register vm_map_t map;
+{
+ register int c;
+
+ if (map == NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ c = --map->ref_count;
+ simple_unlock(&map->ref_lock);
+
+ if (c > 0) {
+ return;
+ }
+
+ /*
+ * Lock the map, to wait out all other references
+ * to it.
+ */
+
+ vm_map_lock(map);
+
+ (void) vm_map_delete(map, map->min_offset, map->max_offset);
+
+ pmap_destroy(map->pmap);
+
+ FREE(map, M_VMMAP);
+}
+
+/*
+ * vm_map_insert:
+ *
+ * Inserts the given whole VM object into the target
+ * map at the specified address range. The object's
+ * size should match that of the address range.
+ *
+ * Requires that the map be locked, and leaves it so.
+ */
+int
+vm_map_insert(map, object, offset, start, end)
+ vm_map_t map;
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_offset_t start;
+ vm_offset_t end;
+{
+ register vm_map_entry_t new_entry;
+ register vm_map_entry_t prev_entry;
+ vm_map_entry_t temp_entry;
+
+ /*
+ * Check that the start and end points are not bogus.
+ */
+
+ if ((start < map->min_offset) || (end > map->max_offset) ||
+ (start >= end))
+ return(KERN_INVALID_ADDRESS);
+
+ /*
+ * Find the entry prior to the proposed
+ * starting address; if it's part of an
+ * existing entry, this range is bogus.
+ */
+
+ if (vm_map_lookup_entry(map, start, &temp_entry))
+ return(KERN_NO_SPACE);
+
+ prev_entry = temp_entry;
+
+ /*
+ * Assert that the next entry doesn't overlap the
+ * end point.
+ */
+
+ if ((prev_entry->next != &map->header) &&
+ (prev_entry->next->start < end))
+ return(KERN_NO_SPACE);
+
+ /*
+ * See if we can avoid creating a new entry by
+ * extending one of our neighbors.
+ */
+
+ if (object == NULL) {
+ if ((prev_entry != &map->header) &&
+ (prev_entry->end == start) &&
+ (map->is_main_map) &&
+ (prev_entry->is_a_map == FALSE) &&
+ (prev_entry->is_sub_map == FALSE) &&
+ (prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
+ (prev_entry->protection == VM_PROT_DEFAULT) &&
+ (prev_entry->max_protection == VM_PROT_DEFAULT) &&
+ (prev_entry->wired_count == 0)) {
+
+ if (vm_object_coalesce(prev_entry->object.vm_object,
+ NULL,
+ prev_entry->offset,
+ (vm_offset_t) 0,
+ (vm_size_t)(prev_entry->end
+ - prev_entry->start),
+ (vm_size_t)(end - prev_entry->end))) {
+ /*
+ * Coalesced the two objects - can extend
+ * the previous map entry to include the
+ * new range.
+ */
+ map->size += (end - prev_entry->end);
+ prev_entry->end = end;
+ return(KERN_SUCCESS);
+ }
+ }
+ }
+
+ /*
+ * Create a new entry
+ */
+
+ new_entry = vm_map_entry_create(map);
+ new_entry->start = start;
+ new_entry->end = end;
+
+ new_entry->is_a_map = FALSE;
+ new_entry->is_sub_map = FALSE;
+ new_entry->object.vm_object = object;
+ new_entry->offset = offset;
+
+ new_entry->copy_on_write = FALSE;
+ new_entry->needs_copy = FALSE;
+
+ if (map->is_main_map) {
+ new_entry->inheritance = VM_INHERIT_DEFAULT;
+ new_entry->protection = VM_PROT_DEFAULT;
+ new_entry->max_protection = VM_PROT_DEFAULT;
+ new_entry->wired_count = 0;
+ }
+
+ /*
+ * Insert the new entry into the list
+ */
+
+ vm_map_entry_link(map, prev_entry, new_entry);
+ map->size += new_entry->end - new_entry->start;
+
+ /*
+ * Update the free space hint
+ */
+
+ if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start))
+ map->first_free = new_entry;
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * SAVE_HINT:
+ *
+ * Saves the specified entry as the hint for
+ * future lookups. Performs necessary interlocks.
+ */
+#define SAVE_HINT(map,value) \
+ simple_lock(&(map)->hint_lock); \
+ (map)->hint = (value); \
+ simple_unlock(&(map)->hint_lock);
+
+/*
+ * vm_map_lookup_entry: [ internal use only ]
+ *
+ * Finds the map entry containing (or
+ * immediately preceding) the specified address
+ * in the given map; the entry is returned
+ * in the "entry" parameter. The boolean
+ * result indicates whether the address is
+ * actually contained in the map.
+ */
+boolean_t vm_map_lookup_entry(map, address, entry)
+ register vm_map_t map;
+ register vm_offset_t address;
+ vm_map_entry_t *entry; /* OUT */
+{
+ register vm_map_entry_t cur;
+ register vm_map_entry_t last;
+
+ /*
+ * Start looking either from the head of the
+ * list, or from the hint.
+ */
+
+ simple_lock(&map->hint_lock);
+ cur = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ if (cur == &map->header)
+ cur = cur->next;
+
+ if (address >= cur->start) {
+ /*
+ * Go from hint to end of list.
+ *
+ * But first, make a quick check to see if
+ * we are already looking at the entry we
+ * want (which is usually the case).
+ * Note also that we don't need to save the hint
+ * here... it is the same hint (unless we are
+ * at the header, in which case the hint didn't
+ * buy us anything anyway).
+ */
+ last = &map->header;
+ if ((cur != last) && (cur->end > address)) {
+ *entry = cur;
+ return(TRUE);
+ }
+ }
+ else {
+ /*
+ * Go from start to hint, *inclusively*
+ */
+ last = cur->next;
+ cur = map->header.next;
+ }
+
+ /*
+ * Search linearly
+ */
+
+ while (cur != last) {
+ if (cur->end > address) {
+ if (address >= cur->start) {
+ /*
+ * Save this lookup for future
+ * hints, and return
+ */
+
+ *entry = cur;
+ SAVE_HINT(map, cur);
+ return(TRUE);
+ }
+ break;
+ }
+ cur = cur->next;
+ }
+ *entry = cur->prev;
+ SAVE_HINT(map, *entry);
+ return(FALSE);
+}
+
+/*
+ * Find sufficient space for `length' bytes in the given map, starting at
+ * `start'. The map must be locked. Returns 0 on success, 1 on no space.
+ */
+int
+vm_map_findspace(map, start, length, addr)
+ register vm_map_t map;
+ register vm_offset_t start;
+ vm_size_t length;
+ vm_offset_t *addr;
+{
+ register vm_map_entry_t entry, next;
+ register vm_offset_t end;
+
+ if (start < map->min_offset)
+ start = map->min_offset;
+ if (start > map->max_offset)
+ return (1);
+
+ /*
+ * Look for the first possible address; if there's already
+ * something at this address, we have to start after it.
+ */
+ if (start == map->min_offset) {
+ if ((entry = map->first_free) != &map->header)
+ start = entry->end;
+ } else {
+ vm_map_entry_t tmp;
+ if (vm_map_lookup_entry(map, start, &tmp))
+ start = tmp->end;
+ entry = tmp;
+ }
+
+ /*
+ * Look through the rest of the map, trying to fit a new region in
+ * the gap between existing regions, or after the very last region.
+ */
+ for (;; start = (entry = next)->end) {
+ /*
+ * Find the end of the proposed new region. Be sure we didn't
+ * go beyond the end of the map, or wrap around the address;
+ * if so, we lose. Otherwise, if this is the last entry, or
+ * if the proposed new region fits before the next entry, we
+ * win.
+ */
+ end = start + length;
+ if (end > map->max_offset || end < start)
+ return (1);
+ next = entry->next;
+ if (next == &map->header || next->start >= end)
+ break;
+ }
+ SAVE_HINT(map, entry);
+ *addr = start;
+ return (0);
+}
+
+/*
+ * vm_map_find finds an unallocated region in the target address
+ * map with the given length. The search is defined to be
+ * first-fit from the specified address; the region found is
+ * returned in the same parameter.
+ *
+ */
+int
+vm_map_find(map, object, offset, addr, length, find_space)
+ vm_map_t map;
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_offset_t *addr; /* IN/OUT */
+ vm_size_t length;
+ boolean_t find_space;
+{
+ register vm_offset_t start;
+ int result;
+
+ start = *addr;
+ vm_map_lock(map);
+ if (find_space) {
+ if (vm_map_findspace(map, start, length, addr)) {
+ vm_map_unlock(map);
+ return (KERN_NO_SPACE);
+ }
+ start = *addr;
+ }
+ result = vm_map_insert(map, object, offset, start, start + length);
+ vm_map_unlock(map);
+ return (result);
+}
+
+/*
+ * vm_map_simplify_entry: [ internal use only ]
+ *
+ * Simplify the given map entry by:
+ * removing extra sharing maps
+ * [XXX maybe later] merging with a neighbor
+ */
+void vm_map_simplify_entry(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+#ifdef lint
+ map++;
+#endif
+
+ /*
+ * If this entry corresponds to a sharing map, then
+ * see if we can remove the level of indirection.
+ * If it's not a sharing map, then it points to
+ * a VM object, so see if we can merge with either
+ * of our neighbors.
+ */
+
+ if (entry->is_sub_map)
+ return;
+ if (entry->is_a_map) {
+#if 0
+ vm_map_t my_share_map;
+ int count;
+
+ my_share_map = entry->object.share_map;
+ simple_lock(&my_share_map->ref_lock);
+ count = my_share_map->ref_count;
+ simple_unlock(&my_share_map->ref_lock);
+
+ if (count == 1) {
+ /* Can move the region from
+ * entry->start to entry->end (+ entry->offset)
+ * in my_share_map into place of entry.
+ * Later.
+ */
+ }
+#endif
+ }
+ else {
+ /*
+ * Try to merge with our neighbors.
+ *
+ * Conditions for merge are:
+ *
+ * 1. entries are adjacent.
+ * 2. both entries point to objects
+ * with null pagers.
+ *
+ * If a merge is possible, we replace the two
+ * entries with a single entry, then merge
+ * the two objects into a single object.
+ *
+ * Now, all that is left to do is write the
+ * code!
+ */
+ }
+}
+
+/*
+ * vm_map_clip_start: [ internal use only ]
+ *
+ * Asserts that the given entry begins at or after
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+#define vm_map_clip_start(map, entry, startaddr) \
+{ \
+ if (startaddr > entry->start) \
+ _vm_map_clip_start(map, entry, startaddr); \
+}
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+static void _vm_map_clip_start(map, entry, start)
+ register vm_map_t map;
+ register vm_map_entry_t entry;
+ register vm_offset_t start;
+{
+ register vm_map_entry_t new_entry;
+
+ /*
+ * See if we can simplify this entry first
+ */
+
+ vm_map_simplify_entry(map, entry);
+
+ /*
+ * Split off the front portion --
+ * note that we must insert the new
+ * entry BEFORE this one, so that
+ * this entry has the specified starting
+ * address.
+ */
+
+ new_entry = vm_map_entry_create(map);
+ *new_entry = *entry;
+
+ new_entry->end = start;
+ entry->offset += (start - entry->start);
+ entry->start = start;
+
+ vm_map_entry_link(map, entry->prev, new_entry);
+
+ if (entry->is_a_map || entry->is_sub_map)
+ vm_map_reference(new_entry->object.share_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * vm_map_clip_end: [ internal use only ]
+ *
+ * Asserts that the given entry ends at or before
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+
+#define vm_map_clip_end(map, entry, endaddr) \
+{ \
+ if (endaddr < entry->end) \
+ _vm_map_clip_end(map, entry, endaddr); \
+}
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+static void _vm_map_clip_end(map, entry, end)
+ register vm_map_t map;
+ register vm_map_entry_t entry;
+ register vm_offset_t end;
+{
+ register vm_map_entry_t new_entry;
+
+ /*
+ * Create a new entry and insert it
+ * AFTER the specified entry
+ */
+
+ new_entry = vm_map_entry_create(map);
+ *new_entry = *entry;
+
+ new_entry->start = entry->end = end;
+ new_entry->offset += (end - entry->start);
+
+ vm_map_entry_link(map, entry, new_entry);
+
+ if (entry->is_a_map || entry->is_sub_map)
+ vm_map_reference(new_entry->object.share_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * VM_MAP_RANGE_CHECK: [ internal use only ]
+ *
+ * Asserts that the starting and ending region
+ * addresses fall within the valid range of the map.
+ */
+#define VM_MAP_RANGE_CHECK(map, start, end) \
+ { \
+ if (start < vm_map_min(map)) \
+ start = vm_map_min(map); \
+ if (end > vm_map_max(map)) \
+ end = vm_map_max(map); \
+ if (start > end) \
+ start = end; \
+ }
+
+/*
+ * vm_map_submap: [ kernel use only ]
+ *
+ * Mark the given range as handled by a subordinate map.
+ *
+ * This range must have been created with vm_map_find,
+ * and no other operations may have been performed on this
+ * range prior to calling vm_map_submap.
+ *
+ * Only a limited number of operations can be performed
+ * within this rage after calling vm_map_submap:
+ * vm_fault
+ * [Don't try vm_map_copy!]
+ *
+ * To remove a submapping, one must first remove the
+ * range from the superior map, and then destroy the
+ * submap (if desired). [Better yet, don't try it.]
+ */
+int
+vm_map_submap(map, start, end, submap)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ vm_map_t submap;
+{
+ vm_map_entry_t entry;
+ register int result = KERN_INVALID_ARGUMENT;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->next;
+
+ vm_map_clip_end(map, entry, end);
+
+ if ((entry->start == start) && (entry->end == end) &&
+ (!entry->is_a_map) &&
+ (entry->object.vm_object == NULL) &&
+ (!entry->copy_on_write)) {
+ entry->is_a_map = FALSE;
+ entry->is_sub_map = TRUE;
+ vm_map_reference(entry->object.sub_map = submap);
+ result = KERN_SUCCESS;
+ }
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+/*
+ * vm_map_protect:
+ *
+ * Sets the protection of the specified address
+ * region in the target map. If "set_max" is
+ * specified, the maximum protection is to be set;
+ * otherwise, only the current protection is affected.
+ */
+int
+vm_map_protect(map, start, end, new_prot, set_max)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_prot_t new_prot;
+ register boolean_t set_max;
+{
+ register vm_map_entry_t current;
+ vm_map_entry_t entry;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->next;
+
+ /*
+ * Make a first pass to check for protection
+ * violations.
+ */
+
+ current = entry;
+ while ((current != &map->header) && (current->start < end)) {
+ if (current->is_sub_map)
+ return(KERN_INVALID_ARGUMENT);
+ if ((new_prot & current->max_protection) != new_prot) {
+ vm_map_unlock(map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ current = current->next;
+ }
+
+ /*
+ * Go back and fix up protections.
+ * [Note that clipping is not necessary the second time.]
+ */
+
+ current = entry;
+
+ while ((current != &map->header) && (current->start < end)) {
+ vm_prot_t old_prot;
+
+ vm_map_clip_end(map, current, end);
+
+ old_prot = current->protection;
+ if (set_max)
+ current->protection =
+ (current->max_protection = new_prot) &
+ old_prot;
+ else
+ current->protection = new_prot;
+
+ /*
+ * Update physical map if necessary.
+ * Worry about copy-on-write here -- CHECK THIS XXX
+ */
+
+ if (current->protection != old_prot) {
+
+#define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \
+ VM_PROT_ALL)
+#define max(a,b) ((a) > (b) ? (a) : (b))
+
+ if (current->is_a_map) {
+ vm_map_entry_t share_entry;
+ vm_offset_t share_end;
+
+ vm_map_lock(current->object.share_map);
+ (void) vm_map_lookup_entry(
+ current->object.share_map,
+ current->offset,
+ &share_entry);
+ share_end = current->offset +
+ (current->end - current->start);
+ while ((share_entry !=
+ &current->object.share_map->header) &&
+ (share_entry->start < share_end)) {
+
+ pmap_protect(map->pmap,
+ (max(share_entry->start,
+ current->offset) -
+ current->offset +
+ current->start),
+ min(share_entry->end,
+ share_end) -
+ current->offset +
+ current->start,
+ current->protection &
+ MASK(share_entry));
+
+ share_entry = share_entry->next;
+ }
+ vm_map_unlock(current->object.share_map);
+ }
+ else
+ pmap_protect(map->pmap, current->start,
+ current->end,
+ current->protection & MASK(entry));
+#undef max
+#undef MASK
+ }
+ current = current->next;
+ }
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_inherit:
+ *
+ * Sets the inheritance of the specified address
+ * range in the target map. Inheritance
+ * affects how the map will be shared with
+ * child maps at the time of vm_map_fork.
+ */
+int
+vm_map_inherit(map, start, end, new_inheritance)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_inherit_t new_inheritance;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t temp_entry;
+
+ switch (new_inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ break;
+ default:
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &temp_entry)) {
+ entry = temp_entry;
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = temp_entry->next;
+
+ while ((entry != &map->header) && (entry->start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ entry->inheritance = new_inheritance;
+
+ entry = entry->next;
+ }
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_pageable:
+ *
+ * Sets the pageability of the specified address
+ * range in the target map. Regions specified
+ * as not pageable require locked-down physical
+ * memory and physical page maps.
+ *
+ * The map must not be locked, but a reference
+ * must remain to the map throughout the call.
+ */
+int
+vm_map_pageable(map, start, end, new_pageable)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register boolean_t new_pageable;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t start_entry;
+ register vm_offset_t failed;
+ int rv;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ /*
+ * Only one pageability change may take place at one
+ * time, since vm_fault assumes it will be called
+ * only once for each wiring/unwiring. Therefore, we
+ * have to make sure we're actually changing the pageability
+ * for the entire region. We do so before making any changes.
+ */
+
+ if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ entry = start_entry;
+
+ /*
+ * Actions are rather different for wiring and unwiring,
+ * so we have two separate cases.
+ */
+
+ if (new_pageable) {
+
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Unwiring. First ensure that the range to be
+ * unwired is really wired down and that there
+ * are no holes.
+ */
+ while ((entry != &map->header) && (entry->start < end)) {
+
+ if (entry->wired_count == 0 ||
+ (entry->end < end &&
+ (entry->next == &map->header ||
+ entry->next->start > entry->end))) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ entry = entry->next;
+ }
+
+ /*
+ * Now decrement the wiring count for each region.
+ * If a region becomes completely unwired,
+ * unwire its physical pages and mappings.
+ */
+ lock_set_recursive(&map->lock);
+
+ entry = start_entry;
+ while ((entry != &map->header) && (entry->start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ entry->wired_count--;
+ if (entry->wired_count == 0)
+ vm_fault_unwire(map, entry->start, entry->end);
+
+ entry = entry->next;
+ }
+ lock_clear_recursive(&map->lock);
+ }
+
+ else {
+ /*
+ * Wiring. We must do this in two passes:
+ *
+ * 1. Holding the write lock, we create any shadow
+ * or zero-fill objects that need to be created.
+ * Then we clip each map entry to the region to be
+ * wired and increment its wiring count. We
+ * create objects before clipping the map entries
+ * to avoid object proliferation.
+ *
+ * 2. We downgrade to a read lock, and call
+ * vm_fault_wire to fault in the pages for any
+ * newly wired area (wired_count is 1).
+ *
+ * Downgrading to a read lock for vm_fault_wire avoids
+ * a possible deadlock with another thread that may have
+ * faulted on one of the pages to be wired (it would mark
+ * the page busy, blocking us, then in turn block on the
+ * map lock that we hold). Because of problems in the
+ * recursive lock package, we cannot upgrade to a write
+ * lock in vm_map_lookup. Thus, any actions that require
+ * the write lock must be done beforehand. Because we
+ * keep the read lock on the map, the copy-on-write status
+ * of the entries we modify here cannot change.
+ */
+
+ /*
+ * Pass 1.
+ */
+ while ((entry != &map->header) && (entry->start < end)) {
+ if (entry->wired_count == 0) {
+
+ /*
+ * Perform actions of vm_map_lookup that need
+ * the write lock on the map: create a shadow
+ * object for a copy-on-write region, or an
+ * object for a zero-fill region.
+ *
+ * We don't have to do this for entries that
+ * point to sharing maps, because we won't hold
+ * the lock on the sharing map.
+ */
+ if (!entry->is_a_map) {
+ if (entry->needs_copy &&
+ ((entry->protection & VM_PROT_WRITE) != 0)) {
+
+ vm_object_shadow(&entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t)(entry->end
+ - entry->start));
+ entry->needs_copy = FALSE;
+ }
+ else if (entry->object.vm_object == NULL) {
+ entry->object.vm_object =
+ vm_object_allocate((vm_size_t)(entry->end
+ - entry->start));
+ entry->offset = (vm_offset_t)0;
+ }
+ }
+ }
+ vm_map_clip_start(map, entry, start);
+ vm_map_clip_end(map, entry, end);
+ entry->wired_count++;
+
+ /*
+ * Check for holes
+ */
+ if (entry->end < end &&
+ (entry->next == &map->header ||
+ entry->next->start > entry->end)) {
+ /*
+ * Found one. Object creation actions
+ * do not need to be undone, but the
+ * wired counts need to be restored.
+ */
+ while (entry != &map->header && entry->end > start) {
+ entry->wired_count--;
+ entry = entry->prev;
+ }
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ entry = entry->next;
+ }
+
+ /*
+ * Pass 2.
+ */
+
+ /*
+ * HACK HACK HACK HACK
+ *
+ * If we are wiring in the kernel map or a submap of it,
+ * unlock the map to avoid deadlocks. We trust that the
+ * kernel threads are well-behaved, and therefore will
+ * not do anything destructive to this region of the map
+ * while we have it unlocked. We cannot trust user threads
+ * to do the same.
+ *
+ * HACK HACK HACK HACK
+ */
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_unlock(map); /* trust me ... */
+ }
+ else {
+ lock_set_recursive(&map->lock);
+ lock_write_to_read(&map->lock);
+ }
+
+ rv = 0;
+ entry = start_entry;
+ while (entry != &map->header && entry->start < end) {
+ /*
+ * If vm_fault_wire fails for any page we need to
+ * undo what has been done. We decrement the wiring
+ * count for those pages which have not yet been
+ * wired (now) and unwire those that have (later).
+ *
+ * XXX this violates the locking protocol on the map,
+ * needs to be fixed.
+ */
+ if (rv)
+ entry->wired_count--;
+ else if (entry->wired_count == 1) {
+ rv = vm_fault_wire(map, entry->start, entry->end);
+ if (rv) {
+ failed = entry->start;
+ entry->wired_count--;
+ }
+ }
+ entry = entry->next;
+ }
+
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_lock(map);
+ }
+ else {
+ lock_clear_recursive(&map->lock);
+ }
+ if (rv) {
+ vm_map_unlock(map);
+ (void) vm_map_pageable(map, start, failed, TRUE);
+ return(rv);
+ }
+ }
+
+ vm_map_unlock(map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_clean
+ *
+ * Push any dirty cached pages in the address range to their pager.
+ * If syncio is TRUE, dirty pages are written synchronously.
+ * If invalidate is TRUE, any cached pages are freed as well.
+ *
+ * Returns an error if any part of the specified range is not mapped.
+ */
+int
+vm_map_clean(map, start, end, syncio, invalidate)
+ vm_map_t map;
+ vm_offset_t start;
+ vm_offset_t end;
+ boolean_t syncio;
+ boolean_t invalidate;
+{
+ register vm_map_entry_t current;
+ vm_map_entry_t entry;
+ vm_size_t size;
+ vm_object_t object;
+ vm_offset_t offset;
+
+ vm_map_lock_read(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+ if (!vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * Make a first pass to check for holes.
+ */
+ for (current = entry; current->start < end; current = current->next) {
+ if (current->is_sub_map) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ if (end > current->end &&
+ (current->next == &map->header ||
+ current->end != current->next->start)) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ }
+
+ /*
+ * Make a second pass, cleaning/uncaching pages from the indicated
+ * objects as we go.
+ */
+ for (current = entry; current->start < end; current = current->next) {
+ offset = current->offset + (start - current->start);
+ size = (end <= current->end ? end : current->end) - start;
+ if (current->is_a_map) {
+ register vm_map_t smap;
+ vm_map_entry_t tentry;
+ vm_size_t tsize;
+
+ smap = current->object.share_map;
+ vm_map_lock_read(smap);
+ (void) vm_map_lookup_entry(smap, offset, &tentry);
+ tsize = tentry->end - offset;
+ if (tsize < size)
+ size = tsize;
+ object = tentry->object.vm_object;
+ offset = tentry->offset + (offset - tentry->start);
+ vm_object_lock(object);
+ vm_map_unlock_read(smap);
+ } else {
+ object = current->object.vm_object;
+ vm_object_lock(object);
+ }
+ /*
+ * Flush pages if writing is allowed.
+ * XXX should we continue on an error?
+ */
+ if ((current->protection & VM_PROT_WRITE) &&
+ !vm_object_page_clean(object, offset, offset+size,
+ syncio, FALSE)) {
+ vm_object_unlock(object);
+ vm_map_unlock_read(map);
+ return(KERN_FAILURE);
+ }
+ if (invalidate)
+ vm_object_page_remove(object, offset, offset+size);
+ vm_object_unlock(object);
+ start += size;
+ }
+
+ vm_map_unlock_read(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_entry_unwire: [ internal use only ]
+ *
+ * Make the region specified by this entry pageable.
+ *
+ * The map in question should be locked.
+ * [This is the reason for this routine's existence.]
+ */
+void vm_map_entry_unwire(map, entry)
+ vm_map_t map;
+ register vm_map_entry_t entry;
+{
+ vm_fault_unwire(map, entry->start, entry->end);
+ entry->wired_count = 0;
+}
+
+/*
+ * vm_map_entry_delete: [ internal use only ]
+ *
+ * Deallocate the given entry from the target map.
+ */
+void vm_map_entry_delete(map, entry)
+ register vm_map_t map;
+ register vm_map_entry_t entry;
+{
+ if (entry->wired_count != 0)
+ vm_map_entry_unwire(map, entry);
+
+ vm_map_entry_unlink(map, entry);
+ map->size -= entry->end - entry->start;
+
+ if (entry->is_a_map || entry->is_sub_map)
+ vm_map_deallocate(entry->object.share_map);
+ else
+ vm_object_deallocate(entry->object.vm_object);
+
+ vm_map_entry_dispose(map, entry);
+}
+
+/*
+ * vm_map_delete: [ internal use only ]
+ *
+ * Deallocates the given address range from the target
+ * map.
+ *
+ * When called with a sharing map, removes pages from
+ * that region from all physical maps.
+ */
+int
+vm_map_delete(map, start, end)
+ register vm_map_t map;
+ vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t first_entry;
+
+ /*
+ * Find the start of the region, and clip it
+ */
+
+ if (!vm_map_lookup_entry(map, start, &first_entry))
+ entry = first_entry->next;
+ else {
+ entry = first_entry;
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Fix the lookup hint now, rather than each
+ * time though the loop.
+ */
+
+ SAVE_HINT(map, entry->prev);
+ }
+
+ /*
+ * Save the free space hint
+ */
+
+ if (map->first_free->start >= start)
+ map->first_free = entry->prev;
+
+ /*
+ * Step through all entries in this region
+ */
+
+ while ((entry != &map->header) && (entry->start < end)) {
+ vm_map_entry_t next;
+ register vm_offset_t s, e;
+ register vm_object_t object;
+
+ vm_map_clip_end(map, entry, end);
+
+ next = entry->next;
+ s = entry->start;
+ e = entry->end;
+
+ /*
+ * Unwire before removing addresses from the pmap;
+ * otherwise, unwiring will put the entries back in
+ * the pmap.
+ */
+
+ object = entry->object.vm_object;
+ if (entry->wired_count != 0)
+ vm_map_entry_unwire(map, entry);
+
+ /*
+ * If this is a sharing map, we must remove
+ * *all* references to this data, since we can't
+ * find all of the physical maps which are sharing
+ * it.
+ */
+
+ if (object == kernel_object || object == kmem_object)
+ vm_object_page_remove(object, entry->offset,
+ entry->offset + (e - s));
+ else if (!map->is_main_map)
+ vm_object_pmap_remove(object,
+ entry->offset,
+ entry->offset + (e - s));
+ else
+ pmap_remove(map->pmap, s, e);
+
+ /*
+ * Delete the entry (which may delete the object)
+ * only after removing all pmap entries pointing
+ * to its pages. (Otherwise, its page frames may
+ * be reallocated, and any modify bits will be
+ * set in the wrong object!)
+ */
+
+ vm_map_entry_delete(map, entry);
+ entry = next;
+ }
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_remove:
+ *
+ * Remove the given address range from the target map.
+ * This is the exported form of vm_map_delete.
+ */
+int
+vm_map_remove(map, start, end)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register int result;
+
+ vm_map_lock(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+ result = vm_map_delete(map, start, end);
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+/*
+ * vm_map_check_protection:
+ *
+ * Assert that the target map allows the specified
+ * privilege on the entire address region given.
+ * The entire region must be allocated.
+ */
+boolean_t vm_map_check_protection(map, start, end, protection)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_prot_t protection;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t tmp_entry;
+
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ return(FALSE);
+ }
+
+ entry = tmp_entry;
+
+ while (start < end) {
+ if (entry == &map->header) {
+ return(FALSE);
+ }
+
+ /*
+ * No holes allowed!
+ */
+
+ if (start < entry->start) {
+ return(FALSE);
+ }
+
+ /*
+ * Check protection associated with entry.
+ */
+
+ if ((entry->protection & protection) != protection) {
+ return(FALSE);
+ }
+
+ /* go to next entry */
+
+ start = entry->end;
+ entry = entry->next;
+ }
+ return(TRUE);
+}
+
+/*
+ * vm_map_copy_entry:
+ *
+ * Copies the contents of the source entry to the destination
+ * entry. The entries *must* be aligned properly.
+ */
+void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
+ vm_map_t src_map, dst_map;
+ register vm_map_entry_t src_entry, dst_entry;
+{
+ vm_object_t temp_object;
+
+ if (src_entry->is_sub_map || dst_entry->is_sub_map)
+ return;
+
+ if (dst_entry->object.vm_object != NULL &&
+ (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0)
+ printf("vm_map_copy_entry: copying over permanent data!\n");
+
+ /*
+ * If our destination map was wired down,
+ * unwire it now.
+ */
+
+ if (dst_entry->wired_count != 0)
+ vm_map_entry_unwire(dst_map, dst_entry);
+
+ /*
+ * If we're dealing with a sharing map, we
+ * must remove the destination pages from
+ * all maps (since we cannot know which maps
+ * this sharing map belongs in).
+ */
+
+ if (dst_map->is_main_map)
+ pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end);
+ else
+ vm_object_pmap_remove(dst_entry->object.vm_object,
+ dst_entry->offset,
+ dst_entry->offset +
+ (dst_entry->end - dst_entry->start));
+
+ if (src_entry->wired_count == 0) {
+
+ boolean_t src_needs_copy;
+
+ /*
+ * If the source entry is marked needs_copy,
+ * it is already write-protected.
+ */
+ if (!src_entry->needs_copy) {
+
+ boolean_t su;
+
+ /*
+ * If the source entry has only one mapping,
+ * we can just protect the virtual address
+ * range.
+ */
+ if (!(su = src_map->is_main_map)) {
+ simple_lock(&src_map->ref_lock);
+ su = (src_map->ref_count == 1);
+ simple_unlock(&src_map->ref_lock);
+ }
+
+ if (su) {
+ pmap_protect(src_map->pmap,
+ src_entry->start,
+ src_entry->end,
+ src_entry->protection & ~VM_PROT_WRITE);
+ }
+ else {
+ vm_object_pmap_copy(src_entry->object.vm_object,
+ src_entry->offset,
+ src_entry->offset + (src_entry->end
+ -src_entry->start));
+ }
+ }
+
+ /*
+ * Make a copy of the object.
+ */
+ temp_object = dst_entry->object.vm_object;
+ vm_object_copy(src_entry->object.vm_object,
+ src_entry->offset,
+ (vm_size_t)(src_entry->end -
+ src_entry->start),
+ &dst_entry->object.vm_object,
+ &dst_entry->offset,
+ &src_needs_copy);
+ /*
+ * If we didn't get a copy-object now, mark the
+ * source map entry so that a shadow will be created
+ * to hold its changed pages.
+ */
+ if (src_needs_copy)
+ src_entry->needs_copy = TRUE;
+
+ /*
+ * The destination always needs to have a shadow
+ * created.
+ */
+ dst_entry->needs_copy = TRUE;
+
+ /*
+ * Mark the entries copy-on-write, so that write-enabling
+ * the entry won't make copy-on-write pages writable.
+ */
+ src_entry->copy_on_write = TRUE;
+ dst_entry->copy_on_write = TRUE;
+ /*
+ * Get rid of the old object.
+ */
+ vm_object_deallocate(temp_object);
+
+ pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
+ dst_entry->end - dst_entry->start, src_entry->start);
+ }
+ else {
+ /*
+ * Of course, wired down pages can't be set copy-on-write.
+ * Cause wired pages to be copied into the new
+ * map by simulating faults (the new pages are
+ * pageable)
+ */
+ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
+ }
+}
+
+/*
+ * vm_map_copy:
+ *
+ * Perform a virtual memory copy from the source
+ * address map/range to the destination map/range.
+ *
+ * If src_destroy or dst_alloc is requested,
+ * the source and destination regions should be
+ * disjoint, not only in the top-level map, but
+ * in the sharing maps as well. [The best way
+ * to guarantee this is to use a new intermediate
+ * map to make copies. This also reduces map
+ * fragmentation.]
+ */
+int
+vm_map_copy(dst_map, src_map,
+ dst_addr, len, src_addr,
+ dst_alloc, src_destroy)
+ vm_map_t dst_map;
+ vm_map_t src_map;
+ vm_offset_t dst_addr;
+ vm_size_t len;
+ vm_offset_t src_addr;
+ boolean_t dst_alloc;
+ boolean_t src_destroy;
+{
+ register
+ vm_map_entry_t src_entry;
+ register
+ vm_map_entry_t dst_entry;
+ vm_map_entry_t tmp_entry;
+ vm_offset_t src_start;
+ vm_offset_t src_end;
+ vm_offset_t dst_start;
+ vm_offset_t dst_end;
+ vm_offset_t src_clip;
+ vm_offset_t dst_clip;
+ int result;
+ boolean_t old_src_destroy;
+
+ /*
+ * XXX While we figure out why src_destroy screws up,
+ * we'll do it by explicitly vm_map_delete'ing at the end.
+ */
+
+ old_src_destroy = src_destroy;
+ src_destroy = FALSE;
+
+ /*
+ * Compute start and end of region in both maps
+ */
+
+ src_start = src_addr;
+ src_end = src_start + len;
+ dst_start = dst_addr;
+ dst_end = dst_start + len;
+
+ /*
+ * Check that the region can exist in both source
+ * and destination.
+ */
+
+ if ((dst_end < dst_start) || (src_end < src_start))
+ return(KERN_NO_SPACE);
+
+ /*
+ * Lock the maps in question -- we avoid deadlock
+ * by ordering lock acquisition by map value
+ */
+
+ if (src_map == dst_map) {
+ vm_map_lock(src_map);
+ }
+ else if ((int) src_map < (int) dst_map) {
+ vm_map_lock(src_map);
+ vm_map_lock(dst_map);
+ } else {
+ vm_map_lock(dst_map);
+ vm_map_lock(src_map);
+ }
+
+ result = KERN_SUCCESS;
+
+ /*
+ * Check protections... source must be completely readable and
+ * destination must be completely writable. [Note that if we're
+ * allocating the destination region, we don't have to worry
+ * about protection, but instead about whether the region
+ * exists.]
+ */
+
+ if (src_map->is_main_map && dst_map->is_main_map) {
+ if (!vm_map_check_protection(src_map, src_start, src_end,
+ VM_PROT_READ)) {
+ result = KERN_PROTECTION_FAILURE;
+ goto Return;
+ }
+
+ if (dst_alloc) {
+ /* XXX Consider making this a vm_map_find instead */
+ if ((result = vm_map_insert(dst_map, NULL,
+ (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS)
+ goto Return;
+ }
+ else if (!vm_map_check_protection(dst_map, dst_start, dst_end,
+ VM_PROT_WRITE)) {
+ result = KERN_PROTECTION_FAILURE;
+ goto Return;
+ }
+ }
+
+ /*
+ * Find the start entries and clip.
+ *
+ * Note that checking protection asserts that the
+ * lookup cannot fail.
+ *
+ * Also note that we wait to do the second lookup
+ * until we have done the first clip, as the clip
+ * may affect which entry we get!
+ */
+
+ (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
+ src_entry = tmp_entry;
+ vm_map_clip_start(src_map, src_entry, src_start);
+
+ (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry);
+ dst_entry = tmp_entry;
+ vm_map_clip_start(dst_map, dst_entry, dst_start);
+
+ /*
+ * If both source and destination entries are the same,
+ * retry the first lookup, as it may have changed.
+ */
+
+ if (src_entry == dst_entry) {
+ (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
+ src_entry = tmp_entry;
+ }
+
+ /*
+ * If source and destination entries are still the same,
+ * a null copy is being performed.
+ */
+
+ if (src_entry == dst_entry)
+ goto Return;
+
+ /*
+ * Go through entries until we get to the end of the
+ * region.
+ */
+
+ while (src_start < src_end) {
+ /*
+ * Clip the entries to the endpoint of the entire region.
+ */
+
+ vm_map_clip_end(src_map, src_entry, src_end);
+ vm_map_clip_end(dst_map, dst_entry, dst_end);
+
+ /*
+ * Clip each entry to the endpoint of the other entry.
+ */
+
+ src_clip = src_entry->start + (dst_entry->end - dst_entry->start);
+ vm_map_clip_end(src_map, src_entry, src_clip);
+
+ dst_clip = dst_entry->start + (src_entry->end - src_entry->start);
+ vm_map_clip_end(dst_map, dst_entry, dst_clip);
+
+ /*
+ * Both entries now match in size and relative endpoints.
+ *
+ * If both entries refer to a VM object, we can
+ * deal with them now.
+ */
+
+ if (!src_entry->is_a_map && !dst_entry->is_a_map) {
+ vm_map_copy_entry(src_map, dst_map, src_entry,
+ dst_entry);
+ }
+ else {
+ register vm_map_t new_dst_map;
+ vm_offset_t new_dst_start;
+ vm_size_t new_size;
+ vm_map_t new_src_map;
+ vm_offset_t new_src_start;
+
+ /*
+ * We have to follow at least one sharing map.
+ */
+
+ new_size = (dst_entry->end - dst_entry->start);
+
+ if (src_entry->is_a_map) {
+ new_src_map = src_entry->object.share_map;
+ new_src_start = src_entry->offset;
+ }
+ else {
+ new_src_map = src_map;
+ new_src_start = src_entry->start;
+ lock_set_recursive(&src_map->lock);
+ }
+
+ if (dst_entry->is_a_map) {
+ vm_offset_t new_dst_end;
+
+ new_dst_map = dst_entry->object.share_map;
+ new_dst_start = dst_entry->offset;
+
+ /*
+ * Since the destination sharing entries
+ * will be merely deallocated, we can
+ * do that now, and replace the region
+ * with a null object. [This prevents
+ * splitting the source map to match
+ * the form of the destination map.]
+ * Note that we can only do so if the
+ * source and destination do not overlap.
+ */
+
+ new_dst_end = new_dst_start + new_size;
+
+ if (new_dst_map != new_src_map) {
+ vm_map_lock(new_dst_map);
+ (void) vm_map_delete(new_dst_map,
+ new_dst_start,
+ new_dst_end);
+ (void) vm_map_insert(new_dst_map,
+ NULL,
+ (vm_offset_t) 0,
+ new_dst_start,
+ new_dst_end);
+ vm_map_unlock(new_dst_map);
+ }
+ }
+ else {
+ new_dst_map = dst_map;
+ new_dst_start = dst_entry->start;
+ lock_set_recursive(&dst_map->lock);
+ }
+
+ /*
+ * Recursively copy the sharing map.
+ */
+
+ (void) vm_map_copy(new_dst_map, new_src_map,
+ new_dst_start, new_size, new_src_start,
+ FALSE, FALSE);
+
+ if (dst_map == new_dst_map)
+ lock_clear_recursive(&dst_map->lock);
+ if (src_map == new_src_map)
+ lock_clear_recursive(&src_map->lock);
+ }
+
+ /*
+ * Update variables for next pass through the loop.
+ */
+
+ src_start = src_entry->end;
+ src_entry = src_entry->next;
+ dst_start = dst_entry->end;
+ dst_entry = dst_entry->next;
+
+ /*
+ * If the source is to be destroyed, here is the
+ * place to do it.
+ */
+
+ if (src_destroy && src_map->is_main_map &&
+ dst_map->is_main_map)
+ vm_map_entry_delete(src_map, src_entry->prev);
+ }
+
+ /*
+ * Update the physical maps as appropriate
+ */
+
+ if (src_map->is_main_map && dst_map->is_main_map) {
+ if (src_destroy)
+ pmap_remove(src_map->pmap, src_addr, src_addr + len);
+ }
+
+ /*
+ * Unlock the maps
+ */
+
+ Return: ;
+
+ if (old_src_destroy)
+ vm_map_delete(src_map, src_addr, src_addr + len);
+
+ vm_map_unlock(src_map);
+ if (src_map != dst_map)
+ vm_map_unlock(dst_map);
+
+ return(result);
+}
+
+/*
+ * vmspace_fork:
+ * Create a new process vmspace structure and vm_map
+ * based on those of an existing process. The new map
+ * is based on the old map, according to the inheritance
+ * values on the regions in that map.
+ *
+ * The source map must not be locked.
+ */
+struct vmspace *
+vmspace_fork(vm1)
+ register struct vmspace *vm1;
+{
+ register struct vmspace *vm2;
+ vm_map_t old_map = &vm1->vm_map;
+ vm_map_t new_map;
+ vm_map_entry_t old_entry;
+ vm_map_entry_t new_entry;
+ pmap_t new_pmap;
+
+ vm_map_lock(old_map);
+
+ vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
+ old_map->entries_pageable);
+ bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
+ (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
+ new_pmap = &vm2->vm_pmap; /* XXX */
+ new_map = &vm2->vm_map; /* XXX */
+
+ old_entry = old_map->header.next;
+
+ while (old_entry != &old_map->header) {
+ if (old_entry->is_sub_map)
+ panic("vm_map_fork: encountered a submap");
+
+ switch (old_entry->inheritance) {
+ case VM_INHERIT_NONE:
+ break;
+
+ case VM_INHERIT_SHARE:
+ /*
+ * If we don't already have a sharing map:
+ */
+
+ if (!old_entry->is_a_map) {
+ vm_map_t new_share_map;
+ vm_map_entry_t new_share_entry;
+
+ /*
+ * Create a new sharing map
+ */
+
+ new_share_map = vm_map_create(NULL,
+ old_entry->start,
+ old_entry->end,
+ TRUE);
+ new_share_map->is_main_map = FALSE;
+
+ /*
+ * Create the only sharing entry from the
+ * old task map entry.
+ */
+
+ new_share_entry =
+ vm_map_entry_create(new_share_map);
+ *new_share_entry = *old_entry;
+ new_share_entry->wired_count = 0;
+
+ /*
+ * Insert the entry into the new sharing
+ * map
+ */
+
+ vm_map_entry_link(new_share_map,
+ new_share_map->header.prev,
+ new_share_entry);
+
+ /*
+ * Fix up the task map entry to refer
+ * to the sharing map now.
+ */
+
+ old_entry->is_a_map = TRUE;
+ old_entry->object.share_map = new_share_map;
+ old_entry->offset = old_entry->start;
+ }
+
+ /*
+ * Clone the entry, referencing the sharing map.
+ */
+
+ new_entry = vm_map_entry_create(new_map);
+ *new_entry = *old_entry;
+ new_entry->wired_count = 0;
+ vm_map_reference(new_entry->object.share_map);
+
+ /*
+ * Insert the entry into the new map -- we
+ * know we're inserting at the end of the new
+ * map.
+ */
+
+ vm_map_entry_link(new_map, new_map->header.prev,
+ new_entry);
+
+ /*
+ * Update the physical map
+ */
+
+ pmap_copy(new_map->pmap, old_map->pmap,
+ new_entry->start,
+ (old_entry->end - old_entry->start),
+ old_entry->start);
+ break;
+
+ case VM_INHERIT_COPY:
+ /*
+ * Clone the entry and link into the map.
+ */
+
+ new_entry = vm_map_entry_create(new_map);
+ *new_entry = *old_entry;
+ new_entry->wired_count = 0;
+ new_entry->object.vm_object = NULL;
+ new_entry->is_a_map = FALSE;
+ vm_map_entry_link(new_map, new_map->header.prev,
+ new_entry);
+ if (old_entry->is_a_map) {
+ int check;
+
+ check = vm_map_copy(new_map,
+ old_entry->object.share_map,
+ new_entry->start,
+ (vm_size_t)(new_entry->end -
+ new_entry->start),
+ old_entry->offset,
+ FALSE, FALSE);
+ if (check != KERN_SUCCESS)
+ printf("vm_map_fork: copy in share_map region failed\n");
+ }
+ else {
+ vm_map_copy_entry(old_map, new_map, old_entry,
+ new_entry);
+ }
+ break;
+ }
+ old_entry = old_entry->next;
+ }
+
+ new_map->size = old_map->size;
+ vm_map_unlock(old_map);
+
+ return(vm2);
+}
+
+/*
+ * vm_map_lookup:
+ *
+ * Finds the VM object, offset, and
+ * protection for a given virtual address in the
+ * specified map, assuming a page fault of the
+ * type specified.
+ *
+ * Leaves the map in question locked for read; return
+ * values are guaranteed until a vm_map_lookup_done
+ * call is performed. Note that the map argument
+ * is in/out; the returned map must be used in
+ * the call to vm_map_lookup_done.
+ *
+ * A handle (out_entry) is returned for use in
+ * vm_map_lookup_done, to make that fast.
+ *
+ * If a lookup is requested with "write protection"
+ * specified, the map may be changed to perform virtual
+ * copying operations, although the data referenced will
+ * remain the same.
+ */
+int
+vm_map_lookup(var_map, vaddr, fault_type, out_entry,
+ object, offset, out_prot, wired, single_use)
+ vm_map_t *var_map; /* IN/OUT */
+ register vm_offset_t vaddr;
+ register vm_prot_t fault_type;
+
+ vm_map_entry_t *out_entry; /* OUT */
+ vm_object_t *object; /* OUT */
+ vm_offset_t *offset; /* OUT */
+ vm_prot_t *out_prot; /* OUT */
+ boolean_t *wired; /* OUT */
+ boolean_t *single_use; /* OUT */
+{
+ vm_map_t share_map;
+ vm_offset_t share_offset;
+ register vm_map_entry_t entry;
+ register vm_map_t map = *var_map;
+ register vm_prot_t prot;
+ register boolean_t su;
+
+ RetryLookup: ;
+
+ /*
+ * Lookup the faulting address.
+ */
+
+ vm_map_lock_read(map);
+
+#define RETURN(why) \
+ { \
+ vm_map_unlock_read(map); \
+ return(why); \
+ }
+
+ /*
+ * If the map has an interesting hint, try it before calling
+ * full blown lookup routine.
+ */
+
+ simple_lock(&map->hint_lock);
+ entry = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ *out_entry = entry;
+
+ if ((entry == &map->header) ||
+ (vaddr < entry->start) || (vaddr >= entry->end)) {
+ vm_map_entry_t tmp_entry;
+
+ /*
+ * Entry was either not a valid hint, or the vaddr
+ * was not contained in the entry, so do a full lookup.
+ */
+ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
+ RETURN(KERN_INVALID_ADDRESS);
+
+ entry = tmp_entry;
+ *out_entry = entry;
+ }
+
+ /*
+ * Handle submaps.
+ */
+
+ if (entry->is_sub_map) {
+ vm_map_t old_map = map;
+
+ *var_map = map = entry->object.sub_map;
+ vm_map_unlock_read(old_map);
+ goto RetryLookup;
+ }
+
+ /*
+ * Check whether this task is allowed to have
+ * this page.
+ */
+
+ prot = entry->protection;
+ if ((fault_type & (prot)) != fault_type)
+ RETURN(KERN_PROTECTION_FAILURE);
+
+ /*
+ * If this page is not pageable, we have to get
+ * it for all possible accesses.
+ */
+
+ if (*wired = (entry->wired_count != 0))
+ prot = fault_type = entry->protection;
+
+ /*
+ * If we don't already have a VM object, track
+ * it down.
+ */
+
+ if (su = !entry->is_a_map) {
+ share_map = map;
+ share_offset = vaddr;
+ }
+ else {
+ vm_map_entry_t share_entry;
+
+ /*
+ * Compute the sharing map, and offset into it.
+ */
+
+ share_map = entry->object.share_map;
+ share_offset = (vaddr - entry->start) + entry->offset;
+
+ /*
+ * Look for the backing store object and offset
+ */
+
+ vm_map_lock_read(share_map);
+
+ if (!vm_map_lookup_entry(share_map, share_offset,
+ &share_entry)) {
+ vm_map_unlock_read(share_map);
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+ entry = share_entry;
+ }
+
+ /*
+ * If the entry was copy-on-write, we either ...
+ */
+
+ if (entry->needs_copy) {
+ /*
+ * If we want to write the page, we may as well
+ * handle that now since we've got the sharing
+ * map locked.
+ *
+ * If we don't need to write the page, we just
+ * demote the permissions allowed.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+ /*
+ * Make a new object, and place it in the
+ * object chain. Note that no new references
+ * have appeared -- one just moved from the
+ * share map to the new object.
+ */
+
+ if (lock_read_to_write(&share_map->lock)) {
+ if (share_map != map)
+ vm_map_unlock_read(map);
+ goto RetryLookup;
+ }
+
+ vm_object_shadow(
+ &entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t) (entry->end - entry->start));
+
+ entry->needs_copy = FALSE;
+
+ lock_write_to_read(&share_map->lock);
+ }
+ else {
+ /*
+ * We're attempting to read a copy-on-write
+ * page -- don't allow writes.
+ */
+
+ prot &= (~VM_PROT_WRITE);
+ }
+ }
+
+ /*
+ * Create an object if necessary.
+ */
+ if (entry->object.vm_object == NULL) {
+
+ if (lock_read_to_write(&share_map->lock)) {
+ if (share_map != map)
+ vm_map_unlock_read(map);
+ goto RetryLookup;
+ }
+
+ entry->object.vm_object = vm_object_allocate(
+ (vm_size_t)(entry->end - entry->start));
+ entry->offset = 0;
+ lock_write_to_read(&share_map->lock);
+ }
+
+ /*
+ * Return the object/offset from this entry. If the entry
+ * was copy-on-write or empty, it has been fixed up.
+ */
+
+ *offset = (share_offset - entry->start) + entry->offset;
+ *object = entry->object.vm_object;
+
+ /*
+ * Return whether this is the only map sharing this data.
+ */
+
+ if (!su) {
+ simple_lock(&share_map->ref_lock);
+ su = (share_map->ref_count == 1);
+ simple_unlock(&share_map->ref_lock);
+ }
+
+ *out_prot = prot;
+ *single_use = su;
+
+ return(KERN_SUCCESS);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_lookup_done:
+ *
+ * Releases locks acquired by a vm_map_lookup
+ * (according to the handle returned by that lookup).
+ */
+
+void vm_map_lookup_done(map, entry)
+ register vm_map_t map;
+ vm_map_entry_t entry;
+{
+ /*
+ * If this entry references a map, unlock it first.
+ */
+
+ if (entry->is_a_map)
+ vm_map_unlock_read(entry->object.share_map);
+
+ /*
+ * Unlock the main-level map
+ */
+
+ vm_map_unlock_read(map);
+}
+
+/*
+ * Routine: vm_map_simplify
+ * Purpose:
+ * Attempt to simplify the map representation in
+ * the vicinity of the given starting address.
+ * Note:
+ * This routine is intended primarily to keep the
+ * kernel maps more compact -- they generally don't
+ * benefit from the "expand a map entry" technology
+ * at allocation time because the adjacent entry
+ * is often wired down.
+ */
+void vm_map_simplify(map, start)
+ vm_map_t map;
+ vm_offset_t start;
+{
+ vm_map_entry_t this_entry;
+ vm_map_entry_t prev_entry;
+
+ vm_map_lock(map);
+ if (
+ (vm_map_lookup_entry(map, start, &this_entry)) &&
+ ((prev_entry = this_entry->prev) != &map->header) &&
+
+ (prev_entry->end == start) &&
+ (map->is_main_map) &&
+
+ (prev_entry->is_a_map == FALSE) &&
+ (prev_entry->is_sub_map == FALSE) &&
+
+ (this_entry->is_a_map == FALSE) &&
+ (this_entry->is_sub_map == FALSE) &&
+
+ (prev_entry->inheritance == this_entry->inheritance) &&
+ (prev_entry->protection == this_entry->protection) &&
+ (prev_entry->max_protection == this_entry->max_protection) &&
+ (prev_entry->wired_count == this_entry->wired_count) &&
+
+ (prev_entry->copy_on_write == this_entry->copy_on_write) &&
+ (prev_entry->needs_copy == this_entry->needs_copy) &&
+
+ (prev_entry->object.vm_object == this_entry->object.vm_object) &&
+ ((prev_entry->offset + (prev_entry->end - prev_entry->start))
+ == this_entry->offset)
+ ) {
+ if (map->first_free == this_entry)
+ map->first_free = prev_entry;
+
+ SAVE_HINT(map, prev_entry);
+ vm_map_entry_unlink(map, this_entry);
+ prev_entry->end = this_entry->end;
+ vm_object_deallocate(this_entry->object.vm_object);
+ vm_map_entry_dispose(map, this_entry);
+ }
+ vm_map_unlock(map);
+}
+
+/*
+ * vm_map_print: [ debug ]
+ */
+void vm_map_print(map, full)
+ register vm_map_t map;
+ boolean_t full;
+{
+ register vm_map_entry_t entry;
+ extern int indent;
+
+ iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
+ (map->is_main_map ? "Task" : "Share"),
+ (int) map, (int) (map->pmap), map->ref_count, map->nentries,
+ map->timestamp);
+
+ if (!full && indent)
+ return;
+
+ indent += 2;
+ for (entry = map->header.next; entry != &map->header;
+ entry = entry->next) {
+ iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
+ (int) entry, (int) entry->start, (int) entry->end);
+ if (map->is_main_map) {
+ static char *inheritance_name[4] =
+ { "share", "copy", "none", "donate_copy"};
+ printf("prot=%x/%x/%s, ",
+ entry->protection,
+ entry->max_protection,
+ inheritance_name[entry->inheritance]);
+ if (entry->wired_count != 0)
+ printf("wired, ");
+ }
+
+ if (entry->is_a_map || entry->is_sub_map) {
+ printf("share=0x%x, offset=0x%x\n",
+ (int) entry->object.share_map,
+ (int) entry->offset);
+ if ((entry->prev == &map->header) ||
+ (!entry->prev->is_a_map) ||
+ (entry->prev->object.share_map !=
+ entry->object.share_map)) {
+ indent += 2;
+ vm_map_print(entry->object.share_map, full);
+ indent -= 2;
+ }
+
+ }
+ else {
+ printf("object=0x%x, offset=0x%x",
+ (int) entry->object.vm_object,
+ (int) entry->offset);
+ if (entry->copy_on_write)
+ printf(", copy (%s)",
+ entry->needs_copy ? "needed" : "done");
+ printf("\n");
+
+ if ((entry->prev == &map->header) ||
+ (entry->prev->is_a_map) ||
+ (entry->prev->object.vm_object !=
+ entry->object.vm_object)) {
+ indent += 2;
+ vm_object_print(entry->object.vm_object, full);
+ indent -= 2;
+ }
+ }
+ }
+ indent -= 2;
+}
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
new file mode 100644
index 000000000000..d25b7a2d1bd3
--- /dev/null
+++ b/sys/vm/vm_map.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_map.h 8.3 (Berkeley) 3/15/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory map module definitions.
+ */
+
+#ifndef _VM_MAP_
+#define _VM_MAP_
+
+/*
+ * Types defined:
+ *
+ * vm_map_t the high-level address map data structure.
+ * vm_map_entry_t an entry in an address map.
+ * vm_map_version_t a timestamp of a map, for use with vm_map_lookup
+ */
+
+/*
+ * Objects which live in maps may be either VM objects, or
+ * another map (called a "sharing map") which denotes read-write
+ * sharing with other maps.
+ */
+
+union vm_map_object {
+ struct vm_object *vm_object; /* object object */
+ struct vm_map *share_map; /* share map */
+ struct vm_map *sub_map; /* belongs to another map */
+};
+
+/*
+ * Address map entries consist of start and end addresses,
+ * a VM object (or sharing map) and offset into that object,
+ * and user-exported inheritance and protection information.
+ * Also included is control information for virtual copy operations.
+ */
+struct vm_map_entry {
+ struct vm_map_entry *prev; /* previous entry */
+ struct vm_map_entry *next; /* next entry */
+ vm_offset_t start; /* start address */
+ vm_offset_t end; /* end address */
+ union vm_map_object object; /* object I point to */
+ vm_offset_t offset; /* offset into object */
+ boolean_t is_a_map; /* Is "object" a map? */
+ boolean_t is_sub_map; /* Is "object" a submap? */
+ /* Only in sharing maps: */
+ boolean_t copy_on_write; /* is data copy-on-write */
+ boolean_t needs_copy; /* does object need to be copied */
+ /* Only in task maps: */
+ vm_prot_t protection; /* protection code */
+ vm_prot_t max_protection; /* maximum protection */
+ vm_inherit_t inheritance; /* inheritance */
+ int wired_count; /* can be paged if = 0 */
+};
+
+/*
+ * Maps are doubly-linked lists of map entries, kept sorted
+ * by address. A single hint is provided to start
+ * searches again from the last successful search,
+ * insertion, or removal.
+ */
+struct vm_map {
+ struct pmap * pmap; /* Physical map */
+ lock_data_t lock; /* Lock for map data */
+ struct vm_map_entry header; /* List of entries */
+ int nentries; /* Number of entries */
+ vm_size_t size; /* virtual size */
+ boolean_t is_main_map; /* Am I a main map? */
+ int ref_count; /* Reference count */
+ simple_lock_data_t ref_lock; /* Lock for ref_count field */
+ vm_map_entry_t hint; /* hint for quick lookups */
+ simple_lock_data_t hint_lock; /* lock for hint storage */
+ vm_map_entry_t first_free; /* First free space hint */
+ boolean_t entries_pageable; /* map entries pageable?? */
+ unsigned int timestamp; /* Version number */
+#define min_offset header.start
+#define max_offset header.end
+};
+
+/*
+ * Map versions are used to validate a previous lookup attempt.
+ *
+ * Since lookup operations may involve both a main map and
+ * a sharing map, it is necessary to have a timestamp from each.
+ * [If the main map timestamp has changed, the share_map and
+ * associated timestamp are no longer valid; the map version
+ * does not include a reference for the imbedded share_map.]
+ */
+typedef struct {
+ int main_timestamp;
+ vm_map_t share_map;
+ int share_timestamp;
+} vm_map_version_t;
+
+/*
+ * Macros: vm_map_lock, etc.
+ * Function:
+ * Perform locking on the data portion of a map.
+ */
+
+#define vm_map_lock(map) { \
+ lock_write(&(map)->lock); \
+ (map)->timestamp++; \
+}
+#define vm_map_unlock(map) lock_write_done(&(map)->lock)
+#define vm_map_lock_read(map) lock_read(&(map)->lock)
+#define vm_map_unlock_read(map) lock_read_done(&(map)->lock)
+
+/*
+ * Functions implemented as macros
+ */
+#define vm_map_min(map) ((map)->min_offset)
+#define vm_map_max(map) ((map)->max_offset)
+#define vm_map_pmap(map) ((map)->pmap)
+
+/* XXX: number of kernel maps and entries to statically allocate */
+#define MAX_KMAP 10
+#define MAX_KMAPENT 500
+
+#ifdef KERNEL
+boolean_t vm_map_check_protection __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_prot_t));
+int vm_map_copy __P((vm_map_t, vm_map_t, vm_offset_t,
+ vm_size_t, vm_offset_t, boolean_t, boolean_t));
+void vm_map_copy_entry __P((vm_map_t,
+ vm_map_t, vm_map_entry_t, vm_map_entry_t));
+struct pmap;
+vm_map_t vm_map_create __P((struct pmap *,
+ vm_offset_t, vm_offset_t, boolean_t));
+void vm_map_deallocate __P((vm_map_t));
+int vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t));
+vm_map_entry_t vm_map_entry_create __P((vm_map_t));
+void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
+void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
+void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
+int vm_map_find __P((vm_map_t, vm_object_t,
+ vm_offset_t, vm_offset_t *, vm_size_t, boolean_t));
+int vm_map_findspace __P((vm_map_t,
+ vm_offset_t, vm_size_t, vm_offset_t *));
+int vm_map_inherit __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_inherit_t));
+void vm_map_init __P((struct vm_map *,
+ vm_offset_t, vm_offset_t, boolean_t));
+int vm_map_insert __P((vm_map_t,
+ vm_object_t, vm_offset_t, vm_offset_t, vm_offset_t));
+int vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t,
+ vm_map_entry_t *, vm_object_t *, vm_offset_t *, vm_prot_t *,
+ boolean_t *, boolean_t *));
+void vm_map_lookup_done __P((vm_map_t, vm_map_entry_t));
+boolean_t vm_map_lookup_entry __P((vm_map_t,
+ vm_offset_t, vm_map_entry_t *));
+int vm_map_pageable __P((vm_map_t,
+ vm_offset_t, vm_offset_t, boolean_t));
+int vm_map_clean __P((vm_map_t,
+ vm_offset_t, vm_offset_t, boolean_t, boolean_t));
+void vm_map_print __P((vm_map_t, boolean_t));
+int vm_map_protect __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
+void vm_map_reference __P((vm_map_t));
+int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t));
+void vm_map_simplify __P((vm_map_t, vm_offset_t));
+void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t));
+void vm_map_startup __P((void));
+int vm_map_submap __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_map_t));
+#endif
+#endif /* _VM_MAP_ */
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
new file mode 100644
index 000000000000..9db6f506c2a0
--- /dev/null
+++ b/sys/vm/vm_meter.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+struct loadavg averunnable; /* load average, of runnable procs */
+
+int maxslp = MAXSLP;
+int saferss = SAFERSS;
+
+void
+vmmeter()
+{
+
+ if (time.tv_sec % 5 == 0)
+ loadav(&averunnable);
+ if (proc0.p_slptime > maxslp/2)
+ wakeup((caddr_t)&proc0);
+}
+
+/*
+ * Constants for averages over 1, 5, and 15 minutes
+ * when sampling at 5 second intervals.
+ */
+fixpt_t cexp[3] = {
+ 0.9200444146293232 * FSCALE, /* exp(-1/12) */
+ 0.9834714538216174 * FSCALE, /* exp(-1/60) */
+ 0.9944598480048967 * FSCALE, /* exp(-1/180) */
+};
+
+/*
+ * Compute a tenex style load average of a quantity on
+ * 1, 5 and 15 minute intervals.
+ */
+void
+loadav(avg)
+ register struct loadavg *avg;
+{
+ register int i, nrun;
+ register struct proc *p;
+
+ for (nrun = 0, p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ switch (p->p_stat) {
+ case SSLEEP:
+ if (p->p_priority > PZERO || p->p_slptime != 0)
+ continue;
+ /* fall through */
+ case SRUN:
+ case SIDL:
+ nrun++;
+ }
+ }
+ for (i = 0; i < 3; i++)
+ avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
+ nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
+}
+
+/*
+ * Attributes associated with virtual memory.
+ */
+vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ struct vmtotal vmtotals;
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case VM_LOADAVG:
+ averunnable.fscale = FSCALE;
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable,
+ sizeof(averunnable)));
+ case VM_METER:
+ vmtotal(&vmtotals);
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
+ sizeof(vmtotals)));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Calculate the current state of the system.
+ * Done on demand from getkerninfo().
+ */
+void
+vmtotal(totalp)
+ register struct vmtotal *totalp;
+{
+ register struct proc *p;
+ register vm_map_entry_t entry;
+ register vm_object_t object;
+ register vm_map_t map;
+ int paging;
+
+ bzero(totalp, sizeof *totalp);
+ /*
+ * Mark all objects as inactive.
+ */
+ simple_lock(&vm_object_list_lock);
+ for (object = vm_object_list.tqh_first;
+ object != NULL;
+ object = object->object_list.tqe_next)
+ object->flags &= ~OBJ_ACTIVE;
+ simple_unlock(&vm_object_list_lock);
+ /*
+ * Calculate process statistics.
+ */
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_flag & P_SYSTEM)
+ continue;
+ switch (p->p_stat) {
+ case 0:
+ continue;
+
+ case SSLEEP:
+ case SSTOP:
+ if (p->p_flag & P_INMEM) {
+ if (p->p_priority <= PZERO)
+ totalp->t_dw++;
+ else if (p->p_slptime < maxslp)
+ totalp->t_sl++;
+ } else if (p->p_slptime < maxslp)
+ totalp->t_sw++;
+ if (p->p_slptime >= maxslp)
+ continue;
+ break;
+
+ case SRUN:
+ case SIDL:
+ if (p->p_flag & P_INMEM)
+ totalp->t_rq++;
+ else
+ totalp->t_sw++;
+ if (p->p_stat == SIDL)
+ continue;
+ break;
+ }
+ /*
+ * Note active objects.
+ */
+ paging = 0;
+ for (map = &p->p_vmspace->vm_map, entry = map->header.next;
+ entry != &map->header; entry = entry->next) {
+ if (entry->is_a_map || entry->is_sub_map ||
+ entry->object.vm_object == NULL)
+ continue;
+ entry->object.vm_object->flags |= OBJ_ACTIVE;
+ paging |= entry->object.vm_object->paging_in_progress;
+ }
+ if (paging)
+ totalp->t_pw++;
+ }
+ /*
+ * Calculate object memory usage statistics.
+ */
+ simple_lock(&vm_object_list_lock);
+ for (object = vm_object_list.tqh_first;
+ object != NULL;
+ object = object->object_list.tqe_next) {
+ totalp->t_vm += num_pages(object->size);
+ totalp->t_rm += object->resident_page_count;
+ if (object->flags & OBJ_ACTIVE) {
+ totalp->t_avm += num_pages(object->size);
+ totalp->t_arm += object->resident_page_count;
+ }
+ if (object->ref_count > 1) {
+ /* shared object */
+ totalp->t_vmshr += num_pages(object->size);
+ totalp->t_rmshr += object->resident_page_count;
+ if (object->flags & OBJ_ACTIVE) {
+ totalp->t_avmshr += num_pages(object->size);
+ totalp->t_armshr += object->resident_page_count;
+ }
+ }
+ }
+ totalp->t_free = cnt.v_free_count;
+}
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
new file mode 100644
index 000000000000..340cded1ba48
--- /dev/null
+++ b/sys/vm/vm_mmap.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
+ *
+ * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94
+ */
+
+/*
+ * Mapped file (mmap) interface to VM
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/conf.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_prot.h>
+
+#ifdef DEBUG
+int mmapdebug = 0;
+#define MDB_FOLLOW 0x01
+#define MDB_SYNC 0x02
+#define MDB_MAPIT 0x04
+#endif
+
+struct sbrk_args {
+ int incr;
+};
+/* ARGSUSED */
+int
+sbrk(p, uap, retval)
+ struct proc *p;
+ struct sbrk_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+struct sstk_args {
+ int incr;
+};
+/* ARGSUSED */
+int
+sstk(p, uap, retval)
+ struct proc *p;
+ struct sstk_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct getpagesize_args {
+ int dummy;
+};
+/* ARGSUSED */
+int
+ogetpagesize(p, uap, retval)
+ struct proc *p;
+ struct getpagesize_args *uap;
+ int *retval;
+{
+
+ *retval = PAGE_SIZE;
+ return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct mmap_args {
+ caddr_t addr;
+ size_t len;
+ int prot;
+ int flags;
+ int fd;
+ long pad;
+ off_t pos;
+};
+
+#ifdef COMPAT_43
+struct ommap_args {
+ caddr_t addr;
+ int len;
+ int prot;
+ int flags;
+ int fd;
+ long pos;
+};
+int
+ommap(p, uap, retval)
+ struct proc *p;
+ register struct ommap_args *uap;
+ int *retval;
+{
+ struct mmap_args nargs;
+ static const char cvtbsdprot[8] = {
+ 0,
+ PROT_EXEC,
+ PROT_WRITE,
+ PROT_EXEC|PROT_WRITE,
+ PROT_READ,
+ PROT_EXEC|PROT_READ,
+ PROT_WRITE|PROT_READ,
+ PROT_EXEC|PROT_WRITE|PROT_READ,
+ };
+#define OMAP_ANON 0x0002
+#define OMAP_COPY 0x0020
+#define OMAP_SHARED 0x0010
+#define OMAP_FIXED 0x0100
+#define OMAP_INHERIT 0x0800
+
+ nargs.addr = uap->addr;
+ nargs.len = uap->len;
+ nargs.prot = cvtbsdprot[uap->prot&0x7];
+ nargs.flags = 0;
+ if (uap->flags & OMAP_ANON)
+ nargs.flags |= MAP_ANON;
+ if (uap->flags & OMAP_COPY)
+ nargs.flags |= MAP_COPY;
+ if (uap->flags & OMAP_SHARED)
+ nargs.flags |= MAP_SHARED;
+ else
+ nargs.flags |= MAP_PRIVATE;
+ if (uap->flags & OMAP_FIXED)
+ nargs.flags |= MAP_FIXED;
+ if (uap->flags & OMAP_INHERIT)
+ nargs.flags |= MAP_INHERIT;
+ nargs.fd = uap->fd;
+ nargs.pos = uap->pos;
+ return (mmap(p, &nargs, retval));
+}
+#endif
+
+int
+mmap(p, uap, retval)
+ struct proc *p;
+ register struct mmap_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vnode *vp;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_prot_t prot, maxprot;
+ caddr_t handle;
+ int flags, error;
+
+ prot = uap->prot & VM_PROT_ALL;
+ flags = uap->flags;
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
+ p->p_pid, uap->addr, uap->len, prot,
+ flags, uap->fd, (vm_offset_t)uap->pos);
+#endif
+ /*
+ * Address (if FIXED) must be page aligned.
+ * Size is implicitly rounded to a page boundary.
+ */
+ addr = (vm_offset_t) uap->addr;
+ if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
+ (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
+ return (EINVAL);
+ size = (vm_size_t) round_page(uap->len);
+ /*
+ * Check for illegal addresses. Watch out for address wrap...
+ * Note that VM_*_ADDRESS are not constants due to casts (argh).
+ */
+ if (flags & MAP_FIXED) {
+ if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+ return (EINVAL);
+ if (addr > addr + size)
+ return (EINVAL);
+ }
+ /*
+ * XXX if no hint provided for a non-fixed mapping place it after
+ * the end of the largest possible heap.
+ *
+ * There should really be a pmap call to determine a reasonable
+ * location.
+ */
+ if (addr == 0 && (flags & MAP_FIXED) == 0)
+ addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
+ if (flags & MAP_ANON) {
+ /*
+ * Mapping blank space is trivial.
+ */
+ handle = NULL;
+ maxprot = VM_PROT_ALL;
+ } else {
+ /*
+ * Mapping file, get fp for validation.
+ * Obtain vnode and make sure it is of appropriate type.
+ */
+ if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EINVAL);
+ vp = (struct vnode *)fp->f_data;
+ if (vp->v_type != VREG && vp->v_type != VCHR)
+ return (EINVAL);
+ /*
+ * XXX hack to handle use of /dev/zero to map anon
+ * memory (ala SunOS).
+ */
+ if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
+ handle = NULL;
+ maxprot = VM_PROT_ALL;
+ flags |= MAP_ANON;
+ } else {
+ /*
+ * Ensure that file and memory protections are
+ * compatible. Note that we only worry about
+ * writability if mapping is shared; in this case,
+ * current and max prot are dictated by the open file.
+ * XXX use the vnode instead? Problem is: what
+ * credentials do we use for determination?
+ * What if proc does a setuid?
+ */
+ maxprot = VM_PROT_EXECUTE; /* ??? */
+ if (fp->f_flag & FREAD)
+ maxprot |= VM_PROT_READ;
+ else if (prot & PROT_READ)
+ return (EACCES);
+ if (flags & MAP_SHARED) {
+ if (fp->f_flag & FWRITE)
+ maxprot |= VM_PROT_WRITE;
+ else if (prot & PROT_WRITE)
+ return (EACCES);
+ } else
+ maxprot |= VM_PROT_WRITE;
+ handle = (caddr_t)vp;
+ }
+ }
+ error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
+ flags, handle, (vm_offset_t)uap->pos);
+ if (error == 0)
+ *retval = (int)addr;
+ return (error);
+}
+
+struct msync_args {
+ caddr_t addr;
+ int len;
+};
+int
+msync(p, uap, retval)
+ struct proc *p;
+ struct msync_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_map_t map;
+ int rv;
+ boolean_t syncio, invalidate;
+
+#ifdef DEBUG
+ if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
+ printf("msync(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+ if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+ return (EINVAL);
+ map = &p->p_vmspace->vm_map;
+ addr = (vm_offset_t)uap->addr;
+ size = (vm_size_t)uap->len;
+ /*
+ * XXX Gak! If size is zero we are supposed to sync "all modified
+ * pages with the region containing addr". Unfortunately, we
+ * don't really keep track of individual mmaps so we approximate
+ * by flushing the range of the map entry containing addr.
+ * This can be incorrect if the region splits or is coalesced
+ * with a neighbor.
+ */
+ if (size == 0) {
+ vm_map_entry_t entry;
+
+ vm_map_lock_read(map);
+ rv = vm_map_lookup_entry(map, addr, &entry);
+ vm_map_unlock_read(map);
+ if (rv)
+ return (EINVAL);
+ addr = entry->start;
+ size = entry->end - entry->start;
+ }
+#ifdef DEBUG
+ if (mmapdebug & MDB_SYNC)
+ printf("msync: cleaning/flushing address range [%x-%x)\n",
+ addr, addr+size);
+#endif
+ /*
+ * Could pass this in as a third flag argument to implement
+ * Sun's MS_ASYNC.
+ */
+ syncio = TRUE;
+ /*
+ * XXX bummer, gotta flush all cached pages to ensure
+ * consistency with the file system cache. Otherwise, we could
+ * pass this in to implement Sun's MS_INVALIDATE.
+ */
+ invalidate = TRUE;
+ /*
+ * Clean the pages and interpret the return value.
+ */
+ rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
+ switch (rv) {
+ case KERN_SUCCESS:
+ break;
+ case KERN_INVALID_ADDRESS:
+ return (EINVAL); /* Sun returns ENOMEM? */
+ case KERN_FAILURE:
+ return (EIO);
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+struct munmap_args {
+ caddr_t addr;
+ int len;
+};
+int
+munmap(p, uap, retval)
+ register struct proc *p;
+ register struct munmap_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_map_t map;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("munmap(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+
+ addr = (vm_offset_t) uap->addr;
+ if ((addr & PAGE_MASK) || uap->len < 0)
+ return(EINVAL);
+ size = (vm_size_t) round_page(uap->len);
+ if (size == 0)
+ return(0);
+ /*
+ * Check for illegal addresses. Watch out for address wrap...
+ * Note that VM_*_ADDRESS are not constants due to casts (argh).
+ */
+ if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+ return (EINVAL);
+ if (addr > addr + size)
+ return (EINVAL);
+ map = &p->p_vmspace->vm_map;
+ /*
+ * Make sure entire range is allocated.
+ */
+ if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
+ return(EINVAL);
+ /* returns nothing but KERN_SUCCESS anyway */
+ (void) vm_map_remove(map, addr, addr+size);
+ return(0);
+}
+
+void
+munmapfd(fd)
+ int fd;
+{
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
+#endif
+
+ /*
+ * XXX should vm_deallocate any regions mapped to this file
+ */
+ curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
+}
+
+struct mprotect_args {
+ caddr_t addr;
+ int len;
+ int prot;
+};
+int
+mprotect(p, uap, retval)
+ struct proc *p;
+ struct mprotect_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ register vm_prot_t prot;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("mprotect(%d): addr %x len %x prot %d\n",
+ p->p_pid, uap->addr, uap->len, uap->prot);
+#endif
+
+ addr = (vm_offset_t)uap->addr;
+ if ((addr & PAGE_MASK) || uap->len < 0)
+ return(EINVAL);
+ size = (vm_size_t)uap->len;
+ prot = uap->prot & VM_PROT_ALL;
+
+ switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
+ FALSE)) {
+ case KERN_SUCCESS:
+ return (0);
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ }
+ return (EINVAL);
+}
+
+struct madvise_args {
+ caddr_t addr;
+ int len;
+ int behav;
+};
+/* ARGSUSED */
+int
+madvise(p, uap, retval)
+ struct proc *p;
+ struct madvise_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+struct mincore_args {
+ caddr_t addr;
+ int len;
+ char *vec;
+};
+/* ARGSUSED */
+int
+mincore(p, uap, retval)
+ struct proc *p;
+ struct mincore_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+struct mlock_args {
+ caddr_t addr;
+ size_t len;
+};
+int
+mlock(p, uap, retval)
+ struct proc *p;
+ struct mlock_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ int error;
+ extern int vm_page_max_wired;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("mlock(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+ addr = (vm_offset_t)uap->addr;
+ if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+ return (EINVAL);
+ size = round_page((vm_size_t)uap->len);
+ if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
+ return (EAGAIN);
+#ifdef pmap_wired_count
+ if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
+ p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
+ return (EAGAIN);
+#else
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+#endif
+
+ error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
+ return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+struct munlock_args {
+ caddr_t addr;
+ size_t len;
+};
+int
+munlock(p, uap, retval)
+ struct proc *p;
+ struct munlock_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ int error;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("munlock(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+ addr = (vm_offset_t)uap->addr;
+ if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+ return (EINVAL);
+#ifndef pmap_wired_count
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+#endif
+ size = round_page((vm_size_t)uap->len);
+
+ error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
+ return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+/*
+ * Internal version of mmap.
+ * Currently used by mmap, exec, and sys5 shared memory.
+ * Handle is either a vnode pointer or NULL for MAP_ANON.
+ */
+int
+vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
+ register vm_map_t map;
+ register vm_offset_t *addr;
+ register vm_size_t size;
+ vm_prot_t prot, maxprot;
+ register int flags;
+ caddr_t handle; /* XXX should be vp */
+ vm_offset_t foff;
+{
+ register vm_pager_t pager;
+ boolean_t fitit;
+ vm_object_t object;
+ struct vnode *vp = NULL;
+ int type;
+ int rv = KERN_SUCCESS;
+
+ if (size == 0)
+ return (0);
+
+ if ((flags & MAP_FIXED) == 0) {
+ fitit = TRUE;
+ *addr = round_page(*addr);
+ } else {
+ fitit = FALSE;
+ (void)vm_deallocate(map, *addr, size);
+ }
+
+ /*
+ * Lookup/allocate pager. All except an unnamed anonymous lookup
+ * gain a reference to ensure continued existance of the object.
+ * (XXX the exception is to appease the pageout daemon)
+ */
+ if (flags & MAP_ANON)
+ type = PG_DFLT;
+ else {
+ vp = (struct vnode *)handle;
+ if (vp->v_type == VCHR) {
+ type = PG_DEVICE;
+ handle = (caddr_t)vp->v_rdev;
+ } else
+ type = PG_VNODE;
+ }
+ pager = vm_pager_allocate(type, handle, size, prot, foff);
+ if (pager == NULL)
+ return (type == PG_DEVICE ? EINVAL : ENOMEM);
+ /*
+ * Find object and release extra reference gained by lookup
+ */
+ object = vm_object_lookup(pager);
+ vm_object_deallocate(object);
+
+ /*
+ * Anonymous memory.
+ */
+ if (flags & MAP_ANON) {
+ rv = vm_allocate_with_pager(map, addr, size, fitit,
+ pager, foff, TRUE);
+ if (rv != KERN_SUCCESS) {
+ if (handle == NULL)
+ vm_pager_deallocate(pager);
+ else
+ vm_object_deallocate(object);
+ goto out;
+ }
+ /*
+ * Don't cache anonymous objects.
+ * Loses the reference gained by vm_pager_allocate.
+ * Note that object will be NULL when handle == NULL,
+ * this is ok since vm_allocate_with_pager has made
+ * sure that these objects are uncached.
+ */
+ (void) pager_cache(object, FALSE);
+#ifdef DEBUG
+ if (mmapdebug & MDB_MAPIT)
+ printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
+ curproc->p_pid, *addr, size, pager);
+#endif
+ }
+ /*
+ * Must be a mapped file.
+ * Distinguish between character special and regular files.
+ */
+ else if (vp->v_type == VCHR) {
+ rv = vm_allocate_with_pager(map, addr, size, fitit,
+ pager, foff, FALSE);
+ /*
+ * Uncache the object and lose the reference gained
+ * by vm_pager_allocate(). If the call to
+ * vm_allocate_with_pager() was sucessful, then we
+ * gained an additional reference ensuring the object
+ * will continue to exist. If the call failed then
+ * the deallocate call below will terminate the
+ * object which is fine.
+ */
+ (void) pager_cache(object, FALSE);
+ if (rv != KERN_SUCCESS)
+ goto out;
+ }
+ /*
+ * A regular file
+ */
+ else {
+#ifdef DEBUG
+ if (object == NULL)
+ printf("vm_mmap: no object: vp %x, pager %x\n",
+ vp, pager);
+#endif
+ /*
+ * Map it directly.
+ * Allows modifications to go out to the vnode.
+ */
+ if (flags & MAP_SHARED) {
+ rv = vm_allocate_with_pager(map, addr, size,
+ fitit, pager,
+ foff, FALSE);
+ if (rv != KERN_SUCCESS) {
+ vm_object_deallocate(object);
+ goto out;
+ }
+ /*
+ * Don't cache the object. This is the easiest way
+ * of ensuring that data gets back to the filesystem
+ * because vnode_pager_deallocate() will fsync the
+ * vnode. pager_cache() will lose the extra ref.
+ */
+ if (prot & VM_PROT_WRITE)
+ pager_cache(object, FALSE);
+ else
+ vm_object_deallocate(object);
+ }
+ /*
+ * Copy-on-write of file. Two flavors.
+ * MAP_COPY is true COW, you essentially get a snapshot of
+ * the region at the time of mapping. MAP_PRIVATE means only
+ * that your changes are not reflected back to the object.
+ * Changes made by others will be seen.
+ */
+ else {
+ vm_map_t tmap;
+ vm_offset_t off;
+
+ /* locate and allocate the target address space */
+ rv = vm_map_find(map, NULL, (vm_offset_t)0,
+ addr, size, fitit);
+ if (rv != KERN_SUCCESS) {
+ vm_object_deallocate(object);
+ goto out;
+ }
+ tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
+ VM_MIN_ADDRESS+size, TRUE);
+ off = VM_MIN_ADDRESS;
+ rv = vm_allocate_with_pager(tmap, &off, size,
+ TRUE, pager,
+ foff, FALSE);
+ if (rv != KERN_SUCCESS) {
+ vm_object_deallocate(object);
+ vm_map_deallocate(tmap);
+ goto out;
+ }
+ /*
+ * (XXX)
+ * MAP_PRIVATE implies that we see changes made by
+ * others. To ensure that we need to guarentee that
+ * no copy object is created (otherwise original
+ * pages would be pushed to the copy object and we
+ * would never see changes made by others). We
+ * totally sleeze it right now by marking the object
+ * internal temporarily.
+ */
+ if ((flags & MAP_COPY) == 0)
+ object->flags |= OBJ_INTERNAL;
+ rv = vm_map_copy(map, tmap, *addr, size, off,
+ FALSE, FALSE);
+ object->flags &= ~OBJ_INTERNAL;
+ /*
+ * (XXX)
+ * My oh my, this only gets worse...
+ * Force creation of a shadow object so that
+ * vm_map_fork will do the right thing.
+ */
+ if ((flags & MAP_COPY) == 0) {
+ vm_map_t tmap;
+ vm_map_entry_t tentry;
+ vm_object_t tobject;
+ vm_offset_t toffset;
+ vm_prot_t tprot;
+ boolean_t twired, tsu;
+
+ tmap = map;
+ vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
+ &tentry, &tobject, &toffset,
+ &tprot, &twired, &tsu);
+ vm_map_lookup_done(tmap, tentry);
+ }
+ /*
+ * (XXX)
+ * Map copy code cannot detect sharing unless a
+ * sharing map is involved. So we cheat and write
+ * protect everything ourselves.
+ */
+ vm_object_pmap_copy(object, foff, foff + size);
+ vm_object_deallocate(object);
+ vm_map_deallocate(tmap);
+ if (rv != KERN_SUCCESS)
+ goto out;
+ }
+#ifdef DEBUG
+ if (mmapdebug & MDB_MAPIT)
+ printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
+ curproc->p_pid, *addr, size, pager);
+#endif
+ }
+ /*
+ * Correct protection (default is VM_PROT_ALL).
+ * If maxprot is different than prot, we must set both explicitly.
+ */
+ rv = KERN_SUCCESS;
+ if (maxprot != VM_PROT_ALL)
+ rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
+ if (rv == KERN_SUCCESS && prot != maxprot)
+ rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
+ if (rv != KERN_SUCCESS) {
+ (void) vm_deallocate(map, *addr, size);
+ goto out;
+ }
+ /*
+ * Shared memory is also shared with children.
+ */
+ if (flags & MAP_SHARED) {
+ rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
+ if (rv != KERN_SUCCESS) {
+ (void) vm_deallocate(map, *addr, size);
+ goto out;
+ }
+ }
+out:
+#ifdef DEBUG
+ if (mmapdebug & MDB_MAPIT)
+ printf("vm_mmap: rv %d\n", rv);
+#endif
+ switch (rv) {
+ case KERN_SUCCESS:
+ return (0);
+ case KERN_INVALID_ADDRESS:
+ case KERN_NO_SPACE:
+ return (ENOMEM);
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ default:
+ return (EINVAL);
+ }
+}
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
new file mode 100644
index 000000000000..d11fa8be014f
--- /dev/null
+++ b/sys/vm/vm_object.c
@@ -0,0 +1,1436 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_object.c 8.5 (Berkeley) 3/22/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory object module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+/*
+ * Virtual memory objects maintain the actual data
+ * associated with allocated virtual memory. A given
+ * page of memory exists within exactly one object.
+ *
+ * An object is only deallocated when all "references"
+ * are given up. Only one "reference" to a given
+ * region of an object should be writeable.
+ *
+ * Associated with each object is a list of all resident
+ * memory pages belonging to that object; this list is
+ * maintained by the "vm_page" module, and locked by the object's
+ * lock.
+ *
+ * Each object also records a "pager" routine which is
+ * used to retrieve (and store) pages to the proper backing
+ * storage. In addition, objects may be backed by other
+ * objects from which they were virtual-copied.
+ *
+ * The only items within the object structure which are
+ * modified after time of creation are:
+ * reference count locked by object's lock
+ * pager routine locked by object's lock
+ *
+ */
+
+struct vm_object kernel_object_store;
+struct vm_object kmem_object_store;
+
+#define VM_OBJECT_HASH_COUNT 157
+
+int vm_cache_max = 100; /* can patch if necessary */
+struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
+
+long object_collapses = 0;
+long object_bypasses = 0;
+
+static void _vm_object_allocate __P((vm_size_t, vm_object_t));
+
+/*
+ * vm_object_init:
+ *
+ * Initialize the VM objects module.
+ */
+void vm_object_init(size)
+ vm_size_t size;
+{
+ register int i;
+
+ TAILQ_INIT(&vm_object_cached_list);
+ TAILQ_INIT(&vm_object_list);
+ vm_object_count = 0;
+ simple_lock_init(&vm_cache_lock);
+ simple_lock_init(&vm_object_list_lock);
+
+ for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
+ TAILQ_INIT(&vm_object_hashtable[i]);
+
+ kernel_object = &kernel_object_store;
+ _vm_object_allocate(size, kernel_object);
+
+ kmem_object = &kmem_object_store;
+ _vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object);
+}
+
+/*
+ * vm_object_allocate:
+ *
+ * Returns a new object with the given size.
+ */
+
+vm_object_t vm_object_allocate(size)
+ vm_size_t size;
+{
+ register vm_object_t result;
+
+ result = (vm_object_t)
+ malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK);
+
+ _vm_object_allocate(size, result);
+
+ return(result);
+}
+
+static void
+_vm_object_allocate(size, object)
+ vm_size_t size;
+ register vm_object_t object;
+{
+ TAILQ_INIT(&object->memq);
+ vm_object_lock_init(object);
+ object->ref_count = 1;
+ object->resident_page_count = 0;
+ object->size = size;
+ object->flags = OBJ_INTERNAL; /* vm_allocate_with_pager will reset */
+ object->paging_in_progress = 0;
+ object->copy = NULL;
+
+ /*
+ * Object starts out read-write, with no pager.
+ */
+
+ object->pager = NULL;
+ object->paging_offset = 0;
+ object->shadow = NULL;
+ object->shadow_offset = (vm_offset_t) 0;
+
+ simple_lock(&vm_object_list_lock);
+ TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
+ vm_object_count++;
+ cnt.v_nzfod += atop(size);
+ simple_unlock(&vm_object_list_lock);
+}
+
+/*
+ * vm_object_reference:
+ *
+ * Gets another reference to the given object.
+ */
+void vm_object_reference(object)
+ register vm_object_t object;
+{
+ if (object == NULL)
+ return;
+
+ vm_object_lock(object);
+ object->ref_count++;
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_deallocate:
+ *
+ * Release a reference to the specified object,
+ * gained either through a vm_object_allocate
+ * or a vm_object_reference call. When all references
+ * are gone, storage associated with this object
+ * may be relinquished.
+ *
+ * No object may be locked.
+ */
+void vm_object_deallocate(object)
+ register vm_object_t object;
+{
+ vm_object_t temp;
+
+ while (object != NULL) {
+
+ /*
+ * The cache holds a reference (uncounted) to
+ * the object; we must lock it before removing
+ * the object.
+ */
+
+ vm_object_cache_lock();
+
+ /*
+ * Lose the reference
+ */
+ vm_object_lock(object);
+ if (--(object->ref_count) != 0) {
+
+ /*
+ * If there are still references, then
+ * we are done.
+ */
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ return;
+ }
+
+ /*
+ * See if this object can persist. If so, enter
+ * it in the cache, then deactivate all of its
+ * pages.
+ */
+
+ if (object->flags & OBJ_CANPERSIST) {
+
+ TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
+ cached_list);
+ vm_object_cached++;
+ vm_object_cache_unlock();
+
+ vm_object_deactivate_pages(object);
+ vm_object_unlock(object);
+
+ vm_object_cache_trim();
+ return;
+ }
+
+ /*
+ * Make sure no one can look us up now.
+ */
+ vm_object_remove(object->pager);
+ vm_object_cache_unlock();
+
+ temp = object->shadow;
+ vm_object_terminate(object);
+ /* unlocks and deallocates object */
+ object = temp;
+ }
+}
+
+
+/*
+ * vm_object_terminate actually destroys the specified object, freeing
+ * up all previously used resources.
+ *
+ * The object must be locked.
+ */
+void vm_object_terminate(object)
+ register vm_object_t object;
+{
+ register vm_page_t p;
+ vm_object_t shadow_object;
+
+ /*
+ * Detach the object from its shadow if we are the shadow's
+ * copy.
+ */
+ if ((shadow_object = object->shadow) != NULL) {
+ vm_object_lock(shadow_object);
+ if (shadow_object->copy == object)
+ shadow_object->copy = NULL;
+#if 0
+ else if (shadow_object->copy != NULL)
+ panic("vm_object_terminate: copy/shadow inconsistency");
+#endif
+ vm_object_unlock(shadow_object);
+ }
+
+ /*
+ * Wait until the pageout daemon is through with the object.
+ */
+ while (object->paging_in_progress) {
+ vm_object_sleep((int)object, object, FALSE);
+ vm_object_lock(object);
+ }
+
+ /*
+ * If not an internal object clean all the pages, removing them
+ * from paging queues as we go.
+ *
+ * XXX need to do something in the event of a cleaning error.
+ */
+ if ((object->flags & OBJ_INTERNAL) == 0) {
+ (void) vm_object_page_clean(object, 0, 0, TRUE, TRUE);
+ vm_object_unlock(object);
+ }
+
+ /*
+ * Now free the pages.
+ * For internal objects, this also removes them from paging queues.
+ */
+ while ((p = object->memq.tqh_first) != NULL) {
+ VM_PAGE_CHECK(p);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ cnt.v_pfree++;
+ vm_page_unlock_queues();
+ }
+ if ((object->flags & OBJ_INTERNAL) == 0)
+ vm_object_unlock(object);
+
+ /*
+ * Let the pager know object is dead.
+ */
+ if (object->pager != NULL)
+ vm_pager_deallocate(object->pager);
+
+ simple_lock(&vm_object_list_lock);
+ TAILQ_REMOVE(&vm_object_list, object, object_list);
+ vm_object_count--;
+ simple_unlock(&vm_object_list_lock);
+
+ /*
+ * Free the space for the object.
+ */
+ free((caddr_t)object, M_VMOBJ);
+}
+
+/*
+ * vm_object_page_clean
+ *
+ * Clean all dirty pages in the specified range of object.
+ * If syncio is TRUE, page cleaning is done synchronously.
+ * If de_queue is TRUE, pages are removed from any paging queue
+ * they were on, otherwise they are left on whatever queue they
+ * were on before the cleaning operation began.
+ *
+ * Odd semantics: if start == end, we clean everything.
+ *
+ * The object must be locked.
+ *
+ * Returns TRUE if all was well, FALSE if there was a pager error
+ * somewhere. We attempt to clean (and dequeue) all pages regardless
+ * of where an error occurs.
+ */
+boolean_t
+vm_object_page_clean(object, start, end, syncio, de_queue)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ boolean_t syncio;
+ boolean_t de_queue;
+{
+ register vm_page_t p;
+ int onqueue;
+ boolean_t noerror = TRUE;
+
+ if (object == NULL)
+ return (TRUE);
+
+ /*
+ * If it is an internal object and there is no pager, attempt to
+ * allocate one. Note that vm_object_collapse may relocate one
+ * from a collapsed object so we must recheck afterward.
+ */
+ if ((object->flags & OBJ_INTERNAL) && object->pager == NULL) {
+ vm_object_collapse(object);
+ if (object->pager == NULL) {
+ vm_pager_t pager;
+
+ vm_object_unlock(object);
+ pager = vm_pager_allocate(PG_DFLT, (caddr_t)0,
+ object->size, VM_PROT_ALL,
+ (vm_offset_t)0);
+ if (pager)
+ vm_object_setpager(object, pager, 0, FALSE);
+ vm_object_lock(object);
+ }
+ }
+ if (object->pager == NULL)
+ return (FALSE);
+
+again:
+ /*
+ * Wait until the pageout daemon is through with the object.
+ */
+ while (object->paging_in_progress) {
+ vm_object_sleep((int)object, object, FALSE);
+ vm_object_lock(object);
+ }
+ /*
+ * Loop through the object page list cleaning as necessary.
+ */
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ if ((start == end || p->offset >= start && p->offset < end) &&
+ !(p->flags & PG_FICTITIOUS)) {
+ if ((p->flags & PG_CLEAN) &&
+ pmap_is_modified(VM_PAGE_TO_PHYS(p)))
+ p->flags &= ~PG_CLEAN;
+ /*
+ * Remove the page from any paging queue.
+ * This needs to be done if either we have been
+ * explicitly asked to do so or it is about to
+ * be cleaned (see comment below).
+ */
+ if (de_queue || !(p->flags & PG_CLEAN)) {
+ vm_page_lock_queues();
+ if (p->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active,
+ p, pageq);
+ p->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ onqueue = 1;
+ } else if (p->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive,
+ p, pageq);
+ p->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ onqueue = -1;
+ } else
+ onqueue = 0;
+ vm_page_unlock_queues();
+ }
+ /*
+ * To ensure the state of the page doesn't change
+ * during the clean operation we do two things.
+ * First we set the busy bit and write-protect all
+ * mappings to ensure that write accesses to the
+ * page block (in vm_fault). Second, we remove
+ * the page from any paging queue to foil the
+ * pageout daemon (vm_pageout_scan).
+ */
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
+ if (!(p->flags & PG_CLEAN)) {
+ p->flags |= PG_BUSY;
+ object->paging_in_progress++;
+ vm_object_unlock(object);
+ /*
+ * XXX if put fails we mark the page as
+ * clean to avoid an infinite loop.
+ * Will loose changes to the page.
+ */
+ if (vm_pager_put(object->pager, p, syncio)) {
+ printf("%s: pager_put error\n",
+ "vm_object_page_clean");
+ p->flags |= PG_CLEAN;
+ noerror = FALSE;
+ }
+ vm_object_lock(object);
+ object->paging_in_progress--;
+ if (!de_queue && onqueue) {
+ vm_page_lock_queues();
+ if (onqueue > 0)
+ vm_page_activate(p);
+ else
+ vm_page_deactivate(p);
+ vm_page_unlock_queues();
+ }
+ p->flags &= ~PG_BUSY;
+ PAGE_WAKEUP(p);
+ goto again;
+ }
+ }
+ }
+ return (noerror);
+}
+
+/*
+ * vm_object_deactivate_pages
+ *
+ * Deactivate all pages in the specified object. (Keep its pages
+ * in memory even though it is no longer referenced.)
+ *
+ * The object must be locked.
+ */
+void
+vm_object_deactivate_pages(object)
+ register vm_object_t object;
+{
+ register vm_page_t p, next;
+
+ for (p = object->memq.tqh_first; p != NULL; p = next) {
+ next = p->listq.tqe_next;
+ vm_page_lock_queues();
+ vm_page_deactivate(p);
+ vm_page_unlock_queues();
+ }
+}
+
+/*
+ * Trim the object cache to size.
+ */
+void
+vm_object_cache_trim()
+{
+ register vm_object_t object;
+
+ vm_object_cache_lock();
+ while (vm_object_cached > vm_cache_max) {
+ object = vm_object_cached_list.tqh_first;
+ vm_object_cache_unlock();
+
+ if (object != vm_object_lookup(object->pager))
+ panic("vm_object_deactivate: I'm sooo confused.");
+
+ pager_cache(object, FALSE);
+
+ vm_object_cache_lock();
+ }
+ vm_object_cache_unlock();
+}
+
+/*
+ * vm_object_pmap_copy:
+ *
+ * Makes all physical pages in the specified
+ * object range copy-on-write. No writeable
+ * references to these pages should remain.
+ *
+ * The object must *not* be locked.
+ */
+void vm_object_pmap_copy(object, start, end)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_page_t p;
+
+ if (object == NULL)
+ return;
+
+ vm_object_lock(object);
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ if ((start <= p->offset) && (p->offset < end)) {
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
+ p->flags |= PG_COPYONWRITE;
+ }
+ }
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_pmap_remove:
+ *
+ * Removes all physical pages in the specified
+ * object range from all physical maps.
+ *
+ * The object must *not* be locked.
+ */
+void vm_object_pmap_remove(object, start, end)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_page_t p;
+
+ if (object == NULL)
+ return;
+
+ vm_object_lock(object);
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
+ if ((start <= p->offset) && (p->offset < end))
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_copy:
+ *
+ * Create a new object which is a copy of an existing
+ * object, and mark all of the pages in the existing
+ * object 'copy-on-write'. The new object has one reference.
+ * Returns the new object.
+ *
+ * May defer the copy until later if the object is not backed
+ * up by a non-default pager.
+ */
+void vm_object_copy(src_object, src_offset, size,
+ dst_object, dst_offset, src_needs_copy)
+ register vm_object_t src_object;
+ vm_offset_t src_offset;
+ vm_size_t size;
+ vm_object_t *dst_object; /* OUT */
+ vm_offset_t *dst_offset; /* OUT */
+ boolean_t *src_needs_copy; /* OUT */
+{
+ register vm_object_t new_copy;
+ register vm_object_t old_copy;
+ vm_offset_t new_start, new_end;
+
+ register vm_page_t p;
+
+ if (src_object == NULL) {
+ /*
+ * Nothing to copy
+ */
+ *dst_object = NULL;
+ *dst_offset = 0;
+ *src_needs_copy = FALSE;
+ return;
+ }
+
+ /*
+ * If the object's pager is null_pager or the
+ * default pager, we don't have to make a copy
+ * of it. Instead, we set the needs copy flag and
+ * make a shadow later.
+ */
+
+ vm_object_lock(src_object);
+ if (src_object->pager == NULL ||
+ (src_object->flags & OBJ_INTERNAL)) {
+
+ /*
+ * Make another reference to the object
+ */
+ src_object->ref_count++;
+
+ /*
+ * Mark all of the pages copy-on-write.
+ */
+ for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next)
+ if (src_offset <= p->offset &&
+ p->offset < src_offset + size)
+ p->flags |= PG_COPYONWRITE;
+ vm_object_unlock(src_object);
+
+ *dst_object = src_object;
+ *dst_offset = src_offset;
+
+ /*
+ * Must make a shadow when write is desired
+ */
+ *src_needs_copy = TRUE;
+ return;
+ }
+
+ /*
+ * Try to collapse the object before copying it.
+ */
+ vm_object_collapse(src_object);
+
+ /*
+ * If the object has a pager, the pager wants to
+ * see all of the changes. We need a copy-object
+ * for the changed pages.
+ *
+ * If there is a copy-object, and it is empty,
+ * no changes have been made to the object since the
+ * copy-object was made. We can use the same copy-
+ * object.
+ */
+
+ Retry1:
+ old_copy = src_object->copy;
+ if (old_copy != NULL) {
+ /*
+ * Try to get the locks (out of order)
+ */
+ if (!vm_object_lock_try(old_copy)) {
+ vm_object_unlock(src_object);
+
+ /* should spin a bit here... */
+ vm_object_lock(src_object);
+ goto Retry1;
+ }
+
+ if (old_copy->resident_page_count == 0 &&
+ old_copy->pager == NULL) {
+ /*
+ * Return another reference to
+ * the existing copy-object.
+ */
+ old_copy->ref_count++;
+ vm_object_unlock(old_copy);
+ vm_object_unlock(src_object);
+ *dst_object = old_copy;
+ *dst_offset = src_offset;
+ *src_needs_copy = FALSE;
+ return;
+ }
+ vm_object_unlock(old_copy);
+ }
+ vm_object_unlock(src_object);
+
+ /*
+ * If the object has a pager, the pager wants
+ * to see all of the changes. We must make
+ * a copy-object and put the changed pages there.
+ *
+ * The copy-object is always made large enough to
+ * completely shadow the original object, since
+ * it may have several users who want to shadow
+ * the original object at different points.
+ */
+
+ new_copy = vm_object_allocate(src_object->size);
+
+ Retry2:
+ vm_object_lock(src_object);
+ /*
+ * Copy object may have changed while we were unlocked
+ */
+ old_copy = src_object->copy;
+ if (old_copy != NULL) {
+ /*
+ * Try to get the locks (out of order)
+ */
+ if (!vm_object_lock_try(old_copy)) {
+ vm_object_unlock(src_object);
+ goto Retry2;
+ }
+
+ /*
+ * Consistency check
+ */
+ if (old_copy->shadow != src_object ||
+ old_copy->shadow_offset != (vm_offset_t) 0)
+ panic("vm_object_copy: copy/shadow inconsistency");
+
+ /*
+ * Make the old copy-object shadow the new one.
+ * It will receive no more pages from the original
+ * object.
+ */
+
+ src_object->ref_count--; /* remove ref. from old_copy */
+ old_copy->shadow = new_copy;
+ new_copy->ref_count++; /* locking not needed - we
+ have the only pointer */
+ vm_object_unlock(old_copy); /* done with old_copy */
+ }
+
+ new_start = (vm_offset_t) 0; /* always shadow original at 0 */
+ new_end = (vm_offset_t) new_copy->size; /* for the whole object */
+
+ /*
+ * Point the new copy at the existing object.
+ */
+
+ new_copy->shadow = src_object;
+ new_copy->shadow_offset = new_start;
+ src_object->ref_count++;
+ src_object->copy = new_copy;
+
+ /*
+ * Mark all the affected pages of the existing object
+ * copy-on-write.
+ */
+ for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
+ if ((new_start <= p->offset) && (p->offset < new_end))
+ p->flags |= PG_COPYONWRITE;
+
+ vm_object_unlock(src_object);
+
+ *dst_object = new_copy;
+ *dst_offset = src_offset - new_start;
+ *src_needs_copy = FALSE;
+}
+
+/*
+ * vm_object_shadow:
+ *
+ * Create a new object which is backed by the
+ * specified existing object range. The source
+ * object reference is deallocated.
+ *
+ * The new object and offset into that object
+ * are returned in the source parameters.
+ */
+
+void vm_object_shadow(object, offset, length)
+ vm_object_t *object; /* IN/OUT */
+ vm_offset_t *offset; /* IN/OUT */
+ vm_size_t length;
+{
+ register vm_object_t source;
+ register vm_object_t result;
+
+ source = *object;
+
+ /*
+ * Allocate a new object with the given length
+ */
+
+ if ((result = vm_object_allocate(length)) == NULL)
+ panic("vm_object_shadow: no object for shadowing");
+
+ /*
+ * The new object shadows the source object, adding
+ * a reference to it. Our caller changes his reference
+ * to point to the new object, removing a reference to
+ * the source object. Net result: no change of reference
+ * count.
+ */
+ result->shadow = source;
+
+ /*
+ * Store the offset into the source object,
+ * and fix up the offset into the new object.
+ */
+
+ result->shadow_offset = *offset;
+
+ /*
+ * Return the new things
+ */
+
+ *offset = 0;
+ *object = result;
+}
+
+/*
+ * Set the specified object's pager to the specified pager.
+ */
+
+void vm_object_setpager(object, pager, paging_offset,
+ read_only)
+ vm_object_t object;
+ vm_pager_t pager;
+ vm_offset_t paging_offset;
+ boolean_t read_only;
+{
+#ifdef lint
+ read_only++; /* No longer used */
+#endif
+
+ vm_object_lock(object); /* XXX ? */
+ object->pager = pager;
+ object->paging_offset = paging_offset;
+ vm_object_unlock(object); /* XXX ? */
+}
+
+/*
+ * vm_object_hash hashes the pager/id pair.
+ */
+
+#define vm_object_hash(pager) \
+ (((unsigned)pager)%VM_OBJECT_HASH_COUNT)
+
+/*
+ * vm_object_lookup looks in the object cache for an object with the
+ * specified pager and paging id.
+ */
+
+vm_object_t vm_object_lookup(pager)
+ vm_pager_t pager;
+{
+ register vm_object_hash_entry_t entry;
+ vm_object_t object;
+
+ vm_object_cache_lock();
+
+ for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first;
+ entry != NULL;
+ entry = entry->hash_links.tqe_next) {
+ object = entry->object;
+ if (object->pager == pager) {
+ vm_object_lock(object);
+ if (object->ref_count == 0) {
+ TAILQ_REMOVE(&vm_object_cached_list, object,
+ cached_list);
+ vm_object_cached--;
+ }
+ object->ref_count++;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ return(object);
+ }
+ }
+
+ vm_object_cache_unlock();
+ return(NULL);
+}
+
+/*
+ * vm_object_enter enters the specified object/pager/id into
+ * the hash table.
+ */
+
+void vm_object_enter(object, pager)
+ vm_object_t object;
+ vm_pager_t pager;
+{
+ struct vm_object_hash_head *bucket;
+ register vm_object_hash_entry_t entry;
+
+ /*
+ * We don't cache null objects, and we can't cache
+ * objects with the null pager.
+ */
+
+ if (object == NULL)
+ return;
+ if (pager == NULL)
+ return;
+
+ bucket = &vm_object_hashtable[vm_object_hash(pager)];
+ entry = (vm_object_hash_entry_t)
+ malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK);
+ entry->object = object;
+ object->flags |= OBJ_CANPERSIST;
+
+ vm_object_cache_lock();
+ TAILQ_INSERT_TAIL(bucket, entry, hash_links);
+ vm_object_cache_unlock();
+}
+
+/*
+ * vm_object_remove:
+ *
+ * Remove the pager from the hash table.
+ * Note: This assumes that the object cache
+ * is locked. XXX this should be fixed
+ * by reorganizing vm_object_deallocate.
+ */
+void
+vm_object_remove(pager)
+ register vm_pager_t pager;
+{
+ struct vm_object_hash_head *bucket;
+ register vm_object_hash_entry_t entry;
+ register vm_object_t object;
+
+ bucket = &vm_object_hashtable[vm_object_hash(pager)];
+
+ for (entry = bucket->tqh_first;
+ entry != NULL;
+ entry = entry->hash_links.tqe_next) {
+ object = entry->object;
+ if (object->pager == pager) {
+ TAILQ_REMOVE(bucket, entry, hash_links);
+ free((caddr_t)entry, M_VMOBJHASH);
+ break;
+ }
+ }
+}
+
+/*
+ * vm_object_cache_clear removes all objects from the cache.
+ *
+ */
+
+void vm_object_cache_clear()
+{
+ register vm_object_t object;
+
+ /*
+ * Remove each object in the cache by scanning down the
+ * list of cached objects.
+ */
+ vm_object_cache_lock();
+ while ((object = vm_object_cached_list.tqh_first) != NULL) {
+ vm_object_cache_unlock();
+
+ /*
+ * Note: it is important that we use vm_object_lookup
+ * to gain a reference, and not vm_object_reference, because
+ * the logic for removing an object from the cache lies in
+ * lookup.
+ */
+ if (object != vm_object_lookup(object->pager))
+ panic("vm_object_cache_clear: I'm sooo confused.");
+ pager_cache(object, FALSE);
+
+ vm_object_cache_lock();
+ }
+ vm_object_cache_unlock();
+}
+
+boolean_t vm_object_collapse_allowed = TRUE;
+/*
+ * vm_object_collapse:
+ *
+ * Collapse an object with the object backing it.
+ * Pages in the backing object are moved into the
+ * parent, and the backing object is deallocated.
+ *
+ * Requires that the object be locked and the page
+ * queues be unlocked.
+ *
+ */
+void vm_object_collapse(object)
+ register vm_object_t object;
+
+{
+ register vm_object_t backing_object;
+ register vm_offset_t backing_offset;
+ register vm_size_t size;
+ register vm_offset_t new_offset;
+ register vm_page_t p, pp;
+
+ if (!vm_object_collapse_allowed)
+ return;
+
+ while (TRUE) {
+ /*
+ * Verify that the conditions are right for collapse:
+ *
+ * The object exists and no pages in it are currently
+ * being paged out (or have ever been paged out).
+ */
+ if (object == NULL ||
+ object->paging_in_progress != 0 ||
+ object->pager != NULL)
+ return;
+
+ /*
+ * There is a backing object, and
+ */
+
+ if ((backing_object = object->shadow) == NULL)
+ return;
+
+ vm_object_lock(backing_object);
+ /*
+ * ...
+ * The backing object is not read_only,
+ * and no pages in the backing object are
+ * currently being paged out.
+ * The backing object is internal.
+ */
+
+ if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
+ backing_object->paging_in_progress != 0) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * The backing object can't be a copy-object:
+ * the shadow_offset for the copy-object must stay
+ * as 0. Furthermore (for the 'we have all the
+ * pages' case), if we bypass backing_object and
+ * just shadow the next object in the chain, old
+ * pages from that object would then have to be copied
+ * BOTH into the (former) backing_object and into the
+ * parent object.
+ */
+ if (backing_object->shadow != NULL &&
+ backing_object->shadow->copy != NULL) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * We know that we can either collapse the backing
+ * object (if the parent is the only reference to
+ * it) or (perhaps) remove the parent's reference
+ * to it.
+ */
+
+ backing_offset = object->shadow_offset;
+ size = object->size;
+
+ /*
+ * If there is exactly one reference to the backing
+ * object, we can collapse it into the parent.
+ */
+
+ if (backing_object->ref_count == 1) {
+
+ /*
+ * We can collapse the backing object.
+ *
+ * Move all in-memory pages from backing_object
+ * to the parent. Pages that have been paged out
+ * will be overwritten by any of the parent's
+ * pages that shadow them.
+ */
+
+ while ((p = backing_object->memq.tqh_first) != NULL) {
+ new_offset = (p->offset - backing_offset);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * dispose of it.
+ *
+ * Otherwise, move it as planned.
+ */
+
+ if (p->offset < backing_offset ||
+ new_offset >= size) {
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ } else {
+ pp = vm_page_lookup(object, new_offset);
+ if (pp != NULL && !(pp->flags & PG_FAKE)) {
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ }
+ else {
+ if (pp) {
+ /* may be someone waiting for it */
+ PAGE_WAKEUP(pp);
+ vm_page_lock_queues();
+ vm_page_free(pp);
+ vm_page_unlock_queues();
+ }
+ vm_page_rename(p, object, new_offset);
+ }
+ }
+ }
+
+ /*
+ * Move the pager from backing_object to object.
+ *
+ * XXX We're only using part of the paging space
+ * for keeps now... we ought to discard the
+ * unused portion.
+ */
+
+ if (backing_object->pager) {
+ object->pager = backing_object->pager;
+ object->paging_offset = backing_offset +
+ backing_object->paging_offset;
+ backing_object->pager = NULL;
+ }
+
+ /*
+ * Object now shadows whatever backing_object did.
+ * Note that the reference to backing_object->shadow
+ * moves from within backing_object to within object.
+ */
+
+ object->shadow = backing_object->shadow;
+ object->shadow_offset += backing_object->shadow_offset;
+ if (object->shadow != NULL &&
+ object->shadow->copy != NULL) {
+ panic("vm_object_collapse: we collapsed a copy-object!");
+ }
+ /*
+ * Discard backing_object.
+ *
+ * Since the backing object has no pages, no
+ * pager left, and no object references within it,
+ * all that is necessary is to dispose of it.
+ */
+
+ vm_object_unlock(backing_object);
+
+ simple_lock(&vm_object_list_lock);
+ TAILQ_REMOVE(&vm_object_list, backing_object,
+ object_list);
+ vm_object_count--;
+ simple_unlock(&vm_object_list_lock);
+
+ free((caddr_t)backing_object, M_VMOBJ);
+
+ object_collapses++;
+ }
+ else {
+ /*
+ * If all of the pages in the backing object are
+ * shadowed by the parent object, the parent
+ * object no longer has to shadow the backing
+ * object; it can shadow the next one in the
+ * chain.
+ *
+ * The backing object must not be paged out - we'd
+ * have to check all of the paged-out pages, as
+ * well.
+ */
+
+ if (backing_object->pager != NULL) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * Should have a check for a 'small' number
+ * of pages here.
+ */
+
+ for (p = backing_object->memq.tqh_first;
+ p != NULL;
+ p = p->listq.tqe_next) {
+ new_offset = (p->offset - backing_offset);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * keep going.
+ *
+ * Otherwise, the backing_object must be
+ * left in the chain.
+ */
+
+ if (p->offset >= backing_offset &&
+ new_offset < size &&
+ ((pp = vm_page_lookup(object, new_offset))
+ == NULL ||
+ (pp->flags & PG_FAKE))) {
+ /*
+ * Page still needed.
+ * Can't go any further.
+ */
+ vm_object_unlock(backing_object);
+ return;
+ }
+ }
+
+ /*
+ * Make the parent shadow the next object
+ * in the chain. Deallocating backing_object
+ * will not remove it, since its reference
+ * count is at least 2.
+ */
+
+ object->shadow = backing_object->shadow;
+ vm_object_reference(object->shadow);
+ object->shadow_offset += backing_object->shadow_offset;
+
+ /*
+ * Backing object might have had a copy pointer
+ * to us. If it did, clear it.
+ */
+ if (backing_object->copy == object) {
+ backing_object->copy = NULL;
+ }
+
+ /* Drop the reference count on backing_object.
+ * Since its ref_count was at least 2, it
+ * will not vanish; so we don't need to call
+ * vm_object_deallocate.
+ */
+ backing_object->ref_count--;
+ vm_object_unlock(backing_object);
+
+ object_bypasses ++;
+
+ }
+
+ /*
+ * Try again with this object's new backing object.
+ */
+ }
+}
+
+/*
+ * vm_object_page_remove: [internal]
+ *
+ * Removes all physical pages in the specified
+ * object range from the object's list of pages.
+ *
+ * The object must be locked.
+ */
+void vm_object_page_remove(object, start, end)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_page_t p, next;
+
+ if (object == NULL)
+ return;
+
+ for (p = object->memq.tqh_first; p != NULL; p = next) {
+ next = p->listq.tqe_next;
+ if ((start <= p->offset) && (p->offset < end)) {
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ }
+ }
+}
+
+/*
+ * Routine: vm_object_coalesce
+ * Function: Coalesces two objects backing up adjoining
+ * regions of memory into a single object.
+ *
+ * returns TRUE if objects were combined.
+ *
+ * NOTE: Only works at the moment if the second object is NULL -
+ * if it's not, which object do we lock first?
+ *
+ * Parameters:
+ * prev_object First object to coalesce
+ * prev_offset Offset into prev_object
+ * next_object Second object into coalesce
+ * next_offset Offset into next_object
+ *
+ * prev_size Size of reference to prev_object
+ * next_size Size of reference to next_object
+ *
+ * Conditions:
+ * The object must *not* be locked.
+ */
+boolean_t vm_object_coalesce(prev_object, next_object,
+ prev_offset, next_offset,
+ prev_size, next_size)
+
+ register vm_object_t prev_object;
+ vm_object_t next_object;
+ vm_offset_t prev_offset, next_offset;
+ vm_size_t prev_size, next_size;
+{
+ vm_size_t newsize;
+
+#ifdef lint
+ next_offset++;
+#endif
+
+ if (next_object != NULL) {
+ return(FALSE);
+ }
+
+ if (prev_object == NULL) {
+ return(TRUE);
+ }
+
+ vm_object_lock(prev_object);
+
+ /*
+ * Try to collapse the object first
+ */
+ vm_object_collapse(prev_object);
+
+ /*
+ * Can't coalesce if:
+ * . more than one reference
+ * . paged out
+ * . shadows another object
+ * . has a copy elsewhere
+ * (any of which mean that the pages not mapped to
+ * prev_entry may be in use anyway)
+ */
+
+ if (prev_object->ref_count > 1 ||
+ prev_object->pager != NULL ||
+ prev_object->shadow != NULL ||
+ prev_object->copy != NULL) {
+ vm_object_unlock(prev_object);
+ return(FALSE);
+ }
+
+ /*
+ * Remove any pages that may still be in the object from
+ * a previous deallocation.
+ */
+
+ vm_object_page_remove(prev_object,
+ prev_offset + prev_size,
+ prev_offset + prev_size + next_size);
+
+ /*
+ * Extend the object if necessary.
+ */
+ newsize = prev_offset + prev_size + next_size;
+ if (newsize > prev_object->size)
+ prev_object->size = newsize;
+
+ vm_object_unlock(prev_object);
+ return(TRUE);
+}
+
+/*
+ * vm_object_print: [ debug ]
+ */
+void vm_object_print(object, full)
+ vm_object_t object;
+ boolean_t full;
+{
+ register vm_page_t p;
+ extern indent;
+
+ register int count;
+
+ if (object == NULL)
+ return;
+
+ iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
+ (int) object, (int) object->size,
+ object->resident_page_count, object->ref_count);
+ printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n",
+ (int) object->pager, (int) object->paging_offset,
+ (int) object->shadow, (int) object->shadow_offset);
+ printf("cache: next=0x%x, prev=0x%x\n",
+ object->cached_list.tqe_next, object->cached_list.tqe_prev);
+
+ if (!full)
+ return;
+
+ indent += 2;
+ count = 0;
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ if (count == 0)
+ iprintf("memory:=");
+ else if (count == 6) {
+ printf("\n");
+ iprintf(" ...");
+ count = 0;
+ } else
+ printf(",");
+ count++;
+
+ printf("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p));
+ }
+ if (count != 0)
+ printf("\n");
+ indent -= 2;
+}
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
new file mode 100644
index 000000000000..5e220acd47cc
--- /dev/null
+++ b/sys/vm/vm_object.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_object.h 8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory object module definitions.
+ */
+
+#ifndef _VM_OBJECT_
+#define _VM_OBJECT_
+
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+/*
+ * Types defined:
+ *
+ * vm_object_t Virtual memory object.
+ */
+
+struct vm_object {
+ struct pglist memq; /* Resident memory */
+ TAILQ_ENTRY(vm_object) object_list; /* list of all objects */
+ u_short flags; /* see below */
+ u_short paging_in_progress; /* Paging (in or out) so
+ don't collapse or destroy */
+ simple_lock_data_t Lock; /* Synchronization */
+ int ref_count; /* How many refs?? */
+ vm_size_t size; /* Object size */
+ int resident_page_count;
+ /* number of resident pages */
+ struct vm_object *copy; /* Object that holds copies of
+ my changed pages */
+ vm_pager_t pager; /* Where to get data */
+ vm_offset_t paging_offset; /* Offset into paging space */
+ struct vm_object *shadow; /* My shadow */
+ vm_offset_t shadow_offset; /* Offset in shadow */
+ TAILQ_ENTRY(vm_object) cached_list; /* for persistence */
+};
+/*
+ * Flags
+ */
+#define OBJ_CANPERSIST 0x0001 /* allow to persist */
+#define OBJ_INTERNAL 0x0002 /* internally created object */
+#define OBJ_ACTIVE 0x0004 /* used to mark active objects */
+
+TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry);
+
+struct vm_object_hash_entry {
+ TAILQ_ENTRY(vm_object_hash_entry) hash_links; /* hash chain links */
+ vm_object_t object; /* object represened */
+};
+
+typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
+
+#ifdef KERNEL
+TAILQ_HEAD(object_q, vm_object);
+
+struct object_q vm_object_cached_list; /* list of objects persisting */
+int vm_object_cached; /* size of cached list */
+simple_lock_data_t vm_cache_lock; /* lock for object cache */
+
+struct object_q vm_object_list; /* list of allocated objects */
+long vm_object_count; /* count of all objects */
+simple_lock_data_t vm_object_list_lock;
+ /* lock for object list and count */
+
+vm_object_t kernel_object; /* the single kernel object */
+vm_object_t kmem_object;
+
+#define vm_object_cache_lock() simple_lock(&vm_cache_lock)
+#define vm_object_cache_unlock() simple_unlock(&vm_cache_lock)
+#endif /* KERNEL */
+
+#define vm_object_lock_init(object) simple_lock_init(&(object)->Lock)
+#define vm_object_lock(object) simple_lock(&(object)->Lock)
+#define vm_object_unlock(object) simple_unlock(&(object)->Lock)
+#define vm_object_lock_try(object) simple_lock_try(&(object)->Lock)
+#define vm_object_sleep(event, object, interruptible) \
+ thread_sleep((event), &(object)->Lock, (interruptible))
+
+#ifdef KERNEL
+vm_object_t vm_object_allocate __P((vm_size_t));
+void vm_object_cache_clear __P((void));
+void vm_object_cache_trim __P((void));
+boolean_t vm_object_coalesce __P((vm_object_t, vm_object_t,
+ vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t));
+void vm_object_collapse __P((vm_object_t));
+void vm_object_copy __P((vm_object_t, vm_offset_t, vm_size_t,
+ vm_object_t *, vm_offset_t *, boolean_t *));
+void vm_object_deactivate_pages __P((vm_object_t));
+void vm_object_deallocate __P((vm_object_t));
+void vm_object_enter __P((vm_object_t, vm_pager_t));
+void vm_object_init __P((vm_size_t));
+vm_object_t vm_object_lookup __P((vm_pager_t));
+boolean_t vm_object_page_clean __P((vm_object_t,
+ vm_offset_t, vm_offset_t, boolean_t, boolean_t));
+void vm_object_page_remove __P((vm_object_t,
+ vm_offset_t, vm_offset_t));
+void vm_object_pmap_copy __P((vm_object_t,
+ vm_offset_t, vm_offset_t));
+void vm_object_pmap_remove __P((vm_object_t,
+ vm_offset_t, vm_offset_t));
+void vm_object_print __P((vm_object_t, boolean_t));
+void vm_object_reference __P((vm_object_t));
+void vm_object_remove __P((vm_pager_t));
+void vm_object_setpager __P((vm_object_t,
+ vm_pager_t, vm_offset_t, boolean_t));
+void vm_object_shadow __P((vm_object_t *,
+ vm_offset_t *, vm_size_t));
+void vm_object_terminate __P((vm_object_t));
+#endif
+#endif /* _VM_OBJECT_ */
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
new file mode 100644
index 000000000000..0cd9d875b699
--- /dev/null
+++ b/sys/vm/vm_page.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Resident memory management module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
+
+/*
+ * Associated with page of user-allocatable memory is a
+ * page structure.
+ */
+
+struct pglist *vm_page_buckets; /* Array of buckets */
+int vm_page_bucket_count = 0; /* How big is array? */
+int vm_page_hash_mask; /* Mask for hash function */
+simple_lock_data_t bucket_lock; /* lock for all buckets XXX */
+
+struct pglist vm_page_queue_free;
+struct pglist vm_page_queue_active;
+struct pglist vm_page_queue_inactive;
+simple_lock_data_t vm_page_queue_lock;
+simple_lock_data_t vm_page_queue_free_lock;
+
+/* has physical page allocation been initialized? */
+boolean_t vm_page_startup_initialized;
+
+vm_page_t vm_page_array;
+long first_page;
+long last_page;
+vm_offset_t first_phys_addr;
+vm_offset_t last_phys_addr;
+vm_size_t page_mask;
+int page_shift;
+
+/*
+ * vm_set_page_size:
+ *
+ * Sets the page size, perhaps based upon the memory
+ * size. Must be called before any use of page-size
+ * dependent functions.
+ *
+ * Sets page_shift and page_mask from cnt.v_page_size.
+ */
+void vm_set_page_size()
+{
+
+ if (cnt.v_page_size == 0)
+ cnt.v_page_size = DEFAULT_PAGE_SIZE;
+ page_mask = cnt.v_page_size - 1;
+ if ((page_mask & cnt.v_page_size) != 0)
+ panic("vm_set_page_size: page size not a power of two");
+ for (page_shift = 0; ; page_shift++)
+ if ((1 << page_shift) == cnt.v_page_size)
+ break;
+}
+
+
+/*
+ * vm_page_startup:
+ *
+ * Initializes the resident memory module.
+ *
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
+ */
+void vm_page_startup(start, end)
+ vm_offset_t *start;
+ vm_offset_t *end;
+{
+ register vm_page_t m;
+ register struct pglist *bucket;
+ vm_size_t npages;
+ int i;
+ vm_offset_t pa;
+ extern vm_offset_t kentry_data;
+ extern vm_size_t kentry_data_size;
+
+
+ /*
+ * Initialize the locks
+ */
+
+ simple_lock_init(&vm_page_queue_free_lock);
+ simple_lock_init(&vm_page_queue_lock);
+
+ /*
+ * Initialize the queue headers for the free queue,
+ * the active queue and the inactive queue.
+ */
+
+ TAILQ_INIT(&vm_page_queue_free);
+ TAILQ_INIT(&vm_page_queue_active);
+ TAILQ_INIT(&vm_page_queue_inactive);
+
+ /*
+ * Calculate the number of hash table buckets.
+ *
+ * The number of buckets MUST BE a power of 2, and
+ * the actual value is the next power of 2 greater
+ * than the number of physical pages in the system.
+ *
+ * Note:
+ * This computation can be tweaked if desired.
+ */
+
+ if (vm_page_bucket_count == 0) {
+ vm_page_bucket_count = 1;
+ while (vm_page_bucket_count < atop(*end - *start))
+ vm_page_bucket_count <<= 1;
+ }
+
+ vm_page_hash_mask = vm_page_bucket_count - 1;
+
+ /*
+ * Allocate (and initialize) the hash table buckets.
+ */
+ vm_page_buckets = (struct pglist *)
+ pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist));
+ bucket = vm_page_buckets;
+
+ for (i = vm_page_bucket_count; i--;) {
+ TAILQ_INIT(bucket);
+ bucket++;
+ }
+
+ simple_lock_init(&bucket_lock);
+
+ /*
+ * Truncate the remainder of physical memory to our page size.
+ */
+
+ *end = trunc_page(*end);
+
+ /*
+ * Pre-allocate maps and map entries that cannot be dynamically
+ * allocated via malloc(). The maps include the kernel_map and
+ * kmem_map which must be initialized before malloc() will
+ * work (obviously). Also could include pager maps which would
+ * be allocated before kmeminit.
+ *
+ * Allow some kernel map entries... this should be plenty
+ * since people shouldn't be cluttering up the kernel
+ * map (they should use their own maps).
+ */
+
+ kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
+ MAX_KMAPENT*sizeof(struct vm_map_entry));
+ kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size);
+
+ /*
+ * Compute the number of pages of memory that will be
+ * available for use (taking into account the overhead
+ * of a page structure per page).
+ */
+
+ cnt.v_free_count = npages = (*end - *start + sizeof(struct vm_page))
+ / (PAGE_SIZE + sizeof(struct vm_page));
+
+ /*
+ * Record the extent of physical memory that the
+ * virtual memory system manages.
+ */
+
+ first_page = *start;
+ first_page += npages*sizeof(struct vm_page);
+ first_page = atop(round_page(first_page));
+ last_page = first_page + npages - 1;
+
+ first_phys_addr = ptoa(first_page);
+ last_phys_addr = ptoa(last_page) + PAGE_MASK;
+
+
+ /*
+ * Allocate and clear the mem entry structures.
+ */
+
+ m = vm_page_array = (vm_page_t)
+ pmap_bootstrap_alloc(npages * sizeof(struct vm_page));
+
+ /*
+ * Initialize the mem entry structures now, and
+ * put them in the free queue.
+ */
+
+ pa = first_phys_addr;
+ while (npages--) {
+ m->flags = 0;
+ m->object = NULL;
+ m->phys_addr = pa;
+#ifdef i386
+ if (pmap_isvalidphys(m->phys_addr)) {
+ TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
+ } else {
+ /* perhaps iomem needs it's own type, or dev pager? */
+ m->flags |= PG_FICTITIOUS | PG_BUSY;
+ cnt.v_free_count--;
+ }
+#else /* i386 */
+ TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
+#endif /* i386 */
+ m++;
+ pa += PAGE_SIZE;
+ }
+
+ /*
+ * Initialize vm_pages_needed lock here - don't wait for pageout
+ * daemon XXX
+ */
+ simple_lock_init(&vm_pages_needed_lock);
+
+ /* from now on, pmap_bootstrap_alloc can't be used */
+ vm_page_startup_initialized = TRUE;
+}
+
+/*
+ * vm_page_hash:
+ *
+ * Distributes the object/offset key pair among hash buckets.
+ *
+ * NOTE: This macro depends on vm_page_bucket_count being a power of 2.
+ */
+#define vm_page_hash(object, offset) \
+ (((unsigned)object+(unsigned)atop(offset))&vm_page_hash_mask)
+
+/*
+ * vm_page_insert: [ internal use only ]
+ *
+ * Inserts the given mem entry into the object/object-page
+ * table and object list.
+ *
+ * The object and page must be locked.
+ */
+
+void vm_page_insert(mem, object, offset)
+ register vm_page_t mem;
+ register vm_object_t object;
+ register vm_offset_t offset;
+{
+ register struct pglist *bucket;
+ int spl;
+
+ VM_PAGE_CHECK(mem);
+
+ if (mem->flags & PG_TABLED)
+ panic("vm_page_insert: already inserted");
+
+ /*
+ * Record the object/offset pair in this page
+ */
+
+ mem->object = object;
+ mem->offset = offset;
+
+ /*
+ * Insert it into the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+ spl = splimp();
+ simple_lock(&bucket_lock);
+ TAILQ_INSERT_TAIL(bucket, mem, hashq);
+ simple_unlock(&bucket_lock);
+ (void) splx(spl);
+
+ /*
+ * Now link into the object's list of backed pages.
+ */
+
+ TAILQ_INSERT_TAIL(&object->memq, mem, listq);
+ mem->flags |= PG_TABLED;
+
+ /*
+ * And show that the object has one more resident
+ * page.
+ */
+
+ object->resident_page_count++;
+}
+
+/*
+ * vm_page_remove: [ internal use only ]
+ * NOTE: used by device pager as well -wfj
+ *
+ * Removes the given mem entry from the object/offset-page
+ * table and the object page list.
+ *
+ * The object and page must be locked.
+ */
+
+void vm_page_remove(mem)
+ register vm_page_t mem;
+{
+ register struct pglist *bucket;
+ int spl;
+
+ VM_PAGE_CHECK(mem);
+
+ if (!(mem->flags & PG_TABLED))
+ return;
+
+ /*
+ * Remove from the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
+ spl = splimp();
+ simple_lock(&bucket_lock);
+ TAILQ_REMOVE(bucket, mem, hashq);
+ simple_unlock(&bucket_lock);
+ (void) splx(spl);
+
+ /*
+ * Now remove from the object's list of backed pages.
+ */
+
+ TAILQ_REMOVE(&mem->object->memq, mem, listq);
+
+ /*
+ * And show that the object has one fewer resident
+ * page.
+ */
+
+ mem->object->resident_page_count--;
+
+ mem->flags &= ~PG_TABLED;
+}
+
+/*
+ * vm_page_lookup:
+ *
+ * Returns the page associated with the object/offset
+ * pair specified; if none is found, NULL is returned.
+ *
+ * The object must be locked. No side effects.
+ */
+
+vm_page_t vm_page_lookup(object, offset)
+ register vm_object_t object;
+ register vm_offset_t offset;
+{
+ register vm_page_t mem;
+ register struct pglist *bucket;
+ int spl;
+
+ /*
+ * Search the hash table for this object/offset pair
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+
+ spl = splimp();
+ simple_lock(&bucket_lock);
+ for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) {
+ VM_PAGE_CHECK(mem);
+ if ((mem->object == object) && (mem->offset == offset)) {
+ simple_unlock(&bucket_lock);
+ splx(spl);
+ return(mem);
+ }
+ }
+
+ simple_unlock(&bucket_lock);
+ splx(spl);
+ return(NULL);
+}
+
+/*
+ * vm_page_rename:
+ *
+ * Move the given memory entry from its
+ * current object to the specified target object/offset.
+ *
+ * The object must be locked.
+ */
+void vm_page_rename(mem, new_object, new_offset)
+ register vm_page_t mem;
+ register vm_object_t new_object;
+ vm_offset_t new_offset;
+{
+ if (mem->object == new_object)
+ return;
+
+ vm_page_lock_queues(); /* keep page from moving out from
+ under pageout daemon */
+ vm_page_remove(mem);
+ vm_page_insert(mem, new_object, new_offset);
+ vm_page_unlock_queues();
+}
+
+/*
+ * vm_page_alloc:
+ *
+ * Allocate and return a memory cell associated
+ * with this VM object/offset pair.
+ *
+ * Object must be locked.
+ */
+vm_page_t vm_page_alloc(object, offset)
+ vm_object_t object;
+ vm_offset_t offset;
+{
+ register vm_page_t mem;
+ int spl;
+
+ spl = splimp(); /* XXX */
+ simple_lock(&vm_page_queue_free_lock);
+ if (vm_page_queue_free.tqh_first == NULL) {
+ simple_unlock(&vm_page_queue_free_lock);
+ splx(spl);
+ return(NULL);
+ }
+
+ mem = vm_page_queue_free.tqh_first;
+ TAILQ_REMOVE(&vm_page_queue_free, mem, pageq);
+
+ cnt.v_free_count--;
+ simple_unlock(&vm_page_queue_free_lock);
+ splx(spl);
+
+ VM_PAGE_INIT(mem, object, offset);
+
+ /*
+ * Decide if we should poke the pageout daemon.
+ * We do this if the free count is less than the low
+ * water mark, or if the free count is less than the high
+ * water mark (but above the low water mark) and the inactive
+ * count is less than its target.
+ *
+ * We don't have the counts locked ... if they change a little,
+ * it doesn't really matter.
+ */
+
+ if (cnt.v_free_count < cnt.v_free_min ||
+ (cnt.v_free_count < cnt.v_free_target &&
+ cnt.v_inactive_count < cnt.v_inactive_target))
+ thread_wakeup((int)&vm_pages_needed);
+ return (mem);
+}
+
+/*
+ * vm_page_free:
+ *
+ * Returns the given page to the free list,
+ * disassociating it with any VM object.
+ *
+ * Object and page must be locked prior to entry.
+ */
+void vm_page_free(mem)
+ register vm_page_t mem;
+{
+ vm_page_remove(mem);
+ if (mem->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
+ mem->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+
+ if (mem->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
+ mem->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ }
+
+ if (!(mem->flags & PG_FICTITIOUS)) {
+ int spl;
+
+ spl = splimp();
+ simple_lock(&vm_page_queue_free_lock);
+ TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq);
+
+ cnt.v_free_count++;
+ simple_unlock(&vm_page_queue_free_lock);
+ splx(spl);
+ }
+}
+
+/*
+ * vm_page_wire:
+ *
+ * Mark this page as wired down by yet
+ * another map, removing it from paging queues
+ * as necessary.
+ *
+ * The page queues must be locked.
+ */
+void vm_page_wire(mem)
+ register vm_page_t mem;
+{
+ VM_PAGE_CHECK(mem);
+
+ if (mem->wire_count == 0) {
+ if (mem->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
+ cnt.v_active_count--;
+ mem->flags &= ~PG_ACTIVE;
+ }
+ if (mem->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
+ cnt.v_inactive_count--;
+ mem->flags &= ~PG_INACTIVE;
+ }
+ cnt.v_wire_count++;
+ }
+ mem->wire_count++;
+}
+
+/*
+ * vm_page_unwire:
+ *
+ * Release one wiring of this page, potentially
+ * enabling it to be paged again.
+ *
+ * The page queues must be locked.
+ */
+void vm_page_unwire(mem)
+ register vm_page_t mem;
+{
+ VM_PAGE_CHECK(mem);
+
+ mem->wire_count--;
+ if (mem->wire_count == 0) {
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq);
+ cnt.v_active_count++;
+ mem->flags |= PG_ACTIVE;
+ cnt.v_wire_count--;
+ }
+}
+
+/*
+ * vm_page_deactivate:
+ *
+ * Returns the given page to the inactive list,
+ * indicating that no physical maps have access
+ * to this page. [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void vm_page_deactivate(m)
+ register vm_page_t m;
+{
+ VM_PAGE_CHECK(m);
+
+ /*
+ * Only move active pages -- ignore locked or already
+ * inactive ones.
+ */
+
+ if (m->flags & PG_ACTIVE) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ m->flags &= ~PG_ACTIVE;
+ m->flags |= PG_INACTIVE;
+ cnt.v_active_count--;
+ cnt.v_inactive_count++;
+ if (pmap_is_modified(VM_PAGE_TO_PHYS(m)))
+ m->flags &= ~PG_CLEAN;
+ if (m->flags & PG_CLEAN)
+ m->flags &= ~PG_LAUNDRY;
+ else
+ m->flags |= PG_LAUNDRY;
+ }
+}
+
+/*
+ * vm_page_activate:
+ *
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+
+void vm_page_activate(m)
+ register vm_page_t m;
+{
+ VM_PAGE_CHECK(m);
+
+ if (m->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ cnt.v_inactive_count--;
+ m->flags &= ~PG_INACTIVE;
+ }
+ if (m->wire_count == 0) {
+ if (m->flags & PG_ACTIVE)
+ panic("vm_page_activate: already active");
+
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ m->flags |= PG_ACTIVE;
+ cnt.v_active_count++;
+ }
+}
+
+/*
+ * vm_page_zero_fill:
+ *
+ * Zero-fill the specified page.
+ * Written as a standard pagein routine, to
+ * be used by the zero-fill object.
+ */
+
+boolean_t vm_page_zero_fill(m)
+ vm_page_t m;
+{
+ VM_PAGE_CHECK(m);
+
+ m->flags &= ~PG_CLEAN;
+ pmap_zero_page(VM_PAGE_TO_PHYS(m));
+ return(TRUE);
+}
+
+/*
+ * vm_page_copy:
+ *
+ * Copy one page to another
+ */
+
+void vm_page_copy(src_m, dest_m)
+ vm_page_t src_m;
+ vm_page_t dest_m;
+{
+ VM_PAGE_CHECK(src_m);
+ VM_PAGE_CHECK(dest_m);
+
+ dest_m->flags &= ~PG_CLEAN;
+ pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
+}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
new file mode 100644
index 000000000000..8bf51469a1f6
--- /dev/null
+++ b/sys/vm/vm_page.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_page.h 8.2 (Berkeley) 12/13/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Resident memory system definitions.
+ */
+
+#ifndef _VM_PAGE_
+#define _VM_PAGE_
+
+/*
+ * Management of resident (logical) pages.
+ *
+ * A small structure is kept for each resident
+ * page, indexed by page number. Each structure
+ * is an element of several lists:
+ *
+ * A hash table bucket used to quickly
+ * perform object/offset lookups
+ *
+ * A list of all pages for a given object,
+ * so they can be quickly deactivated at
+ * time of deallocation.
+ *
+ * An ordered list of pages due for pageout.
+ *
+ * In addition, the structure contains the object
+ * and offset to which this page belongs (for pageout),
+ * and sundry status bits.
+ *
+ * Fields in this structure are locked either by the lock on the
+ * object that the page belongs to (O) or by the lock on the page
+ * queues (P).
+ */
+
+TAILQ_HEAD(pglist, vm_page);
+
+struct vm_page {
+ TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
+ * queue or free list (P) */
+ TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
+ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
+
+ vm_object_t object; /* which object am I in (O,P)*/
+ vm_offset_t offset; /* offset into object (O,P) */
+
+ u_short wire_count; /* wired down maps refs (P) */
+ u_short flags; /* see below */
+
+ vm_offset_t phys_addr; /* physical address of page */
+};
+
+/*
+ * These are the flags defined for vm_page.
+ *
+ * Note: PG_FILLED and PG_DIRTY are added for the filesystems.
+ */
+#define PG_INACTIVE 0x0001 /* page is in inactive list (P) */
+#define PG_ACTIVE 0x0002 /* page is in active list (P) */
+#define PG_LAUNDRY 0x0004 /* page is being cleaned now (P)*/
+#define PG_CLEAN 0x0008 /* page has not been modified */
+#define PG_BUSY 0x0010 /* page is in transit (O) */
+#define PG_WANTED 0x0020 /* someone is waiting for page (O) */
+#define PG_TABLED 0x0040 /* page is in VP table (O) */
+#define PG_COPYONWRITE 0x0080 /* must copy page before changing (O) */
+#define PG_FICTITIOUS 0x0100 /* physical page doesn't exist (O) */
+#define PG_FAKE 0x0200 /* page is placeholder for pagein (O) */
+#define PG_FILLED 0x0400 /* client flag to set when filled */
+#define PG_DIRTY 0x0800 /* client flag to set when dirty */
+#define PG_PAGEROWNED 0x4000 /* DEBUG: async paging op in progress */
+#define PG_PTPAGE 0x8000 /* DEBUG: is a user page table page */
+
+#if VM_PAGE_DEBUG
+#define VM_PAGE_CHECK(mem) { \
+ if ((((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \
+ (((unsigned int) mem) > \
+ ((unsigned int) &vm_page_array[last_page-first_page])) || \
+ ((mem->flags & (PG_ACTIVE | PG_INACTIVE)) == \
+ (PG_ACTIVE | PG_INACTIVE))) \
+ panic("vm_page_check: not valid!"); \
+}
+#else /* VM_PAGE_DEBUG */
+#define VM_PAGE_CHECK(mem)
+#endif /* VM_PAGE_DEBUG */
+
+#ifdef KERNEL
+/*
+ * Each pageable resident page falls into one of three lists:
+ *
+ * free
+ * Available for allocation now.
+ * inactive
+ * Not referenced in any map, but still has an
+ * object/offset-page mapping, and may be dirty.
+ * This is the list of pages that should be
+ * paged out next.
+ * active
+ * A list of pages which have been placed in
+ * at least one physical map. This list is
+ * ordered, in LRU-like fashion.
+ */
+
+extern
+struct pglist vm_page_queue_free; /* memory free queue */
+extern
+struct pglist vm_page_queue_active; /* active memory queue */
+extern
+struct pglist vm_page_queue_inactive; /* inactive memory queue */
+
+extern
+vm_page_t vm_page_array; /* First resident page in table */
+extern
+long first_page; /* first physical page number */
+ /* ... represented in vm_page_array */
+extern
+long last_page; /* last physical page number */
+ /* ... represented in vm_page_array */
+ /* [INCLUSIVE] */
+extern
+vm_offset_t first_phys_addr; /* physical address for first_page */
+extern
+vm_offset_t last_phys_addr; /* physical address for last_page */
+
+#define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr)
+
+#define IS_VM_PHYSADDR(pa) \
+ ((pa) >= first_phys_addr && (pa) <= last_phys_addr)
+
+#define PHYS_TO_VM_PAGE(pa) \
+ (&vm_page_array[atop(pa) - first_page ])
+
+extern
+simple_lock_data_t vm_page_queue_lock; /* lock on active and inactive
+ page queues */
+extern /* lock on free page queue */
+simple_lock_data_t vm_page_queue_free_lock;
+
+/*
+ * Functions implemented as macros
+ */
+
+#define PAGE_ASSERT_WAIT(m, interruptible) { \
+ (m)->flags |= PG_WANTED; \
+ assert_wait((int) (m), (interruptible)); \
+ }
+
+#define PAGE_WAKEUP(m) { \
+ (m)->flags &= ~PG_BUSY; \
+ if ((m)->flags & PG_WANTED) { \
+ (m)->flags &= ~PG_WANTED; \
+ thread_wakeup((int) (m)); \
+ } \
+ }
+
+#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
+#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
+
+#define vm_page_set_modified(m) { (m)->flags &= ~PG_CLEAN; }
+
+#define VM_PAGE_INIT(mem, object, offset) { \
+ (mem)->flags = PG_BUSY | PG_CLEAN | PG_FAKE; \
+ vm_page_insert((mem), (object), (offset)); \
+ (mem)->wire_count = 0; \
+}
+
+void vm_page_activate __P((vm_page_t));
+vm_page_t vm_page_alloc __P((vm_object_t, vm_offset_t));
+void vm_page_copy __P((vm_page_t, vm_page_t));
+void vm_page_deactivate __P((vm_page_t));
+void vm_page_free __P((vm_page_t));
+void vm_page_insert __P((vm_page_t, vm_object_t, vm_offset_t));
+vm_page_t vm_page_lookup __P((vm_object_t, vm_offset_t));
+void vm_page_remove __P((vm_page_t));
+void vm_page_rename __P((vm_page_t, vm_object_t, vm_offset_t));
+void vm_page_startup __P((vm_offset_t *, vm_offset_t *));
+void vm_page_unwire __P((vm_page_t));
+void vm_page_wire __P((vm_page_t));
+boolean_t vm_page_zero_fill __P((vm_page_t));
+
+#endif /* KERNEL */
+#endif /* !_VM_PAGE_ */
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
new file mode 100644
index 000000000000..679540591e7f
--- /dev/null
+++ b/sys/vm/vm_pageout.c
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * The proverbial page-out daemon.
+ */
+
+#include <sys/param.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#ifndef VM_PAGE_FREE_MIN
+#define VM_PAGE_FREE_MIN (cnt.v_free_count / 20)
+#endif
+
+#ifndef VM_PAGE_FREE_TARGET
+#define VM_PAGE_FREE_TARGET ((cnt.v_free_min * 4) / 3)
+#endif
+
+int vm_page_free_min_min = 16 * 1024;
+int vm_page_free_min_max = 256 * 1024;
+
+int vm_pages_needed; /* Event on which pageout daemon sleeps */
+
+int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */
+
+#ifdef CLUSTERED_PAGEOUT
+#define MAXPOCLUSTER (MAXPHYS/NBPG) /* XXX */
+int doclustered_pageout = 1;
+#endif
+
+/*
+ * vm_pageout_scan does the dirty work for the pageout daemon.
+ */
+void
+vm_pageout_scan()
+{
+ register vm_page_t m, next;
+ register int page_shortage;
+ register int s;
+ register int pages_freed;
+ int free;
+ vm_object_t object;
+
+ /*
+ * Only continue when we want more pages to be "free"
+ */
+
+ cnt.v_rev++;
+
+ s = splimp();
+ simple_lock(&vm_page_queue_free_lock);
+ free = cnt.v_free_count;
+ simple_unlock(&vm_page_queue_free_lock);
+ splx(s);
+
+ if (free < cnt.v_free_target) {
+ swapout_threads();
+
+ /*
+ * Be sure the pmap system is updated so
+ * we can scan the inactive queue.
+ */
+
+ pmap_update();
+ }
+
+ /*
+ * Acquire the resident page system lock,
+ * as we may be changing what's resident quite a bit.
+ */
+ vm_page_lock_queues();
+
+ /*
+ * Start scanning the inactive queue for pages we can free.
+ * We keep scanning until we have enough free pages or
+ * we have scanned through the entire queue. If we
+ * encounter dirty pages, we start cleaning them.
+ */
+
+ pages_freed = 0;
+ for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
+ s = splimp();
+ simple_lock(&vm_page_queue_free_lock);
+ free = cnt.v_free_count;
+ simple_unlock(&vm_page_queue_free_lock);
+ splx(s);
+ if (free >= cnt.v_free_target)
+ break;
+
+ cnt.v_scan++;
+ next = m->pageq.tqe_next;
+
+ /*
+ * If the page has been referenced, move it back to the
+ * active queue.
+ */
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ vm_page_activate(m);
+ cnt.v_reactivated++;
+ continue;
+ }
+
+ /*
+ * If the page is clean, free it up.
+ */
+ if (m->flags & PG_CLEAN) {
+ object = m->object;
+ if (vm_object_lock_try(object)) {
+ pmap_page_protect(VM_PAGE_TO_PHYS(m),
+ VM_PROT_NONE);
+ vm_page_free(m);
+ pages_freed++;
+ cnt.v_dfree++;
+ vm_object_unlock(object);
+ }
+ continue;
+ }
+
+ /*
+ * If the page is dirty but already being washed, skip it.
+ */
+ if ((m->flags & PG_LAUNDRY) == 0)
+ continue;
+
+ /*
+ * Otherwise the page is dirty and still in the laundry,
+ * so we start the cleaning operation and remove it from
+ * the laundry.
+ */
+ object = m->object;
+ if (!vm_object_lock_try(object))
+ continue;
+ cnt.v_pageouts++;
+#ifdef CLUSTERED_PAGEOUT
+ if (object->pager &&
+ vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
+ vm_pageout_cluster(m, object);
+ else
+#endif
+ vm_pageout_page(m, object);
+ thread_wakeup((int) object);
+ vm_object_unlock(object);
+ /*
+ * Former next page may no longer even be on the inactive
+ * queue (due to potential blocking in the pager with the
+ * queues unlocked). If it isn't, we just start over.
+ */
+ if (next && (next->flags & PG_INACTIVE) == 0)
+ next = vm_page_queue_inactive.tqh_first;
+ }
+
+ /*
+ * Compute the page shortage. If we are still very low on memory
+ * be sure that we will move a minimal amount of pages from active
+ * to inactive.
+ */
+
+ page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
+ if (page_shortage <= 0 && pages_freed == 0)
+ page_shortage = 1;
+
+ while (page_shortage > 0) {
+ /*
+ * Move some more pages from active to inactive.
+ */
+
+ if ((m = vm_page_queue_active.tqh_first) == NULL)
+ break;
+ vm_page_deactivate(m);
+ page_shortage--;
+ }
+
+ vm_page_unlock_queues();
+}
+
+/*
+ * Called with object and page queues locked.
+ * If reactivate is TRUE, a pager error causes the page to be
+ * put back on the active queue, ow it is left on the inactive queue.
+ */
+void
+vm_pageout_page(m, object)
+ vm_page_t m;
+ vm_object_t object;
+{
+ vm_pager_t pager;
+ int pageout_status;
+
+ /*
+ * We set the busy bit to cause potential page faults on
+ * this page to block.
+ *
+ * We also set pageout-in-progress to keep the object from
+ * disappearing during pageout. This guarantees that the
+ * page won't move from the inactive queue. (However, any
+ * other page on the inactive queue may move!)
+ */
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+ m->flags |= PG_BUSY;
+
+ /*
+ * Try to collapse the object before making a pager for it.
+ * We must unlock the page queues first.
+ */
+ vm_page_unlock_queues();
+ if (object->pager == NULL)
+ vm_object_collapse(object);
+
+ object->paging_in_progress++;
+ vm_object_unlock(object);
+
+ /*
+ * Do a wakeup here in case the following operations block.
+ */
+ thread_wakeup((int) &cnt.v_free_count);
+
+ /*
+ * If there is no pager for the page, use the default pager.
+ * If there is no place to put the page at the moment,
+ * leave it in the laundry and hope that there will be
+ * paging space later.
+ */
+ if ((pager = object->pager) == NULL) {
+ pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
+ VM_PROT_ALL, (vm_offset_t)0);
+ if (pager != NULL)
+ vm_object_setpager(object, pager, 0, FALSE);
+ }
+ pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
+ vm_object_lock(object);
+ vm_page_lock_queues();
+
+ switch (pageout_status) {
+ case VM_PAGER_OK:
+ case VM_PAGER_PEND:
+ cnt.v_pgpgout++;
+ m->flags &= ~PG_LAUNDRY;
+ break;
+ case VM_PAGER_BAD:
+ /*
+ * Page outside of range of object. Right now we
+ * essentially lose the changes by pretending it
+ * worked.
+ *
+ * XXX dubious, what should we do?
+ */
+ m->flags &= ~PG_LAUNDRY;
+ m->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ break;
+ case VM_PAGER_AGAIN:
+ {
+ extern int lbolt;
+
+ /*
+ * FAIL on a write is interpreted to mean a resource
+ * shortage, so we put pause for awhile and try again.
+ * XXX could get stuck here.
+ */
+ (void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
+ break;
+ }
+ case VM_PAGER_FAIL:
+ case VM_PAGER_ERROR:
+ /*
+ * If page couldn't be paged out, then reactivate
+ * the page so it doesn't clog the inactive list.
+ * (We will try paging out it again later).
+ */
+ vm_page_activate(m);
+ cnt.v_reactivated++;
+ break;
+ }
+
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+
+ /*
+ * If the operation is still going, leave the page busy
+ * to block all other accesses. Also, leave the paging
+ * in progress indicator set so that we don't attempt an
+ * object collapse.
+ */
+ if (pageout_status != VM_PAGER_PEND) {
+ m->flags &= ~PG_BUSY;
+ PAGE_WAKEUP(m);
+ object->paging_in_progress--;
+ }
+}
+
+#ifdef CLUSTERED_PAGEOUT
+#define PAGEOUTABLE(p) \
+ ((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
+ (PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
+
+/*
+ * Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
+ * from ``object''. Using information returned from the pager, we assemble
+ * a sorted list of contiguous dirty pages and feed them to the pager in one
+ * chunk. Called with paging queues and object locked. Also, object must
+ * already have a pager.
+ */
+void
+vm_pageout_cluster(m, object)
+ vm_page_t m;
+ vm_object_t object;
+{
+ vm_offset_t offset, loff, hoff;
+ vm_page_t plist[MAXPOCLUSTER], *plistp, p;
+ int postatus, ix, count;
+
+ /*
+ * Determine the range of pages that can be part of a cluster
+ * for this object/offset. If it is only our single page, just
+ * do it normally.
+ */
+ vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
+ if (hoff - loff == PAGE_SIZE) {
+ vm_pageout_page(m, object);
+ return;
+ }
+
+ plistp = plist;
+
+ /*
+ * Target page is always part of the cluster.
+ */
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+ m->flags |= PG_BUSY;
+ plistp[atop(m->offset - loff)] = m;
+ count = 1;
+
+ /*
+ * Backup from the given page til we find one not fulfilling
+ * the pageout criteria or we hit the lower bound for the
+ * cluster. For each page determined to be part of the
+ * cluster, unmap it and busy it out so it won't change.
+ */
+ ix = atop(m->offset - loff);
+ offset = m->offset;
+ while (offset > loff && count < MAXPOCLUSTER-1) {
+ p = vm_page_lookup(object, offset - PAGE_SIZE);
+ if (p == NULL || !PAGEOUTABLE(p))
+ break;
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ p->flags |= PG_BUSY;
+ plistp[--ix] = p;
+ offset -= PAGE_SIZE;
+ count++;
+ }
+ plistp += atop(offset - loff);
+ loff = offset;
+
+ /*
+ * Now do the same moving forward from the target.
+ */
+ ix = atop(m->offset - loff) + 1;
+ offset = m->offset + PAGE_SIZE;
+ while (offset < hoff && count < MAXPOCLUSTER) {
+ p = vm_page_lookup(object, offset);
+ if (p == NULL || !PAGEOUTABLE(p))
+ break;
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ p->flags |= PG_BUSY;
+ plistp[ix++] = p;
+ offset += PAGE_SIZE;
+ count++;
+ }
+ hoff = offset;
+
+ /*
+ * Pageout the page.
+ * Unlock everything and do a wakeup prior to the pager call
+ * in case it blocks.
+ */
+ vm_page_unlock_queues();
+ object->paging_in_progress++;
+ vm_object_unlock(object);
+again:
+ thread_wakeup((int) &cnt.v_free_count);
+ postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
+ /*
+ * XXX rethink this
+ */
+ if (postatus == VM_PAGER_AGAIN) {
+ extern int lbolt;
+
+ (void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
+ goto again;
+ } else if (postatus == VM_PAGER_BAD)
+ panic("vm_pageout_cluster: VM_PAGER_BAD");
+ vm_object_lock(object);
+ vm_page_lock_queues();
+
+ /*
+ * Loop through the affected pages, reflecting the outcome of
+ * the operation.
+ */
+ for (ix = 0; ix < count; ix++) {
+ p = *plistp++;
+ switch (postatus) {
+ case VM_PAGER_OK:
+ case VM_PAGER_PEND:
+ cnt.v_pgpgout++;
+ p->flags &= ~PG_LAUNDRY;
+ break;
+ case VM_PAGER_FAIL:
+ case VM_PAGER_ERROR:
+ /*
+ * Pageout failed, reactivate the target page so it
+ * doesn't clog the inactive list. Other pages are
+ * left as they are.
+ */
+ if (p == m) {
+ vm_page_activate(p);
+ cnt.v_reactivated++;
+ }
+ break;
+ }
+ pmap_clear_reference(VM_PAGE_TO_PHYS(p));
+ /*
+ * If the operation is still going, leave the page busy
+ * to block all other accesses.
+ */
+ if (postatus != VM_PAGER_PEND) {
+ p->flags &= ~PG_BUSY;
+ PAGE_WAKEUP(p);
+
+ }
+ }
+ /*
+ * If the operation is still going, leave the paging in progress
+ * indicator set so that we don't attempt an object collapse.
+ */
+ if (postatus != VM_PAGER_PEND)
+ object->paging_in_progress--;
+
+}
+#endif
+
+/*
+ * vm_pageout is the high level pageout daemon.
+ */
+
+void vm_pageout()
+{
+ (void) spl0();
+
+ /*
+ * Initialize some paging parameters.
+ */
+
+ if (cnt.v_free_min == 0) {
+ cnt.v_free_min = VM_PAGE_FREE_MIN;
+ vm_page_free_min_min /= cnt.v_page_size;
+ vm_page_free_min_max /= cnt.v_page_size;
+ if (cnt.v_free_min < vm_page_free_min_min)
+ cnt.v_free_min = vm_page_free_min_min;
+ if (cnt.v_free_min > vm_page_free_min_max)
+ cnt.v_free_min = vm_page_free_min_max;
+ }
+
+ if (cnt.v_free_target == 0)
+ cnt.v_free_target = VM_PAGE_FREE_TARGET;
+
+ if (cnt.v_free_target <= cnt.v_free_min)
+ cnt.v_free_target = cnt.v_free_min + 1;
+
+ /* XXX does not really belong here */
+ if (vm_page_max_wired == 0)
+ vm_page_max_wired = cnt.v_free_count / 3;
+
+ /*
+ * The pageout daemon is never done, so loop
+ * forever.
+ */
+
+ simple_lock(&vm_pages_needed_lock);
+ while (TRUE) {
+ thread_sleep((int) &vm_pages_needed, &vm_pages_needed_lock,
+ FALSE);
+ /*
+ * Compute the inactive target for this scan.
+ * We need to keep a reasonable amount of memory in the
+ * inactive list to better simulate LRU behavior.
+ */
+ cnt.v_inactive_target =
+ (cnt.v_active_count + cnt.v_inactive_count) / 3;
+ if (cnt.v_inactive_target <= cnt.v_free_target)
+ cnt.v_inactive_target = cnt.v_free_target + 1;
+
+ /*
+ * Only make a scan if we are likely to do something.
+ * Otherwise we might have been awakened by a pager
+ * to clean up async pageouts.
+ */
+ if (cnt.v_free_count < cnt.v_free_target ||
+ cnt.v_inactive_count < cnt.v_inactive_target)
+ vm_pageout_scan();
+ vm_pager_sync();
+ simple_lock(&vm_pages_needed_lock);
+ thread_wakeup((int) &cnt.v_free_count);
+ }
+}
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
new file mode 100644
index 000000000000..a82a0ea40aca
--- /dev/null
+++ b/sys/vm/vm_pageout.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pageout.h 8.2 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Avadis Tevanian, Jr.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Header file for pageout daemon.
+ */
+
+/*
+ * Exported data structures.
+ */
+
+extern int vm_pages_needed; /* should be some "event" structure */
+simple_lock_data_t vm_pages_needed_lock;
+
+
+/*
+ * Exported routines.
+ */
+
+/*
+ * Signal pageout-daemon and wait for it.
+ */
+
+#define VM_WAIT { \
+ simple_lock(&vm_pages_needed_lock); \
+ thread_wakeup((int)&vm_pages_needed); \
+ thread_sleep((int)&cnt.v_free_count, \
+ &vm_pages_needed_lock, FALSE); \
+ }
+#ifdef KERNEL
+void vm_pageout __P((void));
+void vm_pageout_scan __P((void));
+void vm_pageout_page __P((vm_page_t, vm_object_t));
+void vm_pageout_cluster __P((vm_page_t, vm_object_t));
+#endif
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
new file mode 100644
index 000000000000..7123abb16ef0
--- /dev/null
+++ b/sys/vm/vm_pager.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pager.c 8.6 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Paging space routine stubs. Emulates a matchmaker-like interface
+ * for builtin pagers.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#ifdef SWAPPAGER
+extern struct pagerops swappagerops;
+#endif
+
+#ifdef VNODEPAGER
+extern struct pagerops vnodepagerops;
+#endif
+
+#ifdef DEVPAGER
+extern struct pagerops devicepagerops;
+#endif
+
+struct pagerops *pagertab[] = {
+#ifdef SWAPPAGER
+ &swappagerops, /* PG_SWAP */
+#else
+ NULL,
+#endif
+#ifdef VNODEPAGER
+ &vnodepagerops, /* PG_VNODE */
+#else
+ NULL,
+#endif
+#ifdef DEVPAGER
+ &devicepagerops, /* PG_DEV */
+#else
+ NULL,
+#endif
+};
+int npagers = sizeof (pagertab) / sizeof (pagertab[0]);
+
+struct pagerops *dfltpagerops = NULL; /* default pager */
+
+/*
+ * Kernel address space for mapping pages.
+ * Used by pagers where KVAs are needed for IO.
+ *
+ * XXX needs to be large enough to support the number of pending async
+ * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
+ * (MAXPHYS == 64k) if you want to get the most efficiency.
+ */
+#define PAGER_MAP_SIZE (4 * 1024 * 1024)
+
+vm_map_t pager_map;
+boolean_t pager_map_wanted;
+vm_offset_t pager_sva, pager_eva;
+
+void
+vm_pager_init()
+{
+ struct pagerops **pgops;
+
+ /*
+ * Allocate a kernel submap for tracking get/put page mappings
+ */
+ pager_map = kmem_suballoc(kernel_map, &pager_sva, &pager_eva,
+ PAGER_MAP_SIZE, FALSE);
+ /*
+ * Initialize known pagers
+ */
+ for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
+ if (pgops)
+ (*(*pgops)->pgo_init)();
+ if (dfltpagerops == NULL)
+ panic("no default pager");
+}
+
+/*
+ * Allocate an instance of a pager of the given type.
+ * Size, protection and offset parameters are passed in for pagers that
+ * need to perform page-level validation (e.g. the device pager).
+ */
+vm_pager_t
+vm_pager_allocate(type, handle, size, prot, off)
+ int type;
+ caddr_t handle;
+ vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t off;
+{
+ struct pagerops *ops;
+
+ ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type];
+ if (ops)
+ return ((*ops->pgo_alloc)(handle, size, prot, off));
+ return (NULL);
+}
+
+void
+vm_pager_deallocate(pager)
+ vm_pager_t pager;
+{
+ if (pager == NULL)
+ panic("vm_pager_deallocate: null pager");
+
+ (*pager->pg_ops->pgo_dealloc)(pager);
+}
+
+int
+vm_pager_get_pages(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+ int rv;
+
+ if (pager == NULL) {
+ rv = VM_PAGER_OK;
+ while (npages--)
+ if (!vm_page_zero_fill(*mlist)) {
+ rv = VM_PAGER_FAIL;
+ break;
+ } else
+ mlist++;
+ return (rv);
+ }
+ return ((*pager->pg_ops->pgo_getpages)(pager, mlist, npages, sync));
+}
+
+int
+vm_pager_put_pages(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+ if (pager == NULL)
+ panic("vm_pager_put_pages: null pager");
+ return ((*pager->pg_ops->pgo_putpages)(pager, mlist, npages, sync));
+}
+
+boolean_t
+vm_pager_has_page(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ if (pager == NULL)
+ panic("vm_pager_has_page: null pager");
+ return ((*pager->pg_ops->pgo_haspage)(pager, offset));
+}
+
+/*
+ * Called by pageout daemon before going back to sleep.
+ * Gives pagers a chance to clean up any completed async pageing operations.
+ */
+void
+vm_pager_sync()
+{
+ struct pagerops **pgops;
+
+ for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
+ if (pgops)
+ (*(*pgops)->pgo_putpages)(NULL, NULL, 0, FALSE);
+}
+
+void
+vm_pager_cluster(pager, offset, loff, hoff)
+ vm_pager_t pager;
+ vm_offset_t offset;
+ vm_offset_t *loff;
+ vm_offset_t *hoff;
+{
+ if (pager == NULL)
+ panic("vm_pager_cluster: null pager");
+ return ((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff));
+}
+
+void
+vm_pager_clusternull(pager, offset, loff, hoff)
+ vm_pager_t pager;
+ vm_offset_t offset;
+ vm_offset_t *loff;
+ vm_offset_t *hoff;
+{
+ panic("vm_pager_nullcluster called");
+}
+
+vm_offset_t
+vm_pager_map_pages(mlist, npages, canwait)
+ vm_page_t *mlist;
+ int npages;
+ boolean_t canwait;
+{
+ vm_offset_t kva, va;
+ vm_size_t size;
+ vm_page_t m;
+
+ /*
+ * Allocate space in the pager map, if none available return 0.
+ * This is basically an expansion of kmem_alloc_wait with optional
+ * blocking on no space.
+ */
+ size = npages * PAGE_SIZE;
+ vm_map_lock(pager_map);
+ while (vm_map_findspace(pager_map, 0, size, &kva)) {
+ if (!canwait) {
+ vm_map_unlock(pager_map);
+ return (0);
+ }
+ pager_map_wanted = TRUE;
+ vm_map_unlock(pager_map);
+ (void) tsleep(pager_map, PVM, "pager_map", 0);
+ vm_map_lock(pager_map);
+ }
+ vm_map_insert(pager_map, NULL, 0, kva, kva + size);
+ vm_map_unlock(pager_map);
+
+ for (va = kva; npages--; va += PAGE_SIZE) {
+ m = *mlist++;
+#ifdef DEBUG
+ if ((m->flags & PG_BUSY) == 0)
+ panic("vm_pager_map_pages: page not busy");
+ if (m->flags & PG_PAGEROWNED)
+ panic("vm_pager_map_pages: page already in pager");
+#endif
+#ifdef DEBUG
+ m->flags |= PG_PAGEROWNED;
+#endif
+ pmap_enter(vm_map_pmap(pager_map), va, VM_PAGE_TO_PHYS(m),
+ VM_PROT_DEFAULT, TRUE);
+ }
+ return (kva);
+}
+
+void
+vm_pager_unmap_pages(kva, npages)
+ vm_offset_t kva;
+ int npages;
+{
+ vm_size_t size = npages * PAGE_SIZE;
+
+#ifdef DEBUG
+ vm_offset_t va;
+ vm_page_t m;
+ int np = npages;
+
+ for (va = kva; np--; va += PAGE_SIZE) {
+ m = vm_pager_atop(va);
+ if (m->flags & PG_PAGEROWNED)
+ m->flags &= ~PG_PAGEROWNED;
+ else
+ printf("vm_pager_unmap_pages: %x(%x/%x) not owned\n",
+ m, va, VM_PAGE_TO_PHYS(m));
+ }
+#endif
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + size);
+ vm_map_lock(pager_map);
+ (void) vm_map_delete(pager_map, kva, kva + size);
+ if (pager_map_wanted)
+ wakeup(pager_map);
+ vm_map_unlock(pager_map);
+}
+
+vm_page_t
+vm_pager_atop(kva)
+ vm_offset_t kva;
+{
+ vm_offset_t pa;
+
+ pa = pmap_extract(vm_map_pmap(pager_map), kva);
+ if (pa == 0)
+ panic("vm_pager_atop");
+ return (PHYS_TO_VM_PAGE(pa));
+}
+
+vm_pager_t
+vm_pager_lookup(pglist, handle)
+ register struct pagerlst *pglist;
+ caddr_t handle;
+{
+ register vm_pager_t pager;
+
+ for (pager = pglist->tqh_first; pager; pager = pager->pg_list.tqe_next)
+ if (pager->pg_handle == handle)
+ return (pager);
+ return (NULL);
+}
+
+/*
+ * This routine gains a reference to the object.
+ * Explicit deallocation is necessary.
+ */
+int
+pager_cache(object, should_cache)
+ vm_object_t object;
+ boolean_t should_cache;
+{
+ if (object == NULL)
+ return (KERN_INVALID_ARGUMENT);
+
+ vm_object_cache_lock();
+ vm_object_lock(object);
+ if (should_cache)
+ object->flags |= OBJ_CANPERSIST;
+ else
+ object->flags &= ~OBJ_CANPERSIST;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+
+ vm_object_deallocate(object);
+
+ return (KERN_SUCCESS);
+}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
new file mode 100644
index 000000000000..e4659c268c1d
--- /dev/null
+++ b/sys/vm/vm_pager.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pager.h 8.4 (Berkeley) 1/12/94
+ */
+
+/*
+ * Pager routine interface definition.
+ * For BSD we use a cleaner version of the internal pager interface.
+ */
+
+#ifndef _VM_PAGER_
+#define _VM_PAGER_
+
+TAILQ_HEAD(pagerlst, pager_struct);
+
+struct pager_struct {
+ TAILQ_ENTRY(pager_struct) pg_list; /* links for list management */
+ caddr_t pg_handle; /* ext. handle (vp, dev, fp) */
+ int pg_type; /* type of pager */
+ int pg_flags; /* flags */
+ struct pagerops *pg_ops; /* pager operations */
+ void *pg_data; /* private pager data */
+};
+
+/* pager types */
+#define PG_DFLT -1
+#define PG_SWAP 0
+#define PG_VNODE 1
+#define PG_DEVICE 2
+
+/* flags */
+#define PG_CLUSTERGET 1
+#define PG_CLUSTERPUT 2
+
+struct pagerops {
+ void (*pgo_init) /* Initialize pager. */
+ __P((void));
+ vm_pager_t (*pgo_alloc) /* Allocate pager. */
+ __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+ void (*pgo_dealloc) /* Disassociate. */
+ __P((vm_pager_t));
+ int (*pgo_getpages) /* Get (read) page. */
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ int (*pgo_putpages) /* Put (write) page. */
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ boolean_t (*pgo_haspage) /* Does pager have page? */
+ __P((vm_pager_t, vm_offset_t));
+ void (*pgo_cluster) /* Return range of cluster. */
+ __P((vm_pager_t, vm_offset_t,
+ vm_offset_t *, vm_offset_t *));
+};
+
+/*
+ * get/put return values
+ * OK operation was successful
+ * BAD specified data was out of the accepted range
+ * FAIL specified data was in range, but doesn't exist
+ * PEND operations was initiated but not completed
+ * ERROR error while accessing data that is in range and exists
+ * AGAIN temporary resource shortage prevented operation from happening
+ */
+#define VM_PAGER_OK 0
+#define VM_PAGER_BAD 1
+#define VM_PAGER_FAIL 2
+#define VM_PAGER_PEND 3
+#define VM_PAGER_ERROR 4
+#define VM_PAGER_AGAIN 5
+
+#ifdef KERNEL
+extern struct pagerops *dfltpagerops;
+
+vm_pager_t vm_pager_allocate
+ __P((int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+vm_page_t vm_pager_atop __P((vm_offset_t));
+void vm_pager_cluster
+ __P((vm_pager_t, vm_offset_t,
+ vm_offset_t *, vm_offset_t *));
+void vm_pager_clusternull
+ __P((vm_pager_t, vm_offset_t,
+ vm_offset_t *, vm_offset_t *));
+void vm_pager_deallocate __P((vm_pager_t));
+int vm_pager_get_pages
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+boolean_t vm_pager_has_page __P((vm_pager_t, vm_offset_t));
+void vm_pager_init __P((void));
+vm_pager_t vm_pager_lookup __P((struct pagerlst *, caddr_t));
+vm_offset_t vm_pager_map_pages __P((vm_page_t *, int, boolean_t));
+int vm_pager_put_pages
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+void vm_pager_sync __P((void));
+void vm_pager_unmap_pages __P((vm_offset_t, int));
+
+#define vm_pager_cancluster(p, b) ((p)->pg_flags & (b))
+
+/*
+ * XXX compat with old interface
+ */
+#define vm_pager_get(p, m, s) \
+({ \
+ vm_page_t ml[1]; \
+ ml[0] = (m); \
+ vm_pager_get_pages(p, ml, 1, s); \
+})
+#define vm_pager_put(p, m, s) \
+({ \
+ vm_page_t ml[1]; \
+ ml[0] = (m); \
+ vm_pager_put_pages(p, ml, 1, s); \
+})
+#endif
+
+#endif /* _VM_PAGER_ */
diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h
new file mode 100644
index 000000000000..2d2c71594edf
--- /dev/null
+++ b/sys/vm/vm_param.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_param.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Machine independent virtual memory parameters.
+ */
+
+#ifndef _VM_PARAM_
+#define _VM_PARAM_
+
+#include <machine/vmparam.h>
+
+/*
+ * This belongs in types.h, but breaks too many existing programs.
+ */
+typedef int boolean_t;
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * The machine independent pages are refered to as PAGES. A page
+ * is some number of hardware pages, depending on the target machine.
+ */
+#define DEFAULT_PAGE_SIZE 4096
+
+/*
+ * All references to the size of a page should be done with PAGE_SIZE
+ * or PAGE_SHIFT. The fact they are variables is hidden here so that
+ * we can easily make them constant if we so desire.
+ */
+#define PAGE_SIZE cnt.v_page_size /* size of page */
+#define PAGE_MASK page_mask /* size of page - 1 */
+#define PAGE_SHIFT page_shift /* bits to shift for pages */
+#ifdef KERNEL
+extern vm_size_t page_mask;
+extern int page_shift;
+#endif
+
+/*
+ * CTL_VM identifiers
+ */
+#define VM_METER 1 /* struct vmmeter */
+#define VM_LOADAVG 2 /* struct loadavg */
+#define VM_MAXID 3 /* number of valid vm ids */
+
+#define CTL_VM_NAMES { \
+ { 0, 0 }, \
+ { "vmmeter", CTLTYPE_STRUCT }, \
+ { "loadavg", CTLTYPE_STRUCT }, \
+}
+
+/*
+ * Return values from the VM routines.
+ */
+#define KERN_SUCCESS 0
+#define KERN_INVALID_ADDRESS 1
+#define KERN_PROTECTION_FAILURE 2
+#define KERN_NO_SPACE 3
+#define KERN_INVALID_ARGUMENT 4
+#define KERN_FAILURE 5
+#define KERN_RESOURCE_SHORTAGE 6
+#define KERN_NOT_RECEIVER 7
+#define KERN_NO_ACCESS 8
+
+#ifndef ASSEMBLER
+/*
+ * Convert addresses to pages and vice versa.
+ * No rounding is used.
+ */
+#ifdef KERNEL
+#define atop(x) (((unsigned)(x)) >> PAGE_SHIFT)
+#define ptoa(x) ((vm_offset_t)((x) << PAGE_SHIFT))
+
+/*
+ * Round off or truncate to the nearest page. These will work
+ * for either addresses or counts (i.e., 1 byte rounds to 1 page).
+ */
+#define round_page(x) \
+ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) & ~PAGE_MASK))
+#define trunc_page(x) \
+ ((vm_offset_t)(((vm_offset_t)(x)) & ~PAGE_MASK))
+#define num_pages(x) \
+ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
+
+extern vm_size_t mem_size; /* size of physical memory (bytes) */
+extern vm_offset_t first_addr; /* first physical page */
+extern vm_offset_t last_addr; /* last physical page */
+
+#else
+/* out-of-kernel versions of round_page and trunc_page */
+#define round_page(x) \
+ ((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * vm_page_size)
+#define trunc_page(x) \
+ ((((vm_offset_t)(x)) / vm_page_size) * vm_page_size)
+
+#endif /* KERNEL */
+#endif /* ASSEMBLER */
+#endif /* _VM_PARAM_ */
diff --git a/sys/vm/vm_prot.h b/sys/vm/vm_prot.h
new file mode 100644
index 000000000000..b3bae4386315
--- /dev/null
+++ b/sys/vm/vm_prot.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_prot.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory protection definitions.
+ */
+
+#ifndef _VM_PROT_
+#define _VM_PROT_
+
+/*
+ * Types defined:
+ *
+ * vm_prot_t VM protection values.
+ */
+
+typedef int vm_prot_t;
+
+/*
+ * Protection values, defined as bits within the vm_prot_t type
+ */
+
+#define VM_PROT_NONE ((vm_prot_t) 0x00)
+
+#define VM_PROT_READ ((vm_prot_t) 0x01) /* read permission */
+#define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */
+#define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */
+
+/*
+ * The default protection for newly-created virtual memory
+ */
+
+#define VM_PROT_DEFAULT (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+
+/*
+ * The maximum privileges possible, for parameter checking.
+ */
+
+#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+
+#endif /* _VM_PROT_ */
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
new file mode 100644
index 000000000000..10b7523ae232
--- /dev/null
+++ b/sys/vm/vm_swap.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/dmap.h> /* XXX */
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/file.h>
+
+#include <miscfs/specfs/specdev.h>
+
+/*
+ * Indirect driver for multi-controller paging.
+ */
+
+int nswap, nswdev;
+#ifdef SEQSWAP
+int niswdev; /* number of interleaved swap devices */
+int niswap; /* size of interleaved swap area */
+#endif
+
+/*
+ * Set up swap devices.
+ * Initialize linked list of free swap
+ * headers. These do not actually point
+ * to buffers, but rather to pages that
+ * are being swapped in and out.
+ */
+void
+swapinit()
+{
+ register int i;
+ register struct buf *sp = swbuf;
+ register struct proc *p = &proc0; /* XXX */
+ struct swdevt *swp;
+ int error;
+
+ /*
+ * Count swap devices, and adjust total swap space available.
+ * Some of the space will not be countable until later (dynamically
+ * configurable devices) and some of the counted space will not be
+ * available until a swapon() system call is issued, both usually
+ * happen when the system goes multi-user.
+ *
+ * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX
+ */
+#ifdef SEQSWAP
+ nswdev = niswdev = 0;
+ nswap = niswap = 0;
+ /*
+ * All interleaved devices must come first
+ */
+ for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
+ if (swp->sw_flags & SW_SEQUENTIAL)
+ break;
+ niswdev++;
+ if (swp->sw_nblks > niswap)
+ niswap = swp->sw_nblks;
+ }
+ niswap = roundup(niswap, dmmax);
+ niswap *= niswdev;
+ if (swdevt[0].sw_vp == NULL &&
+ bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
+ panic("swapvp");
+ /*
+ * The remainder must be sequential
+ */
+ for ( ; swp->sw_dev != NODEV; swp++) {
+ if ((swp->sw_flags & SW_SEQUENTIAL) == 0)
+ panic("binit: mis-ordered swap devices");
+ nswdev++;
+ if (swp->sw_nblks > 0) {
+ if (swp->sw_nblks % dmmax)
+ swp->sw_nblks -= (swp->sw_nblks % dmmax);
+ nswap += swp->sw_nblks;
+ }
+ }
+ nswdev += niswdev;
+ if (nswdev == 0)
+ panic("swapinit");
+ nswap += niswap;
+#else
+ nswdev = 0;
+ nswap = 0;
+ for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
+ nswdev++;
+ if (swp->sw_nblks > nswap)
+ nswap = swp->sw_nblks;
+ }
+ if (nswdev == 0)
+ panic("swapinit");
+ if (nswdev > 1)
+ nswap = ((nswap + dmmax - 1) / dmmax) * dmmax;
+ nswap *= nswdev;
+ if (swdevt[0].sw_vp == NULL &&
+ bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
+ panic("swapvp");
+#endif
+ if (nswap == 0)
+ printf("WARNING: no swap space found\n");
+ else if (error = swfree(p, 0)) {
+ printf("swfree errno %d\n", error); /* XXX */
+ panic("swapinit swfree 0");
+ }
+
+ /*
+ * Now set up swap buffer headers.
+ */
+ bswlist.b_actf = sp;
+ for (i = 0; i < nswbuf - 1; i++, sp++) {
+ sp->b_actf = sp + 1;
+ sp->b_rcred = sp->b_wcred = p->p_ucred;
+ sp->b_vnbufs.le_next = NOLIST;
+ }
+ sp->b_rcred = sp->b_wcred = p->p_ucred;
+ sp->b_vnbufs.le_next = NOLIST;
+ sp->b_actf = NULL;
+}
+
+void
+swstrategy(bp)
+ register struct buf *bp;
+{
+ int sz, off, seg, index;
+ register struct swdevt *sp;
+ struct vnode *vp;
+
+#ifdef GENERIC
+ /*
+ * A mini-root gets copied into the front of the swap
+ * and we run over top of the swap area just long
+ * enough for us to do a mkfs and restor of the real
+ * root (sure beats rewriting standalone restor).
+ */
+#define MINIROOTSIZE 4096
+ if (rootdev == dumpdev)
+ bp->b_blkno += MINIROOTSIZE;
+#endif
+ sz = howmany(bp->b_bcount, DEV_BSIZE);
+ if (bp->b_blkno + sz > nswap) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ if (nswdev > 1) {
+#ifdef SEQSWAP
+ if (bp->b_blkno < niswap) {
+ if (niswdev > 1) {
+ off = bp->b_blkno % dmmax;
+ if (off+sz > dmmax) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ seg = bp->b_blkno / dmmax;
+ index = seg % niswdev;
+ seg /= niswdev;
+ bp->b_blkno = seg*dmmax + off;
+ } else
+ index = 0;
+ } else {
+ register struct swdevt *swp;
+
+ bp->b_blkno -= niswap;
+ for (index = niswdev, swp = &swdevt[niswdev];
+ swp->sw_dev != NODEV;
+ swp++, index++) {
+ if (bp->b_blkno < swp->sw_nblks)
+ break;
+ bp->b_blkno -= swp->sw_nblks;
+ }
+ if (swp->sw_dev == NODEV ||
+ bp->b_blkno+sz > swp->sw_nblks) {
+ bp->b_error = swp->sw_dev == NODEV ?
+ ENODEV : EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ }
+#else
+ off = bp->b_blkno % dmmax;
+ if (off+sz > dmmax) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ seg = bp->b_blkno / dmmax;
+ index = seg % nswdev;
+ seg /= nswdev;
+ bp->b_blkno = seg*dmmax + off;
+#endif
+ } else
+ index = 0;
+ sp = &swdevt[index];
+ if ((bp->b_dev = sp->sw_dev) == NODEV)
+ panic("swstrategy");
+ if (sp->sw_vp == NULL) {
+ bp->b_error = ENODEV;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ VHOLD(sp->sw_vp);
+ if ((bp->b_flags & B_READ) == 0) {
+ if (vp = bp->b_vp) {
+ vp->v_numoutput--;
+ if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+ vp->v_flag &= ~VBWAIT;
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+ sp->sw_vp->v_numoutput++;
+ }
+ if (bp->b_vp != NULL)
+ brelvp(bp);
+ bp->b_vp = sp->sw_vp;
+ VOP_STRATEGY(bp);
+}
+
+/*
+ * System call swapon(name) enables swapping on device name,
+ * which must be in the swdevsw. Return EBUSY
+ * if already swapping on this device.
+ */
+struct swapon_args {
+ char *name;
+};
+/* ARGSUSED */
+int
+swapon(p, uap, retval)
+ struct proc *p;
+ struct swapon_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ register struct swdevt *sp;
+ dev_t dev;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VBLK) {
+ vrele(vp);
+ return (ENOTBLK);
+ }
+ dev = (dev_t)vp->v_rdev;
+ if (major(dev) >= nblkdev) {
+ vrele(vp);
+ return (ENXIO);
+ }
+ for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) {
+ if (sp->sw_dev == dev) {
+ if (sp->sw_flags & SW_FREED) {
+ vrele(vp);
+ return (EBUSY);
+ }
+ sp->sw_vp = vp;
+ if (error = swfree(p, sp - swdevt)) {
+ vrele(vp);
+ return (error);
+ }
+ return (0);
+ }
+#ifdef SEQSWAP
+ /*
+ * If we have reached a non-freed sequential device without
+ * finding what we are looking for, it is an error.
+ * That is because all interleaved devices must come first
+ * and sequential devices must be freed in order.
+ */
+ if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL)
+ break;
+#endif
+ }
+ vrele(vp);
+ return (EINVAL);
+}
+
+/*
+ * Swfree(index) frees the index'th portion of the swap map.
+ * Each of the nswdev devices provides 1/nswdev'th of the swap
+ * space, which is laid out with blocks of dmmax pages circularly
+ * among the devices.
+ */
+int
+swfree(p, index)
+ struct proc *p;
+ int index;
+{
+ register struct swdevt *sp;
+ register swblk_t vsbase;
+ register long blk;
+ struct vnode *vp;
+ register swblk_t dvbase;
+ register int nblks;
+ int error;
+
+ sp = &swdevt[index];
+ vp = sp->sw_vp;
+ if (error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))
+ return (error);
+ sp->sw_flags |= SW_FREED;
+ nblks = sp->sw_nblks;
+ /*
+ * Some devices may not exist til after boot time.
+ * If so, their nblk count will be 0.
+ */
+ if (nblks <= 0) {
+ int perdev;
+ dev_t dev = sp->sw_dev;
+
+ if (bdevsw[major(dev)].d_psize == 0 ||
+ (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
+ (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+ sp->sw_flags &= ~SW_FREED;
+ return (ENXIO);
+ }
+#ifdef SEQSWAP
+ if (index < niswdev) {
+ perdev = niswap / niswdev;
+ if (nblks > perdev)
+ nblks = perdev;
+ } else {
+ if (nblks % dmmax)
+ nblks -= (nblks % dmmax);
+ nswap += nblks;
+ }
+#else
+ perdev = nswap / nswdev;
+ if (nblks > perdev)
+ nblks = perdev;
+#endif
+ sp->sw_nblks = nblks;
+ }
+ if (nblks == 0) {
+ (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+ sp->sw_flags &= ~SW_FREED;
+ return (0); /* XXX error? */
+ }
+#ifdef SEQSWAP
+ if (sp->sw_flags & SW_SEQUENTIAL) {
+ register struct swdevt *swp;
+
+ blk = niswap;
+ for (swp = &swdevt[niswdev]; swp != sp; swp++)
+ blk += swp->sw_nblks;
+ rmfree(swapmap, nblks, blk);
+ return (0);
+ }
+#endif
+ for (dvbase = 0; dvbase < nblks; dvbase += dmmax) {
+ blk = nblks - dvbase;
+#ifdef SEQSWAP
+ if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap)
+ panic("swfree");
+#else
+ if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap)
+ panic("swfree");
+#endif
+ if (blk > dmmax)
+ blk = dmmax;
+ if (vsbase == 0) {
+ /*
+ * First of all chunks... initialize the swapmap.
+ * Don't use the first cluster of the device
+ * in case it starts with a label or boot block.
+ */
+ rminit(swapmap, blk - ctod(CLSIZE),
+ vsbase + ctod(CLSIZE), "swap", nswapmap);
+ } else if (dvbase == 0) {
+ /*
+ * Don't use the first cluster of the device
+ * in case it starts with a label or boot block.
+ */
+ rmfree(swapmap, blk - ctod(CLSIZE),
+ vsbase + ctod(CLSIZE));
+ } else
+ rmfree(swapmap, blk, vsbase);
+ }
+ return (0);
+}
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
new file mode 100644
index 000000000000..3d49ea717184
--- /dev/null
+++ b/sys/vm/vm_unix.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$
+ *
+ * @(#)vm_unix.c 8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Traditional sbrk/grow interface to VM
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+
+struct obreak_args {
+ char *nsiz;
+};
+/* ARGSUSED */
+int
+obreak(p, uap, retval)
+ struct proc *p;
+ struct obreak_args *uap;
+ int *retval;
+{
+ register struct vmspace *vm = p->p_vmspace;
+ vm_offset_t new, old;
+ int rv;
+ register int diff;
+
+ old = (vm_offset_t)vm->vm_daddr;
+ new = round_page(uap->nsiz);
+ if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
+ return(ENOMEM);
+ old = round_page(old + ctob(vm->vm_dsize));
+ diff = new - old;
+ if (diff > 0) {
+ rv = vm_allocate(&vm->vm_map, &old, diff, FALSE);
+ if (rv != KERN_SUCCESS) {
+ uprintf("sbrk: grow failed, return = %d\n", rv);
+ return(ENOMEM);
+ }
+ vm->vm_dsize += btoc(diff);
+ } else if (diff < 0) {
+ diff = -diff;
+ rv = vm_deallocate(&vm->vm_map, new, diff);
+ if (rv != KERN_SUCCESS) {
+ uprintf("sbrk: shrink failed, return = %d\n", rv);
+ return(ENOMEM);
+ }
+ vm->vm_dsize -= btoc(diff);
+ }
+ return(0);
+}
+
+/*
+ * Enlarge the "stack segment" to include the specified
+ * stack pointer for the process.
+ */
+int
+grow(p, sp)
+ struct proc *p;
+ unsigned sp;
+{
+ register struct vmspace *vm = p->p_vmspace;
+ register int si;
+
+ /*
+ * For user defined stacks (from sendsig).
+ */
+ if (sp < (unsigned)vm->vm_maxsaddr)
+ return (0);
+ /*
+ * For common case of already allocated (from trap).
+ */
+ if (sp >= USRSTACK - ctob(vm->vm_ssize))
+ return (1);
+ /*
+ * Really need to check vs limit and increment stack size if ok.
+ */
+ si = clrnd(btoc(USRSTACK-sp) - vm->vm_ssize);
+ if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
+ return (0);
+ vm->vm_ssize += si;
+ return (1);
+}
+
+struct ovadvise_args {
+ int anom;
+};
+/* ARGSUSED */
+int
+ovadvise(p, uap, retval)
+ struct proc *p;
+ struct ovadvise_args *uap;
+ int *retval;
+{
+
+ return (EINVAL);
+}
diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c
new file mode 100644
index 000000000000..20172c6c6519
--- /dev/null
+++ b/sys/vm/vm_user.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_user.c 8.2 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * User-exported virtual memory functions.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+simple_lock_data_t vm_alloc_lock; /* XXX */
+
+#ifdef MACHVMCOMPAT
+/*
+ * BSD style syscall interfaces to MACH calls
+ * All return MACH return values.
+ */
+struct svm_allocate_args {
+ vm_map_t map;
+ vm_offset_t *addr;
+ vm_size_t size;
+ boolean_t anywhere;
+};
+/* ARGSUSED */
+int
+svm_allocate(p, uap, retval)
+ struct proc *p;
+ struct svm_allocate_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+
+ if (copyin((caddr_t)uap->addr, (caddr_t)&addr, sizeof (addr)))
+ rv = KERN_INVALID_ARGUMENT;
+ else
+ rv = vm_allocate(uap->map, &addr, uap->size, uap->anywhere);
+ if (rv == KERN_SUCCESS) {
+ if (copyout((caddr_t)&addr, (caddr_t)uap->addr, sizeof(addr)))
+ rv = KERN_INVALID_ARGUMENT;
+ }
+ return((int)rv);
+}
+
+struct svm_deallocate_args {
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+};
+/* ARGSUSED */
+int
+svm_deallocate(p, uap, retval)
+ struct proc *p;
+ struct svm_deallocate_args *uap;
+ int *retval;
+{
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+ rv = vm_deallocate(uap->map, uap->addr, uap->size);
+ return((int)rv);
+}
+
+struct svm_inherit_args {
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_inherit_t inherit;
+};
+/* ARGSUSED */
+int
+svm_inherit(p, uap, retval)
+ struct proc *p;
+ struct svm_inherit_args *uap;
+ int *retval;
+{
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+ rv = vm_inherit(uap->map, uap->addr, uap->size, uap->inherit);
+ return((int)rv);
+}
+
+struct svm_protect_args {
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+ boolean_t setmax;
+ vm_prot_t prot;
+};
+/* ARGSUSED */
+int
+svm_protect(p, uap, retval)
+ struct proc *p;
+ struct svm_protect_args *uap;
+ int *retval;
+{
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+ rv = vm_protect(uap->map, uap->addr, uap->size, uap->setmax, uap->prot);
+ return((int)rv);
+}
+
+/*
+ * vm_inherit sets the inheritence of the specified range in the
+ * specified map.
+ */
+int
+vm_inherit(map, start, size, new_inheritance)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_inherit_t new_inheritance;
+{
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_inherit(map, trunc_page(start), round_page(start+size), new_inheritance));
+}
+
+/*
+ * vm_protect sets the protection of the specified range in the
+ * specified map.
+ */
+
+int
+vm_protect(map, start, size, set_maximum, new_protection)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+ boolean_t set_maximum;
+ vm_prot_t new_protection;
+{
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum));
+}
+#endif
+
+/*
+ * vm_allocate allocates "zero fill" memory in the specfied
+ * map.
+ */
+int
+vm_allocate(map, addr, size, anywhere)
+ register vm_map_t map;
+ register vm_offset_t *addr;
+ register vm_size_t size;
+ boolean_t anywhere;
+{
+ int result;
+
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+ if (size == 0) {
+ *addr = 0;
+ return(KERN_SUCCESS);
+ }
+
+ if (anywhere)
+ *addr = vm_map_min(map);
+ else
+ *addr = trunc_page(*addr);
+ size = round_page(size);
+
+ result = vm_map_find(map, NULL, (vm_offset_t) 0, addr, size, anywhere);
+
+ return(result);
+}
+
+/*
+ * vm_deallocate deallocates the specified range of addresses in the
+ * specified address map.
+ */
+int
+vm_deallocate(map, start, size)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+{
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size == (vm_offset_t) 0)
+ return(KERN_SUCCESS);
+
+ return(vm_map_remove(map, trunc_page(start), round_page(start+size)));
+}
+
+/*
+ * Similar to vm_allocate but assigns an explicit pager.
+ */
+int
+vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal)
+ register vm_map_t map;
+ register vm_offset_t *addr;
+ register vm_size_t size;
+ boolean_t anywhere;
+ vm_pager_t pager;
+ vm_offset_t poffset;
+ boolean_t internal;
+{
+ register vm_object_t object;
+ register int result;
+
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ *addr = trunc_page(*addr);
+ size = round_page(size);
+
+ /*
+ * Lookup the pager/paging-space in the object cache.
+ * If it's not there, then create a new object and cache
+ * it.
+ */
+ object = vm_object_lookup(pager);
+ cnt.v_lookups++;
+ if (object == NULL) {
+ object = vm_object_allocate(size);
+ /*
+ * From Mike Hibler: "unnamed anonymous objects should never
+ * be on the hash list ... For now you can just change
+ * vm_allocate_with_pager to not do vm_object_enter if this
+ * is an internal object ..."
+ */
+ if (!internal)
+ vm_object_enter(object, pager);
+ } else
+ cnt.v_hits++;
+ if (internal)
+ object->flags |= OBJ_INTERNAL;
+ else {
+ object->flags &= ~OBJ_INTERNAL;
+ cnt.v_nzfod -= atop(size);
+ }
+
+ result = vm_map_find(map, object, poffset, addr, size, anywhere);
+ if (result != KERN_SUCCESS)
+ vm_object_deallocate(object);
+ else if (pager != NULL)
+ vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE);
+ return(result);
+}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
new file mode 100644
index 000000000000..9c2f8260cfb3
--- /dev/null
+++ b/sys/vm/vnode_pager.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94
+ */
+
+/*
+ * Page to/from files (vnodes).
+ *
+ * TODO:
+ * pageouts
+ * fix credential use (uses current process credentials now)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/mount.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
+
+struct pagerlst vnode_pager_list; /* list of managed vnodes */
+
+#ifdef DEBUG
+int vpagerdebug = 0x00;
+#define VDB_FOLLOW 0x01
+#define VDB_INIT 0x02
+#define VDB_IO 0x04
+#define VDB_FAIL 0x08
+#define VDB_ALLOC 0x10
+#define VDB_SIZE 0x20
+#endif
+
+static vm_pager_t vnode_pager_alloc
+ __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void vnode_pager_cluster
+ __P((vm_pager_t, vm_offset_t,
+ vm_offset_t *, vm_offset_t *));
+static void vnode_pager_dealloc __P((vm_pager_t));
+static int vnode_pager_getpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t));
+static void vnode_pager_init __P((void));
+static int vnode_pager_io
+ __P((vn_pager_t, vm_page_t *, int,
+ boolean_t, enum uio_rw));
+static boolean_t vnode_pager_putpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+
+struct pagerops vnodepagerops = {
+ vnode_pager_init,
+ vnode_pager_alloc,
+ vnode_pager_dealloc,
+ vnode_pager_getpage,
+ vnode_pager_putpage,
+ vnode_pager_haspage,
+ vnode_pager_cluster
+};
+
+static void
+vnode_pager_init()
+{
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("vnode_pager_init()\n");
+#endif
+ TAILQ_INIT(&vnode_pager_list);
+}
+
+/*
+ * Allocate (or lookup) pager for a vnode.
+ * Handle is a vnode pointer.
+ */
+static vm_pager_t
+vnode_pager_alloc(handle, size, prot, foff)
+ caddr_t handle;
+ vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t foff;
+{
+ register vm_pager_t pager;
+ register vn_pager_t vnp;
+ vm_object_t object;
+ struct vattr vattr;
+ struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+
+#ifdef DEBUG
+ if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
+ printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
+#endif
+ /*
+ * Pageout to vnode, no can do yet.
+ */
+ if (handle == NULL)
+ return(NULL);
+
+ /*
+ * Vnodes keep a pointer to any associated pager so no need to
+ * lookup with vm_pager_lookup.
+ */
+ vp = (struct vnode *)handle;
+ pager = (vm_pager_t)vp->v_vmdata;
+ if (pager == NULL) {
+ /*
+ * Allocate pager structures
+ */
+ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
+ if (pager == NULL)
+ return(NULL);
+ vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
+ if (vnp == NULL) {
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ /*
+ * And an object of the appropriate size
+ */
+ if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
+ object = vm_object_allocate(round_page(vattr.va_size));
+ vm_object_enter(object, pager);
+ vm_object_setpager(object, pager, 0, TRUE);
+ } else {
+ free((caddr_t)vnp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ /*
+ * Hold a reference to the vnode and initialize pager data.
+ */
+ VREF(vp);
+ vnp->vnp_flags = 0;
+ vnp->vnp_vp = vp;
+ vnp->vnp_size = vattr.va_size;
+ TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
+ pager->pg_handle = handle;
+ pager->pg_type = PG_VNODE;
+ pager->pg_flags = 0;
+ pager->pg_ops = &vnodepagerops;
+ pager->pg_data = vnp;
+ vp->v_vmdata = (caddr_t)pager;
+ } else {
+ /*
+ * vm_object_lookup() will remove the object from the
+ * cache if found and also gain a reference to the object.
+ */
+ object = vm_object_lookup(pager);
+#ifdef DEBUG
+ vnp = (vn_pager_t)pager->pg_data;
+#endif
+ }
+#ifdef DEBUG
+ if (vpagerdebug & VDB_ALLOC)
+ printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
+ vp, vnp->vnp_size, pager, object);
+#endif
+ return(pager);
+}
+
+static void
+vnode_pager_dealloc(pager)
+ vm_pager_t pager;
+{
+ register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+ register struct vnode *vp;
+#ifdef NOTDEF
+ struct proc *p = curproc; /* XXX */
+#endif
+
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("vnode_pager_dealloc(%x)\n", pager);
+#endif
+ if (vp = vnp->vnp_vp) {
+ vp->v_vmdata = NULL;
+ vp->v_flag &= ~VTEXT;
+#if NOTDEF
+ /* can hang if done at reboot on NFS FS */
+ (void) VOP_FSYNC(vp, p->p_ucred, p);
+#endif
+ vrele(vp);
+ }
+ TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
+ free((caddr_t)vnp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+vnode_pager_getpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("vnode_pager_getpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+ return(vnode_pager_io((vn_pager_t)pager->pg_data,
+ mlist, npages, sync, UIO_READ));
+}
+
+static boolean_t
+vnode_pager_putpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+ int err;
+
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("vnode_pager_putpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+ if (pager == NULL)
+ return (FALSE); /* ??? */
+ err = vnode_pager_io((vn_pager_t)pager->pg_data,
+ mlist, npages, sync, UIO_WRITE);
+ /*
+ * If the operation was successful, mark the pages clean.
+ */
+ if (err == VM_PAGER_OK) {
+ while (npages--) {
+ (*mlist)->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist));
+ mlist++;
+ }
+ }
+ return(err);
+}
+
+static boolean_t
+vnode_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+ daddr_t bn;
+ int err;
+
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
+#endif
+
+ /*
+ * Offset beyond end of file, do not have the page
+ * Lock the vnode first to make sure we have the most recent
+ * version of the size.
+ */
+ VOP_LOCK(vnp->vnp_vp);
+ if (offset >= vnp->vnp_size) {
+ VOP_UNLOCK(vnp->vnp_vp);
+#ifdef DEBUG
+ if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
+ printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
+ pager, offset, vnp->vnp_size);
+#endif
+ return(FALSE);
+ }
+
+ /*
+ * Read the index to find the disk block to read
+ * from. If there is no block, report that we don't
+ * have this data.
+ *
+ * Assumes that the vnode has whole page or nothing.
+ */
+ err = VOP_BMAP(vnp->vnp_vp,
+ offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
+ (struct vnode **)0, &bn, NULL);
+ VOP_UNLOCK(vnp->vnp_vp);
+ if (err) {
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FAIL)
+ printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
+ err, pager, offset);
+#endif
+ return(TRUE);
+ }
+ return((long)bn < 0 ? FALSE : TRUE);
+}
+
+static void
+vnode_pager_cluster(pager, offset, loffset, hoffset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+ vm_offset_t *loffset;
+ vm_offset_t *hoffset;
+{
+ vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+ vm_offset_t loff, hoff;
+
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("vnode_pager_cluster(%x, %x) ", pager, offset);
+#endif
+ loff = offset;
+ if (loff >= vnp->vnp_size)
+ panic("vnode_pager_cluster: bad offset");
+ /*
+ * XXX could use VOP_BMAP to get maxcontig value
+ */
+ hoff = loff + MAXBSIZE;
+ if (hoff > round_page(vnp->vnp_size))
+ hoff = round_page(vnp->vnp_size);
+
+ *loffset = loff;
+ *hoffset = hoff;
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("returns [%x-%x]\n", loff, hoff);
+#endif
+}
+
+/*
+ * (XXX)
+ * Lets the VM system know about a change in size for a file.
+ * If this vnode is mapped into some address space (i.e. we have a pager
+ * for it) we adjust our own internal size and flush any cached pages in
+ * the associated object that are affected by the size change.
+ *
+ * Note: this routine may be invoked as a result of a pager put
+ * operation (possibly at object termination time), so we must be careful.
+ */
+void
+vnode_pager_setsize(vp, nsize)
+ struct vnode *vp;
+ u_long nsize;
+{
+ register vn_pager_t vnp;
+ register vm_object_t object;
+ vm_pager_t pager;
+
+ /*
+ * Not a mapped vnode
+ */
+ if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
+ return;
+ /*
+ * Hasn't changed size
+ */
+ pager = (vm_pager_t)vp->v_vmdata;
+ vnp = (vn_pager_t)pager->pg_data;
+ if (nsize == vnp->vnp_size)
+ return;
+ /*
+ * No object.
+ * This can happen during object termination since
+ * vm_object_page_clean is called after the object
+ * has been removed from the hash table, and clean
+ * may cause vnode write operations which can wind
+ * up back here.
+ */
+ object = vm_object_lookup(pager);
+ if (object == NULL)
+ return;
+
+#ifdef DEBUG
+ if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
+ printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
+ vp, object, vnp->vnp_size, nsize);
+#endif
+ /*
+ * File has shrunk.
+ * Toss any cached pages beyond the new EOF.
+ */
+ if (nsize < vnp->vnp_size) {
+ vm_object_lock(object);
+ vm_object_page_remove(object,
+ (vm_offset_t)nsize, vnp->vnp_size);
+ vm_object_unlock(object);
+ }
+ vnp->vnp_size = (vm_offset_t)nsize;
+ vm_object_deallocate(object);
+}
+
+void
+vnode_pager_umount(mp)
+ register struct mount *mp;
+{
+ register vm_pager_t pager, npager;
+ struct vnode *vp;
+
+ for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){
+ /*
+ * Save the next pointer now since uncaching may
+ * terminate the object and render pager invalid
+ */
+ npager = pager->pg_list.tqe_next;
+ vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
+ if (mp == (struct mount *)0 || vp->v_mount == mp) {
+ VOP_LOCK(vp);
+ (void) vnode_pager_uncache(vp);
+ VOP_UNLOCK(vp);
+ }
+ }
+}
+
+/*
+ * Remove vnode associated object from the object cache.
+ *
+ * XXX unlock the vnode if it is currently locked.
+ * We must do this since uncaching the object may result in its
+ * destruction which may initiate paging activity which may necessitate
+ * re-locking the vnode.
+ */
+boolean_t
+vnode_pager_uncache(vp)
+ register struct vnode *vp;
+{
+ register vm_object_t object;
+ boolean_t uncached;
+ vm_pager_t pager;
+
+ /*
+ * Not a mapped vnode
+ */
+ pager = (vm_pager_t)vp->v_vmdata;
+ if (pager == NULL)
+ return (TRUE);
+#ifdef DEBUG
+ if (!VOP_ISLOCKED(vp)) {
+ extern int (**nfsv2_vnodeop_p)();
+
+ if (vp->v_op != nfsv2_vnodeop_p)
+ panic("vnode_pager_uncache: vnode not locked!");
+ }
+#endif
+ /*
+ * Must use vm_object_lookup() as it actually removes
+ * the object from the cache list.
+ */
+ object = vm_object_lookup(pager);
+ if (object) {
+ uncached = (object->ref_count <= 1);
+ VOP_UNLOCK(vp);
+ pager_cache(object, FALSE);
+ VOP_LOCK(vp);
+ } else
+ uncached = TRUE;
+ return(uncached);
+}
+
+static int
+vnode_pager_io(vnp, mlist, npages, sync, rw)
+ register vn_pager_t vnp;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+ enum uio_rw rw;
+{
+ struct uio auio;
+ struct iovec aiov;
+ vm_offset_t kva, foff;
+ int error, size;
+ struct proc *p = curproc; /* XXX */
+
+ /* XXX */
+ vm_page_t m;
+ if (npages != 1)
+ panic("vnode_pager_io: cannot handle multiple pages");
+ m = *mlist;
+ /* XXX */
+
+#ifdef DEBUG
+ if (vpagerdebug & VDB_FOLLOW)
+ printf("vnode_pager_io(%x, %x, %c): vnode %x\n",
+ vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
+#endif
+ foff = m->offset + m->object->paging_offset;
+ /*
+ * Allocate a kernel virtual address and initialize so that
+ * we can use VOP_READ/WRITE routines.
+ */
+ kva = vm_pager_map_pages(mlist, npages, sync);
+ if (kva == NULL)
+ return(VM_PAGER_AGAIN);
+ /*
+ * After all of the potentially blocking operations have been
+ * performed, we can do the size checks:
+ * read beyond EOF (returns error)
+ * short read
+ */
+ VOP_LOCK(vnp->vnp_vp);
+ if (foff >= vnp->vnp_size) {
+ VOP_UNLOCK(vnp->vnp_vp);
+ vm_pager_unmap_pages(kva, npages);
+#ifdef DEBUG
+ if (vpagerdebug & VDB_SIZE)
+ printf("vnode_pager_io: vp %x, off %d size %d\n",
+ vnp->vnp_vp, foff, vnp->vnp_size);
+#endif
+ return(VM_PAGER_BAD);
+ }
+ if (foff + PAGE_SIZE > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+ else
+ size = PAGE_SIZE;
+ aiov.iov_base = (caddr_t)kva;
+ aiov.iov_len = size;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = foff;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = rw;
+ auio.uio_resid = size;
+ auio.uio_procp = (struct proc *)0;
+#ifdef DEBUG
+ if (vpagerdebug & VDB_IO)
+ printf("vnode_pager_io: vp %x kva %x foff %x size %x",
+ vnp->vnp_vp, kva, foff, size);
+#endif
+ if (rw == UIO_READ)
+ error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
+ else
+ error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
+ VOP_UNLOCK(vnp->vnp_vp);
+#ifdef DEBUG
+ if (vpagerdebug & VDB_IO) {
+ if (error || auio.uio_resid)
+ printf(" returns error %x, resid %x",
+ error, auio.uio_resid);
+ printf("\n");
+ }
+#endif
+ if (!error) {
+ register int count = size - auio.uio_resid;
+
+ if (count == 0)
+ error = EINVAL;
+ else if (count != PAGE_SIZE && rw == UIO_READ)
+ bzero((void *)(kva + count), PAGE_SIZE - count);
+ }
+ vm_pager_unmap_pages(kva, npages);
+ return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
+}
diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h
new file mode 100644
index 000000000000..95c9545452ae
--- /dev/null
+++ b/sys/vm/vnode_pager.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vnode_pager.h 8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef _VNODE_PAGER_
+#define _VNODE_PAGER_ 1
+
+/*
+ * VNODE pager private data.
+ */
+struct vnpager {
+ int vnp_flags; /* flags */
+ struct vnode *vnp_vp; /* vnode */
+ vm_size_t vnp_size; /* vnode current size */
+};
+typedef struct vnpager *vn_pager_t;
+
+#define VN_PAGER_NULL ((vn_pager_t)0)
+
+#define VNP_PAGING 0x01 /* vnode used for pageout */
+#define VNP_CACHED 0x02 /* vnode is cached */
+
+#endif /* _VNODE_PAGER_ */