#!/bin/sh # # Copyright (c) 2009 Peter Holm # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # There is a well-known problem in FreeBSD, caused by allowing page faults # while doing filesystem data move to or from userspace during read(2) and # write(2). The issue is that if the userspace address being read or write # from/to is backed by the mapping of the same file we are doing i/o to, # we deadlock. # Test scenario by ups [ `id -u ` -ne 0 ] && echo "Must be root!" && exit 1 . ../default.cfg here=`pwd` cd /tmp sed '1,/^EOF/d' < $here/$0 > datamove.c mycc -o datamove -Wall datamove.c rm -f datamove.c n=5 old=`sysctl vm.old_msync | awk '{print $NF}'` sysctl vm.old_msync=1 for i in `jot $n`; do mkdir -p /tmp/datamove.dir.$i cd /tmp/datamove.dir.$i /tmp/datamove & done cd /tmp for i in `jot $n`; do wait done for i in `jot $n`; do rm -rf /tmp/datamove.dir.$i done sysctl vm.old_msync=$old rm -rf /tmp/datamove exit 0 EOF /*- * Copyright (c) 2006, Stephan Uphoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include int prepareFile(char* filename,int* fdp); int mapBuffer(char** bufferp,int fd1,int fd2); int startIO(int fd,char *buffer); int pagesize; #define FILESIZE (32*1024) char wbuffer[FILESIZE]; /* Create a FILESIZE sized file - then remove file data from the cache*/ int prepareFile(char* filename,int* fdp) { int fd; int len; int status; void *addr; fd = open(filename,O_CREAT | O_TRUNC | O_RDWR,S_IRWXU); if (fd == -1) { perror("Creating file"); return fd; } len = write(fd,wbuffer,FILESIZE); if (len < 0) { perror("Write failed"); return 1; } status = fsync(fd); if (status != 0) { perror("fsync failed"); return 1; } addr = mmap(NULL,FILESIZE, PROT_READ | PROT_WRITE , MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { perror("Mmap failed"); return 1; } status = msync(addr,FILESIZE,MS_INVALIDATE | MS_SYNC); if (status != 0) { perror("Msync failed"); return 1; } munmap(addr,FILESIZE); *fdp = fd; return 0; } /* mmap a 2 page buffer - first page is from fd1, second page from fd2 */ int mapBuffer(char** bufferp,int fd1,int fd2) { void* addr; char *buffer; addr = mmap(NULL,pagesize*2, PROT_READ | PROT_WRITE , MAP_SHARED, fd1, 0); if (addr == MAP_FAILED) { perror("Mmap failed"); return 1; } buffer = addr; addr = mmap(buffer + pagesize,pagesize, PROT_READ | PROT_WRITE , MAP_FIXED | MAP_SHARED, fd2, 0); if (addr == MAP_FAILED) { perror("Mmap2 failed"); return 1; } *bufferp = buffer; return 0; } int startIO(int fd,char *buffer) { ssize_t len; len = write(fd,buffer,2*pagesize); if (len == -1) { perror("write failed"); return 1; } return 0; } int main(int argc,char *argv[],char *envp[]) { int fdA,fdB,fdDelayA,fdDelayB; int status; char *bufferA,*bufferB; pid_t pid; pagesize = getpagesize(); if ((prepareFile("A",&fdA)) || (prepareFile("B",&fdB)) || (prepareFile("DelayA",&fdDelayA)) || (prepareFile("DelayB",&fdDelayB)) || (mapBuffer(&bufferA,fdDelayA,fdB)) || (mapBuffer(&bufferB,fdDelayB,fdA))) exit(1); pid = fork(); if (pid == 0) { status = startIO(fdA,bufferA); exit(status); } if (pid == -1) { exit(1); } status = startIO(fdB,bufferB); exit(status); }