aboutsummaryrefslogtreecommitdiff
path: root/biology/vcflib
diff options
context:
space:
mode:
authorJason W. Bacon <jwb@FreeBSD.org>2018-04-17 03:11:39 +0000
committerJason W. Bacon <jwb@FreeBSD.org>2018-04-17 03:11:39 +0000
commit3d701c86bf27d15961f152cacf0196e7cfc50ab0 (patch)
treeff9bf0299d3b0bbbd98698a3caa84889111a3ea2 /biology/vcflib
parent0964627d7b26072cf7887f7bb9c96b5fad12fa55 (diff)
downloadports-3d701c86bf27d15961f152cacf0196e7cfc50ab0.tar.gz
ports-3d701c86bf27d15961f152cacf0196e7cfc50ab0.zip
biology/vcflib: C++ library and CLI tools for parsing and manipulating VCF files
Approved by: jrm (mentor) Differential Revision: https://reviews.freebsd.org/D15109
Notes
Notes: svn path=/head/; revision=467553
Diffstat (limited to 'biology/vcflib')
-rw-r--r--biology/vcflib/Makefile47
-rw-r--r--biology/vcflib/distinfo3
-rw-r--r--biology/vcflib/files/Makefile.external-libs207
-rw-r--r--biology/vcflib/files/Makefile.submod117
-rw-r--r--biology/vcflib/files/patch-src_cdflib.cpp11
-rw-r--r--biology/vcflib/pkg-descr21
-rw-r--r--biology/vcflib/pkg-plist104
7 files changed, 510 insertions, 0 deletions
diff --git a/biology/vcflib/Makefile b/biology/vcflib/Makefile
new file mode 100644
index 000000000000..ffec9fe35272
--- /dev/null
+++ b/biology/vcflib/Makefile
@@ -0,0 +1,47 @@
+# $FreeBSD$
+
+PORTNAME= vcflib
+DISTVERSIONPREFIX= v
+DISTVERSION= 1.0.0-rc1-130
+DISTVERSIONSUFFIX= -g7e3d806
+CATEGORIES= biology
+
+MAINTAINER= jwb@FreeBSD.org
+COMMENT= C++ library and CLI tools for parsing and manipulating VCF files
+
+LICENSE= MIT
+
+LIB_DEPENDS= libhts.so:biology/htslib \
+ libtabix.so:biology/tabixpp \
+ libsw.so:biology/smithwaterman
+COMMON_DEPENDS= fastahack:biology/fastahack \
+ filevercmp:sysutils/filevercmp \
+ fsom:science/fsom \
+ multichoose>=1.0.3:math/multichoose \
+ interval_tree_test:math/intervaltree
+BUILD_DEPENDS= ${COMMON_DEPENDS}
+RUN_DEPENDS= ${COMMON_DEPENDS}
+
+USES= gmake shebangfix
+USE_LDCONFIG= yes
+USE_GITHUB= yes
+GH_ACCOUNT= ekg
+
+CXXFLAGS+= -I${LOCALBASE}/include/smithwaterman \
+ -I${LOCALBASE}/include/multichoose \
+ -I${LOCALBASE}/include/filevercmp \
+ -I${LOCALBASE}/include/fastahack \
+ -I${LOCALBASE}/include/intervaltree \
+ -DVERSION='\"${PORTVERSION}\"'
+
+# Clang and GCC disable sse2 by default on i386, but it's required for vcflib
+CFLAGS_i386= -msse2
+
+MAKEFILE= ${FILESDIR}/Makefile.external-libs
+INSTALL_TARGET= install-strip
+
+post-install:
+ ${RLN} ${STAGEDIR}${PREFIX}/lib/libvcflib.so.1 \
+ ${STAGEDIR}${PREFIX}/lib/libvcflib.so
+
+.include <bsd.port.mk>
diff --git a/biology/vcflib/distinfo b/biology/vcflib/distinfo
new file mode 100644
index 000000000000..985916a8d85b
--- /dev/null
+++ b/biology/vcflib/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1523243322
+SHA256 (ekg-vcflib-v1.0.0-rc1-130-g7e3d806_GH0.tar.gz) = 8ec24354dfc7a87c011c865ab75f3eb72646768ab9cb6eff4263e69763338478
+SIZE (ekg-vcflib-v1.0.0-rc1-130-g7e3d806_GH0.tar.gz) = 20138644
diff --git a/biology/vcflib/files/Makefile.external-libs b/biology/vcflib/files/Makefile.external-libs
new file mode 100644
index 000000000000..ae030a517b44
--- /dev/null
+++ b/biology/vcflib/files/Makefile.external-libs
@@ -0,0 +1,207 @@
+#OBJ_DIR = ./
+HEADERS = src/Variant.h \
+ src/split.h \
+ src/pdflib.hpp \
+ src/var.hpp \
+ src/cdflib.hpp \
+ src/rnglib.hpp \
+ src/join.h
+SOURCES = src/Variant.cpp \
+ src/rnglib.cpp \
+ src/var.cpp \
+ src/pdflib.cpp \
+ src/cdflib.cpp \
+ src/split.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+
+VCF_LIB_LOCAL:=$(shell pwd)
+BIN_DIR:=bin
+LIB_DIR:=lib
+SRC_DIR=src
+INC_DIR:=include
+OBJ_DIR:=obj
+
+LIB = libvcflib.a
+SOVERSION = 1
+SLIB = libvcflib.so.$(SOVERSION)
+
+# TODO
+#vcfstats.cpp
+
+BIN_SOURCES = src/vcfecho.cpp \
+ src/vcfnormalizesvs.cpp \
+ src/dumpContigsFromHeader.cpp \
+ src/bFst.cpp \
+ src/pVst.cpp \
+ src/hapLrt.cpp \
+ src/popStats.cpp \
+ src/wcFst.cpp \
+ src/iHS.cpp \
+ src/segmentFst.cpp \
+ src/segmentIhs.cpp \
+ src/genotypeSummary.cpp \
+ src/sequenceDiversity.cpp \
+ src/pFst.cpp \
+ src/smoother.cpp \
+ src/LD.cpp \
+ src/plotHaps.cpp \
+ src/abba-baba.cpp \
+ src/permuteGPAT++.cpp \
+ src/permuteSmooth.cpp \
+ src/normalize-iHS.cpp \
+ src/meltEHH.cpp \
+ src/vcfaltcount.cpp \
+ src/vcfhetcount.cpp \
+ src/vcfhethomratio.cpp \
+ src/vcffilter.cpp \
+ src/vcf2tsv.cpp \
+ src/vcfgenotypes.cpp \
+ src/vcfannotategenotypes.cpp \
+ src/vcfcommonsamples.cpp \
+ src/vcfremovesamples.cpp \
+ src/vcfkeepsamples.cpp \
+ src/vcfsamplenames.cpp \
+ src/vcfgenotypecompare.cpp \
+ src/vcffixup.cpp \
+ src/vcfclassify.cpp \
+ src/vcfsamplediff.cpp \
+ src/vcfremoveaberrantgenotypes.cpp \
+ src/vcfrandom.cpp \
+ src/vcfparsealts.cpp \
+ src/vcfstats.cpp \
+ src/vcfflatten.cpp \
+ src/vcfprimers.cpp \
+ src/vcfnumalt.cpp \
+ src/vcfcleancomplex.cpp \
+ src/vcfintersect.cpp \
+ src/vcfannotate.cpp \
+ src/vcfallelicprimitives.cpp \
+ src/vcfoverlay.cpp \
+ src/vcfaddinfo.cpp \
+ src/vcfkeepinfo.cpp \
+ src/vcfkeepgeno.cpp \
+ src/vcfafpath.cpp \
+ src/vcfcountalleles.cpp \
+ src/vcflength.cpp \
+ src/vcfdistance.cpp \
+ src/vcfrandomsample.cpp \
+ src/vcfentropy.cpp \
+ src/vcfglxgt.cpp \
+ src/vcfroc.cpp \
+ src/vcfcheck.cpp \
+ src/vcfstreamsort.cpp \
+ src/vcfuniq.cpp \
+ src/vcfuniqalleles.cpp \
+ src/vcfremap.cpp \
+ src/vcf2fasta.cpp \
+ src/vcfsitesummarize.cpp \
+ src/vcfbreakmulti.cpp \
+ src/vcfcreatemulti.cpp \
+ src/vcfevenregions.cpp \
+ src/vcfcat.cpp \
+ src/vcfgenosummarize.cpp \
+ src/vcfgenosamplenames.cpp \
+ src/vcfgeno2haplo.cpp \
+ src/vcfleftalign.cpp \
+ src/vcfcombine.cpp \
+ src/vcfgeno2alleles.cpp \
+ src/vcfindex.cpp \
+ src/vcf2dag.cpp \
+ src/vcfsample2info.cpp \
+ src/vcfqual2info.cpp \
+ src/vcfinfo2qual.cpp \
+ src/vcfglbound.cpp \
+ src/vcfunphase.cpp \
+ src/vcfnull2ref.cpp \
+ src/vcfinfosummarize.cpp
+
+# when we can figure out how to build on mac
+# src/vcfsom.cpp
+
+#BINS = $(BIN_SOURCES:.cpp=)
+BINS = $(addprefix bin/,$(notdir $(BIN_SOURCES:.cpp=)))
+SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=))
+# Use ?= to allow overriding from the env or command-line.
+
+MAKE ?= make
+LOCALBASE ?= /usr/local
+LIB_PATH ?= ${LOCALBASE}/lib
+
+CC ?= cc
+CXX ?= c++
+CXXFLAGS ?= -O3
+CFLAGS += -D_FILE_OFFSET_BITS=64 -fPIC
+CXXFLAGS += $(CFLAGS) --std=c++11
+#CXXFLAGS += -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual
+
+DESTDIR ?= stage
+PREFIX ?= /usr/local
+STRIP ?= strip
+INSTALL ?= install -c
+MKDIR ?= mkdir -p
+AR ?= ar
+
+SSW = src/ssw.o src/ssw_cpp.o
+
+INCLUDES = -I${LOCALBASE}/include \
+ -I${LOCALBASE}/include/smithwaterman \
+ -I${LOCALBASE}/include/multichoose \
+ -I${LOCALBASE}/include/fastahack \
+ -I${LOCALBASE}/include/intervaltree
+LDFLAGS += -L. -lvcflib \
+ -L$(LIB_PATH) -lsw -ltabix -lhts -lfastahack -lfilevercmp \
+ -lpthread -lz -lm
+
+all: $(OBJECTS) $(BINS) $(LIB) $(SLIB)
+
+ssw.o: src/ssw.h
+ssw_cpp.o: src/ssw_cpp.h
+
+openmp:
+ $(MAKE) CXXFLAGS="$(CXXFLAGS) -fopenmp -D HAS_OPENMP"
+
+profiling:
+ $(MAKE) CXXFLAGS="$(CXXFLAGS) -g" all
+
+gprof:
+ $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all
+
+$(OBJECTS): $(SOURCES) $(HEADERS)
+ $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(CXXFLAGS)
+
+$(SHORTBINS):
+ $(MAKE) $(BIN_DIR)/$@
+
+$(BINS): $(BIN_SOURCES) $(LIB) $(OBJECTS) $(SSW) pre
+ $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(CXXFLAGS) $(LDFLAGS)
+
+$(LIB): $(OBJECTS) $(SSW)
+ ar rs $(LIB) $(OBJECTS) $(SSW)
+
+$(SLIB): $(OBJECTS) $(SSW)
+ $(CXX) -shared -Wl,-soname,$(SLIB) -o $(SLIB) $(OBJECTS) $(SSW)
+
+install: all
+ $(MKDIR) $(DESTDIR)$(PREFIX)/bin
+ $(MKDIR) $(DESTDIR)$(PREFIX)/include/vcflib
+ $(MKDIR) $(DESTDIR)$(PREFIX)/lib
+ $(INSTALL) bin/* $(DESTDIR)$(PREFIX)/bin
+ $(INSTALL) src/*.h src/*.hpp $(DESTDIR)$(PREFIX)/include/vcflib
+ $(INSTALL) $(LIB) $(SLIB) $(DESTDIR)$(PREFIX)/lib
+
+install-strip: install
+ $(STRIP) $(DESTDIR)$(PREFIX)/bin/* $(DESTDIR)$(PREFIX)/lib/$(SLIB)
+
+test: $(BINS)
+ @prove -Itests/lib -w tests/*.t
+
+clean:
+ rm -f $(BINS) $(OBJECTS)
+ rm -f ssw_cpp.o ssw.o
+ rm -f $(LIB)
+ rm -rf $(BIN_DIR)
+
+pre:
+ mkdir -p $(BIN_DIR)
+
+.PHONY: clean all test pre
diff --git a/biology/vcflib/files/Makefile.submod b/biology/vcflib/files/Makefile.submod
new file mode 100644
index 000000000000..aafad2809a9e
--- /dev/null
+++ b/biology/vcflib/files/Makefile.submod
@@ -0,0 +1,117 @@
+#OBJ_DIR = ./
+HEADERS = src/Variant.h \
+ src/split.h \
+ src/join.h
+SOURCES = src/Variant.cpp \
+ src/split.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+
+VCF_LIB_LOCAL:=$(shell pwd)
+BIN_DIR:=bin
+LIB_DIR:=lib
+SRC_DIR=src
+INC_DIR:=include
+OBJ_DIR:=obj
+
+include Makefile.common
+
+TABIX = tabixpp/tabix.o
+FASTAHACK = fastahack/Fasta.o
+SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o
+REPEATS = smithwaterman/Repeats.o
+INDELALLELE = smithwaterman/IndelAllele.o
+DISORDER = smithwaterman/disorder.o
+LEFTALIGN = smithwaterman/LeftAlign.o
+FSOM = fsom/fsom.o
+FILEVERCMP = filevercmp/filevercmp.o
+
+INCLUDES = -Itabixpp/htslib -I$(INC_DIR) -L. -Ltabixpp/htslib
+LDFLAGS = -L$(LIB_DIR) -lvcflib -lhts -lpthread -lz -lm
+
+
+all: $(OBJECTS) $(BINS)
+
+CXX ?= c++
+CXXFLAGS ?= -O3 -D_FILE_OFFSET_BITS=64
+#CXXFLAGS = -O2
+#CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual
+
+SSW = src/ssw.o src/ssw_cpp.o
+
+ssw.o: src/ssw.h
+ssw_cpp.o:src/ssw_cpp.h
+
+openmp:
+ $(MAKE) CXXFLAGS="$(CXXFLAGS) -fopenmp -D HAS_OPENMP"
+
+profiling:
+ $(MAKE) CXXFLAGS="$(CXXFLAGS) -g" all
+
+gprof:
+ $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all
+
+$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP)
+ $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && cp src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
+
+multichoose: pre
+ cd multichoose && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
+
+intervaltree: pre
+ cd intervaltree && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
+
+$(TABIX): pre
+ cd tabixpp && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
+
+$(SMITHWATERMAN): pre
+ cd smithwaterman && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp *.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/
+
+$(DISORDER): $(SMITHWATERMAN)
+
+$(REPEATS): $(SMITHWATERMAN)
+
+$(LEFTALIGN): $(SMITHWATERMAN)
+
+$(INDELALLELE): $(SMITHWATERMAN)
+
+$(FASTAHACK): pre
+ cd fastahack && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp Fasta.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/
+
+#$(FSOM):
+# cd fsom && $(CXX) $(CXXFLAGS) -c fsom.c -lm
+
+$(FILEVERCMP): pre
+ cd filevercmp && make && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/
+
+$(SHORTBINS): pre
+ $(MAKE) bin/$@
+
+$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre intervaltree
+ $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS)
+
+libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) $(TABIX) pre
+ ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) $(TABIX)
+ cp libvcflib.a $(LIB_DIR)
+
+
+test: $(BINS)
+ @prove -Itests/lib -w tests/*.t
+
+pre:
+ if [ ! -d $(BIN_DIR) ]; then mkdir -p $(BIN_DIR); fi
+ if [ ! -d $(LIB_DIR) ]; then mkdir -p $(LIB_DIR); fi
+ if [ ! -d $(INC_DIR) ]; then mkdir -p $(INC_DIR); fi
+ if [ ! -d $(OBJ_DIR) ]; then mkdir -p $(OBJ_DIR); fi
+
+clean:
+ rm -f $(BINS) $(OBJECTS)
+ rm -f ssw_cpp.o ssw.o
+ rm -f libvcflib.a
+ rm -rf $(BIN_DIR)
+ rm -rf $(LIB_DIR)
+ rm -rf $(INC_DIR)
+ rm -rf $(OBJ_DIR)
+ cd tabixpp && make clean
+ cd smithwaterman && make clean
+ cd fastahack && make clean
+
+.PHONY: clean all test pre
diff --git a/biology/vcflib/files/patch-src_cdflib.cpp b/biology/vcflib/files/patch-src_cdflib.cpp
new file mode 100644
index 000000000000..84bff432c1ed
--- /dev/null
+++ b/biology/vcflib/files/patch-src_cdflib.cpp
@@ -0,0 +1,11 @@
+--- src/cdflib.cpp.orig 2018-03-09 20:31:19 UTC
++++ src/cdflib.cpp
+@@ -10040,7 +10040,7 @@ void negative_binomial_cdf_values ( int
+ 1, 2, 3,
+ 0, 1, 2 };
+
+- if ( n_data < 0 )
++ if ( *n_data < 0 )
+ {
+ *n_data = 0;
+ }
diff --git a/biology/vcflib/pkg-descr b/biology/vcflib/pkg-descr
new file mode 100644
index 000000000000..977e86a03dae
--- /dev/null
+++ b/biology/vcflib/pkg-descr
@@ -0,0 +1,21 @@
+The Variant Call Format (VCF) is a flat-file, tab-delimited textual format
+intended to concisely describe reference-indexed variations between
+individuals. VCF provides a common interchange format for the description of
+variation in individuals and populations of samples, and has become the defacto
+standard reporting format for a wide array of genomic variant detectors.
+
+vcflib provides methods to manipulate and interpret sequence variation as it
+can be described by VCF. It is both:
+
+ an API for parsing and operating on records of genomic variation as it can
+ be described by the VCF format
+
+ and a collection of command-line utilities for executing complex
+ manipulations on VCF files.
+
+The API itself provides a quick and extremely permissive method to read and
+write VCF files. Extensions and applications of the library provided in the
+included utilities (*.cpp) comprise the vast bulk of the library's utility for
+most users.
+
+WWW: https://github.com/vcflib/vcflib
diff --git a/biology/vcflib/pkg-plist b/biology/vcflib/pkg-plist
new file mode 100644
index 000000000000..944045d78581
--- /dev/null
+++ b/biology/vcflib/pkg-plist
@@ -0,0 +1,104 @@
+bin/LD
+bin/abba-baba
+bin/bFst
+bin/dumpContigsFromHeader
+bin/genotypeSummary
+bin/hapLrt
+bin/iHS
+bin/meltEHH
+bin/normalize-iHS
+bin/pFst
+bin/pVst
+bin/permuteGPAT++
+bin/permuteSmooth
+bin/plotHaps
+bin/popStats
+bin/segmentFst
+bin/segmentIhs
+bin/sequenceDiversity
+bin/smoother
+bin/vcf2dag
+bin/vcf2fasta
+bin/vcf2tsv
+bin/vcfaddinfo
+bin/vcfafpath
+bin/vcfallelicprimitives
+bin/vcfaltcount
+bin/vcfannotate
+bin/vcfannotategenotypes
+bin/vcfbreakmulti
+bin/vcfcat
+bin/vcfcheck
+bin/vcfclassify
+bin/vcfcleancomplex
+bin/vcfcombine
+bin/vcfcommonsamples
+bin/vcfcountalleles
+bin/vcfcreatemulti
+bin/vcfdistance
+bin/vcfecho
+bin/vcfentropy
+bin/vcfevenregions
+bin/vcffilter
+bin/vcffixup
+bin/vcfflatten
+bin/vcfgeno2alleles
+bin/vcfgeno2haplo
+bin/vcfgenosamplenames
+bin/vcfgenosummarize
+bin/vcfgenotypecompare
+bin/vcfgenotypes
+bin/vcfglbound
+bin/vcfglxgt
+bin/vcfhetcount
+bin/vcfhethomratio
+bin/vcfindex
+bin/vcfinfo2qual
+bin/vcfinfosummarize
+bin/vcfintersect
+bin/vcfkeepgeno
+bin/vcfkeepinfo
+bin/vcfkeepsamples
+bin/vcfleftalign
+bin/vcflength
+bin/vcfnormalizesvs
+bin/vcfnull2ref
+bin/vcfnumalt
+bin/vcfoverlay
+bin/vcfparsealts
+bin/vcfprimers
+bin/vcfqual2info
+bin/vcfrandom
+bin/vcfrandomsample
+bin/vcfremap
+bin/vcfremoveaberrantgenotypes
+bin/vcfremovesamples
+bin/vcfroc
+bin/vcfsample2info
+bin/vcfsamplediff
+bin/vcfsamplenames
+bin/vcfsitesummarize
+bin/vcfstats
+bin/vcfstreamsort
+bin/vcfuniq
+bin/vcfuniqalleles
+bin/vcfunphase
+bin/wcFst
+include/vcflib/BedReader.h
+include/vcflib/Variant.h
+include/vcflib/cdflib.hpp
+include/vcflib/convert.h
+include/vcflib/gpatInfo.hpp
+include/vcflib/join.h
+include/vcflib/mt19937ar.h
+include/vcflib/pdflib.hpp
+include/vcflib/rnglib.hpp
+include/vcflib/split.h
+include/vcflib/ssw.hpp
+include/vcflib/ssw_cpp.hpp
+include/vcflib/var.hpp
+include/vcflib/vec128int.h
+include/vcflib/veclib_types.h
+lib/libvcflib.a
+lib/libvcflib.so
+lib/libvcflib.so.1