biology/bio-mocha: bcftools plugin for mosaic chromosomal alterations

MoChA is a bcftools plugin released under the MIT license for mosaic chromosomal alteration detection and analysis from DNA microarray or whole genome sequence data. It can be used both with Illumina and Affymetrix data. It can also be used for detection of germline copy number variants. Data can be prepared in usable file formats using the gtc2vcf plugin.
author: Jason W. Bacon <jwb@FreeBSD.org> 2021-12-03 13:41:26 +0000
committer: Jason W. Bacon <jwb@FreeBSD.org> 2021-12-03 13:41:26 +0000
commit: 9152cfc775fca98435610da5faad6d56c5cdeacb (patch)
tree: 2f2e4d5fbead0ec9b02489d64912a617015e75cf
parent: 2f5db2a99dd4fe980c58c1e9f2436f0714195558 (diff)
download: ports-9152cfc775fca98435610da5faad6d56c5cdeacb.tar.gz
ports-9152cfc775fca98435610da5faad6d56c5cdeacb.zip
9 files changed, 613 insertions, 0 deletions
diff --git a/biology/Makefile b/biology/Makefile
index 1c79cff38867..02ba7b46b286 100644
--- a/biology/Makefile
+++ b/biology/Makefile
@@ -11,6 +11,7 @@
     SUBDIR += bcftools
     SUBDIR += bedtools
     SUBDIR += bfc
+    SUBDIR += bio-mocha
     SUBDIR += bioawk
     SUBDIR += biococoa
     SUBDIR += biolibc
diff --git a/biology/bio-mocha/Makefile b/biology/bio-mocha/Makefile
new file mode 100644
index 000000000000..a046d5fd95c5
--- /dev/null
+++ b/biology/bio-mocha/Makefile
@@ -0,0 +1,48 @@
+PORTNAME=	bio-mocha
+DISTVERSION=	1.13
+CATEGORIES=	biology
+MASTER_SITES=	https://software.broadinstitute.org/software/mocha/
+DISTFILES+=	${PORTNAME}_${DISTVERSION}-20211015.tar.gz
+
+MAINTAINER=	jwb@FreeBSD.org
+COMMENT=	bcftools plugin for mosaic chromosomal alteration analysis
+
+LICENSE=	MIT
+
+LIB_DEPENDS=	libhts.so:biology/htslib
+BUILD_DEPENDS=	bash:shells/bash
+RUN_DEPENDS=	bcftools==${PORTVERSION}:biology/bcftools
+
+USES=		autoreconf gmake localbase perl5 python:env shebangfix
+USE_GITHUB=	yes
+USE_PERL5=	test
+
+GH_ACCOUNT=	samtools
+GH_PROJECT=	bcftools
+GNU_CONFIGURE=	yes
+SHEBANG_FILES=	misc/* test/test.pl
+
+post-extract:
+	@${MV} ${WRKDIR}/*.c ${WRKDIR}/*.h ${WRKSRC}/plugins
+	@${MKDIR} ${WRKSRC}/MoCha
+	@${MV} ${WRKDIR}/*.R ${WRKSRC}/MoCha
+
+pre-configure:
+	@${REINPLACE_CMD} -e 's|@PORTVERSION@|${PORTVERSION}|g' \
+		${WRKSRC}/configure.ac
+
+do-install:
+	${MKDIR} ${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/extendFMT.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/mocha.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/mochatools.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/score.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/trio-phase.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	(cd ${WRKSRC}/MoCha && ${COPYTREE_SHARE} . ${STAGEDIR}${DATADIR})
+
+.include <bsd.port.mk>
diff --git a/biology/bio-mocha/distinfo b/biology/bio-mocha/distinfo
new file mode 100644
index 000000000000..e67e570dfc93
--- /dev/null
+++ b/biology/bio-mocha/distinfo
@@ -0,0 +1,5 @@
+TIMESTAMP = 1638495618
+SHA256 (bio-mocha_1.13-20211015.tar.gz) = e145bb48fea347202e16395ab7d78e7e7314749b8d472a2071f8074004cd63a5
+SIZE (bio-mocha_1.13-20211015.tar.gz) = 73627
+SHA256 (samtools-bcftools-1.13_GH0.tar.gz) = 55fbc674ec69e243052e9fb6560eb43d06f45f210e1842ba4dbe33acb394e562
+SIZE (samtools-bcftools-1.13_GH0.tar.gz) = 3133637
diff --git a/biology/bio-mocha/files/patch-Makefile b/biology/bio-mocha/files/patch-Makefile
new file mode 100644
index 000000000000..3299954ae45b
--- /dev/null
+++ b/biology/bio-mocha/files/patch-Makefile
@@ -0,0 +1,33 @@
+--- Makefile.orig	2021-03-17 09:16:18 UTC
++++ Makefile
+@@ -58,13 +58,14 @@ pluginpath  = $(plugindir)
+ # Installation location for $(MISC_PROGRAMS) and $(MISC_SCRIPTS)
+ misc_bindir = $(bindir)
+ 
+-MKDIR_P = mkdir -p
+-INSTALL = install -p
+-INSTALL_DATA    = $(INSTALL) -m 644
+-INSTALL_DIR     = $(MKDIR_P) -m 755
+-INSTALL_MAN     = $(INSTALL_DATA)
+-INSTALL_PROGRAM = $(INSTALL)
+-INSTALL_SCRIPT  = $(INSTALL_PROGRAM)
++# Use BSD_INSTALL_PROGRAM to strip when WITH_DEBUG not set
++MKDIR_P 	= mkdir -p
++INSTALL 	= install -p
++INSTALL_DATA    = ${BSD_INSTALL_DATA}
++INSTALL_DIR     = $(MKDIR_P)
++INSTALL_MAN     = ${BSD_INSTALL_MAN}
++INSTALL_PROGRAM = ${BSD_INSTALL_PROGRAM}
++INSTALL_SCRIPT  = ${BSD_INSTALL_SCRIPT}
+ 
+ PROGRAMS = bcftools
+ MISC_SCRIPTS = \
+@@ -142,7 +143,7 @@ print-version:
+ ifdef USE_GPL
+     main.o : EXTRA_CPPFLAGS += -DUSE_GPL
+     OBJS += polysomy.o peakfit.o
+-    GSL_LIBS ?= -lgsl -lcblas
++    GSL_LIBS ?= -lgslcblas
+ endif
+ 
+ print-%:
diff --git a/biology/bio-mocha/files/patch-configure.ac b/biology/bio-mocha/files/patch-configure.ac
new file mode 100644
index 000000000000..ca845d2ad85f
--- /dev/null
+++ b/biology/bio-mocha/files/patch-configure.ac
@@ -0,0 +1,11 @@
+--- configure.ac.orig	2018-07-18 08:34:29 UTC
++++ configure.ac
+@@ -23,7 +23,7 @@
+ # DEALINGS IN THE SOFTWARE.
+ 
+ dnl Process this file with autoconf to produce a configure script
+-AC_INIT([BCFtools], m4_esyscmd_s([./version.sh 2>/dev/null]),
++AC_INIT([BCFtools], [@PORTVERSION@],
+         [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/])
+ AC_PREREQ([2.63])  dnl This version introduced 4-argument AC_CHECK_HEADER
+ AC_CONFIG_SRCDIR([main.c])
diff --git a/biology/bio-mocha/files/patch-plugins_beta__binom.h b/biology/bio-mocha/files/patch-plugins_beta__binom.h
new file mode 100644
index 000000000000..a6804a6dc9f4
--- /dev/null
+++ b/biology/bio-mocha/files/patch-plugins_beta__binom.h
@@ -0,0 +1,19 @@
+--- plugins/beta_binom.h.orig	2021-11-30 13:41:36 UTC
++++ plugins/beta_binom.h
+@@ -137,14 +137,14 @@ void beta_binom_update(beta_binom_t *self, double p, d
+  *  Returns the equivalent of dbeta_binom(a, a+b, p, (1 - rho) / rho, log=TRUE) from R package
+  * rmutil
+  */
+-inline double beta_binom_log_unsafe(const beta_binom_t *self, int a, int b) {
++static inline double beta_binom_log_unsafe(const beta_binom_t *self, int a, int b) {
+     return self->log_gamma_alpha[a] + self->log_gamma_beta[b] - self->log_gamma_alpha_beta[a + b];
+ }
+ 
+ /**
+  *  Same as before but it performs boundary checking before computing the log likelihood
+  */
+-inline double beta_binom_log(beta_binom_t *self, int a, int b) {
++static inline double beta_binom_log(beta_binom_t *self, int a, int b) {
+     if (a < 0 || b < 0) return NAN;
+     if (a > self->n1 || b > self->n1 || a + b > self->n2)
+         beta_binom_update(self, self->p, self->rho, a > b ? a : b, a + b);
diff --git a/biology/bio-mocha/files/patch-plugins_mocha.c b/biology/bio-mocha/files/patch-plugins_mocha.c
new file mode 100644
index 000000000000..665d785c00f3
--- /dev/null
+++ b/biology/bio-mocha/files/patch-plugins_mocha.c
@@ -0,0 +1,479 @@
+--- plugins/mocha.c.orig	2021-10-15 02:37:57 UTC
++++ plugins/mocha.c
+@@ -705,6 +705,44 @@ static double baf_phase_lod(const float *baf_arr, cons
+     return (double)ret * M_LOG10E;
+ }
+ 
++typedef struct
++{
++    const float *baf;
++    const int8_t *gt_phase;
++    int n;
++    const int *imap;
++    int8_t *path;
++    float err_log_prb;
++    float baf_sd;
++}   f1_data_t;
++
++double f1(double x, void *data)
++{
++    f1_data_t	*d = data;
++
++    return -baf_phase_lod(d->baf, d->gt_phase, d->n, d->imap, d->path,
++                          d->err_log_prb, d->baf_sd, x);
++}
++
++static inline f1_data_t *f1_pack(const float *baf, const int8_t *gt_phase, int n,
++		   const int *imap, int8_t *path, float err_log_prb,
++		   float baf_sd)
++{
++    // Switch to malloc() and free() if more than one object must exist at
++    // any given time
++    f1_data_t	*d = malloc(sizeof(f1_data_t));
++
++    d->baf = baf;
++    d->gt_phase = gt_phase;
++    d->n = n;
++    d->imap = imap;
++    d->path = path;
++    d->err_log_prb = err_log_prb;
++    d->baf_sd = baf_sd;
++
++    return d;
++}
++
+ // TODO find a better title for this function
+ static float compare_models(const float *baf, const int8_t *gt_phase, int n, const int *imap, float xy_log_prb,
+                             float err_log_prb, float flip_log_prb, float tel_log_prb, float baf_sd, const float *bdev,
+@@ -716,8 +754,11 @@ static float compare_models(const float *baf, const in
+     int n_flips = 0;
+     for (int i = 1; i < n; i++)
+         if (path[i - 1] && path[i] && path[i - 1] != path[i]) n_flips++;
+-    double f(double x, void *data) { return -baf_phase_lod(baf, gt_phase, n, imap, path, err_log_prb, baf_sd, x); }
+-    double x, fx = kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x);
++    double x;
++    f1_data_t *f1_data = f1_pack(baf, gt_phase, n, imap, path, err_log_prb,
++				 baf_sd);
++    double fx = kmin_brent(f1, 0.1, 0.2, f1_data, KMIN_EPS, &x);
++    free(f1_data);
+     free(path);
+     return -(float)fx + (float)n_flips * flip_log_prb * (float)M_LOG10E;
+ }
+@@ -750,6 +791,34 @@ static float get_sample_mean(const float *v, int n, co
+     return mean /= (float)j;
+ }
+ 
++typedef struct
++{
++    const float *baf_arr;
++    int n;
++    const int *imap;
++    float baf_sd;
++}   f7_data_t;
++
++double f7(double x, void *data)
++{
++    f7_data_t *d = data;
++
++    return -baf_log_lkl(d->baf_arr, d->n, d->imap, d->baf_sd, x);
++}
++
++static inline f7_data_t *f7_pack(const float *baf_arr, int n, const int *imap,
++    float baf_sd)
++{
++    f7_data_t *d = malloc(sizeof(f7_data_t));
++
++    d->baf_arr = baf_arr;
++    d->n = n;
++    d->imap = imap;
++    d->baf_sd = baf_sd;
++
++    return d;
++}
++
+ static float get_baf_bdev(const float *baf_arr, int n, const int *imap, float baf_sd) {
+     double bdev = 0.0;
+     int j = 0;
+@@ -763,8 +832,9 @@ static float get_baf_bdev(const float *baf_arr, int n,
+     bdev /= j;
+     // simple method to compute bdev should work well for germline duplications
+     if ((float)bdev > 2.0f * baf_sd) return (float)bdev;
+-    double f(double x, void *data) { return -baf_log_lkl(baf_arr, n, imap, baf_sd, x); }
+-    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++    f7_data_t *f7_data = f7_pack(baf_arr, n, imap, baf_sd);
++    kmin_brent(f7, 0.1, 0.2, f7_data, KMIN_EPS, &bdev);
++    free(f7_data);
+     return (float)bdev < 1e-4 ? (float)NAN : (float)bdev;
+ }
+ 
+@@ -801,10 +871,41 @@ static double ad_log_lkl(const int16_t *ad0_arr, const
+     return (double)ret * M_LOG10E;
+ }
+ 
++typedef struct
++{
++    const int16_t *ad0_arr;
++    const int16_t *ad1_arr;
++    int n;
++    const int *imap;
++    float ad_rho;
++}   f5_data_t;
++
++double f5(double x, void *data)
++{
++    f5_data_t *d = data;
++
++    return -ad_log_lkl(d->ad0_arr, d->ad1_arr, d->n, d->imap, d->ad_rho, x);
++}
++
++static inline f5_data_t *f5_pack(const int16_t *ad0_arr,
++    const int16_t *ad1_arr, int n, const int *imap, float ad_rho)
++{
++    f5_data_t *d = malloc(sizeof(f5_data_t));
++
++    d->ad0_arr = ad0_arr;
++    d->ad1_arr = ad1_arr;
++    d->n = n;
++    d->imap = imap;
++    d->ad_rho = ad_rho;
++
++    return d;
++}
++
+ static float get_ad_bdev(const int16_t *ad0_arr, const int16_t *ad1_arr, int n, const int *imap, float ad_rho) {
+     double bdev = 0.0;
+-    double f(double x, void *data) { return -ad_log_lkl(ad0_arr, ad1_arr, n, imap, ad_rho, x); }
+-    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++    f5_data_t *f5_data = f5_pack(ad0_arr, ad1_arr, n, imap, ad_rho);
++    kmin_brent(f5, 0.1, 0.2, f5_data, KMIN_EPS, &bdev);
++    free(f5_data);
+     return (float)bdev < 1e-4 ? (float)NAN : (float)bdev;
+ }
+ 
+@@ -986,6 +1087,44 @@ static double ad_phase_lod(const int16_t *ad0_arr, con
+     return (double)ret * M_LOG10E;
+ }
+ 
++typedef struct
++{
++    const int16_t *ad0;
++    const int16_t *ad1;
++    const int8_t *gt_phase;
++    int n;
++    const int *imap;
++    int8_t *path;
++    float err_log_prb;
++    float ad_rho;
++}   f4_data_t;
++
++double f4(double x, void *data)
++{
++    f4_data_t *d = data;
++
++    return -ad_phase_lod(d->ad0, d->ad1, d->gt_phase, d->n, d->imap, d->path,
++        d->err_log_prb, d->ad_rho, x);
++}
++
++static inline f4_data_t *f4_pack(const int16_t *ad0, const int16_t *ad1,
++    const int8_t *gt_phase, int n, const int *imap, int8_t *path,
++    float err_log_prb, float ad_rho)
++{
++    f4_data_t *d = malloc(sizeof(f4_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->gt_phase = gt_phase;
++    d->n = n;
++    d->imap = imap;
++    d->path = path;
++    d->err_log_prb = err_log_prb;
++    d->ad_rho = ad_rho;
++
++    return d;
++}
++
+ // TODO find a better title for this function
+ static float compare_wgs_models(const int16_t *ad0, const int16_t *ad1, const int8_t *gt_phase, int n, const int *imap,
+                                 float xy_log_prb, float err_log_prb, float flip_log_prb, float tel_log_prb,
+@@ -998,8 +1137,9 @@ static float compare_wgs_models(const int16_t *ad0, co
+     int n_flips = 0;
+     for (int i = 1; i < n; i++)
+         if (path[i - 1] && path[i] && path[i - 1] != path[i]) n_flips++;
+-    double f(double x, void *data) { return -ad_phase_lod(ad0, ad1, gt_phase, n, imap, path, err_log_prb, ad_rho, x); }
+-    double x, fx = kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x);
++    f4_data_t *f4_data = f4_pack(ad0, ad1, gt_phase, n, imap, path, err_log_prb, ad_rho);
++    double x, fx = kmin_brent(f4, 0.1, 0.2, f4_data, KMIN_EPS, &x);
++    free(f4_data);
+     free(path);
+     return -(float)fx + (float)n_flips * flip_log_prb * (float)M_LOG10E;
+ }
+@@ -1485,6 +1625,129 @@ static float get_path_segs(const int8_t *path, const f
+     return 0;
+ }
+ 
++typedef struct
++{
++    float *lrr;
++    int a;
++    int16_t *ad0;
++    int16_t *ad1;
++    mocha_t *mocha;
++    const model_t *model;
++    sample_t *self;
++    float *baf;
++}   f3_data_t;
++    
++double f3(double x, void *data)
++{
++    f3_data_t *d = data;
++
++    if (d->model->flags & WGS_DATA)
++        return -lrr_ad_lod(d->lrr + d->a, d->ad0 + d->a, d->ad1 + d->a,
++            d->mocha->n_sites, NULL, NAN, d->model->lrr_bias,
++            d->model->lrr_hap2dip, d->self->adjlrr_sd,
++            d->self->stats.dispersion, x);
++    else
++        return -lrr_baf_lod(d->lrr + d->a, d->baf + d->a, d->mocha->n_sites,
++            NULL, NAN, d->model->lrr_bias, d->model->lrr_hap2dip,
++            d->self->adjlrr_sd, d->self->stats.dispersion, x);
++}
++
++static inline f3_data_t *f3_pack(float *lrr, int a, int16_t *ad0, int16_t *ad1,
++    mocha_t *mocha, const model_t *model, sample_t *self, float *baf)
++
++{
++    f3_data_t *d = malloc(sizeof(f3_data_t));
++
++    d->lrr = lrr;
++    d->a = a;
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->mocha = mocha;
++    d->model = model;
++    d->self = self;
++    d->baf = baf;
++
++    return d;
++}
++
++typedef struct
++{
++    int16_t *ad0;
++    int16_t *ad1;
++    int8_t *gt_phase;
++    mocha_t *mocha;
++    int *imap_arr;
++    int *beg;
++    int i;
++    int8_t *path;
++    sample_t *self;
++}   f6_data_t;
++
++double f6(double x, void *data)
++{
++    f6_data_t *d = data;
++
++    return -ad_phase_lod(d->ad0, d->ad1, d->gt_phase, d->mocha->n_hets,
++        d->imap_arr + d->beg[d->i], d->path + d->beg[d->i], NAN, d->self->stats.dispersion, x);
++}
++
++static inline f6_data_t *f6_pack(int16_t *ad0, int16_t *ad1, int8_t *gt_phase,
++    mocha_t *mocha, int *imap_arr, int *beg, int i, int8_t *path, sample_t *self)
++
++{
++    f6_data_t *d = malloc(sizeof(f6_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->gt_phase = gt_phase;
++    d->mocha = mocha;
++    d->imap_arr = imap_arr;
++    d->beg = beg;
++    d->i = i;
++    d->path = path;
++    d->self = self;
++
++    return d;
++}
++
++typedef struct
++{
++    float *baf;
++    int8_t *gt_phase;
++    mocha_t *mocha;
++    int *imap_arr;
++    int *beg;
++    int i;
++    int8_t *path;
++    sample_t *self;
++}   f8_data_t;
++
++double f8(double x, void *data)
++{
++    f8_data_t *d = data;
++
++    return -baf_phase_lod(d->baf, d->gt_phase, d->mocha->n_hets,
++	d->imap_arr + d->beg[d->i], d->path + d->beg[d->i], NAN,
++	d->self->stats.dispersion, x);
++}
++
++static inline f8_data_t *f8_pack(float *baf, int8_t *gt_phase, mocha_t *mocha,
++    int *imap_arr, int *beg, int i, int8_t *path, sample_t *self)
++{
++    f8_data_t *d = malloc(sizeof(f8_data_t));
++
++    d->baf = baf;
++    d->gt_phase = gt_phase;
++    d->mocha = mocha;
++    d->imap_arr = imap_arr;
++    d->beg = beg;
++    d->i = i;
++    d->path = path;
++    d->self = self;
++
++    return d;
++}
++
+ // process one contig for one sample
+ static void sample_run(sample_t *self, mocha_table_t *mocha_table, const model_t *model) {
+     // do nothing if chromosome Y or MT are being tested
+@@ -1735,16 +1998,11 @@ static void sample_run(sample_t *self, mocha_table_t *
+             mocha.ldev = get_median(lrr + a, b + 1 - a, NULL);
+             get_mocha_stats(pos, lrr, baf, gt_phase, n, a, b, cen_beg, cen_end, length, self->stats.baf_conc, &mocha);
+ 
+-            double f(double x, void *data) {
+-                if (model->flags & WGS_DATA)
+-                    return -lrr_ad_lod(lrr + a, ad0 + a, ad1 + a, mocha.n_sites, NULL, NAN, model->lrr_bias,
+-                                       model->lrr_hap2dip, self->adjlrr_sd, self->stats.dispersion, x);
+-                else
+-                    return -lrr_baf_lod(lrr + a, baf + a, mocha.n_sites, NULL, NAN, model->lrr_bias, model->lrr_hap2dip,
+-                                        self->adjlrr_sd, self->stats.dispersion, x);
+-            }
+             double bdev_lrr_baf;
+-            kmin_brent(f, -0.15, 0.15, NULL, KMIN_EPS, &bdev_lrr_baf);
++            f3_data_t *f3_data = f3_pack(lrr, a, ad0, ad1, &mocha, model,
++                 self, baf);
++            kmin_brent(f3, -0.15, 0.15, f3_data, KMIN_EPS, &bdev_lrr_baf);
++	    free(f3_data);
+             if (model->flags & WGS_DATA)
+                 mocha.lod_lrr_baf =
+                     lrr_ad_lod(lrr + a, ad0 + a, ad1 + a, mocha.n_sites, NULL, model->err_log_prb, model->lrr_bias,
+@@ -1796,23 +2054,21 @@ static void sample_run(sample_t *self, mocha_table_t *
+                     if (path[j] != path[j + 1]) mocha.n_flips++;
+ 
+                 if (model->flags & WGS_DATA) {
+-                    double f(double x, void *data) {
+-                        return -ad_phase_lod(ad0, ad1, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], NAN,
+-                                             self->stats.dispersion, x);
+-                    }
+                     double bdev;
+-                    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++		    f6_data_t *f6_data = f6_pack(ad0, ad1, gt_phase, &mocha,
++                        imap_arr, beg, i, path, self);
++                    kmin_brent(f6, 0.1, 0.2, f6_data, KMIN_EPS, &bdev);
++		    free(f6_data);
+                     mocha.bdev = fabsf((float)bdev);
+                     mocha.lod_baf_phase =
+                         ad_phase_lod(ad0, ad1, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i],
+                                      model->err_log_prb, self->stats.dispersion, mocha.bdev);
+                 } else {
+-                    double f(double x, void *data) {
+-                        return -baf_phase_lod(baf, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], NAN,
+-                                              self->stats.dispersion, x);
+-                    }
+                     double bdev;
+-                    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++		    f8_data_t *f8_data = f8_pack(baf, gt_phase, &mocha,
++			imap_arr, beg, i, path, self);
++                    kmin_brent(f8, 0.1, 0.2, f8_data, KMIN_EPS, &bdev);
++		    free(f8_data);
+                     mocha.bdev = fabsf((float)bdev);
+                     mocha.lod_baf_phase = baf_phase_lod(baf, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i],
+                                                         model->err_log_prb, self->stats.dispersion, mocha.bdev);
+@@ -1923,6 +2179,60 @@ static float get_lrr_cutoff(const float *v, int n) {
+     return cutoff;
+ }
+ 
++typedef struct
++{
++    int16_t *ad0;
++    int16_t *ad1;
++    int n;
++}   f2_data_t;
++
++double f2(double x, void *data)
++{
++    f2_data_t *d = data;
++
++    return -lod_lkl_beta_binomial(d->ad0, d->ad1, d->n, NULL, x);
++}
++
++static inline f2_data_t *f2_pack(int16_t *ad0, int16_t *ad1, int n)
++
++{
++    f2_data_t *d = malloc(sizeof(f2_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->n = n;
++
++    return d;
++}
++
++typedef struct
++{
++    int16_t *ad0;
++    int16_t *ad1;
++    int n_imap;
++    int *imap_arr;
++}   f9_data_t;
++
++double f9(double x, void *data)
++{
++    f9_data_t *d = data;
++
++    return -lod_lkl_beta_binomial(d->ad0, d->ad1, d->n_imap, d->imap_arr, x);
++}
++
++static inline f9_data_t *f9_pack(int16_t *ad0, int16_t *ad1, int n_imap,
++    int *imap_arr)
++{
++    f9_data_t *d = malloc(sizeof(f9_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->n_imap = n_imap;
++    d->imap_arr = imap_arr;
++
++    return d;
++}
++
+ // this function computes several contig stats and then clears the contig data from the sample
+ static void sample_stats(sample_t *self, const model_t *model) {
+     int n = self->n;
+@@ -1968,9 +2278,10 @@ static void sample_stats(sample_t *self, const model_t
+         self->x_nonpar_lrr_median = get_median(lrr, n_imap, imap_arr);
+ 
+         if (model->flags & WGS_DATA) {
+-            double f(double x, void *data) { return -lod_lkl_beta_binomial(ad0, ad1, n_imap, imap_arr, x); }
++            f9_data_t *f9_data = f9_pack(ad0, ad1, n_imap, imap_arr);
+             double x;
+-            kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++            kmin_brent(f9, 0.1, 0.2, f9_data, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++	    free(f9_data);
+             self->x_nonpar_dispersion = (float)x;
+         } else {
+             self->x_nonpar_dispersion = get_sample_sd(baf, n_imap, imap_arr);
+@@ -1995,9 +2306,10 @@ static void sample_stats(sample_t *self, const model_t
+         hts_expand(stats_t, self->n_stats, self->m_stats, self->stats_arr);
+ 
+         if (model->flags & WGS_DATA) {
+-            double f(double x, void *data) { return -lod_lkl_beta_binomial(ad0, ad1, n, NULL, x); }
+             double x;
+-            kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++	    f2_data_t *f2_data = f2_pack(ad0, ad1, n);
++            kmin_brent(f2, 0.1, 0.2, f2_data, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++	    free(f2_data);
+             self->stats_arr[self->n_stats - 1].dispersion = (float)x;
+         } else {
+             self->stats_arr[self->n_stats - 1].dispersion = get_sample_sd(baf, n, NULL);
diff --git a/biology/bio-mocha/pkg-descr b/biology/bio-mocha/pkg-descr
new file mode 100644
index 000000000000..cfecbffefbde
--- /dev/null
+++ b/biology/bio-mocha/pkg-descr
@@ -0,0 +1,8 @@
+MoChA is a bcftools plugin released under the MIT license for mosaic
+chromosomal alteration detection and analysis from DNA microarray or
+whole genome sequence data. It can be used both with Illumina and
+Affymetrix data. It can also be used for detection of germline copy
+number variants. Data can be prepared in usable file formats using the
+gtc2vcf plugin.
+
+WWW: https://software.broadinstitute.org/software/mocha/
diff --git a/biology/bio-mocha/pkg-plist b/biology/bio-mocha/pkg-plist
new file mode 100644
index 000000000000..b7be082bf4e3
--- /dev/null
+++ b/biology/bio-mocha/pkg-plist
@@ -0,0 +1,9 @@
+libexec/bcftools/extendFMT.so
+libexec/bcftools/mocha.so
+libexec/bcftools/mochatools.so
+libexec/bcftools/score.so
+libexec/bcftools/trio-phase.so
+%%DATADIR%%/mocha_plot.R
+%%DATADIR%%/pileup_plot.R
+%%DATADIR%%/shift_plot.R
+%%DATADIR%%/summary_plot.R
author	Jason W. Bacon <jwb@FreeBSD.org>	2021-12-03 13:41:26 +0000
committer	Jason W. Bacon <jwb@FreeBSD.org>	2021-12-03 13:41:26 +0000
commit	9152cfc775fca98435610da5faad6d56c5cdeacb (patch)
tree	2f2e4d5fbead0ec9b02489d64912a617015e75cf
parent	2f5db2a99dd4fe980c58c1e9f2436f0714195558 (diff)
download	ports-9152cfc775fca98435610da5faad6d56c5cdeacb.tar.gz ports-9152cfc775fca98435610da5faad6d56c5cdeacb.zip