diff options
-rw-r--r-- | biology/Makefile | 1 | ||||
-rw-r--r-- | biology/bio-mocha/Makefile | 48 | ||||
-rw-r--r-- | biology/bio-mocha/distinfo | 5 | ||||
-rw-r--r-- | biology/bio-mocha/files/patch-Makefile | 33 | ||||
-rw-r--r-- | biology/bio-mocha/files/patch-configure.ac | 11 | ||||
-rw-r--r-- | biology/bio-mocha/files/patch-plugins_beta__binom.h | 19 | ||||
-rw-r--r-- | biology/bio-mocha/files/patch-plugins_mocha.c | 479 | ||||
-rw-r--r-- | biology/bio-mocha/pkg-descr | 8 | ||||
-rw-r--r-- | biology/bio-mocha/pkg-plist | 9 |
9 files changed, 613 insertions, 0 deletions
diff --git a/biology/Makefile b/biology/Makefile index 1c79cff38867..02ba7b46b286 100644 --- a/biology/Makefile +++ b/biology/Makefile @@ -11,6 +11,7 @@ SUBDIR += bcftools SUBDIR += bedtools SUBDIR += bfc + SUBDIR += bio-mocha SUBDIR += bioawk SUBDIR += biococoa SUBDIR += biolibc diff --git a/biology/bio-mocha/Makefile b/biology/bio-mocha/Makefile new file mode 100644 index 000000000000..a046d5fd95c5 --- /dev/null +++ b/biology/bio-mocha/Makefile @@ -0,0 +1,48 @@ +PORTNAME= bio-mocha +DISTVERSION= 1.13 +CATEGORIES= biology +MASTER_SITES= https://software.broadinstitute.org/software/mocha/ +DISTFILES+= ${PORTNAME}_${DISTVERSION}-20211015.tar.gz + +MAINTAINER= jwb@FreeBSD.org +COMMENT= bcftools plugin for mosaic chromosomal alteration analysis + +LICENSE= MIT + +LIB_DEPENDS= libhts.so:biology/htslib +BUILD_DEPENDS= bash:shells/bash +RUN_DEPENDS= bcftools==${PORTVERSION}:biology/bcftools + +USES= autoreconf gmake localbase perl5 python:env shebangfix +USE_GITHUB= yes +USE_PERL5= test + +GH_ACCOUNT= samtools +GH_PROJECT= bcftools +GNU_CONFIGURE= yes +SHEBANG_FILES= misc/* test/test.pl + +post-extract: + @${MV} ${WRKDIR}/*.c ${WRKDIR}/*.h ${WRKSRC}/plugins + @${MKDIR} ${WRKSRC}/MoCha + @${MV} ${WRKDIR}/*.R ${WRKSRC}/MoCha + +pre-configure: + @${REINPLACE_CMD} -e 's|@PORTVERSION@|${PORTVERSION}|g' \ + ${WRKSRC}/configure.ac + +do-install: + ${MKDIR} ${STAGEDIR}${PREFIX}/libexec/bcftools + ${INSTALL_PROGRAM} ${WRKSRC}/plugins/extendFMT.so \ + ${STAGEDIR}${PREFIX}/libexec/bcftools + ${INSTALL_PROGRAM} ${WRKSRC}/plugins/mocha.so \ + ${STAGEDIR}${PREFIX}/libexec/bcftools + ${INSTALL_PROGRAM} ${WRKSRC}/plugins/mochatools.so \ + ${STAGEDIR}${PREFIX}/libexec/bcftools + ${INSTALL_PROGRAM} ${WRKSRC}/plugins/score.so \ + ${STAGEDIR}${PREFIX}/libexec/bcftools + ${INSTALL_PROGRAM} ${WRKSRC}/plugins/trio-phase.so \ + ${STAGEDIR}${PREFIX}/libexec/bcftools + (cd ${WRKSRC}/MoCha && ${COPYTREE_SHARE} . ${STAGEDIR}${DATADIR}) + +.include <bsd.port.mk> diff --git a/biology/bio-mocha/distinfo b/biology/bio-mocha/distinfo new file mode 100644 index 000000000000..e67e570dfc93 --- /dev/null +++ b/biology/bio-mocha/distinfo @@ -0,0 +1,5 @@ +TIMESTAMP = 1638495618 +SHA256 (bio-mocha_1.13-20211015.tar.gz) = e145bb48fea347202e16395ab7d78e7e7314749b8d472a2071f8074004cd63a5 +SIZE (bio-mocha_1.13-20211015.tar.gz) = 73627 +SHA256 (samtools-bcftools-1.13_GH0.tar.gz) = 55fbc674ec69e243052e9fb6560eb43d06f45f210e1842ba4dbe33acb394e562 +SIZE (samtools-bcftools-1.13_GH0.tar.gz) = 3133637 diff --git a/biology/bio-mocha/files/patch-Makefile b/biology/bio-mocha/files/patch-Makefile new file mode 100644 index 000000000000..3299954ae45b --- /dev/null +++ b/biology/bio-mocha/files/patch-Makefile @@ -0,0 +1,33 @@ +--- Makefile.orig 2021-03-17 09:16:18 UTC ++++ Makefile +@@ -58,13 +58,14 @@ pluginpath = $(plugindir) + # Installation location for $(MISC_PROGRAMS) and $(MISC_SCRIPTS) + misc_bindir = $(bindir) + +-MKDIR_P = mkdir -p +-INSTALL = install -p +-INSTALL_DATA = $(INSTALL) -m 644 +-INSTALL_DIR = $(MKDIR_P) -m 755 +-INSTALL_MAN = $(INSTALL_DATA) +-INSTALL_PROGRAM = $(INSTALL) +-INSTALL_SCRIPT = $(INSTALL_PROGRAM) ++# Use BSD_INSTALL_PROGRAM to strip when WITH_DEBUG not set ++MKDIR_P = mkdir -p ++INSTALL = install -p ++INSTALL_DATA = ${BSD_INSTALL_DATA} ++INSTALL_DIR = $(MKDIR_P) ++INSTALL_MAN = ${BSD_INSTALL_MAN} ++INSTALL_PROGRAM = ${BSD_INSTALL_PROGRAM} ++INSTALL_SCRIPT = ${BSD_INSTALL_SCRIPT} + + PROGRAMS = bcftools + MISC_SCRIPTS = \ +@@ -142,7 +143,7 @@ print-version: + ifdef USE_GPL + main.o : EXTRA_CPPFLAGS += -DUSE_GPL + OBJS += polysomy.o peakfit.o +- GSL_LIBS ?= -lgsl -lcblas ++ GSL_LIBS ?= -lgslcblas + endif + + print-%: diff --git a/biology/bio-mocha/files/patch-configure.ac b/biology/bio-mocha/files/patch-configure.ac new file mode 100644 index 000000000000..ca845d2ad85f --- /dev/null +++ b/biology/bio-mocha/files/patch-configure.ac @@ -0,0 +1,11 @@ +--- configure.ac.orig 2018-07-18 08:34:29 UTC ++++ configure.ac +@@ -23,7 +23,7 @@ + # DEALINGS IN THE SOFTWARE. + + dnl Process this file with autoconf to produce a configure script +-AC_INIT([BCFtools], m4_esyscmd_s([./version.sh 2>/dev/null]), ++AC_INIT([BCFtools], [@PORTVERSION@], + [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/]) + AC_PREREQ([2.63]) dnl This version introduced 4-argument AC_CHECK_HEADER + AC_CONFIG_SRCDIR([main.c]) diff --git a/biology/bio-mocha/files/patch-plugins_beta__binom.h b/biology/bio-mocha/files/patch-plugins_beta__binom.h new file mode 100644 index 000000000000..a6804a6dc9f4 --- /dev/null +++ b/biology/bio-mocha/files/patch-plugins_beta__binom.h @@ -0,0 +1,19 @@ +--- plugins/beta_binom.h.orig 2021-11-30 13:41:36 UTC ++++ plugins/beta_binom.h +@@ -137,14 +137,14 @@ void beta_binom_update(beta_binom_t *self, double p, d + * Returns the equivalent of dbeta_binom(a, a+b, p, (1 - rho) / rho, log=TRUE) from R package + * rmutil + */ +-inline double beta_binom_log_unsafe(const beta_binom_t *self, int a, int b) { ++static inline double beta_binom_log_unsafe(const beta_binom_t *self, int a, int b) { + return self->log_gamma_alpha[a] + self->log_gamma_beta[b] - self->log_gamma_alpha_beta[a + b]; + } + + /** + * Same as before but it performs boundary checking before computing the log likelihood + */ +-inline double beta_binom_log(beta_binom_t *self, int a, int b) { ++static inline double beta_binom_log(beta_binom_t *self, int a, int b) { + if (a < 0 || b < 0) return NAN; + if (a > self->n1 || b > self->n1 || a + b > self->n2) + beta_binom_update(self, self->p, self->rho, a > b ? a : b, a + b); diff --git a/biology/bio-mocha/files/patch-plugins_mocha.c b/biology/bio-mocha/files/patch-plugins_mocha.c new file mode 100644 index 000000000000..665d785c00f3 --- /dev/null +++ b/biology/bio-mocha/files/patch-plugins_mocha.c @@ -0,0 +1,479 @@ +--- plugins/mocha.c.orig 2021-10-15 02:37:57 UTC ++++ plugins/mocha.c +@@ -705,6 +705,44 @@ static double baf_phase_lod(const float *baf_arr, cons + return (double)ret * M_LOG10E; + } + ++typedef struct ++{ ++ const float *baf; ++ const int8_t *gt_phase; ++ int n; ++ const int *imap; ++ int8_t *path; ++ float err_log_prb; ++ float baf_sd; ++} f1_data_t; ++ ++double f1(double x, void *data) ++{ ++ f1_data_t *d = data; ++ ++ return -baf_phase_lod(d->baf, d->gt_phase, d->n, d->imap, d->path, ++ d->err_log_prb, d->baf_sd, x); ++} ++ ++static inline f1_data_t *f1_pack(const float *baf, const int8_t *gt_phase, int n, ++ const int *imap, int8_t *path, float err_log_prb, ++ float baf_sd) ++{ ++ // Switch to malloc() and free() if more than one object must exist at ++ // any given time ++ f1_data_t *d = malloc(sizeof(f1_data_t)); ++ ++ d->baf = baf; ++ d->gt_phase = gt_phase; ++ d->n = n; ++ d->imap = imap; ++ d->path = path; ++ d->err_log_prb = err_log_prb; ++ d->baf_sd = baf_sd; ++ ++ return d; ++} ++ + // TODO find a better title for this function + static float compare_models(const float *baf, const int8_t *gt_phase, int n, const int *imap, float xy_log_prb, + float err_log_prb, float flip_log_prb, float tel_log_prb, float baf_sd, const float *bdev, +@@ -716,8 +754,11 @@ static float compare_models(const float *baf, const in + int n_flips = 0; + for (int i = 1; i < n; i++) + if (path[i - 1] && path[i] && path[i - 1] != path[i]) n_flips++; +- double f(double x, void *data) { return -baf_phase_lod(baf, gt_phase, n, imap, path, err_log_prb, baf_sd, x); } +- double x, fx = kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); ++ double x; ++ f1_data_t *f1_data = f1_pack(baf, gt_phase, n, imap, path, err_log_prb, ++ baf_sd); ++ double fx = kmin_brent(f1, 0.1, 0.2, f1_data, KMIN_EPS, &x); ++ free(f1_data); + free(path); + return -(float)fx + (float)n_flips * flip_log_prb * (float)M_LOG10E; + } +@@ -750,6 +791,34 @@ static float get_sample_mean(const float *v, int n, co + return mean /= (float)j; + } + ++typedef struct ++{ ++ const float *baf_arr; ++ int n; ++ const int *imap; ++ float baf_sd; ++} f7_data_t; ++ ++double f7(double x, void *data) ++{ ++ f7_data_t *d = data; ++ ++ return -baf_log_lkl(d->baf_arr, d->n, d->imap, d->baf_sd, x); ++} ++ ++static inline f7_data_t *f7_pack(const float *baf_arr, int n, const int *imap, ++ float baf_sd) ++{ ++ f7_data_t *d = malloc(sizeof(f7_data_t)); ++ ++ d->baf_arr = baf_arr; ++ d->n = n; ++ d->imap = imap; ++ d->baf_sd = baf_sd; ++ ++ return d; ++} ++ + static float get_baf_bdev(const float *baf_arr, int n, const int *imap, float baf_sd) { + double bdev = 0.0; + int j = 0; +@@ -763,8 +832,9 @@ static float get_baf_bdev(const float *baf_arr, int n, + bdev /= j; + // simple method to compute bdev should work well for germline duplications + if ((float)bdev > 2.0f * baf_sd) return (float)bdev; +- double f(double x, void *data) { return -baf_log_lkl(baf_arr, n, imap, baf_sd, x); } +- kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev); ++ f7_data_t *f7_data = f7_pack(baf_arr, n, imap, baf_sd); ++ kmin_brent(f7, 0.1, 0.2, f7_data, KMIN_EPS, &bdev); ++ free(f7_data); + return (float)bdev < 1e-4 ? (float)NAN : (float)bdev; + } + +@@ -801,10 +871,41 @@ static double ad_log_lkl(const int16_t *ad0_arr, const + return (double)ret * M_LOG10E; + } + ++typedef struct ++{ ++ const int16_t *ad0_arr; ++ const int16_t *ad1_arr; ++ int n; ++ const int *imap; ++ float ad_rho; ++} f5_data_t; ++ ++double f5(double x, void *data) ++{ ++ f5_data_t *d = data; ++ ++ return -ad_log_lkl(d->ad0_arr, d->ad1_arr, d->n, d->imap, d->ad_rho, x); ++} ++ ++static inline f5_data_t *f5_pack(const int16_t *ad0_arr, ++ const int16_t *ad1_arr, int n, const int *imap, float ad_rho) ++{ ++ f5_data_t *d = malloc(sizeof(f5_data_t)); ++ ++ d->ad0_arr = ad0_arr; ++ d->ad1_arr = ad1_arr; ++ d->n = n; ++ d->imap = imap; ++ d->ad_rho = ad_rho; ++ ++ return d; ++} ++ + static float get_ad_bdev(const int16_t *ad0_arr, const int16_t *ad1_arr, int n, const int *imap, float ad_rho) { + double bdev = 0.0; +- double f(double x, void *data) { return -ad_log_lkl(ad0_arr, ad1_arr, n, imap, ad_rho, x); } +- kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev); ++ f5_data_t *f5_data = f5_pack(ad0_arr, ad1_arr, n, imap, ad_rho); ++ kmin_brent(f5, 0.1, 0.2, f5_data, KMIN_EPS, &bdev); ++ free(f5_data); + return (float)bdev < 1e-4 ? (float)NAN : (float)bdev; + } + +@@ -986,6 +1087,44 @@ static double ad_phase_lod(const int16_t *ad0_arr, con + return (double)ret * M_LOG10E; + } + ++typedef struct ++{ ++ const int16_t *ad0; ++ const int16_t *ad1; ++ const int8_t *gt_phase; ++ int n; ++ const int *imap; ++ int8_t *path; ++ float err_log_prb; ++ float ad_rho; ++} f4_data_t; ++ ++double f4(double x, void *data) ++{ ++ f4_data_t *d = data; ++ ++ return -ad_phase_lod(d->ad0, d->ad1, d->gt_phase, d->n, d->imap, d->path, ++ d->err_log_prb, d->ad_rho, x); ++} ++ ++static inline f4_data_t *f4_pack(const int16_t *ad0, const int16_t *ad1, ++ const int8_t *gt_phase, int n, const int *imap, int8_t *path, ++ float err_log_prb, float ad_rho) ++{ ++ f4_data_t *d = malloc(sizeof(f4_data_t)); ++ ++ d->ad0 = ad0; ++ d->ad1 = ad1; ++ d->gt_phase = gt_phase; ++ d->n = n; ++ d->imap = imap; ++ d->path = path; ++ d->err_log_prb = err_log_prb; ++ d->ad_rho = ad_rho; ++ ++ return d; ++} ++ + // TODO find a better title for this function + static float compare_wgs_models(const int16_t *ad0, const int16_t *ad1, const int8_t *gt_phase, int n, const int *imap, + float xy_log_prb, float err_log_prb, float flip_log_prb, float tel_log_prb, +@@ -998,8 +1137,9 @@ static float compare_wgs_models(const int16_t *ad0, co + int n_flips = 0; + for (int i = 1; i < n; i++) + if (path[i - 1] && path[i] && path[i - 1] != path[i]) n_flips++; +- double f(double x, void *data) { return -ad_phase_lod(ad0, ad1, gt_phase, n, imap, path, err_log_prb, ad_rho, x); } +- double x, fx = kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); ++ f4_data_t *f4_data = f4_pack(ad0, ad1, gt_phase, n, imap, path, err_log_prb, ad_rho); ++ double x, fx = kmin_brent(f4, 0.1, 0.2, f4_data, KMIN_EPS, &x); ++ free(f4_data); + free(path); + return -(float)fx + (float)n_flips * flip_log_prb * (float)M_LOG10E; + } +@@ -1485,6 +1625,129 @@ static float get_path_segs(const int8_t *path, const f + return 0; + } + ++typedef struct ++{ ++ float *lrr; ++ int a; ++ int16_t *ad0; ++ int16_t *ad1; ++ mocha_t *mocha; ++ const model_t *model; ++ sample_t *self; ++ float *baf; ++} f3_data_t; ++ ++double f3(double x, void *data) ++{ ++ f3_data_t *d = data; ++ ++ if (d->model->flags & WGS_DATA) ++ return -lrr_ad_lod(d->lrr + d->a, d->ad0 + d->a, d->ad1 + d->a, ++ d->mocha->n_sites, NULL, NAN, d->model->lrr_bias, ++ d->model->lrr_hap2dip, d->self->adjlrr_sd, ++ d->self->stats.dispersion, x); ++ else ++ return -lrr_baf_lod(d->lrr + d->a, d->baf + d->a, d->mocha->n_sites, ++ NULL, NAN, d->model->lrr_bias, d->model->lrr_hap2dip, ++ d->self->adjlrr_sd, d->self->stats.dispersion, x); ++} ++ ++static inline f3_data_t *f3_pack(float *lrr, int a, int16_t *ad0, int16_t *ad1, ++ mocha_t *mocha, const model_t *model, sample_t *self, float *baf) ++ ++{ ++ f3_data_t *d = malloc(sizeof(f3_data_t)); ++ ++ d->lrr = lrr; ++ d->a = a; ++ d->ad0 = ad0; ++ d->ad1 = ad1; ++ d->mocha = mocha; ++ d->model = model; ++ d->self = self; ++ d->baf = baf; ++ ++ return d; ++} ++ ++typedef struct ++{ ++ int16_t *ad0; ++ int16_t *ad1; ++ int8_t *gt_phase; ++ mocha_t *mocha; ++ int *imap_arr; ++ int *beg; ++ int i; ++ int8_t *path; ++ sample_t *self; ++} f6_data_t; ++ ++double f6(double x, void *data) ++{ ++ f6_data_t *d = data; ++ ++ return -ad_phase_lod(d->ad0, d->ad1, d->gt_phase, d->mocha->n_hets, ++ d->imap_arr + d->beg[d->i], d->path + d->beg[d->i], NAN, d->self->stats.dispersion, x); ++} ++ ++static inline f6_data_t *f6_pack(int16_t *ad0, int16_t *ad1, int8_t *gt_phase, ++ mocha_t *mocha, int *imap_arr, int *beg, int i, int8_t *path, sample_t *self) ++ ++{ ++ f6_data_t *d = malloc(sizeof(f6_data_t)); ++ ++ d->ad0 = ad0; ++ d->ad1 = ad1; ++ d->gt_phase = gt_phase; ++ d->mocha = mocha; ++ d->imap_arr = imap_arr; ++ d->beg = beg; ++ d->i = i; ++ d->path = path; ++ d->self = self; ++ ++ return d; ++} ++ ++typedef struct ++{ ++ float *baf; ++ int8_t *gt_phase; ++ mocha_t *mocha; ++ int *imap_arr; ++ int *beg; ++ int i; ++ int8_t *path; ++ sample_t *self; ++} f8_data_t; ++ ++double f8(double x, void *data) ++{ ++ f8_data_t *d = data; ++ ++ return -baf_phase_lod(d->baf, d->gt_phase, d->mocha->n_hets, ++ d->imap_arr + d->beg[d->i], d->path + d->beg[d->i], NAN, ++ d->self->stats.dispersion, x); ++} ++ ++static inline f8_data_t *f8_pack(float *baf, int8_t *gt_phase, mocha_t *mocha, ++ int *imap_arr, int *beg, int i, int8_t *path, sample_t *self) ++{ ++ f8_data_t *d = malloc(sizeof(f8_data_t)); ++ ++ d->baf = baf; ++ d->gt_phase = gt_phase; ++ d->mocha = mocha; ++ d->imap_arr = imap_arr; ++ d->beg = beg; ++ d->i = i; ++ d->path = path; ++ d->self = self; ++ ++ return d; ++} ++ + // process one contig for one sample + static void sample_run(sample_t *self, mocha_table_t *mocha_table, const model_t *model) { + // do nothing if chromosome Y or MT are being tested +@@ -1735,16 +1998,11 @@ static void sample_run(sample_t *self, mocha_table_t * + mocha.ldev = get_median(lrr + a, b + 1 - a, NULL); + get_mocha_stats(pos, lrr, baf, gt_phase, n, a, b, cen_beg, cen_end, length, self->stats.baf_conc, &mocha); + +- double f(double x, void *data) { +- if (model->flags & WGS_DATA) +- return -lrr_ad_lod(lrr + a, ad0 + a, ad1 + a, mocha.n_sites, NULL, NAN, model->lrr_bias, +- model->lrr_hap2dip, self->adjlrr_sd, self->stats.dispersion, x); +- else +- return -lrr_baf_lod(lrr + a, baf + a, mocha.n_sites, NULL, NAN, model->lrr_bias, model->lrr_hap2dip, +- self->adjlrr_sd, self->stats.dispersion, x); +- } + double bdev_lrr_baf; +- kmin_brent(f, -0.15, 0.15, NULL, KMIN_EPS, &bdev_lrr_baf); ++ f3_data_t *f3_data = f3_pack(lrr, a, ad0, ad1, &mocha, model, ++ self, baf); ++ kmin_brent(f3, -0.15, 0.15, f3_data, KMIN_EPS, &bdev_lrr_baf); ++ free(f3_data); + if (model->flags & WGS_DATA) + mocha.lod_lrr_baf = + lrr_ad_lod(lrr + a, ad0 + a, ad1 + a, mocha.n_sites, NULL, model->err_log_prb, model->lrr_bias, +@@ -1796,23 +2054,21 @@ static void sample_run(sample_t *self, mocha_table_t * + if (path[j] != path[j + 1]) mocha.n_flips++; + + if (model->flags & WGS_DATA) { +- double f(double x, void *data) { +- return -ad_phase_lod(ad0, ad1, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], NAN, +- self->stats.dispersion, x); +- } + double bdev; +- kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev); ++ f6_data_t *f6_data = f6_pack(ad0, ad1, gt_phase, &mocha, ++ imap_arr, beg, i, path, self); ++ kmin_brent(f6, 0.1, 0.2, f6_data, KMIN_EPS, &bdev); ++ free(f6_data); + mocha.bdev = fabsf((float)bdev); + mocha.lod_baf_phase = + ad_phase_lod(ad0, ad1, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], + model->err_log_prb, self->stats.dispersion, mocha.bdev); + } else { +- double f(double x, void *data) { +- return -baf_phase_lod(baf, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], NAN, +- self->stats.dispersion, x); +- } + double bdev; +- kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev); ++ f8_data_t *f8_data = f8_pack(baf, gt_phase, &mocha, ++ imap_arr, beg, i, path, self); ++ kmin_brent(f8, 0.1, 0.2, f8_data, KMIN_EPS, &bdev); ++ free(f8_data); + mocha.bdev = fabsf((float)bdev); + mocha.lod_baf_phase = baf_phase_lod(baf, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], + model->err_log_prb, self->stats.dispersion, mocha.bdev); +@@ -1923,6 +2179,60 @@ static float get_lrr_cutoff(const float *v, int n) { + return cutoff; + } + ++typedef struct ++{ ++ int16_t *ad0; ++ int16_t *ad1; ++ int n; ++} f2_data_t; ++ ++double f2(double x, void *data) ++{ ++ f2_data_t *d = data; ++ ++ return -lod_lkl_beta_binomial(d->ad0, d->ad1, d->n, NULL, x); ++} ++ ++static inline f2_data_t *f2_pack(int16_t *ad0, int16_t *ad1, int n) ++ ++{ ++ f2_data_t *d = malloc(sizeof(f2_data_t)); ++ ++ d->ad0 = ad0; ++ d->ad1 = ad1; ++ d->n = n; ++ ++ return d; ++} ++ ++typedef struct ++{ ++ int16_t *ad0; ++ int16_t *ad1; ++ int n_imap; ++ int *imap_arr; ++} f9_data_t; ++ ++double f9(double x, void *data) ++{ ++ f9_data_t *d = data; ++ ++ return -lod_lkl_beta_binomial(d->ad0, d->ad1, d->n_imap, d->imap_arr, x); ++} ++ ++static inline f9_data_t *f9_pack(int16_t *ad0, int16_t *ad1, int n_imap, ++ int *imap_arr) ++{ ++ f9_data_t *d = malloc(sizeof(f9_data_t)); ++ ++ d->ad0 = ad0; ++ d->ad1 = ad1; ++ d->n_imap = n_imap; ++ d->imap_arr = imap_arr; ++ ++ return d; ++} ++ + // this function computes several contig stats and then clears the contig data from the sample + static void sample_stats(sample_t *self, const model_t *model) { + int n = self->n; +@@ -1968,9 +2278,10 @@ static void sample_stats(sample_t *self, const model_t + self->x_nonpar_lrr_median = get_median(lrr, n_imap, imap_arr); + + if (model->flags & WGS_DATA) { +- double f(double x, void *data) { return -lod_lkl_beta_binomial(ad0, ad1, n_imap, imap_arr, x); } ++ f9_data_t *f9_data = f9_pack(ad0, ad1, n_imap, imap_arr); + double x; +- kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); // dispersions above 0.5 are not allowed ++ kmin_brent(f9, 0.1, 0.2, f9_data, KMIN_EPS, &x); // dispersions above 0.5 are not allowed ++ free(f9_data); + self->x_nonpar_dispersion = (float)x; + } else { + self->x_nonpar_dispersion = get_sample_sd(baf, n_imap, imap_arr); +@@ -1995,9 +2306,10 @@ static void sample_stats(sample_t *self, const model_t + hts_expand(stats_t, self->n_stats, self->m_stats, self->stats_arr); + + if (model->flags & WGS_DATA) { +- double f(double x, void *data) { return -lod_lkl_beta_binomial(ad0, ad1, n, NULL, x); } + double x; +- kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); // dispersions above 0.5 are not allowed ++ f2_data_t *f2_data = f2_pack(ad0, ad1, n); ++ kmin_brent(f2, 0.1, 0.2, f2_data, KMIN_EPS, &x); // dispersions above 0.5 are not allowed ++ free(f2_data); + self->stats_arr[self->n_stats - 1].dispersion = (float)x; + } else { + self->stats_arr[self->n_stats - 1].dispersion = get_sample_sd(baf, n, NULL); diff --git a/biology/bio-mocha/pkg-descr b/biology/bio-mocha/pkg-descr new file mode 100644 index 000000000000..cfecbffefbde --- /dev/null +++ b/biology/bio-mocha/pkg-descr @@ -0,0 +1,8 @@ +MoChA is a bcftools plugin released under the MIT license for mosaic +chromosomal alteration detection and analysis from DNA microarray or +whole genome sequence data. It can be used both with Illumina and +Affymetrix data. It can also be used for detection of germline copy +number variants. Data can be prepared in usable file formats using the +gtc2vcf plugin. + +WWW: https://software.broadinstitute.org/software/mocha/ diff --git a/biology/bio-mocha/pkg-plist b/biology/bio-mocha/pkg-plist new file mode 100644 index 000000000000..b7be082bf4e3 --- /dev/null +++ b/biology/bio-mocha/pkg-plist @@ -0,0 +1,9 @@ +libexec/bcftools/extendFMT.so +libexec/bcftools/mocha.so +libexec/bcftools/mochatools.so +libexec/bcftools/score.so +libexec/bcftools/trio-phase.so +%%DATADIR%%/mocha_plot.R +%%DATADIR%%/pileup_plot.R +%%DATADIR%%/shift_plot.R +%%DATADIR%%/summary_plot.R |