summaryrefslogtreecommitdiff
path: root/biology
diff options
context:
space:
mode:
authorbacon <bacon@pkgsrc.org>2021-03-24 15:22:29 +0000
committerbacon <bacon@pkgsrc.org>2021-03-24 15:22:29 +0000
commitdfe56e923f7ad16e6a1a908656cba9b35c871dcc (patch)
tree7fac110667f6b880cf6e4c2a4e634c564c558bd9 /biology
parent1181a4b0eb00eab64cac8aa0c9447cafa1308140 (diff)
downloadpkgsrc-dfe56e923f7ad16e6a1a908656cba9b35c871dcc.tar.gz
biology/vcf-split: import vcf-split-0.1.1
Vcf-split splits a multi-sample VCF into single-sample VCFs, writing thousands of output files simultaneously. Parsing the TOPMed human chromosome 1 BCF with bcftools takes two days, so extracting the 137,977 samples one at a time or using thousands of parallel readers of the same file is impractical. Vcf-split solves this by generating thousands of single-sample outputs during a single sweep through the multi-sample input.
Diffstat (limited to 'biology')
-rw-r--r--biology/vcf-split/DESCR6
-rw-r--r--biology/vcf-split/Makefile16
-rw-r--r--biology/vcf-split/PLIST3
-rw-r--r--biology/vcf-split/distinfo6
4 files changed, 31 insertions, 0 deletions
diff --git a/biology/vcf-split/DESCR b/biology/vcf-split/DESCR
new file mode 100644
index 00000000000..37f7d39c7c9
--- /dev/null
+++ b/biology/vcf-split/DESCR
@@ -0,0 +1,6 @@
+Vcf-split splits a multi-sample VCF into single-sample VCFs, writing thousands
+of output files simultaneously. Parsing the TOPMed human chromosome 1 BCF
+with bcftools takes two days, so extracting the 137,977 samples one at a time
+or using thousands of parallel readers of the same file is impractical.
+Vcf-split solves this by generating thousands of single-sample outputs during
+a single sweep through the multi-sample input.
diff --git a/biology/vcf-split/Makefile b/biology/vcf-split/Makefile
new file mode 100644
index 00000000000..3b3285dfba0
--- /dev/null
+++ b/biology/vcf-split/Makefile
@@ -0,0 +1,16 @@
+# $NetBSD: Makefile,v 1.1 2021/03/24 15:22:29 bacon Exp $
+
+DISTNAME= vcf-split-0.1.1
+CATEGORIES= biology
+MASTER_SITES= ${MASTER_SITE_GITHUB:=auerlab/}
+
+MAINTAINER= bacon@NetBSD.org
+HOMEPAGE= https://github.com/auerlab/vcf-split
+COMMENT= Split a multi-sample VCF into single-sample VCFs
+LICENSE= 2-clause-bsd
+
+pre-build:
+ cd ${WRKSRC} && ${MAKE} PREFIX=${PREFIX} depend
+
+.include "../../biology/biolibc/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/biology/vcf-split/PLIST b/biology/vcf-split/PLIST
new file mode 100644
index 00000000000..0efca2992ad
--- /dev/null
+++ b/biology/vcf-split/PLIST
@@ -0,0 +1,3 @@
+@comment $NetBSD: PLIST,v 1.1 2021/03/24 15:22:29 bacon Exp $
+bin/vcf-split
+man/man1/vcf-split.1
diff --git a/biology/vcf-split/distinfo b/biology/vcf-split/distinfo
new file mode 100644
index 00000000000..d11adf9a848
--- /dev/null
+++ b/biology/vcf-split/distinfo
@@ -0,0 +1,6 @@
+$NetBSD: distinfo,v 1.1 2021/03/24 15:22:29 bacon Exp $
+
+SHA1 (vcf-split-0.1.1.tar.gz) = 550fefb4c07d4632405e94127a19e98031ac0067
+RMD160 (vcf-split-0.1.1.tar.gz) = 76a1b0b5a8934949d39e69ebe719c8f0ba247a13
+SHA512 (vcf-split-0.1.1.tar.gz) = fcc67d287adb9b1f12fb316981ca0b0c06099d8de07113760d198fbcbafb2557ae0d1c202976fae98d6d712a7b802728979d8224354fbaaf65a7872a27d1aa00
+Size (vcf-split-0.1.1.tar.gz) = 14226 bytes