Simple steps to import a dataset and querying it.


Prerequisites


Annotations (optional)

/path/vep \
  --format vcf \
  -i /path/vcf.gz \
  --vcf \
  -o /path/annotated.vcf \
  --verbose \
  --cache \
  --dir /path/vep/cache \
  --offline \
  --fork XX \
  --force_overwrite \
  --assembly GRCh38 \
  --terms SO \
  --variant_class \
  --no_stats \
  --gencode_basic \
  --biotype \
  --custom /path/clinvar/GRCh38/clinvar.vcf.gz,clinvar,vcf,exact,0,CLNSIG \
  --custom /path/gnomad/r3/gnomad.genomes.r3.0.snv.tsv.gz,gnomAD,vcf,exact,0,AF

ETL

$ spark-submit \
    --master local[*] \
    --conf spark.local.dir=/tmppath \
    --conf spark.driver.maxResultSize=XX \
    --driver-memory=XX \
    --packages=io.projectglow:glow-spark3_2.12:1.2.1 \
    --class org.dnaerys.etl \
    /path/dnaerys-ctl-1.15.3.jar \
    --path /path/to/input_dir/with/vcfs \
    --path2save /path/to/dnaerys_dataset \
    --sinfo  /path/samples.csv \
    --notes "some dataset VEP annotations" \
    --cohort <some name> \
    --rings 8 \
    --grch38 \
    --vep \
    --clinsig clinvar_CLNSIG \
    --gnomad gnomAD_AF \
    --skipnotannotated

License

$ scala -cp /path/dnaerys-ctl-1.15.3.jar org.dnaerys.license --show
$ scala -cp /path/dnaerys-ctl-1.15.3.jar org.dnaerys.license --accept --path /path/dnaerys_dataset

Cluster Installation

minikube start
minikube addons enable csi-hostpath-driver
minikube addons enable ingress

eval $(minikube docker-env)
docker pull dnaerys/dnaerys-k8s:latest
minikube mount /path/dnaerys_dataset:/dnaerys/1kg/

git clone https://github.com/dnaerys/dnaerys-deployment/tree/master/kubernetes/helm/1kgp
cd dnaerys-deployment/kubernetes/helm/
helm install 1kgp 1kgp

Querying

pathogenic variants in TP53

grpcurl \
  -plaintext
  -proto dnaerys_1.15.3.proto \
  -d '{"chr":"17", "start":"7661779", "end":"7687546", "hom":"true", "het":"true", "ann": {"clnsgn":"PATHOGENIC"}, "assembly":"GRCh38"}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SelectVariantsInRegion

high impact heterozygous variants in transcripts in TP53

grpcurl \
  -plaintext
  -proto dnaerys_1.15.3.proto \
  -d '{"chr":"17", "start":"7661779", "end":"7687546", "het":"true", "ann": {"ftypes":["TRANSCRIPT"], "impact":["HIGH"]}, "assembly":"GRCh38"}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SelectVariantsInRegion

pathogenic heterozygous variants in sample (NA10842) in TP53

grpcurl \
  -plaintext
  -proto dnaerys_1.15.3.proto \
  -d '{"chr":"17", "start":"7661779", "end":"7687546", "het":"true", "samples":"NA10842", "ann": {"clnsgn":["PATHOGENIC"]}, "assembly":"GRCh38"}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SelectVariantsInRegionInVirtualCohort

samples with pathogenic heterozygous variants in transcripts in TP53 with gnomAD AF < 0.0001

grpcurl \
  -plaintext
  -proto dnaerys_1.15.3.proto \
  -d '{"chr":"17", "start":"7661779", "end":"7687546", "het":"true", "ann": {"ftypes":"TRANSCRIPT", "clnsgn":"PATHOGENIC", "gnomad_af_lt":"0.0001"}, "assembly":"GRCh38"}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SelectSamplesInRegion

De Novo variants in chromosome 1 in a trio classified as likely pathogenic by AlphaMissense

grpcurl \
  -plaintext
  -proto dnaerys_1.15.3.proto \
  -d '{"parent1":"HG00418", "parent2":"HG00419", "proband":"HG00420", "chr":"1", "start":"1", "end":"248956422", "ann": {"am_class":"AM_LIKELY_PATHOGENIC"}}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SelectDeNovo

Homozygous Recessive variants in chromosome 1 in a trio classified as likely pathogenic by AlphaMissense

grpcurl \
  -plaintext
  -proto dnaerys_1.15.3.proto \
  -d '{"unaffected_parent1":"HG00418", "unaffected_parent2":"HG00419", "affected_child":"HG00420", "chr":"1", "start":"1", "end":"248956422", "ann": {"am_class":"AM_LIKELY_PATHOGENIC"}}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SelectHomRecessive

Heterozygous Dominant variants in chromosome 1 in a trio classified as likely pathogenic by AlphaMissense

grpcurl \
  -plaintext
  -proto dnaerys_1.15.3.proto \
  -d '{"affected_parent":"HG00418", "unaffected_parent":"HG00419", "affected_child":"HG00420", "chr":"1", "start":"1", "end":"248956422", "ann": {"am_class":"AM_LIKELY_PATHOGENIC"}}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SelectHetDominant

Querying: QC

Reported vs observed sex mismatch check for all samples in cohort

grpcurl \
  -plaintext \
  -proto dnaerys_1.15.3.proto \
  -d '{"cohort_name":<cohort_name>}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/SexMismatchCheck

Identical twins and samples duplications in all possible pairs in cohort

grpcurl \
  -plaintext \
  -proto dnaerys_1.15.3.proto \
  -d '{"cohort_name":<cohort_name>, "degree":"TWINS_MONOZYGOTIC"}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/Kinship

All related pairs up to 3rd degree

grpcurl \
  -plaintext \
  -proto dnaerys_1.15.3.proto \
  -d '{"cohort_name":<cohort_name>, "degree":"THIRD_DEGREE"}' \
  ingress.local:80 \
  org.dnaerys.cluster.grpc.DnaerysService/Kinship