Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nallo upload #4205

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cg/cli/upload/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
upload_rna_junctions_to_scout,
upload_rna_omics_to_scout,
upload_rna_to_scout,
upload_tomte_to_scout,
upload_to_scout,
upload_tomte_to_scout,
)
from cg.cli.upload.utils import suggest_cases_to_upload
from cg.cli.upload.validate import validate
Expand All @@ -40,9 +40,10 @@
from cg.meta.upload.mip.mip_dna import MipDNAUploadAPI
from cg.meta.upload.mip.mip_rna import MipRNAUploadAPI
from cg.meta.upload.mutant.mutant import MutantUploadAPI
from cg.meta.upload.nallo.nallo import NalloUploadAPI
from cg.meta.upload.nf_analysis import NfAnalysisUploadAPI
from cg.meta.upload.tomte.tomte import TomteUploadAPI
from cg.meta.upload.raredisease.raredisease import RarediseaseUploadAPI
from cg.meta.upload.tomte.tomte import TomteUploadAPI
from cg.meta.upload.upload_api import UploadAPI
from cg.models.cg_config import CGConfig
from cg.store.models import Case
Expand Down Expand Up @@ -86,6 +87,8 @@ def upload(context: click.Context, case_id: str | None, restart: bool):
upload_api = MipRNAUploadAPI(config_object)
elif case.data_analysis == Workflow.MICROSALT:
upload_api = MicrosaltUploadAPI(config_object)
elif case.data_analysis == Workflow.NALLO:
upload_api = NalloUploadAPI(config_object)
elif case.data_analysis == Workflow.RAREDISEASE:
upload_api = RarediseaseUploadAPI(config_object)
elif case.data_analysis == Workflow.TOMTE:
Expand Down
2 changes: 2 additions & 0 deletions cg/cli/upload/scout.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from cg.meta.workflow.balsamic_umi import BalsamicUmiAnalysisAPI
from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI
from cg.meta.workflow.mip_rna import MipRNAAnalysisAPI
from cg.meta.workflow.nallo import NalloAnalysisAPI
from cg.meta.workflow.raredisease import RarediseaseAnalysisAPI
from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
from cg.meta.workflow.tomte import TomteAnalysisAPI
Expand Down Expand Up @@ -316,6 +317,7 @@ def get_upload_api(case: Case, cg_config: CGConfig) -> UploadAPI:
Workflow.BALSAMIC_UMI: BalsamicUmiAnalysisAPI,
Workflow.MIP_RNA: MipRNAAnalysisAPI,
Workflow.MIP_DNA: MipDNAAnalysisAPI,
Workflow.NALLO: NalloAnalysisAPI,
Workflow.RAREDISEASE: RarediseaseAnalysisAPI,
Workflow.RNAFUSION: RnafusionAnalysisAPI,
Workflow.TOMTE: TomteAnalysisAPI,
Expand Down
5 changes: 5 additions & 0 deletions cg/constants/housekeeper_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ class BalsamicAnalysisTag:
QC_METRICS: list[str] = ["qc-metrics", "deliverable"]


class NalloAnalysisTag:
HAPLOTAGS: str = "haplotags"
PARAPHASE: str = "paraphase"


class HkAnalysisMetricsTag:
QC_METRICS: set[str] = {"qc-metrics", "deliverable"}

Expand Down
24 changes: 23 additions & 1 deletion cg/constants/scout.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import StrEnum, auto

from cg.constants import FileExtensions
from cg.constants.housekeeper_tags import AlignmentFileTag
from cg.constants.housekeeper_tags import AlignmentFileTag, NalloAnalysisTag

HGNC_ID = "hgnc_id"

Expand Down Expand Up @@ -79,6 +79,18 @@ class ScoutUploadKey(StrEnum):
vcf_str={"vcf-str"},
)

NALLO_CASE_TAGS = dict(
delivery_report={"delivery-report"},
multiqc={"multiqc-html"},
somalier_pairs={"relate-pairs", "somalier"},
somalier_samples={"relate-samples", "somalier"},
vcf_snv_research={"vcf-snv-research"},
vcf_snv={"vcf-snv-clinical"},
vcf_sv_research={"vcf-sv-research"},
vcf_sv={"vcf-sv-clinical"},
vcf_str={"vcf-str"},
)

BALSAMIC_CASE_TAGS = dict(
sv_vcf={"vcf-sv-clinical"},
snv_vcf={"vcf-snv-clinical"},
Expand Down Expand Up @@ -138,6 +150,12 @@ class ScoutUploadKey(StrEnum):
mitodel_file={"mitodel"},
)

NALLO_SAMPLE_TAGS: dict[str, set[str]] = dict(
alignment_file={AlignmentFileTag.BAM, NalloAnalysisTag.HAPLOTAGS},
d4_file={"d4"},
paraphase_alignment_path={AlignmentFileTag.BAM, NalloAnalysisTag.PARAPHASE},
)

BALSAMIC_SAMPLE_TAGS = dict(
bam_file={"bam"},
alignment_file={"cram"},
Expand All @@ -155,3 +173,7 @@ class ScoutUploadKey(StrEnum):
)

RANK_MODEL_THRESHOLD = 5

NALLO_RANK_MODEL_THRESHOLD = 8
NALLO_RANK_MODEL_VERSION_SNV = "1.0"
NALLO_RANK_MODEL_VERSION_SV = "1.0"
43 changes: 43 additions & 0 deletions cg/meta/upload/nallo/nallo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""NALLO upload API."""

import datetime as dt
import logging

import rich_click as click

from cg.cli.generate.delivery_report.base import generate_delivery_report
from cg.cli.upload.scout import upload_to_scout
from cg.constants import REPORT_SUPPORTED_DATA_DELIVERY, DataDelivery
from cg.meta.upload.upload_api import UploadAPI
from cg.meta.workflow.nallo import NalloAnalysisAPI
from cg.models.cg_config import CGConfig
from cg.store.models import Analysis, Case

LOG = logging.getLogger(__name__)


class NalloUploadAPI(UploadAPI):
"""NALLO upload API."""

def __init__(self, config: CGConfig):
self.analysis_api = NalloAnalysisAPI(config)
super().__init__(config=config, analysis_api=self.analysis_api)

def upload(self, ctx: click.Context, case: Case, restart: bool) -> None:
"""Uploads NALLO analysis data and files."""
analysis: Analysis = case.analyses[0]
self.update_upload_started_at(analysis=analysis)
# Delivery report generation
if case.data_delivery in REPORT_SUPPORTED_DATA_DELIVERY:
ctx.invoke(generate_delivery_report, case_id=case.internal_id)

# Scout specific upload
if DataDelivery.SCOUT in case.data_delivery:
ctx.invoke(upload_to_scout, case_id=case.internal_id, re_upload=restart)
LOG.info(
f"Upload of case {case.internal_id} was successful. Uploaded at {dt.datetime.now()} in StatusDB"
)
self.update_uploaded_at(analysis=analysis)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe very minor but I thought this was done after uploads to Caesar? So after line 43?


# Clinical delivery upload
self.upload_files_to_customer_inbox(case)
4 changes: 4 additions & 0 deletions cg/meta/upload/scout/hk_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class CaseTags(BaseModel):
peddy_ped: set[str] = Field(None, description="Ped info from peddy, only for rare disease")
peddy_sex: set[str] | None = Field(None, description="Peddy sex check, only for rare disease")
peddy_check: set[str] = Field(None, description="Peddy pedigree check, only for rare disease")
somalier_samples: set[str] = Field(None, description="Somalier samples info")
somalier_pairs: set[str] = Field(None, description="Somalier pairs info")
sex_check: set[str] = Field(None, description="Somalier sex check info, only for rare disease")
multiqc_report: set[str] | None = Field(None, description="MultiQC report")
multiqc: set[str] | None = Field(None, description="MultiQC report")
delivery_report: set[str] | None = Field(None, description="Delivery report")
Expand Down Expand Up @@ -84,3 +87,4 @@ class SampleTags(BaseModel):
reviewer_catalog: set[str] | None = None
reviewer_vcf: set[str] | None = None
mitodel_file: set[str] | None = None
paraphase_alignment_path: set[str] | None = None
88 changes: 88 additions & 0 deletions cg/meta/upload/scout/nallo_config_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import logging

from housekeeper.store.models import Version

from cg.apps.lims import LimsAPI
from cg.apps.madeline.api import MadelineAPI
from cg.constants.housekeeper_tags import HK_DELIVERY_REPORT_TAG
from cg.constants.scout import (
NALLO_CASE_TAGS,
NALLO_RANK_MODEL_THRESHOLD,
NALLO_RANK_MODEL_VERSION_SNV,
NALLO_RANK_MODEL_VERSION_SV,
NALLO_SAMPLE_TAGS,
GenomeBuild,
UploadTrack,
)
from cg.meta.upload.scout.hk_tags import CaseTags, SampleTags
from cg.meta.upload.scout.scout_config_builder import ScoutConfigBuilder
from cg.meta.workflow.nallo import NalloAnalysisAPI
from cg.models.scout.scout_load_config import NalloLoadConfig, ScoutNalloIndividual
from cg.store.models import Analysis

LOG = logging.getLogger(__name__)


class NalloConfigBuilder(ScoutConfigBuilder):
def __init__(
self,
hk_version_obj: Version,
analysis_obj: Analysis,
Comment on lines +29 to +30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of scope but I do not like designs where we tie the whole class to a single Version/Analysis. It makes it less flexible

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also out of scope but modifying the input and making all methods return None is confusing to me. If we redesign at some point it might be nice to consider changing.

nallo_analysis_api: NalloAnalysisAPI,
lims_api: LimsAPI,
madeline_api: MadelineAPI,
):
super().__init__(
hk_version_obj=hk_version_obj,
analysis_obj=analysis_obj,
lims_api=lims_api,
)
self.case_tags: CaseTags = CaseTags(**NALLO_CASE_TAGS)
self.sample_tags: SampleTags = SampleTags(**NALLO_SAMPLE_TAGS)
Comment on lines +40 to +41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.case_tags: CaseTags = CaseTags(**NALLO_CASE_TAGS)
self.sample_tags: SampleTags = SampleTags(**NALLO_SAMPLE_TAGS)
self.case_tags = CaseTags(**NALLO_CASE_TAGS)
self.sample_tags = SampleTags(**NALLO_SAMPLE_TAGS)

self.nallo_analysis_api: NalloAnalysisAPI = nallo_analysis_api
self.lims_api: LimsAPI = lims_api
self.madeline_api: MadelineAPI = madeline_api

def build_load_config(self) -> NalloLoadConfig:
"""Create a NALLO specific load config for uploading analysis to Scout."""
LOG.info("Build load config for NALLO case")
load_config: NalloLoadConfig = NalloLoadConfig(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
load_config: NalloLoadConfig = NalloLoadConfig(
load_config = NalloLoadConfig(

track=UploadTrack.RARE_DISEASE.value,
delivery_report=self.get_file_from_hk({HK_DELIVERY_REPORT_TAG}),
)
self.add_common_info_to_load_config(load_config)
load_config.gene_panels = self.nallo_analysis_api.get_aggregated_panels(
customer_id=self.analysis_obj.case.customer.internal_id,
default_panels=set(self.analysis_obj.case.panels),
)
self.include_case_files(load_config)
self.get_sample_information(load_config)
self.include_pedigree_picture(load_config)
load_config.human_genome_build = GenomeBuild.hg38
load_config.rank_score_threshold = NALLO_RANK_MODEL_THRESHOLD
load_config.rank_model_version = NALLO_RANK_MODEL_VERSION_SNV
load_config.sv_rank_model_version = NALLO_RANK_MODEL_VERSION_SV
return load_config

def include_case_files(self, load_config: NalloLoadConfig) -> None:
"""Include case level files for NALLO case."""
LOG.info("Including NALLO specific case level files")
for scout_key in NALLO_CASE_TAGS.keys():
self._include_case_file(load_config, scout_key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have named arguments?


def _include_case_file(self, load_config: NalloLoadConfig, scout_key: str) -> None:
"""Include the file path associated to a scout configuration parameter if the corresponding housekeeper tags
are found. Otherwise return None."""
file_path = self.get_file_from_hk(getattr(self.case_tags, scout_key))
setattr(load_config, scout_key, file_path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Named arguments


def include_sample_files(self, config_sample: ScoutNalloIndividual) -> None:
"""Include sample level files that are optional."""
LOG.info("Including NALLO specific sample level files")
sample_id: str = config_sample.sample_id
config_sample.d4_file = self.get_sample_file(
hk_tags=self.sample_tags.d4_file, sample_id=sample_id
)
config_sample.paraphase_alignment_path = self.get_sample_file(
hk_tags=self.sample_tags.paraphase_alignment_path, sample_id=sample_id
)
2 changes: 1 addition & 1 deletion cg/meta/upload/scout/raredisease_config_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def load_custom_image_sample(self, load_config: RarediseaseLoadConfig) -> None:
load_config.custom_images = config_custom_images

def include_case_files(self, load_config: RarediseaseLoadConfig) -> None:
"""Include case level files for mip case."""
"""Include case level files for RAREDISEASE case."""
LOG.info("Including RAREDISEASE specific case level files")
for scout_key in RAREDISEASE_CASE_TAGS.keys():
self._include_case_file(load_config, scout_key)
Expand Down
5 changes: 4 additions & 1 deletion cg/meta/upload/scout/scout_config_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
ScoutIndividual,
ScoutLoadConfig,
ScoutMipIndividual,
ScoutNalloIndividual,
ScoutRarediseaseIndividual,
)
from cg.store.models import Analysis, Case, CaseSample, Sample
Expand Down Expand Up @@ -107,6 +108,8 @@ def build_config_sample(self, case_sample: CaseSample) -> ScoutIndividual:
config_sample = ScoutRarediseaseIndividual()
elif self.analysis_obj.workflow == Workflow.MIP_DNA:
config_sample = ScoutMipIndividual()
elif self.analysis_obj.workflow == Workflow.NALLO:
config_sample = ScoutNalloIndividual()
elif self.analysis_obj.workflow == Workflow.RNAFUSION:
config_sample = ScoutIndividual()
self.add_common_sample_info(config_sample=config_sample, case_sample=case_sample)
Expand Down Expand Up @@ -158,7 +161,7 @@ def include_sample_files(self, config_sample: ScoutIndividual) -> None:
"""Include all files that are used on sample level in Scout."""
raise NotImplementedError

def include_case_files(self) -> None:
def include_case_files(self, load_config: ScoutLoadConfig) -> None:
"""Include all files that are used on case level in Scout."""
raise NotImplementedError

Expand Down
8 changes: 8 additions & 0 deletions cg/meta/upload/scout/uploadscoutapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from cg.meta.upload.scout.balsamic_config_builder import BalsamicConfigBuilder
from cg.meta.upload.scout.balsamic_umi_config_builder import BalsamicUmiConfigBuilder
from cg.meta.upload.scout.mip_config_builder import MipConfigBuilder
from cg.meta.upload.scout.nallo_config_builder import NalloConfigBuilder
from cg.meta.upload.scout.raredisease_config_builder import RarediseaseConfigBuilder
from cg.meta.upload.scout.rnafusion_config_builder import RnafusionConfigBuilder
from cg.meta.upload.scout.scout_config_builder import ScoutConfigBuilder
Expand Down Expand Up @@ -632,6 +633,13 @@ def get_config_builder(self, analysis, hk_version) -> ScoutConfigBuilder:
lims_api=self.lims,
madeline_api=self.madeline_api,
),
Workflow.NALLO: NalloConfigBuilder(
hk_version_obj=hk_version,
analysis_obj=analysis,
nallo_analysis_api=self.analysis_api,
lims_api=self.lims,
madeline_api=self.madeline_api,
),
Workflow.RAREDISEASE: RarediseaseConfigBuilder(
hk_version_obj=hk_version,
analysis_obj=analysis,
Expand Down
17 changes: 17 additions & 0 deletions cg/models/scout/scout_load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ class ScoutMipIndividual(ScoutIndividual):
mitodel_file: str | None = None


class ScoutNalloIndividual(ScoutIndividual):
d4_file: str | None = None
paraphase_alignment_path: str | None = None


class ScoutRarediseaseIndividual(ScoutIndividual):
mt_bam: str | None = None
chromograph_images: ChromographImages = ChromographImages()
Expand Down Expand Up @@ -163,6 +168,18 @@ class MipLoadConfig(ScoutLoadConfig):
vcf_sv_research: Annotated[str | None, BeforeValidator(field_not_none)] = None


class NalloLoadConfig(ScoutLoadConfig):
madeline: str | None = None
samples: list[ScoutNalloIndividual] = []
somalier_samples: str | None = None
somalier_pairs: str | None = None
vcf_snv: Annotated[str, BeforeValidator(field_not_none)] = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am very confused by us having a BeforeValidator seemingly avoiding "field not None" behaviour, but also us having a default value of None. Seems a bit contradictory to me.

You can consider it out of scope though, the pattern does not seem introduced by you.

vcf_snv_research: Annotated[str | None, BeforeValidator(field_not_none)] = None
vcf_sv: Annotated[str | None, BeforeValidator(field_not_none)] = None
vcf_sv_research: Annotated[str | None, BeforeValidator(field_not_none)] = None
vcf_str: str | None = None


class RarediseaseLoadConfig(ScoutLoadConfig):
madeline: str | None = None
peddy_check: str | None = None
Expand Down
3 changes: 3 additions & 0 deletions tests/cli/upload/test_cli_scout.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from cg.meta.workflow.balsamic import BalsamicAnalysisAPI
from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI
from cg.meta.workflow.mip_rna import MipRNAAnalysisAPI
from cg.meta.workflow.nallo import NalloAnalysisAPI
from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
from cg.meta.workflow.tomte import TomteAnalysisAPI
from cg.models.cg_config import CGConfig
Expand All @@ -24,13 +25,15 @@
Workflow.BALSAMIC,
Workflow.MIP_DNA,
Workflow.MIP_RNA,
Workflow.NALLO,
Workflow.RNAFUSION,
]

ANALYSIS_API: list = [
(Workflow.BALSAMIC, BalsamicAnalysisAPI),
(Workflow.MIP_DNA, MipDNAAnalysisAPI),
(Workflow.MIP_RNA, MipRNAAnalysisAPI),
(Workflow.NALLO, NalloAnalysisAPI),
(Workflow.RNAFUSION, RnafusionAnalysisAPI),
(Workflow.TOMTE, TomteAnalysisAPI),
]
Expand Down
3 changes: 2 additions & 1 deletion tests/cli/upload/test_cli_upload_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datetime
import logging
import pytest

import pytest
from click.testing import CliRunner

from cg.cli.upload.base import upload_all_completed_analyses
Expand All @@ -16,6 +16,7 @@
Workflow.MICROSALT,
Workflow.MIP_DNA,
Workflow.MIP_RNA,
Workflow.NALLO,
Workflow.RAREDISEASE,
Workflow.RNAFUSION,
Workflow.TAXPROFILER,
Expand Down
Loading