Skip to content

Commit a78b608

Browse files
diitaz93islean
andauthored
Raredisease configurator - Create services for file creation (#4260) (patch)
* add code for the params file * add creation of file in create_cpnfig * fix 2 out of 3 failing tests * add also sample sheet code to configurator * fix fixture * add skeleton of new classes * resolve conflicts * stash commit * enhance sample sheet and config * unify classes * stash commit * remove out of scope change * created a nextflow level of inheritance in configurator * add constants for gene files * removed duplicated utils method * renamed write function * rolled back small fix for scope reasons * inject content creators * add gene panel creator * some renaming * add sample sheet creator fixture * add params file creator fixture * add other file creator fixtures * add fixtures in plugins to conftest * fix constructors and fixtures * Add managed_variants support (#4265) (patch) ### Added - ManagedVariantsFileContentCreator * add first tests for content creation * Rework raredisease extension (#4266) * Add managed_variants support * Inject creators * Inject creators * Inject creators * Refactor Raredisease configurator into extension * Fix type hint * Fix fixtures * Fix test * remove inheritance of nextflow configurator (#4267) * remove inheritance of nextflow configurator * add sample sheet path as parameter to params * fix sample sheet path in configurator * address comments * fi sample sheet fixture (#4270) * add todo * fix samplesheet path fixture * Implement Raredisease Extension (#4271) * move raredisease models to the analysis starter directory * rename content creator files * add tests for managed variants * add tests for gene panel * fix sample sheet creator * fix creator * fix sample sheet test * Address comments * Remove TODO * Fix typo * Make interface * Revert interface --------- Co-authored-by: Isak Ohlsson Ångnell <[email protected]> Co-authored-by: islean <[email protected]>
1 parent 8744c09 commit a78b608

36 files changed

+1147
-282
lines changed

cg/meta/workflow/raredisease.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,27 @@
1616
from cg.constants import DEFAULT_CAPTURE_KIT, Workflow
1717
from cg.constants.constants import GenomeVersion
1818
from cg.constants.nf_analysis import (
19+
RAREDISEASE_ADAPTER_BASES_PERCENTAGE_THRESHOLD,
1920
RAREDISEASE_COVERAGE_FILE_TAGS,
2021
RAREDISEASE_COVERAGE_INTERVAL_TYPE,
2122
RAREDISEASE_COVERAGE_THRESHOLD,
22-
RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION,
23-
RAREDISEASE_METRIC_CONDITIONS_WGS,
2423
RAREDISEASE_METRIC_CONDITIONS_WES,
25-
RAREDISEASE_ADAPTER_BASES_PERCENTAGE_THRESHOLD,
24+
RAREDISEASE_METRIC_CONDITIONS_WGS,
25+
RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION,
2626
)
2727
from cg.constants.scout import RAREDISEASE_CASE_TAGS, ScoutExportFileName
28-
from cg.constants.sequencing import SeqLibraryPrepCategory, NOVASEQ_SEQUENCING_READ_LENGTH
28+
from cg.constants.sequencing import NOVASEQ_SEQUENCING_READ_LENGTH, SeqLibraryPrepCategory
2929
from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
3030
from cg.constants.tb import AnalysisType
3131
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
3232
from cg.models.cg_config import CGConfig
3333
from cg.models.deliverables.metric_deliverables import MetricsBase, MultiqcDataJson
34-
from cg.models.raredisease.raredisease import (
34+
from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH
35+
from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import (
3536
RarediseaseParameters,
3637
RarediseaseSampleSheetEntry,
3738
RarediseaseSampleSheetHeaders,
3839
)
39-
from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH
4040
from cg.store.models import CaseSample, Sample
4141

4242
LOG = logging.getLogger(__name__)

cg/models/raredisease/raredisease.py

-61
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
from enum import StrEnum
2-
31
from cg.constants.constants import SexOptions
4-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
52
from cg.models.qc_metrics import QCMetrics
63

74

@@ -12,61 +9,3 @@ class RarediseaseQCMetrics(QCMetrics):
129
percent_duplicates: float
1310
predicted_sex_sex_check: SexOptions
1411
total_reads: int
15-
16-
17-
class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry):
18-
"""Raredisease sample model is used when building the sample sheet."""
19-
20-
sex: str
21-
phenotype: int
22-
sex: int
23-
paternal_id: str
24-
maternal_id: str
25-
case_id: str
26-
27-
@property
28-
def reformat_sample_content(self) -> list[list[str]]:
29-
"""Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
30-
return [
31-
[
32-
self.name,
33-
lane + 1,
34-
self.fastq_forward_read_paths,
35-
self.fastq_reverse_read_paths,
36-
self.sex,
37-
self.phenotype,
38-
self.paternal_id,
39-
self.maternal_id,
40-
self.case_id,
41-
]
42-
for lane, (self.fastq_forward_read_paths, self.fastq_reverse_read_paths) in enumerate(
43-
zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths)
44-
)
45-
]
46-
47-
48-
class RarediseaseSampleSheetHeaders(StrEnum):
49-
sample: str = "sample"
50-
lane: str = "lane"
51-
fastq_1: str = "fastq_1"
52-
fastq_2: str = "fastq_2"
53-
sex: str = "sex"
54-
phenotype: str = "phenotype"
55-
paternal_id: str = "paternal_id"
56-
maternal_id: str = "maternal_id"
57-
case_id: str = "case_id"
58-
59-
@classmethod
60-
def list(cls) -> list[str]:
61-
return list(map(lambda header: header.value, cls))
62-
63-
64-
class RarediseaseParameters(WorkflowParameters):
65-
"""Model for Raredisease parameters."""
66-
67-
target_bed_file: str
68-
analysis_type: str
69-
save_mapped_as_cram: bool
70-
skip_germlinecnvcaller: bool
71-
vcfanno_extra_resources: str
72-
vep_filters_scout_fmt: str
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from abc import ABC
22

3+
from cg.services.analysis_starter.configurator.abstract_model import CaseConfig
4+
35

46
class Configurator(ABC):
57

6-
def create_config(self, case_id: str, dry_run: bool = False):
8+
def create_config(self, case_id: str) -> CaseConfig:
79
"""Abstract method to create a case config for a case."""
810
pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from pathlib import Path
2+
3+
4+
class PipelineExtension:
5+
def configure(self, case_id: str, case_path: Path):
6+
"""Intended for pipeline specific configurations. If none is needed, this bare class
7+
can be used."""
8+
pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from pathlib import Path
2+
3+
from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension
4+
from cg.services.analysis_starter.configurator.file_creators.gene_panel import GenePanelFileCreator
5+
from cg.services.analysis_starter.configurator.file_creators.managed_variants import (
6+
ManagedVariantsFileCreator,
7+
)
8+
9+
10+
class RarediseaseExtension(PipelineExtension):
11+
"""Contains Raredisease specific file creations which differ from the default Nextflow flow."""
12+
13+
def __init__(
14+
self,
15+
gene_panel_file_creator: GenePanelFileCreator,
16+
managed_variants_file_creator: ManagedVariantsFileCreator,
17+
):
18+
self.gene_panel_file_creator = gene_panel_file_creator
19+
self.managed_variants_file_creator = managed_variants_file_creator
20+
21+
def configure(self, case_id: str, case_path: Path) -> None:
22+
"""Perform pipeline specific actions."""
23+
self.gene_panel_file_creator.create(case_id=case_id, case_path=case_path)
24+
self.managed_variants_file_creator.create(case_id=case_id, case_path=case_path)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from pathlib import Path
2+
3+
from cg.constants import FileExtensions
4+
from cg.io.json import write_json
5+
from cg.io.txt import concat_txt
6+
from cg.store.models import Case
7+
from cg.store.store import Store
8+
9+
10+
class NextflowConfigFileCreator:
11+
12+
def __init__(
13+
self, store: Store, platform: str, workflow_config_path: str, resources: str, account: str
14+
):
15+
self.store = store
16+
self.platform = platform
17+
self.workflow_config_path = workflow_config_path
18+
self.resources = resources
19+
self.account = account
20+
21+
@staticmethod
22+
def get_file_path(case_id: str, case_path: Path) -> Path:
23+
"""Return the path to the Nextflow config file."""
24+
return Path(case_path, f"{case_id}_nextflow_config").with_suffix(FileExtensions.JSON)
25+
26+
def create(self, case_id: str, case_path: Path) -> None:
27+
"""Create the Nextflow config file for a case."""
28+
file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path)
29+
content: str = self._get_content(case_id)
30+
write_json(file_path=file_path, content=content)
31+
32+
def _get_content(self, case_id: str) -> str:
33+
"""Get the content of the Nextflow config file."""
34+
config_files_list: list[str] = [
35+
self.platform,
36+
self.workflow_config_path,
37+
self.resources,
38+
]
39+
case_specific_params: list[str] = [
40+
self._get_cluster_options(case_id),
41+
]
42+
return concat_txt(
43+
file_paths=config_files_list,
44+
str_content=case_specific_params,
45+
)
46+
47+
def _get_cluster_options(self, case_id: str) -> str:
48+
case: Case = self.store.get_case_by_internal_id(case_id)
49+
return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from pathlib import Path
2+
3+
from cg.apps.scout.scoutapi import ScoutAPI
4+
from cg.constants import FileExtensions, GenePanelMasterList, Workflow
5+
from cg.constants.gene_panel import GenePanelCombo, GenePanelGenomeBuild
6+
from cg.io.txt import write_txt
7+
from cg.services.analysis_starter.configurator.file_creators.utils import get_genome_build
8+
from cg.store.models import Case
9+
from cg.store.store import Store
10+
11+
12+
class GenePanelFileCreator:
13+
def __init__(self, store: Store, scout_api: ScoutAPI):
14+
self.store = store
15+
self.scout_api = scout_api
16+
17+
@staticmethod
18+
def get_file_path(case_path: Path) -> Path:
19+
return Path(case_path, "gene_panels").with_suffix(FileExtensions.BED)
20+
21+
def create(self, case_id: str, case_path: Path) -> None:
22+
file_path: Path = self.get_file_path(case_path)
23+
content: list[str] = self._get_content(case_id)
24+
write_txt(file_path=file_path, content=content)
25+
26+
def _get_content(self, case_id: str) -> list[str]:
27+
case: Case = self.store.get_case_by_internal_id(internal_id=case_id)
28+
genome_build: GenePanelGenomeBuild = get_genome_build(workflow=Workflow(case.data_analysis))
29+
all_panels: list[str] = self._get_aggregated_panels(
30+
customer_id=case.customer.internal_id, default_panels=set(case.panels)
31+
)
32+
return self.scout_api.export_panels(build=genome_build, panels=all_panels)
33+
34+
def _get_aggregated_panels(self, customer_id: str, default_panels: set[str]) -> list[str]:
35+
"""Check if customer is collaborator for gene panel master list
36+
and if all default panels are included in the gene panel master list.
37+
If not, add gene panel combo and broad non-specific gene panels.
38+
Return an aggregated gene panel."""
39+
if GenePanelMasterList.is_customer_collaborator_and_panels_in_gene_panels_master_list(
40+
customer_id=customer_id, gene_panels=default_panels
41+
):
42+
return GenePanelMasterList.get_panel_names()
43+
all_panels: set[str] = self._add_gene_panel_combo(gene_panels=default_panels)
44+
all_panels |= GenePanelMasterList.get_non_specific_gene_panels()
45+
return list(all_panels)
46+
47+
@staticmethod
48+
def _add_gene_panel_combo(gene_panels: set[str]) -> set[str]:
49+
"""
50+
Add gene panels combinations for gene panels being part of gene panel combination and
51+
return updated gene panels.
52+
"""
53+
additional_panels = set()
54+
for panel in gene_panels:
55+
if panel in GenePanelCombo.COMBO_1:
56+
additional_panels |= GenePanelCombo.COMBO_1.get(panel)
57+
gene_panels |= additional_panels
58+
return gene_panels
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from pathlib import Path
2+
3+
from cg.apps.scout.scoutapi import ScoutAPI
4+
from cg.constants import FileExtensions, Workflow
5+
from cg.constants.gene_panel import GenePanelGenomeBuild
6+
from cg.io.txt import write_txt
7+
from cg.services.analysis_starter.configurator.file_creators.utils import get_genome_build
8+
from cg.store.store import Store
9+
10+
11+
class ManagedVariantsFileCreator:
12+
13+
def __init__(self, scout_api: ScoutAPI, store: Store):
14+
self.scout_api = scout_api
15+
self.store = store
16+
17+
def create(self, case_id: str, case_path: Path) -> None:
18+
file_path = Path(case_path, "managed_variants").with_suffix(FileExtensions.VCF)
19+
content: list[str] = self._get_content(case_id)
20+
write_txt(file_path=file_path, content=content)
21+
22+
def _get_content(self, case_id: str) -> list[str]:
23+
workflow = Workflow(self.store.get_case_by_internal_id(case_id).data_analysis)
24+
genome_build: GenePanelGenomeBuild = get_genome_build(workflow)
25+
return self.scout_api.export_managed_variants(genome_build)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from abc import ABC, abstractmethod
2+
from pathlib import Path
3+
4+
5+
class ParamsFileCreator(ABC):
6+
7+
@staticmethod
8+
@abstractmethod
9+
def get_file_path(case_id: str, case_path: Path) -> Path:
10+
pass
11+
12+
@abstractmethod
13+
def create(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> any:
14+
pass

0 commit comments

Comments
 (0)