From 4c75f398dba479e120dba1b115a585b461e87f27 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 28 Feb 2025 13:54:27 +0100 Subject: [PATCH 01/43] add code for the params file --- cg/meta/workflow/nf_analysis.py | 2 +- .../implementations/raredisease.py | 114 ++++++++++++++++-- .../analysis_starter/configurator/utils.py | 32 +++++ 3 files changed, 140 insertions(+), 8 deletions(-) diff --git a/cg/meta/workflow/nf_analysis.py b/cg/meta/workflow/nf_analysis.py index 3424a1216b..1dd9a3b34b 100644 --- a/cg/meta/workflow/nf_analysis.py +++ b/cg/meta/workflow/nf_analysis.py @@ -362,8 +362,8 @@ def create_params_file(self, case_id: str, dry_run: bool) -> None: ) def replace_values_in_params_file(self, workflow_parameters: dict) -> dict: - replaced_workflow_parameters = copy.deepcopy(workflow_parameters) """Iterate through the dictionary until all placeholders are replaced with the corresponding value from the dictionary""" + replaced_workflow_parameters = copy.deepcopy(workflow_parameters) while True: resolved: bool = True for key, value in replaced_workflow_parameters.items(): diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 770a4ed468..53b0bdd629 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -1,16 +1,23 @@ import logging from pathlib import Path -from cg.constants import FileExtensions, Priority, Workflow +from cg.apps.lims import LimsAPI +from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions, Priority, Workflow +from cg.constants.scout import ScoutExportFileName +from cg.constants.tb import AnalysisType +from cg.exc import CgDataError from cg.io.txt import concat_txt +from cg.io.yaml import read_yaml, write_yaml_nextflow_style from cg.models.cg_config import RarediseaseConfig +from cg.models.raredisease.raredisease import RarediseaseParameters from cg.services.analysis_starter.configurator.abstract_service import Configurator from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig from cg.services.analysis_starter.configurator.utils import ( get_slurm_qos_for_case, + replace_values_in_params_file, write_content_to_file_or_stdout, ) -from cg.store.models import Case +from cg.store.models import BedVersion, Case, Sample from cg.store.store import Store LOG = logging.getLogger(__name__) @@ -19,8 +26,9 @@ class RarediseaseConfigurator(Configurator): """Configurator for Raredisease analysis.""" - def __init__(self, store: Store, config: RarediseaseConfig): + def __init__(self, store: Store, config: RarediseaseConfig, lims: LimsAPI): self.account: str = config.slurm.account + self.lims: LimsAPI = lims self.platform: str = config.platform self.resources: str = config.resources self.root_dir: str = config.root @@ -55,6 +63,48 @@ def _create_nextflow_config(self, case_id: str, dry_run: bool = False) -> None: write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) LOG.debug(f"Created nextflow config file {file_path.as_posix()} successfully") + def _create_params_file(self, case_id: str, dry_run: bool = False) -> None: + """Create parameters file for a case.""" + LOG.debug("Getting parameters information built on-the-fly") + built_workflow_parameters: dict | None = self._get_built_workflow_parameters( + case_id=case_id + ).model_dump() + LOG.debug("Adding parameters from the pipeline config file if it exist") + + workflow_parameters: dict = built_workflow_parameters | ( + read_yaml(self.params) if hasattr(self, "params") and self.params else {} + ) + replaced_workflow_parameters: dict = replace_values_in_params_file( + workflow_parameters=workflow_parameters + ) + if not dry_run: + self._write_params_file( + case_id=case_id, replaced_workflow_parameters=replaced_workflow_parameters + ) + + def _get_data_analysis_type(self, case_id: str) -> str: + """Return data analysis type carried out.""" + sample: Sample = self.store.get_samples_by_case_id(case_id=case_id)[0] + return sample.application_version.application.analysis_type + + def _get_built_workflow_parameters(self, case_id: str) -> RarediseaseParameters: + """Return parameters.""" + analysis_type: str = self._get_data_analysis_type(case_id=case_id) + target_bed_file: str = self._get_target_bed(case_id=case_id, analysis_type=analysis_type) + skip_germlinecnvcaller = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) + outdir = self._get_case_path(case_id=case_id) + + return RarediseaseParameters( + input=self._get_sample_sheet_path(case_id=case_id), + outdir=outdir, + analysis_type=analysis_type, + target_bed_file=target_bed_file, + save_mapped_as_cram=True, + skip_germlinecnvcaller=skip_germlinecnvcaller, + vcfanno_extra_resources=f"{outdir}/{ScoutExportFileName.MANAGED_VARIANTS}", + vep_filters_scout_fmt=f"{outdir}/{ScoutExportFileName.PANELS}", + ) + def _get_case_path(self, case_id: str) -> Path: """Path to case working directory.""" return Path(self.root_dir, case_id) @@ -66,6 +116,17 @@ def _get_case_workflow(self, case_id: str) -> Workflow: case: Case = self.store.get_case_by_internal_id(case_id) return Workflow(case.data_analysis) + def _get_cluster_options(self, case_id: str) -> str: + case: Case = self.store.get_case_by_internal_id(case_id) + qos: str = get_slurm_qos_for_case(case) + return f'process.clusterOptions = "-A {self.account} --qos={qos}"\n' + + @staticmethod + def _get_germlinecnvcaller_flag(analysis_type: str) -> bool: + if analysis_type == AnalysisType.WGS: + return True + return False + def _get_nextflow_config_content(self, case_id: str) -> str: config_files_list: list[str] = [ self.platform, @@ -90,10 +151,49 @@ def _get_params_file_path(self, case_id: str) -> Path: FileExtensions.YAML ) + def _get_sample_sheet_path(self, case_id: str) -> Path: + """Path to sample sheet.""" + return Path(self._get_case_path(case_id), f"{case_id}_samplesheet").with_suffix( + FileExtensions.CSV + ) + + def _get_target_bed(self, case_id: str, analysis_type: str) -> str: + """ + Return the target bed file from LIMS or use default capture kit for WHOLE_GENOME_SEQUENCING. + """ + target_bed_file: str = self._get_target_bed_from_lims(case_id=case_id) + if not target_bed_file: + if analysis_type == AnalysisType.WGS: + return DEFAULT_CAPTURE_KIT + raise ValueError("No capture kit was found in LIMS") + return target_bed_file + + def _get_target_bed_from_lims(self, case_id: str) -> str | None: + """Get target bed filename from LIMS.""" + case: Case = self.store.get_case_by_internal_id(internal_id=case_id) + sample: Sample = case.links[0].sample + if sample.from_sample: + sample: Sample = self.store.get_sample_by_internal_id(internal_id=sample.from_sample) + target_bed_shortname: str | None = self.lims.capture_kit(lims_id=sample.internal_id) + if not target_bed_shortname: + return None + bed_version: BedVersion | None = self.store.get_bed_version_by_short_name( + bed_version_short_name=target_bed_shortname + ) + if not bed_version: + raise CgDataError(f"Bed-version {target_bed_shortname} does not exist") + return bed_version.filename + def _get_work_dir(self, case_id: str) -> Path: return Path(self.root_dir, case_id, "work") - def _get_cluster_options(self, case_id: str) -> str: - case: Case = self.store.get_case_by_internal_id(case_id) - qos: str = get_slurm_qos_for_case(case) - return f'process.clusterOptions = "-A {self.account} --qos={qos}"\n' + def _write_params_file(self, case_id: str, replaced_workflow_parameters: dict = None) -> None: + """Write params-file for analysis.""" + LOG.debug("Writing parameters file") + if replaced_workflow_parameters: + write_yaml_nextflow_style( + content=replaced_workflow_parameters, + file_path=self._get_params_file_path(case_id=case_id), + ) + else: + self._get_params_file_path(case_id=case_id).touch() diff --git a/cg/services/analysis_starter/configurator/utils.py b/cg/services/analysis_starter/configurator/utils.py index 00a96de160..ba4844cdf6 100644 --- a/cg/services/analysis_starter/configurator/utils.py +++ b/cg/services/analysis_starter/configurator/utils.py @@ -1,3 +1,5 @@ +import copy +import re from pathlib import Path import rich_click as click @@ -21,3 +23,33 @@ def get_slurm_qos_for_case(case: Case) -> str: if are_all_samples_control(case=case): return SlurmQos.EXPRESS return Priority.priority_to_slurm_qos().get(case.priority) + + +def replace_values_in_params_file(workflow_parameters: dict) -> dict: + """ + Iterate through the dictionary until all placeholders are replaced with the corresponding value + from the dictionary + """ + replaced_workflow_parameters = copy.deepcopy(workflow_parameters) + while True: + resolved: bool = True + for key, value in replaced_workflow_parameters.items(): + new_value: str | int = replace_params_placeholders(value, workflow_parameters) + if new_value != value: + resolved = False + replaced_workflow_parameters[key] = new_value + if resolved: + break + return replaced_workflow_parameters + + +def replace_params_placeholders(value: str | int, workflow_parameters: dict) -> str: + """Replace values marked as placeholders with values from the given dictionary""" + if isinstance(value, str): + placeholders: list[str] = re.findall(r"{{\s*([^{}\s]+)\s*}}", value) + for placeholder in placeholders: + if placeholder in workflow_parameters: + value = value.replace( + f"{{{{{placeholder}}}}}", str(workflow_parameters[placeholder]) + ) + return value From 683ea3cab6f52424210f049984f1448d0efb3779 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 28 Feb 2025 13:55:42 +0100 Subject: [PATCH 02/43] add creation of file in create_cpnfig --- .../analysis_starter/configurator/implementations/raredisease.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 53b0bdd629..031091b4c9 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -38,6 +38,7 @@ def __init__(self, store: Store, config: RarediseaseConfig, lims: LimsAPI): def create_config(self, case_id: str, dry_run: bool = False) -> NextflowCaseConfig: self._create_case_directory(case_id=case_id, dry_run=False) self._create_nextflow_config(case_id=case_id, dry_run=False) + self._create_params_file(case_id=case_id, dry_run=False) return NextflowCaseConfig( case_id=case_id, case_priority=self._get_case_priority(case_id), From 4b6665d8b66fcc7e5216fb522ca4e4f68e17b7b1 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 28 Feb 2025 15:37:58 +0100 Subject: [PATCH 03/43] fix 2 out of 3 failing tests --- tests/conftest.py | 1 + tests/fixture_plugins/analysis_starter/configurator_fixtures.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 56898adeb3..96deae22ab 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3008,6 +3008,7 @@ def raredisease_context( """context to use in cli""" cg_context.housekeeper_api_ = nf_analysis_housekeeper cg_context.trailblazer_api_ = trailblazer_api + cg_context.lims_api_ = MockLimsAPI() cg_context.meta_apis["analysis_api"] = RarediseaseAnalysisAPI(config=cg_context) status_db: Store = cg_context.status_db diff --git a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py index de11ef9fc0..e6ca055896 100644 --- a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py @@ -11,4 +11,5 @@ def raredisease_configurator(raredisease_context: CGConfig) -> RarediseaseConfig return RarediseaseConfigurator( store=raredisease_context.status_db, config=raredisease_context.raredisease, + lims=raredisease_context.lims_api, ) From 6c1c8aa896865fd3b3a1e6800ee927ae76079592 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 28 Feb 2025 16:45:17 +0100 Subject: [PATCH 04/43] add also sample sheet code to configurator --- .../implementations/raredisease.py | 116 ++++++++++++++++-- .../analysis_starter/configurator/utils.py | 71 +++++++++++ .../nf_analysis/test_cli_config_case.py | 8 +- tests/conftest.py | 38 +----- .../analysis_starter/case_config_fixtures.py | 14 ++- .../analysis_starter/configurator_fixtures.py | 1 + .../analysis_starter/path_fixtures.py | 33 +++-- .../analysis_starter/test_configurator.py | 11 +- 8 files changed, 227 insertions(+), 65 deletions(-) diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 031091b4c9..4d18815512 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -1,23 +1,35 @@ import logging from pathlib import Path +from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims import LimsAPI -from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions, Priority, Workflow +from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions, Priority, SequencingFileTag, Workflow +from cg.constants.constants import FileFormat from cg.constants.scout import ScoutExportFileName from cg.constants.tb import AnalysisType from cg.exc import CgDataError +from cg.io.controller import WriteFile from cg.io.txt import concat_txt from cg.io.yaml import read_yaml, write_yaml_nextflow_style from cg.models.cg_config import RarediseaseConfig -from cg.models.raredisease.raredisease import RarediseaseParameters +from cg.models.fastq import FastqFileMeta +from cg.models.raredisease.raredisease import ( + RarediseaseParameters, + RarediseaseSampleSheetEntry, + RarediseaseSampleSheetHeaders, +) from cg.services.analysis_starter.configurator.abstract_service import Configurator from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig from cg.services.analysis_starter.configurator.utils import ( + extract_read_files, + get_phenotype_code, + get_sex_code, get_slurm_qos_for_case, + parse_fastq_data, replace_values_in_params_file, write_content_to_file_or_stdout, ) -from cg.store.models import BedVersion, Case, Sample +from cg.store.models import BedVersion, Case, CaseSample, Sample from cg.store.store import Store LOG = logging.getLogger(__name__) @@ -26,19 +38,32 @@ class RarediseaseConfigurator(Configurator): """Configurator for Raredisease analysis.""" - def __init__(self, store: Store, config: RarediseaseConfig, lims: LimsAPI): + def __init__( + self, + store: Store, + config: RarediseaseConfig, + housekeeper_api: HousekeeperAPI, + lims: LimsAPI, + ): self.account: str = config.slurm.account self.lims: LimsAPI = lims + self.housekeeper_api: HousekeeperAPI = housekeeper_api self.platform: str = config.platform self.resources: str = config.resources self.root_dir: str = config.root self.store: Store = store self.workflow_config_path: str = config.config + @property + def sample_sheet_headers(self) -> list[str]: + """Headers for sample sheet.""" + return RarediseaseSampleSheetHeaders.list() + def create_config(self, case_id: str, dry_run: bool = False) -> NextflowCaseConfig: self._create_case_directory(case_id=case_id, dry_run=False) - self._create_nextflow_config(case_id=case_id, dry_run=False) + self._create_sample_sheet(case_id=case_id, dry_run=False) self._create_params_file(case_id=case_id, dry_run=False) + self._create_nextflow_config(case_id=case_id, dry_run=False) return NextflowCaseConfig( case_id=case_id, case_priority=self._get_case_priority(case_id), @@ -83,10 +108,15 @@ def _create_params_file(self, case_id: str, dry_run: bool = False) -> None: case_id=case_id, replaced_workflow_parameters=replaced_workflow_parameters ) - def _get_data_analysis_type(self, case_id: str) -> str: - """Return data analysis type carried out.""" - sample: Sample = self.store.get_samples_by_case_id(case_id=case_id)[0] - return sample.application_version.application.analysis_type + def _create_sample_sheet(self, case_id: str, dry_run: bool) -> None: + """Create sample sheet for a case.""" + sample_sheet_content: list[list[any]] = self._get_sample_sheet_content(case_id=case_id) + if not dry_run: + self._write_sample_sheet( + content=sample_sheet_content, + file_path=self._get_sample_sheet_path(case_id=case_id), + header=self.sample_sheet_headers, + ) def _get_built_workflow_parameters(self, case_id: str) -> RarediseaseParameters: """Return parameters.""" @@ -122,6 +152,19 @@ def _get_cluster_options(self, case_id: str) -> str: qos: str = get_slurm_qos_for_case(case) return f'process.clusterOptions = "-A {self.account} --qos={qos}"\n' + def _get_data_analysis_type(self, case_id: str) -> str: + """Return data analysis type carried out.""" + sample: Sample = self.store.get_samples_by_case_id(case_id=case_id)[0] + return sample.application_version.application.analysis_type + + def _get_file_metadata_for_sample(self, sample: Sample) -> list[FastqFileMeta]: + return [ + parse_fastq_data(hk_file.full_path) + for hk_file in self.housekeeper_api.files( + bundle=sample.internal_id, tags={SequencingFileTag.FASTQ} + ) + ] + @staticmethod def _get_germlinecnvcaller_flag(analysis_type: str) -> bool: if analysis_type == AnalysisType.WGS: @@ -152,6 +195,45 @@ def _get_params_file_path(self, case_id: str) -> Path: FileExtensions.YAML ) + def _get_paired_read_paths(self, sample: Sample) -> tuple[list[str], list[str]]: + """Returns a tuple of paired fastq file paths for the forward and reverse read.""" + sample_metadata: list[FastqFileMeta] = self._get_file_metadata_for_sample(sample=sample) + fastq_forward_read_paths: list[str] = extract_read_files( + metadata=sample_metadata, forward_read=True + ) + fastq_reverse_read_paths: list[str] = extract_read_files( + metadata=sample_metadata, reverse_read=True + ) + return fastq_forward_read_paths, fastq_reverse_read_paths + + def _get_sample_sheet_content(self, case_id: str) -> list[list[any]]: + """Return formatted information required to build a sample sheet for a case. + This contains information for all samples linked to the case.""" + sample_sheet_content: list = [] + case: Case = self.store.get_case_by_internal_id(internal_id=case_id) + LOG.info(f"Samples linked to case {case_id}: {len(case.links)}") + LOG.debug("Getting sample sheet information") + for link in case.links: + sample_sheet_content.extend(self._get_sample_sheet_content_per_sample(case_sample=link)) + return sample_sheet_content + + def _get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]: + """Collect and format information required to build a sample sheet for a single sample.""" + fastq_forward_read_paths, fastq_reverse_read_paths = self._get_paired_read_paths( + sample=case_sample.sample + ) + sample_sheet_entry = RarediseaseSampleSheetEntry( + name=case_sample.sample.internal_id, + fastq_forward_read_paths=fastq_forward_read_paths, + fastq_reverse_read_paths=fastq_reverse_read_paths, + sex=get_sex_code(case_sample.sample.sex), + phenotype=get_phenotype_code(case_sample.status), + paternal_id=case_sample.get_paternal_sample_id, + maternal_id=case_sample.get_maternal_sample_id, + case_id=case_sample.case.internal_id, + ) + return sample_sheet_entry.reformat_sample_content + def _get_sample_sheet_path(self, case_id: str) -> Path: """Path to sample sheet.""" return Path(self._get_case_path(case_id), f"{case_id}_samplesheet").with_suffix( @@ -198,3 +280,19 @@ def _write_params_file(self, case_id: str, replaced_workflow_parameters: dict = ) else: self._get_params_file_path(case_id=case_id).touch() + + @staticmethod + def _write_sample_sheet( + content: list[list[any]], + file_path: Path, + header: list[str], + ) -> None: + """Write sample sheet CSV file.""" + LOG.debug("Writing sample sheet") + if header: + content.insert(0, header) + WriteFile.write_file_from_content( + content=content, + file_format=FileFormat.CSV, + file_path=file_path, + ) diff --git a/cg/services/analysis_starter/configurator/utils.py b/cg/services/analysis_starter/configurator/utils.py index ba4844cdf6..62a852bc67 100644 --- a/cg/services/analysis_starter/configurator/utils.py +++ b/cg/services/analysis_starter/configurator/utils.py @@ -1,14 +1,21 @@ import copy +import logging import re from pathlib import Path import rich_click as click from cg.constants.priority import Priority, SlurmQos +from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex +from cg.io.gzip import read_gzip_first_line from cg.io.txt import write_txt +from cg.meta.workflow.fastq import _is_undetermined_in_path from cg.meta.workflow.utils.utils import are_all_samples_control +from cg.models.fastq import FastqFileMeta, GetFastqFileMeta from cg.store.models import Case +LOG = logging.getLogger(__name__) + def write_content_to_file_or_stdout(content: str, file_path: Path, dry_run: bool = False) -> None: """Write content to a file if dry-run is False, otherwise print to stdout.""" @@ -53,3 +60,67 @@ def replace_params_placeholders(value: str | int, workflow_parameters: dict) -> f"{{{{{placeholder}}}}}", str(workflow_parameters[placeholder]) ) return value + + +def get_sex_code(sex: str) -> int: + """Return Raredisease sex code.""" + LOG.debug("Translate sex to integer code") + try: + code = PlinkSex[sex.upper()] + except KeyError: + raise ValueError(f"{sex} is not a valid sex") + return code + + +def get_phenotype_code(phenotype: str) -> int: + """Return Raredisease phenotype code.""" + LOG.debug("Translate phenotype to integer code") + try: + code = PlinkPhenotypeStatus[phenotype.upper()] + except KeyError: + raise ValueError(f"{phenotype} is not a valid phenotype") + return code + + +def extract_read_files( + metadata: list[FastqFileMeta], forward_read: bool = False, reverse_read: bool = False +) -> list[str]: + """Extract a list of fastq file paths for either forward or reverse reads.""" + if forward_read and not reverse_read: + read_direction = 1 + elif reverse_read and not forward_read: + read_direction = 2 + else: + raise ValueError("Either forward or reverse needs to be specified") + sorted_metadata: list = sorted(metadata, key=lambda k: k.path) + return [ + fastq_file.path + for fastq_file in sorted_metadata + if fastq_file.read_direction == read_direction + ] + + +def parse_fastq_data(fastq_path: Path) -> FastqFileMeta: + header_line: str = read_gzip_first_line(file_path=fastq_path) + fastq_file_meta: FastqFileMeta = parse_fastq_header(header_line) + fastq_file_meta.path = fastq_path + fastq_file_meta.undetermined = _is_undetermined_in_path(fastq_path) + matches = re.findall(r"-l[1-9]t([1-9]{2})_", str(fastq_path)) + if len(matches) > 0: + fastq_file_meta.flow_cell_id = f"{fastq_file_meta.flow_cell_id}-{matches[0]}" + return fastq_file_meta + + +def parse_fastq_header(line: str) -> FastqFileMeta | None: + """Parse and return fastq header metadata. + Handle Illumina's two different header formats + @see https://en.wikipedia.org/wiki/FASTQ_format + Raise: + TypeError if unable to split line into expected parts. + """ + parts = line.split(":") + try: + return GetFastqFileMeta.header_format.get(len(parts))(parts=parts) + except TypeError as exception: + LOG.error(f"Could not parse header format for header: {line}") + raise exception diff --git a/tests/cli/workflow/nf_analysis/test_cli_config_case.py b/tests/cli/workflow/nf_analysis/test_cli_config_case.py index ff73087a51..b338a35bea 100644 --- a/tests/cli/workflow/nf_analysis/test_cli_config_case.py +++ b/tests/cli/workflow/nf_analysis/test_cli_config_case.py @@ -120,7 +120,9 @@ def test_config_case_default_parameters( case_id: str = request.getfixturevalue(f"{workflow}_case_id") sample_sheet_path: Path = request.getfixturevalue(f"{workflow}_sample_sheet_path") params_file_path: Path = request.getfixturevalue(f"{workflow}_params_file_path") - nexflow_config_file_path: Path = request.getfixturevalue(f"{workflow}_nexflow_config_file_path") + nexflow_config_file_path: Path = request.getfixturevalue( + f"{workflow}_nextflow_config_file_path" + ) sample_sheet_content_expected: str = request.getfixturevalue(f"{workflow}_sample_sheet_content") # Mocking external Scout call @@ -201,7 +203,9 @@ def test_config_case_dry_run( case_id: str = request.getfixturevalue(f"{workflow}_case_id") sample_sheet_path: Path = request.getfixturevalue(f"{workflow}_sample_sheet_path") params_file_path: Path = request.getfixturevalue(f"{workflow}_params_file_path") - nexflow_config_file_path: Path = request.getfixturevalue(f"{workflow}_nexflow_config_file_path") + nexflow_config_file_path: Path = request.getfixturevalue( + f"{workflow}_nextflow_config_file_path" + ) # GIVEN a valid case diff --git a/tests/conftest.py b/tests/conftest.py index 96deae22ab..242507bbe0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2820,7 +2820,7 @@ def nallo_multiqc_json_metrics(nallo_analysis_dir) -> dict: @pytest.fixture(scope="function") -def nallo_nexflow_config_file_path(nallo_dir, nallo_case_id) -> Path: +def nallo_nextflow_config_file_path(nallo_dir, nallo_case_id) -> Path: """Path to config file.""" return Path(nallo_dir, nallo_case_id, f"{nallo_case_id}_nextflow_config").with_suffix( FileExtensions.JSON @@ -2905,36 +2905,6 @@ def raredisease_sample_sheet_content( return "\n".join([headers, row]) -@pytest.fixture(scope="function") -def raredisease_sample_sheet_path(raredisease_dir, raredisease_case_id) -> Path: - """Path to sample sheet.""" - return Path( - raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_samplesheet" - ).with_suffix(FileExtensions.CSV) - - -@pytest.fixture(scope="function") -def raredisease_params_file_path(raredisease_dir, raredisease_case_id) -> Path: - """Path to parameters file.""" - return Path( - raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_params_file" - ).with_suffix(FileExtensions.YAML) - - -@pytest.fixture(scope="function") -def raredisease_gene_panel_path(raredisease_dir, raredisease_case_id) -> Path: - """Path to gene panel file.""" - return Path(raredisease_dir, raredisease_case_id, "gene_panels").with_suffix(FileExtensions.BED) - - -@pytest.fixture(scope="function") -def raredisease_nexflow_config_file_path(raredisease_dir, raredisease_case_id) -> Path: - """Path to config file.""" - return Path( - raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_nextflow_config" - ).with_suffix(FileExtensions.JSON) - - @pytest.fixture(scope="function") def raredisease_deliverable_data( raredisease_dir: Path, raredisease_case_id: str, sample_id: str @@ -3378,7 +3348,7 @@ def rnafusion_params_file_path(rnafusion_dir, rnafusion_case_id) -> Path: @pytest.fixture(scope="function") -def rnafusion_nexflow_config_file_path(rnafusion_dir, rnafusion_case_id) -> Path: +def rnafusion_nextflow_config_file_path(rnafusion_dir, rnafusion_case_id) -> Path: """Path to config file.""" return Path( rnafusion_dir, rnafusion_case_id, f"{rnafusion_case_id}_nextflow_config" @@ -3690,7 +3660,7 @@ def tomte_params_file_path(tomte_dir, tomte_case_id) -> Path: @pytest.fixture(scope="function") -def tomte_nexflow_config_file_path(tomte_dir, tomte_case_id) -> Path: +def tomte_nextflow_config_file_path(tomte_dir, tomte_case_id) -> Path: """Path to config file.""" return Path(tomte_dir, tomte_case_id, f"{tomte_case_id}_nextflow_config").with_suffix( FileExtensions.JSON @@ -4025,7 +3995,7 @@ def taxprofiler_sample_sheet_path(taxprofiler_dir, taxprofiler_case_id) -> Path: @pytest.fixture(scope="function") -def taxprofiler_nexflow_config_file_path(taxprofiler_dir, taxprofiler_case_id) -> Path: +def taxprofiler_nextflow_config_file_path(taxprofiler_dir, taxprofiler_case_id) -> Path: """Path to config file.""" return Path( taxprofiler_dir, taxprofiler_case_id, f"{taxprofiler_case_id}_nextflow_config" diff --git a/tests/fixture_plugins/analysis_starter/case_config_fixtures.py b/tests/fixture_plugins/analysis_starter/case_config_fixtures.py index cb651a8a77..c35744be06 100644 --- a/tests/fixture_plugins/analysis_starter/case_config_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/case_config_fixtures.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from cg.constants import Priority, Workflow @@ -7,15 +9,15 @@ @pytest.fixture def raredisease_case_config( raredisease_case_id: str, - dummy_work_dir_path: str, - raredisease_nextflow_config_path: str, - dummy_params_file_path: str, + raredisease_work_dir_path: Path, + raredisease_nextflow_config_file_path: Path, + raredisease_params_file_path: Path, ) -> NextflowCaseConfig: return NextflowCaseConfig( case_id=raredisease_case_id, workflow=Workflow.RAREDISEASE, case_priority=Priority.standard, - netxflow_config_file=raredisease_nextflow_config_path, - params_file=dummy_params_file_path, - work_dir=dummy_work_dir_path, + netxflow_config_file=raredisease_nextflow_config_file_path.as_posix(), + params_file=raredisease_params_file_path.as_posix(), + work_dir=raredisease_work_dir_path.as_posix(), ) diff --git a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py index e6ca055896..515de92e0f 100644 --- a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py @@ -11,5 +11,6 @@ def raredisease_configurator(raredisease_context: CGConfig) -> RarediseaseConfig return RarediseaseConfigurator( store=raredisease_context.status_db, config=raredisease_context.raredisease, + housekeeper_api=raredisease_context.housekeeper_api, lims=raredisease_context.lims_api, ) diff --git a/tests/fixture_plugins/analysis_starter/path_fixtures.py b/tests/fixture_plugins/analysis_starter/path_fixtures.py index cb35b242b8..0373e29af3 100644 --- a/tests/fixture_plugins/analysis_starter/path_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/path_fixtures.py @@ -2,18 +2,37 @@ import pytest +from cg.constants import FileExtensions + @pytest.fixture -def dummy_work_dir_path() -> str: - return "path/to/work/dir" +def raredisease_work_dir_path(raredisease_dir: Path) -> Path: + return Path(raredisease_dir, "work") + + +@pytest.fixture(scope="function") +def raredisease_gene_panel_path(raredisease_dir: Path, raredisease_case_id) -> Path: + """Path to gene panel file.""" + return Path(raredisease_dir, raredisease_case_id, "gene_panels").with_suffix(FileExtensions.BED) @pytest.fixture -def dummy_params_file_path() -> str: - return "path/to/params/file" +def raredisease_params_file_path(raredisease_dir: Path, raredisease_case_id: str) -> Path: + return Path( + raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_params_file" + ).with_suffix(FileExtensions.YAML) @pytest.fixture -def raredisease_nextflow_config_path(raredisease_dir: Path, raredisease_case_id: str) -> str: - path = Path(raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_nextflow_config.json") - return path.as_posix() +def raredisease_nextflow_config_file_path(raredisease_dir: Path, raredisease_case_id: str) -> Path: + return Path( + raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_nextflow_config" + ).with_suffix(FileExtensions.JSON) + + +@pytest.fixture(scope="function") +def raredisease_sample_sheet_path(raredisease_dir, raredisease_case_id) -> Path: + """Path to sample sheet.""" + return Path( + raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_samplesheet" + ).with_suffix(FileExtensions.CSV) diff --git a/tests/services/analysis_starter/test_configurator.py b/tests/services/analysis_starter/test_configurator.py index 38178618cf..bd49ad5afc 100644 --- a/tests/services/analysis_starter/test_configurator.py +++ b/tests/services/analysis_starter/test_configurator.py @@ -19,8 +19,8 @@ def test_create_config( configurator_fixture: str, case_config_fixture: str, case_id_fixture: str, - dummy_params_file_path: str, - dummy_work_dir_path: str, + raredisease_params_file_path: str, + raredisease_work_dir_path: str, request: pytest.FixtureRequest, ): """Test creating the case config for all pipelines.""" @@ -29,11 +29,8 @@ def test_create_config( case_id: str = request.getfixturevalue(case_id_fixture) # WHEN creating a case config - with ( - mock.patch.object( - configurator, "_get_params_file_path", return_value=Path(dummy_params_file_path) - ), - mock.patch.object(configurator, "_get_work_dir", return_value=Path(dummy_work_dir_path)), + with mock.patch.object( + configurator, "_get_work_dir", return_value=Path(raredisease_work_dir_path) ): case_config: CaseConfig = configurator.create_config(case_id=case_id) From 97060568e5d2fae9031daedb0b2aaabf6e9bcab1 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 28 Feb 2025 16:48:43 +0100 Subject: [PATCH 05/43] fix fixture --- tests/fixture_plugins/analysis_starter/path_fixtures.py | 4 ++-- tests/services/analysis_starter/test_configurator.py | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/fixture_plugins/analysis_starter/path_fixtures.py b/tests/fixture_plugins/analysis_starter/path_fixtures.py index 0373e29af3..36eac986c8 100644 --- a/tests/fixture_plugins/analysis_starter/path_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/path_fixtures.py @@ -6,8 +6,8 @@ @pytest.fixture -def raredisease_work_dir_path(raredisease_dir: Path) -> Path: - return Path(raredisease_dir, "work") +def raredisease_work_dir_path(raredisease_dir: Path, raredisease_case_id: Path) -> Path: + return Path(raredisease_dir, raredisease_case_id, "work") @pytest.fixture(scope="function") diff --git a/tests/services/analysis_starter/test_configurator.py b/tests/services/analysis_starter/test_configurator.py index bd49ad5afc..623a380dc0 100644 --- a/tests/services/analysis_starter/test_configurator.py +++ b/tests/services/analysis_starter/test_configurator.py @@ -29,10 +29,7 @@ def test_create_config( case_id: str = request.getfixturevalue(case_id_fixture) # WHEN creating a case config - with mock.patch.object( - configurator, "_get_work_dir", return_value=Path(raredisease_work_dir_path) - ): - case_config: CaseConfig = configurator.create_config(case_id=case_id) + case_config: CaseConfig = configurator.create_config(case_id=case_id) # THEN the expected case config is returned expected_case_config: CaseConfig = request.getfixturevalue(case_config_fixture) From c189d12f713df1be0cab215ab5d46660b75e22d0 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Mon, 3 Mar 2025 11:15:33 +0100 Subject: [PATCH 06/43] add skeleton of new classes --- .../config_file_creator/raredisease.py | 53 +++++++++++++++++++ .../implementations/raredisease.py | 18 ++++++- .../params_file_creator/raredisease.py | 8 +++ .../sample_sheet_creator/raredisease.py | 23 ++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 cg/services/analysis_starter/configurator/config_file_creator/raredisease.py create mode 100644 cg/services/analysis_starter/configurator/params_file_creator/raredisease.py create mode 100644 cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py diff --git a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py new file mode 100644 index 0000000000..070384db02 --- /dev/null +++ b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py @@ -0,0 +1,53 @@ +import logging +from pathlib import Path + +from cg.constants import FileExtensions +from cg.io.txt import concat_txt +from cg.services.analysis_starter.configurator.utils import write_content_to_file_or_stdout +from cg.store.models import Case + +LOG = logging.getLogger(__name__) + + +class RarediseaseNextflowConfigCreator: + """Create a config file for the raredisease pipeline.""" + + def __init__( + self, store, platform: str, workflow_config_path: str, resources: str, account: str + ): + self.store = store + self.platform = platform + self.workflow_config_path = workflow_config_path + self.resources = resources + self.account = account + + @staticmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + """Get the path to the nextflow config file.""" + return Path(case_path, f"{case_id}_nextflow_config").with_suffix(FileExtensions.JSON) + + def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: + """Create a config file for the raredisease pipeline.""" + content: str = self._get_file_content(case_id=case_id) + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) + LOG.debug(f"Created nextflow config file {file_path.as_posix()} successfully") + + def _get_cluster_options(self, case_id: str) -> str: + case: Case = self.store.get_case_by_internal_id(case_id) + return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' + + def _get_file_content(self, case_id: str) -> str: + """Get the content of the nextflow config file.""" + config_files_list: list[str] = [ + self.platform, + self.workflow_config_path, + self.resources, + ] + case_specific_params: list[str] = [ + self._get_cluster_options(case_id=case_id), + ] + return concat_txt( + file_paths=config_files_list, + str_content=case_specific_params, + ) diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 4d18815512..3c44a9e8af 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -19,7 +19,16 @@ RarediseaseSampleSheetHeaders, ) from cg.services.analysis_starter.configurator.abstract_service import Configurator +from cg.services.analysis_starter.configurator.config_file_creator.raredisease import ( + RarediseaseNextflowConfigCreator, +) from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig +from cg.services.analysis_starter.configurator.params_file_creator.raredisease import ( + RarediseaseParamsCreator, +) +from cg.services.analysis_starter.configurator.sample_sheet_creator.raredisease import ( + RarediseaseSampleSheetCreator, +) from cg.services.analysis_starter.configurator.utils import ( extract_read_files, get_phenotype_code, @@ -44,7 +53,13 @@ def __init__( config: RarediseaseConfig, housekeeper_api: HousekeeperAPI, lims: LimsAPI, + sample_sheet_creator: RarediseaseSampleSheetCreator, + config_file_creator: RarediseaseNextflowConfigCreator, + parameters_file_creator: RarediseaseParamsCreator, ): + self.sample_sheet_creator = sample_sheet_creator + self.config_file_creator = config_file_creator + self.parameters_file_creator = parameters_file_creator self.account: str = config.slurm.account self.lims: LimsAPI = lims self.housekeeper_api: HousekeeperAPI = housekeeper_api @@ -60,10 +75,11 @@ def sample_sheet_headers(self) -> list[str]: return RarediseaseSampleSheetHeaders.list() def create_config(self, case_id: str, dry_run: bool = False) -> NextflowCaseConfig: + case_path: Path = self._get_case_path(case_id=case_id) self._create_case_directory(case_id=case_id, dry_run=False) self._create_sample_sheet(case_id=case_id, dry_run=False) self._create_params_file(case_id=case_id, dry_run=False) - self._create_nextflow_config(case_id=case_id, dry_run=False) + self.config_file_creator.create(case_id=case_id, case_path=case_path, dry_run=dry_run) return NextflowCaseConfig( case_id=case_id, case_priority=self._get_case_priority(case_id), diff --git a/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py new file mode 100644 index 0000000000..706bdc2dc2 --- /dev/null +++ b/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py @@ -0,0 +1,8 @@ +from pathlib import Path + + +class RarediseaseParamsCreator: + + def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: + """Create a params file for the raredisease pipeline.""" + pass diff --git a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py new file mode 100644 index 0000000000..11ec07180a --- /dev/null +++ b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py @@ -0,0 +1,23 @@ +from pathlib import Path + +from cg.constants import FileExtensions + + +class RarediseaseSampleSheetCreator: + + def __init__(self): + pass + + def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: + sample_sheet_content: list[list[any]] = self._get_sample_sheet_content(case_id=case_id) + if not dry_run: + self._write_sample_sheet( + content=sample_sheet_content, + file_path=self.get_sample_sheet_path(case_id=case_id, case_path=case_path), + header=self.sample_sheet_headers, + ) + + @staticmethod + def get_sample_sheet_path(case_id: str, case_path: Path) -> Path: + """Path to sample sheet.""" + return Path(case_path, f"{case_id}_samplesheet").with_suffix(FileExtensions.CSV) From 5854181d004ae9c092780e49d64ef5d7217e63ca Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Mon, 3 Mar 2025 11:22:36 +0100 Subject: [PATCH 07/43] resolve conflicts --- .../configurator/implementations/raredisease.py | 2 +- cg/services/analysis_starter/configurator/utils.py | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 27b79c003c..6f7e7eb8b9 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -33,7 +33,6 @@ extract_read_files, get_phenotype_code, get_sex_code, - get_slurm_qos_for_case, parse_fastq_data, replace_values_in_params_file, write_content_to_file_or_stdout, @@ -307,6 +306,7 @@ def _write_sample_sheet( file_format=FileFormat.CSV, file_path=file_path, ) + def _get_cluster_options(self, case_id: str) -> str: case: Case = self.store.get_case_by_internal_id(case_id) return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' diff --git a/cg/services/analysis_starter/configurator/utils.py b/cg/services/analysis_starter/configurator/utils.py index 62a852bc67..815b7cb2b4 100644 --- a/cg/services/analysis_starter/configurator/utils.py +++ b/cg/services/analysis_starter/configurator/utils.py @@ -5,14 +5,11 @@ import rich_click as click -from cg.constants.priority import Priority, SlurmQos from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex from cg.io.gzip import read_gzip_first_line from cg.io.txt import write_txt from cg.meta.workflow.fastq import _is_undetermined_in_path -from cg.meta.workflow.utils.utils import are_all_samples_control from cg.models.fastq import FastqFileMeta, GetFastqFileMeta -from cg.store.models import Case LOG = logging.getLogger(__name__) @@ -25,13 +22,6 @@ def write_content_to_file_or_stdout(content: str, file_path: Path, dry_run: bool write_txt(content=content, file_path=file_path) -def get_slurm_qos_for_case(case: Case) -> str: - """Get Quality of service (SLURM QOS) for the case.""" - if are_all_samples_control(case=case): - return SlurmQos.EXPRESS - return Priority.priority_to_slurm_qos().get(case.priority) - - def replace_values_in_params_file(workflow_parameters: dict) -> dict: """ Iterate through the dictionary until all placeholders are replaced with the corresponding value From baf2220b1781579a847a7257733e93ee0796996e Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Mon, 3 Mar 2025 15:24:44 +0100 Subject: [PATCH 08/43] stash commit --- cg/io/yaml.py | 1 + cg/models/raredisease/raredisease.py | 7 +- .../config_file_creator/raredisease.py | 2 +- .../configurator/file_creators/abstract.py | 24 +++ .../implementations/raredisease.py | 4 +- .../params_file_creator/raredisease.py | 118 ++++++++++++- .../configurator/params_file_creator/utils.py | 32 ++++ .../sample_sheet_creator/raredisease.py | 167 ++++++++++++++++-- .../analysis_starter/configurator/utils.py | 31 ---- 9 files changed, 335 insertions(+), 51 deletions(-) create mode 100644 cg/services/analysis_starter/configurator/file_creators/abstract.py create mode 100644 cg/services/analysis_starter/configurator/params_file_creator/utils.py diff --git a/cg/io/yaml.py b/cg/io/yaml.py index 17d2f721a2..12f8092953 100644 --- a/cg/io/yaml.py +++ b/cg/io/yaml.py @@ -28,6 +28,7 @@ def write_yaml_stream(content: Any) -> str: return yaml.dump(content) +# TODO: Refactor this into a content convertor (without writting) def write_yaml_nextflow_style(content: dict[str, Any], file_path: Path) -> None: """Write content to yaml file accepted by Nextflow with non-quoted booleans and quoted strings.""" with open(file_path, "w") as outfile: diff --git a/cg/models/raredisease/raredisease.py b/cg/models/raredisease/raredisease.py index 1797d6c78c..5bbc0a4733 100644 --- a/cg/models/raredisease/raredisease.py +++ b/cg/models/raredisease/raredisease.py @@ -14,6 +14,7 @@ class RarediseaseQCMetrics(QCMetrics): total_reads: int +# TODO: MOve this to models folder in appropriate service class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry): """Raredisease sample model is used when building the sample sheet.""" @@ -31,15 +32,15 @@ def reformat_sample_content(self) -> list[list[str]]: [ self.name, lane + 1, - self.fastq_forward_read_paths, - self.fastq_reverse_read_paths, + forward_path, + reverse_path, self.sex, self.phenotype, self.paternal_id, self.maternal_id, self.case_id, ] - for lane, (self.fastq_forward_read_paths, self.fastq_reverse_read_paths) in enumerate( + for lane, (forward_path, reverse_path) in enumerate( zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths) ) ] diff --git a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py index 070384db02..61265d3d0e 100644 --- a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py @@ -28,8 +28,8 @@ def get_file_path(case_id: str, case_path: Path) -> Path: def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: """Create a config file for the raredisease pipeline.""" - content: str = self._get_file_content(case_id=case_id) file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + content: str = self._get_file_content(case_id=case_id) write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) LOG.debug(f"Created nextflow config file {file_path.as_posix()} successfully") diff --git a/cg/services/analysis_starter/configurator/file_creators/abstract.py b/cg/services/analysis_starter/configurator/file_creators/abstract.py new file mode 100644 index 0000000000..6c810a9f9a --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/abstract.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod +from pathlib import Path + + +class NextflowFileCreator(ABC): + + def create(self, case_id: str, case_path: Path, dry_run: bool = False, **kwargs) -> None: + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + content: any = self._get_content(case_id=case_id) + self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) + + @staticmethod + @abstractmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + pass + + @abstractmethod + def _get_content(self, case_id: str) -> any: + pass + + @staticmethod + @abstractmethod + def _write_content_to_file_or_stdout(content: any, file_path: Path, dry_run: bool) -> None: + pass diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 6f7e7eb8b9..ed8693d726 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -26,6 +26,9 @@ from cg.services.analysis_starter.configurator.params_file_creator.raredisease import ( RarediseaseParamsCreator, ) +from cg.services.analysis_starter.configurator.params_file_creator.utils import ( + replace_values_in_params_file, +) from cg.services.analysis_starter.configurator.sample_sheet_creator.raredisease import ( RarediseaseSampleSheetCreator, ) @@ -34,7 +37,6 @@ get_phenotype_code, get_sex_code, parse_fastq_data, - replace_values_in_params_file, write_content_to_file_or_stdout, ) from cg.store.models import BedVersion, Case, CaseSample, Sample diff --git a/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py index 706bdc2dc2..8556d8ae67 100644 --- a/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py @@ -1,8 +1,120 @@ +import logging from pathlib import Path +from cg.apps.lims import LimsAPI +from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions +from cg.constants.scout import ScoutExportFileName +from cg.constants.tb import AnalysisType +from cg.exc import CgDataError +from cg.io.yaml import read_yaml, write_yaml_nextflow_style +from cg.models.raredisease.raredisease import RarediseaseParameters +from cg.services.analysis_starter.configurator.params_file_creator.utils import ( + replace_values_in_params_file, +) +from cg.store.models import BedVersion, Case, Sample + +LOG = logging.getLogger(__name__) + class RarediseaseParamsCreator: - def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: - """Create a params file for the raredisease pipeline.""" - pass + def __init__(self, store, lims: LimsAPI, params: str): + self.store = store + self.params = params + self.lims = lims + + @staticmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + return Path(case_path, f"{case_id}_params_file").with_suffix(FileExtensions.YAML) + + def create( + self, case_id: str, case_path: Path, sample_sheet_path: Path, dry_run: bool = False + ) -> None: + """Create parameters file for a case.""" + LOG.debug("Getting parameters information built on-the-fly") + built_workflow_parameters: dict | None = self._get_built_workflow_parameters( + case_id=case_id, case_path=case_path, sample_sheet_path=sample_sheet_path + ).model_dump() + LOG.debug("Adding parameters from the pipeline config file if it exist") + + workflow_parameters: dict = built_workflow_parameters | ( + read_yaml(self.params) if hasattr(self, "params") and self.params else {} + ) + replaced_workflow_parameters: dict = replace_values_in_params_file( + workflow_parameters=workflow_parameters + ) + if not dry_run: + self._write_file( + case_id=case_id, content=replaced_workflow_parameters, case_path=case_path + ) + + def _get_built_workflow_parameters( + self, case_id: str, case_path: Path, sample_sheet_path: Path + ) -> RarediseaseParameters: + """Return parameters.""" + analysis_type: str = self._get_data_analysis_type(case_id=case_id) + target_bed_file: str = self._get_target_bed(case_id=case_id, analysis_type=analysis_type) + skip_germlinecnvcaller = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) + outdir = case_path + + return RarediseaseParameters( + input=sample_sheet_path, + outdir=outdir, + analysis_type=analysis_type, + target_bed_file=target_bed_file, + save_mapped_as_cram=True, + skip_germlinecnvcaller=skip_germlinecnvcaller, + vcfanno_extra_resources=f"{outdir}/{ScoutExportFileName.MANAGED_VARIANTS}", + vep_filters_scout_fmt=f"{outdir}/{ScoutExportFileName.PANELS}", + ) + + def _get_data_analysis_type(self, case_id: str) -> str: + """ + Return case analysis type (WEG, WGS, WTS or TGS). Assumes all case samples have the same + analysis type. + """ + sample: Sample = self.store.get_samples_by_case_id(case_id=case_id)[0] + return sample.application_version.application.analysis_type + + def _get_target_bed(self, case_id: str, analysis_type: str) -> str: + """ + Return the target bed file from LIMS or use default capture kit for WHOLE_GENOME_SEQUENCING. + """ + target_bed_file: str = self._get_target_bed_from_lims(case_id=case_id) + if not target_bed_file: + if analysis_type == AnalysisType.WGS: + return DEFAULT_CAPTURE_KIT + raise ValueError("No capture kit was found in LIMS") + return target_bed_file + + def _get_target_bed_from_lims(self, case_id: str) -> str | None: + """ + Get target bed filename from LIMS. + Raises: + CgDataError: if the bed target capture version is not found in StatusDB. + """ + case: Case = self.store.get_case_by_internal_id(internal_id=case_id) + sample: Sample = case.links[0].sample + if sample.from_sample: + sample: Sample = self.store.get_sample_by_internal_id(internal_id=sample.from_sample) + target_bed_shortname: str | None = self.lims.capture_kit(lims_id=sample.internal_id) + if not target_bed_shortname: + return None + bed_version: BedVersion | None = self.store.get_bed_version_by_short_name( + bed_version_short_name=target_bed_shortname + ) + if not bed_version: + raise CgDataError(f"Bed-version {target_bed_shortname} does not exist") + return bed_version.filename + + def _write_file(self, case_id: str, case_path: Path, content: dict = None) -> None: + """Write params-file for analysis.""" + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + LOG.debug("Writing parameters file") + if content: + write_yaml_nextflow_style( + content=content, + file_path=file_path, + ) + else: + file_path.touch() diff --git a/cg/services/analysis_starter/configurator/params_file_creator/utils.py b/cg/services/analysis_starter/configurator/params_file_creator/utils.py new file mode 100644 index 0000000000..c5c40506f7 --- /dev/null +++ b/cg/services/analysis_starter/configurator/params_file_creator/utils.py @@ -0,0 +1,32 @@ +import copy +import re + + +def replace_values_in_params_file(workflow_parameters: dict) -> dict: + """ + Iterate through the dictionary until all placeholders are replaced with the corresponding value + from the dictionary + """ + replaced_workflow_parameters = copy.deepcopy(workflow_parameters) + while True: + resolved: bool = True + for key, value in replaced_workflow_parameters.items(): + new_value: str | int = _replace_params_placeholders(value, workflow_parameters) + if new_value != value: + resolved = False + replaced_workflow_parameters[key] = new_value + if resolved: + break + return replaced_workflow_parameters + + +def _replace_params_placeholders(value: str | int, workflow_parameters: dict) -> str: + """Replace values marked as placeholders with values from the given dictionary""" + if isinstance(value, str): + placeholders: list[str] = re.findall(r"{{\s*([^{}\s]+)\s*}}", value) + for placeholder in placeholders: + if placeholder in workflow_parameters: + value = value.replace( + f"{{{{{placeholder}}}}}", str(workflow_parameters[placeholder]) + ) + return value diff --git a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py index 11ec07180a..f7bf5037ac 100644 --- a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py @@ -1,23 +1,166 @@ +import logging +import re from pathlib import Path -from cg.constants import FileExtensions +import rich_click as click +from cg.apps.housekeeper.hk import HousekeeperAPI +from cg.constants import FileExtensions, SequencingFileTag +from cg.constants.constants import FileFormat +from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex +from cg.io.controller import WriteFile +from cg.io.gzip import read_gzip_first_line +from cg.meta.workflow.fastq import _is_undetermined_in_path +from cg.models.fastq import FastqFileMeta, GetFastqFileMeta +from cg.models.raredisease.raredisease import ( + RarediseaseSampleSheetEntry, + RarediseaseSampleSheetHeaders, +) +from cg.store.models import Case, CaseSample, Sample +from cg.store.store import Store -class RarediseaseSampleSheetCreator: +HEADER: list[str] = RarediseaseSampleSheetHeaders.list() +LOG = logging.getLogger(__name__) - def __init__(self): - pass - def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: - sample_sheet_content: list[list[any]] = self._get_sample_sheet_content(case_id=case_id) - if not dry_run: - self._write_sample_sheet( - content=sample_sheet_content, - file_path=self.get_sample_sheet_path(case_id=case_id, case_path=case_path), - header=self.sample_sheet_headers, - ) +class RarediseaseSampleSheetCreator: + + def __init__(self, store: Store, housekeeper_api: HousekeeperAPI): + self.housekeeper_api = housekeeper_api + self.store = store @staticmethod def get_sample_sheet_path(case_id: str, case_path: Path) -> Path: """Path to sample sheet.""" return Path(case_path, f"{case_id}_samplesheet").with_suffix(FileExtensions.CSV) + + def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: + file_path: Path = self.get_sample_sheet_path(case_id=case_id, case_path=case_path) + content: list[list[any]] = self._get_content(case_id=case_id) + self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) + + def _get_content(self, case_id: str) -> list[list[str]]: + """Return formatted information required to build a sample sheet for a case. + This contains information for all samples linked to the case.""" + sample_sheet_content: list = [] + case: Case = self.store.get_case_by_internal_id(internal_id=case_id) + LOG.info(f"Samples linked to case {case_id}: {len(case.links)}") + for link in case.links: + sample_sheet_content.extend(self._get_sample_sheet_content_per_sample(case_sample=link)) + return sample_sheet_content + + def _get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]: + """Collect and format information required to build a sample sheet for a single sample.""" + fastq_forward_read_paths, fastq_reverse_read_paths = self._get_paired_read_paths( + sample=case_sample.sample + ) + sample_sheet_entry = RarediseaseSampleSheetEntry( + name=case_sample.sample.internal_id, + fastq_forward_read_paths=fastq_forward_read_paths, + fastq_reverse_read_paths=fastq_reverse_read_paths, + sex=self._get_sex_code(case_sample.sample.sex), + phenotype=self._get_phenotype_code(case_sample.status), + paternal_id=case_sample.get_paternal_sample_id, + maternal_id=case_sample.get_maternal_sample_id, + case_id=case_sample.case.internal_id, + ) + return sample_sheet_entry.reformat_sample_content + + def _get_paired_read_paths(self, sample: Sample) -> tuple[list[str], list[str]]: + """Returns a tuple of paired fastq file paths for the forward and reverse read.""" + sample_metadata: list[FastqFileMeta] = self._get_fastq_metadata_for_sample(sample) + fastq_forward_read_paths: list[str] = self._extract_read_files( + metadata=sample_metadata, forward_read=True + ) + fastq_reverse_read_paths: list[str] = self._extract_read_files( + metadata=sample_metadata, reverse_read=True + ) + return fastq_forward_read_paths, fastq_reverse_read_paths + + def _get_fastq_metadata_for_sample(self, sample: Sample) -> list[FastqFileMeta]: + """Return FASTQ metadata objects for all fastq files linked to a sample.""" + return [ + self._parse_fastq_data(hk_file.full_path) + for hk_file in self.housekeeper_api.files( + bundle=sample.internal_id, tags={SequencingFileTag.FASTQ} + ) + ] + + def _parse_fastq_data(self, fastq_path: Path) -> FastqFileMeta: + header_line: str = read_gzip_first_line(file_path=fastq_path) + fastq_file_meta: FastqFileMeta = self._parse_fastq_header(header_line) + fastq_file_meta.path = fastq_path + fastq_file_meta.undetermined = _is_undetermined_in_path(fastq_path) + matches = re.findall(r"-l[1-9]t([1-9]{2})_", str(fastq_path)) + if len(matches) > 0: + fastq_file_meta.flow_cell_id = f"{fastq_file_meta.flow_cell_id}-{matches[0]}" + return fastq_file_meta + + @staticmethod + def _parse_fastq_header(line: str) -> FastqFileMeta | None: + """Parse and return fastq header metadata. + Handle Illumina's two different header formats + @see https://en.wikipedia.org/wiki/FASTQ_format + Raise: + TypeError if unable to split line into expected parts. + """ + parts = line.split(":") + try: + return GetFastqFileMeta.header_format.get(len(parts))(parts=parts) + except TypeError as exception: + LOG.error(f"Could not parse header format for header: {line}") + raise exception + + @staticmethod + def _extract_read_files( + metadata: list[FastqFileMeta], forward_read: bool = False, reverse_read: bool = False + ) -> list[str]: + """Extract a list of fastq file paths for either forward or reverse reads.""" + if forward_read and not reverse_read: + read_direction = 1 + elif reverse_read and not forward_read: + read_direction = 2 + else: + raise ValueError("Either forward or reverse needs to be specified") + sorted_metadata: list = sorted(metadata, key=lambda k: k.path) + return [ + fastq_file.path + for fastq_file in sorted_metadata + if fastq_file.read_direction == read_direction + ] + + @staticmethod + def _get_phenotype_code(phenotype: str) -> int: + """Return Raredisease phenotype code.""" + LOG.debug("Translate phenotype to integer code") + try: + code = PlinkPhenotypeStatus[phenotype.upper()] + except KeyError: + raise ValueError(f"{phenotype} is not a valid phenotype") + return code + + @staticmethod + def _get_sex_code(sex: str) -> int: + """Return Raredisease sex code.""" + LOG.debug("Translate sex to integer code") + try: + code = PlinkSex[sex.upper()] + except KeyError: + raise ValueError(f"{sex} is not a valid sex") + return code + + @staticmethod + def _write_content_to_file_or_stdout( + content: list[list[any]], file_path: Path, dry_run: bool = False + ) -> None: + """Write sample sheet to file.""" + content.insert(0, HEADER) + if dry_run: + click.echo(content) + return + LOG.debug(f"Writing sample sheet to {file_path}") + WriteFile.write_file_from_content( + content=content, + file_format=FileFormat.CSV, + file_path=file_path, + ) diff --git a/cg/services/analysis_starter/configurator/utils.py b/cg/services/analysis_starter/configurator/utils.py index 815b7cb2b4..bab67315ea 100644 --- a/cg/services/analysis_starter/configurator/utils.py +++ b/cg/services/analysis_starter/configurator/utils.py @@ -1,4 +1,3 @@ -import copy import logging import re from pathlib import Path @@ -22,36 +21,6 @@ def write_content_to_file_or_stdout(content: str, file_path: Path, dry_run: bool write_txt(content=content, file_path=file_path) -def replace_values_in_params_file(workflow_parameters: dict) -> dict: - """ - Iterate through the dictionary until all placeholders are replaced with the corresponding value - from the dictionary - """ - replaced_workflow_parameters = copy.deepcopy(workflow_parameters) - while True: - resolved: bool = True - for key, value in replaced_workflow_parameters.items(): - new_value: str | int = replace_params_placeholders(value, workflow_parameters) - if new_value != value: - resolved = False - replaced_workflow_parameters[key] = new_value - if resolved: - break - return replaced_workflow_parameters - - -def replace_params_placeholders(value: str | int, workflow_parameters: dict) -> str: - """Replace values marked as placeholders with values from the given dictionary""" - if isinstance(value, str): - placeholders: list[str] = re.findall(r"{{\s*([^{}\s]+)\s*}}", value) - for placeholder in placeholders: - if placeholder in workflow_parameters: - value = value.replace( - f"{{{{{placeholder}}}}}", str(workflow_parameters[placeholder]) - ) - return value - - def get_sex_code(sex: str) -> int: """Return Raredisease sex code.""" LOG.debug("Translate sex to integer code") From e8308fa691aa7914d67a41961ab3dcc24e104dad Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Mon, 3 Mar 2025 16:34:38 +0100 Subject: [PATCH 09/43] enhance sample sheet and config --- .../config_file_creator/raredisease.py | 25 +++++++++++++------ .../sample_sheet_creator/raredisease.py | 10 +++----- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py index 61265d3d0e..d33864003e 100644 --- a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py @@ -1,9 +1,10 @@ import logging from pathlib import Path +import rich_click as click + from cg.constants import FileExtensions -from cg.io.txt import concat_txt -from cg.services.analysis_starter.configurator.utils import write_content_to_file_or_stdout +from cg.io.txt import concat_txt, write_txt from cg.store.models import Case LOG = logging.getLogger(__name__) @@ -30,13 +31,9 @@ def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: """Create a config file for the raredisease pipeline.""" file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) content: str = self._get_file_content(case_id=case_id) - write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) + self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) LOG.debug(f"Created nextflow config file {file_path.as_posix()} successfully") - def _get_cluster_options(self, case_id: str) -> str: - case: Case = self.store.get_case_by_internal_id(case_id) - return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' - def _get_file_content(self, case_id: str) -> str: """Get the content of the nextflow config file.""" config_files_list: list[str] = [ @@ -51,3 +48,17 @@ def _get_file_content(self, case_id: str) -> str: file_paths=config_files_list, str_content=case_specific_params, ) + + def _get_cluster_options(self, case_id: str) -> str: + case: Case = self.store.get_case_by_internal_id(case_id) + return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' + + @staticmethod + def _write_content_to_file_or_stdout(content: any, file_path: Path, dry_run: bool) -> None: + """Write content to file or stdout.""" + if dry_run: + LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") + click.echo(content) + return + LOG.debug(f"Writing config file to {file_path}") + write_txt(content=content, file_path=file_path) diff --git a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py index f7bf5037ac..b4edb94b84 100644 --- a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py @@ -6,9 +6,8 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.constants import FileExtensions, SequencingFileTag -from cg.constants.constants import FileFormat from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex -from cg.io.controller import WriteFile +from cg.io.csv import write_csv from cg.io.gzip import read_gzip_first_line from cg.meta.workflow.fastq import _is_undetermined_in_path from cg.models.fastq import FastqFileMeta, GetFastqFileMeta @@ -156,11 +155,8 @@ def _write_content_to_file_or_stdout( """Write sample sheet to file.""" content.insert(0, HEADER) if dry_run: + LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") click.echo(content) return LOG.debug(f"Writing sample sheet to {file_path}") - WriteFile.write_file_from_content( - content=content, - file_format=FileFormat.CSV, - file_path=file_path, - ) + write_csv(content=content, file_path=file_path) From 8bc008d34bcbf039bd1e9a9e6b946f8216fa513a Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Tue, 4 Mar 2025 09:15:44 +0100 Subject: [PATCH 10/43] unify classes --- .../config_file_creator/raredisease.py | 5 +- .../configurator/file_creators/abstract.py | 13 +++-- .../implementations/raredisease.py | 1 - .../params_file_creator/raredisease.py | 49 +++++++++++++------ .../sample_sheet_creator/raredisease.py | 13 ++--- 5 files changed, 54 insertions(+), 27 deletions(-) diff --git a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py index d33864003e..1e657fadbe 100644 --- a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py @@ -5,12 +5,13 @@ from cg.constants import FileExtensions from cg.io.txt import concat_txt, write_txt +from cg.services.analysis_starter.configurator.file_creators.abstract import NextflowFileCreator from cg.store.models import Case LOG = logging.getLogger(__name__) -class RarediseaseNextflowConfigCreator: +class RarediseaseNextflowConfigCreator(NextflowFileCreator): """Create a config file for the raredisease pipeline.""" def __init__( @@ -54,7 +55,7 @@ def _get_cluster_options(self, case_id: str) -> str: return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' @staticmethod - def _write_content_to_file_or_stdout(content: any, file_path: Path, dry_run: bool) -> None: + def _write_content_to_file_or_stdout(content: str, file_path: Path, dry_run: bool) -> None: """Write content to file or stdout.""" if dry_run: LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") diff --git a/cg/services/analysis_starter/configurator/file_creators/abstract.py b/cg/services/analysis_starter/configurator/file_creators/abstract.py index 6c810a9f9a..e850a2b775 100644 --- a/cg/services/analysis_starter/configurator/file_creators/abstract.py +++ b/cg/services/analysis_starter/configurator/file_creators/abstract.py @@ -1,12 +1,14 @@ from abc import ABC, abstractmethod from pathlib import Path +from cg.constants import FileExtensions + class NextflowFileCreator(ABC): - def create(self, case_id: str, case_path: Path, dry_run: bool = False, **kwargs) -> None: + def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: any = self._get_content(case_id=case_id) + content: any = self._get_file_content(case_id=case_id) self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) @staticmethod @@ -15,10 +17,15 @@ def get_file_path(case_id: str, case_path: Path) -> Path: pass @abstractmethod - def _get_content(self, case_id: str) -> any: + def _get_file_content(self, case_id: str) -> any: pass @staticmethod @abstractmethod def _write_content_to_file_or_stdout(content: any, file_path: Path, dry_run: bool) -> None: pass + + @staticmethod + def _get_sample_sheet_path(case_id: str, case_path: Path) -> Path: + """Path to sample sheet.""" + return Path(case_path, f"{case_id}_samplesheet").with_suffix(FileExtensions.CSV) diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index ed8693d726..f8892bc6af 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -113,7 +113,6 @@ def _create_params_file(self, case_id: str, dry_run: bool = False) -> None: case_id=case_id ).model_dump() LOG.debug("Adding parameters from the pipeline config file if it exist") - workflow_parameters: dict = built_workflow_parameters | ( read_yaml(self.params) if hasattr(self, "params") and self.params else {} ) diff --git a/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py index 8556d8ae67..615230df2e 100644 --- a/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py @@ -1,6 +1,8 @@ import logging from pathlib import Path +import rich_click as click + from cg.apps.lims import LimsAPI from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions from cg.constants.scout import ScoutExportFileName @@ -8,6 +10,7 @@ from cg.exc import CgDataError from cg.io.yaml import read_yaml, write_yaml_nextflow_style from cg.models.raredisease.raredisease import RarediseaseParameters +from cg.services.analysis_starter.configurator.file_creators.abstract import NextflowFileCreator from cg.services.analysis_starter.configurator.params_file_creator.utils import ( replace_values_in_params_file, ) @@ -16,7 +19,7 @@ LOG = logging.getLogger(__name__) -class RarediseaseParamsCreator: +class RarediseaseParamsCreator(NextflowFileCreator): def __init__(self, store, lims: LimsAPI, params: str): self.store = store @@ -27,26 +30,28 @@ def __init__(self, store, lims: LimsAPI, params: str): def get_file_path(case_id: str, case_path: Path) -> Path: return Path(case_path, f"{case_id}_params_file").with_suffix(FileExtensions.YAML) - def create( - self, case_id: str, case_path: Path, sample_sheet_path: Path, dry_run: bool = False - ) -> None: + def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: """Create parameters file for a case.""" + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + content: any = self._get_file_content( + case_id=case_id, + case_path=case_path, + ) + self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) + + def _get_file_content(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> dict: + """Return parameters.""" LOG.debug("Getting parameters information built on-the-fly") built_workflow_parameters: dict | None = self._get_built_workflow_parameters( - case_id=case_id, case_path=case_path, sample_sheet_path=sample_sheet_path + case_id=case_id, + case_path=case_path, + sample_sheet_path=sample_sheet_path, ).model_dump() LOG.debug("Adding parameters from the pipeline config file if it exist") - workflow_parameters: dict = built_workflow_parameters | ( read_yaml(self.params) if hasattr(self, "params") and self.params else {} ) - replaced_workflow_parameters: dict = replace_values_in_params_file( - workflow_parameters=workflow_parameters - ) - if not dry_run: - self._write_file( - case_id=case_id, content=replaced_workflow_parameters, case_path=case_path - ) + return replace_values_in_params_file(workflow_parameters) def _get_built_workflow_parameters( self, case_id: str, case_path: Path, sample_sheet_path: Path @@ -54,8 +59,8 @@ def _get_built_workflow_parameters( """Return parameters.""" analysis_type: str = self._get_data_analysis_type(case_id=case_id) target_bed_file: str = self._get_target_bed(case_id=case_id, analysis_type=analysis_type) - skip_germlinecnvcaller = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) - outdir = case_path + skip_germlinecnvcaller: bool = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) + outdir: Path = case_path return RarediseaseParameters( input=sample_sheet_path, @@ -107,6 +112,11 @@ def _get_target_bed_from_lims(self, case_id: str) -> str | None: raise CgDataError(f"Bed-version {target_bed_shortname} does not exist") return bed_version.filename + @staticmethod + def _get_germlinecnvcaller_flag(analysis_type: str) -> bool: + """Return True if the germlinecnvcaller should be skipped.""" + return True if analysis_type == AnalysisType.WGS else False + def _write_file(self, case_id: str, case_path: Path, content: dict = None) -> None: """Write params-file for analysis.""" file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) @@ -118,3 +128,12 @@ def _write_file(self, case_id: str, case_path: Path, content: dict = None) -> No ) else: file_path.touch() + + @staticmethod + def _write_content_to_file_or_stdout(content: any, file_path: Path, dry_run: bool) -> None: + if dry_run: + LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") + click.echo(content) + return + LOG.debug(f"Writing params file to {file_path}") + write_yaml_nextflow_style(content=content, file_path=file_path) diff --git a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py index b4edb94b84..ac791960ae 100644 --- a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py @@ -15,6 +15,7 @@ RarediseaseSampleSheetEntry, RarediseaseSampleSheetHeaders, ) +from cg.services.analysis_starter.configurator.file_creators.abstract import NextflowFileCreator from cg.store.models import Case, CaseSample, Sample from cg.store.store import Store @@ -22,23 +23,23 @@ LOG = logging.getLogger(__name__) -class RarediseaseSampleSheetCreator: +class RarediseaseSampleSheetCreator(NextflowFileCreator): def __init__(self, store: Store, housekeeper_api: HousekeeperAPI): self.housekeeper_api = housekeeper_api self.store = store @staticmethod - def get_sample_sheet_path(case_id: str, case_path: Path) -> Path: + def get_file_path(case_id: str, case_path: Path) -> Path: """Path to sample sheet.""" - return Path(case_path, f"{case_id}_samplesheet").with_suffix(FileExtensions.CSV) + return NextflowFileCreator._get_sample_sheet_path(case_id=case_id, case_path=case_path) def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: - file_path: Path = self.get_sample_sheet_path(case_id=case_id, case_path=case_path) - content: list[list[any]] = self._get_content(case_id=case_id) + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + content: list[list[any]] = self._get_file_content(case_id=case_id) self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) - def _get_content(self, case_id: str) -> list[list[str]]: + def _get_file_content(self, case_id: str) -> list[list[str]]: """Return formatted information required to build a sample sheet for a case. This contains information for all samples linked to the case.""" sample_sheet_content: list = [] From cc83172740762b71bb6190c3814c0467ed89eee1 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Tue, 4 Mar 2025 17:34:40 +0100 Subject: [PATCH 11/43] stash commit --- cg/constants/nf_analysis.py | 6 ++ .../config_file_creator/raredisease.py | 65 ------------- .../configurator/file_creators/abstract.py | 26 +---- .../file_creators/config_creator.py | 35 +++++++ .../params}/raredisease.py | 95 ++++++------------- .../params}/utils.py | 0 .../sample_sheet}/raredisease.py | 50 +++------- .../configurator/file_creators/utils.py | 59 ++++++++++++ .../implementations/raredisease.py | 34 ++++--- 9 files changed, 158 insertions(+), 212 deletions(-) delete mode 100644 cg/services/analysis_starter/configurator/config_file_creator/raredisease.py create mode 100644 cg/services/analysis_starter/configurator/file_creators/config_creator.py rename cg/services/analysis_starter/configurator/{params_file_creator => file_creators/params}/raredisease.py (51%) rename cg/services/analysis_starter/configurator/{params_file_creator => file_creators/params}/utils.py (100%) rename cg/services/analysis_starter/configurator/{sample_sheet_creator => file_creators/sample_sheet}/raredisease.py (76%) create mode 100644 cg/services/analysis_starter/configurator/file_creators/utils.py diff --git a/cg/constants/nf_analysis.py b/cg/constants/nf_analysis.py index 51edc6af46..0cd1221aaf 100644 --- a/cg/constants/nf_analysis.py +++ b/cg/constants/nf_analysis.py @@ -15,6 +15,12 @@ class NfTowerStatus(StrEnum): UNKNOWN: str = "UNKNOWN" +class NextflowFileType(StrEnum): + PARAMS = "params_file" + SAMPLE_SHEET = "sample_sheet" + CONFIG = "nextflow_config" + + NALLO_METRIC_CONDITIONS: dict[str, dict[str, Any]] = { "median_coverage": {"norm": "gt", "threshold": 25}, } diff --git a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py deleted file mode 100644 index 1e657fadbe..0000000000 --- a/cg/services/analysis_starter/configurator/config_file_creator/raredisease.py +++ /dev/null @@ -1,65 +0,0 @@ -import logging -from pathlib import Path - -import rich_click as click - -from cg.constants import FileExtensions -from cg.io.txt import concat_txt, write_txt -from cg.services.analysis_starter.configurator.file_creators.abstract import NextflowFileCreator -from cg.store.models import Case - -LOG = logging.getLogger(__name__) - - -class RarediseaseNextflowConfigCreator(NextflowFileCreator): - """Create a config file for the raredisease pipeline.""" - - def __init__( - self, store, platform: str, workflow_config_path: str, resources: str, account: str - ): - self.store = store - self.platform = platform - self.workflow_config_path = workflow_config_path - self.resources = resources - self.account = account - - @staticmethod - def get_file_path(case_id: str, case_path: Path) -> Path: - """Get the path to the nextflow config file.""" - return Path(case_path, f"{case_id}_nextflow_config").with_suffix(FileExtensions.JSON) - - def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: - """Create a config file for the raredisease pipeline.""" - file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: str = self._get_file_content(case_id=case_id) - self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) - LOG.debug(f"Created nextflow config file {file_path.as_posix()} successfully") - - def _get_file_content(self, case_id: str) -> str: - """Get the content of the nextflow config file.""" - config_files_list: list[str] = [ - self.platform, - self.workflow_config_path, - self.resources, - ] - case_specific_params: list[str] = [ - self._get_cluster_options(case_id=case_id), - ] - return concat_txt( - file_paths=config_files_list, - str_content=case_specific_params, - ) - - def _get_cluster_options(self, case_id: str) -> str: - case: Case = self.store.get_case_by_internal_id(case_id) - return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' - - @staticmethod - def _write_content_to_file_or_stdout(content: str, file_path: Path, dry_run: bool) -> None: - """Write content to file or stdout.""" - if dry_run: - LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") - click.echo(content) - return - LOG.debug(f"Writing config file to {file_path}") - write_txt(content=content, file_path=file_path) diff --git a/cg/services/analysis_starter/configurator/file_creators/abstract.py b/cg/services/analysis_starter/configurator/file_creators/abstract.py index e850a2b775..7fb09d4d42 100644 --- a/cg/services/analysis_starter/configurator/file_creators/abstract.py +++ b/cg/services/analysis_starter/configurator/file_creators/abstract.py @@ -1,31 +1,9 @@ from abc import ABC, abstractmethod from pathlib import Path -from cg.constants import FileExtensions - -class NextflowFileCreator(ABC): - - def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: - file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: any = self._get_file_content(case_id=case_id) - self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) - - @staticmethod - @abstractmethod - def get_file_path(case_id: str, case_path: Path) -> Path: - pass +class FileContentCreator(ABC): @abstractmethod - def _get_file_content(self, case_id: str) -> any: + def create(self, case_path: Path) -> any: pass - - @staticmethod - @abstractmethod - def _write_content_to_file_or_stdout(content: any, file_path: Path, dry_run: bool) -> None: - pass - - @staticmethod - def _get_sample_sheet_path(case_id: str, case_path: Path) -> Path: - """Path to sample sheet.""" - return Path(case_path, f"{case_id}_samplesheet").with_suffix(FileExtensions.CSV) diff --git a/cg/services/analysis_starter/configurator/file_creators/config_creator.py b/cg/services/analysis_starter/configurator/file_creators/config_creator.py new file mode 100644 index 0000000000..fb7c87b853 --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/config_creator.py @@ -0,0 +1,35 @@ +from cg.io.txt import concat_txt +from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.store.models import Case +from cg.store.store import Store + + +class ConfigFileContentCreator(FileContentCreator): + + def __init__( + self, store: Store, platform: str, workflow_config_path: str, resources: str, account: str + ): + self.store = store + self.platform = platform + self.workflow_config_path = workflow_config_path + self.resources = resources + self.account = account + + def create(self, case_id: str) -> str: + """Get the content of the nextflow config file.""" + config_files_list: list[str] = [ + self.platform, + self.workflow_config_path, + self.resources, + ] + case_specific_params: list[str] = [ + self._get_cluster_options(case_id=case_id), + ] + return concat_txt( + file_paths=config_files_list, + str_content=case_specific_params, + ) + + def _get_cluster_options(self, case_id: str) -> str: + case: Case = self.store.get_case_by_internal_id(case_id) + return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' diff --git a/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/params/raredisease.py similarity index 51% rename from cg/services/analysis_starter/configurator/params_file_creator/raredisease.py rename to cg/services/analysis_starter/configurator/file_creators/params/raredisease.py index 615230df2e..62e0925571 100644 --- a/cg/services/analysis_starter/configurator/params_file_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/params/raredisease.py @@ -1,76 +1,58 @@ -import logging from pathlib import Path -import rich_click as click - from cg.apps.lims import LimsAPI -from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions +from cg.constants import DEFAULT_CAPTURE_KIT +from cg.constants.nf_analysis import NextflowFileType from cg.constants.scout import ScoutExportFileName from cg.constants.tb import AnalysisType from cg.exc import CgDataError -from cg.io.yaml import read_yaml, write_yaml_nextflow_style +from cg.io.yaml import read_yaml from cg.models.raredisease.raredisease import RarediseaseParameters -from cg.services.analysis_starter.configurator.file_creators.abstract import NextflowFileCreator -from cg.services.analysis_starter.configurator.params_file_creator.utils import ( +from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.params.utils import ( replace_values_in_params_file, ) +from cg.services.analysis_starter.configurator.file_creators.utils import ( + get_case_id_from_path, + get_file_path, +) from cg.store.models import BedVersion, Case, Sample +from cg.store.store import Store -LOG = logging.getLogger(__name__) +class RarediseaseParamsFileContentCreator(FileContentCreator): -class RarediseaseParamsCreator(NextflowFileCreator): - - def __init__(self, store, lims: LimsAPI, params: str): + def __init__(self, store: Store, lims: LimsAPI, params: str): self.store = store - self.params = params self.lims = lims + self.params = params - @staticmethod - def get_file_path(case_id: str, case_path: Path) -> Path: - return Path(case_path, f"{case_id}_params_file").with_suffix(FileExtensions.YAML) - - def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: + def create(self, case_path: Path) -> dict: """Create parameters file for a case.""" - file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: any = self._get_file_content( - case_id=case_id, - case_path=case_path, + case_workflow_parameters: dict = self._get_case_parameters(case_path).model_dump() + workflow_parameters: any = read_yaml(self.params) + parameters: dict = case_workflow_parameters | workflow_parameters + curated_parameters: dict = replace_values_in_params_file(parameters) + return curated_parameters + + def _get_case_parameters(self, case_path: Path) -> RarediseaseParameters: + """Return case-specific parameters for the analysis.""" + case_id: str = get_case_id_from_path(case_path=case_path) + sample_sheet_path: Path = get_file_path( + case_path=case_path, file_type=NextflowFileType.SAMPLE_SHEET ) - self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) - - def _get_file_content(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> dict: - """Return parameters.""" - LOG.debug("Getting parameters information built on-the-fly") - built_workflow_parameters: dict | None = self._get_built_workflow_parameters( - case_id=case_id, - case_path=case_path, - sample_sheet_path=sample_sheet_path, - ).model_dump() - LOG.debug("Adding parameters from the pipeline config file if it exist") - workflow_parameters: dict = built_workflow_parameters | ( - read_yaml(self.params) if hasattr(self, "params") and self.params else {} - ) - return replace_values_in_params_file(workflow_parameters) - - def _get_built_workflow_parameters( - self, case_id: str, case_path: Path, sample_sheet_path: Path - ) -> RarediseaseParameters: - """Return parameters.""" analysis_type: str = self._get_data_analysis_type(case_id=case_id) target_bed_file: str = self._get_target_bed(case_id=case_id, analysis_type=analysis_type) skip_germlinecnvcaller: bool = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) - outdir: Path = case_path - return RarediseaseParameters( input=sample_sheet_path, - outdir=outdir, + outdir=case_path, analysis_type=analysis_type, target_bed_file=target_bed_file, save_mapped_as_cram=True, skip_germlinecnvcaller=skip_germlinecnvcaller, - vcfanno_extra_resources=f"{outdir}/{ScoutExportFileName.MANAGED_VARIANTS}", - vep_filters_scout_fmt=f"{outdir}/{ScoutExportFileName.PANELS}", + vcfanno_extra_resources=f"{case_path}/{ScoutExportFileName.MANAGED_VARIANTS}", + vep_filters_scout_fmt=f"{case_path}/{ScoutExportFileName.PANELS}", ) def _get_data_analysis_type(self, case_id: str) -> str: @@ -116,24 +98,3 @@ def _get_target_bed_from_lims(self, case_id: str) -> str | None: def _get_germlinecnvcaller_flag(analysis_type: str) -> bool: """Return True if the germlinecnvcaller should be skipped.""" return True if analysis_type == AnalysisType.WGS else False - - def _write_file(self, case_id: str, case_path: Path, content: dict = None) -> None: - """Write params-file for analysis.""" - file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - LOG.debug("Writing parameters file") - if content: - write_yaml_nextflow_style( - content=content, - file_path=file_path, - ) - else: - file_path.touch() - - @staticmethod - def _write_content_to_file_or_stdout(content: any, file_path: Path, dry_run: bool) -> None: - if dry_run: - LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") - click.echo(content) - return - LOG.debug(f"Writing params file to {file_path}") - write_yaml_nextflow_style(content=content, file_path=file_path) diff --git a/cg/services/analysis_starter/configurator/params_file_creator/utils.py b/cg/services/analysis_starter/configurator/file_creators/params/utils.py similarity index 100% rename from cg/services/analysis_starter/configurator/params_file_creator/utils.py rename to cg/services/analysis_starter/configurator/file_creators/params/utils.py diff --git a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py similarity index 76% rename from cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py rename to cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py index ac791960ae..472751b2c0 100644 --- a/cg/services/analysis_starter/configurator/sample_sheet_creator/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py @@ -2,12 +2,10 @@ import re from pathlib import Path -import rich_click as click - from cg.apps.housekeeper.hk import HousekeeperAPI -from cg.constants import FileExtensions, SequencingFileTag +from cg.apps.lims import LimsAPI +from cg.constants import SequencingFileTag from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex -from cg.io.csv import write_csv from cg.io.gzip import read_gzip_first_line from cg.meta.workflow.fastq import _is_undetermined_in_path from cg.models.fastq import FastqFileMeta, GetFastqFileMeta @@ -15,36 +13,27 @@ RarediseaseSampleSheetEntry, RarediseaseSampleSheetHeaders, ) -from cg.services.analysis_starter.configurator.file_creators.abstract import NextflowFileCreator +from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.utils import get_case_id_from_path from cg.store.models import Case, CaseSample, Sample from cg.store.store import Store -HEADER: list[str] = RarediseaseSampleSheetHeaders.list() LOG = logging.getLogger(__name__) -class RarediseaseSampleSheetCreator(NextflowFileCreator): +class RarediseaseSampleSheetContentCreator(FileContentCreator): - def __init__(self, store: Store, housekeeper_api: HousekeeperAPI): - self.housekeeper_api = housekeeper_api + def __init__(self, store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI): self.store = store + self.housekeeper_api = housekeeper_api + self.lims = lims - @staticmethod - def get_file_path(case_id: str, case_path: Path) -> Path: - """Path to sample sheet.""" - return NextflowFileCreator._get_sample_sheet_path(case_id=case_id, case_path=case_path) - - def create(self, case_id: str, case_path: Path, dry_run: bool = False) -> None: - file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: list[list[any]] = self._get_file_content(case_id=case_id) - self._write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) - - def _get_file_content(self, case_id: str) -> list[list[str]]: + def create(self, case_path: Path) -> any: """Return formatted information required to build a sample sheet for a case. This contains information for all samples linked to the case.""" - sample_sheet_content: list = [] + case_id: str = get_case_id_from_path(case_path=case_path) case: Case = self.store.get_case_by_internal_id(internal_id=case_id) - LOG.info(f"Samples linked to case {case_id}: {len(case.links)}") + sample_sheet_content: list[list[str]] = [RarediseaseSampleSheetHeaders.list()] for link in case.links: sample_sheet_content.extend(self._get_sample_sheet_content_per_sample(case_sample=link)) return sample_sheet_content @@ -132,7 +121,6 @@ def _extract_read_files( @staticmethod def _get_phenotype_code(phenotype: str) -> int: """Return Raredisease phenotype code.""" - LOG.debug("Translate phenotype to integer code") try: code = PlinkPhenotypeStatus[phenotype.upper()] except KeyError: @@ -140,24 +128,10 @@ def _get_phenotype_code(phenotype: str) -> int: return code @staticmethod - def _get_sex_code(sex: str) -> int: + def _get_sex_code(sex: str) -> PlinkSex: """Return Raredisease sex code.""" - LOG.debug("Translate sex to integer code") try: code = PlinkSex[sex.upper()] except KeyError: raise ValueError(f"{sex} is not a valid sex") return code - - @staticmethod - def _write_content_to_file_or_stdout( - content: list[list[any]], file_path: Path, dry_run: bool = False - ) -> None: - """Write sample sheet to file.""" - content.insert(0, HEADER) - if dry_run: - LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") - click.echo(content) - return - LOG.debug(f"Writing sample sheet to {file_path}") - write_csv(content=content, file_path=file_path) diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py new file mode 100644 index 0000000000..5847dddff6 --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -0,0 +1,59 @@ +import logging +from pathlib import Path + +import rich_click as click + +from cg.constants import FileExtensions +from cg.constants.nf_analysis import NextflowFileType +from cg.io.csv import write_csv +from cg.io.json import write_json +from cg.io.yaml import write_yaml_nextflow_style +from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator + +LOG = logging.getLogger(__name__) + +FILE_TYPE_TO_EXTENSION: dict[NextflowFileType, FileExtensions] = { + NextflowFileType.PARAMS: FileExtensions.YAML, + NextflowFileType.SAMPLE_SHEET: FileExtensions.CSV, + NextflowFileType.CONFIG: FileExtensions.JSON, +} + +FILE_TYPE_TO_WRITER: dict[NextflowFileType, callable] = { + NextflowFileType.PARAMS: write_yaml_nextflow_style, + NextflowFileType.SAMPLE_SHEET: write_csv, + NextflowFileType.CONFIG: write_json, +} + + +def get_file_path(case_path: Path, file_type: NextflowFileType) -> Path: + case_id: str = case_path.name + extension: FileExtensions = FILE_TYPE_TO_EXTENSION[file_type] + return Path(case_path, f"{case_id}_{file_type}").with_suffix(extension) + + +def write_content_to_file_or_stdout( + content: any, file_path: Path, file_type: NextflowFileType, dry_run: bool +) -> None: + if dry_run: + LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") + click.echo(content) + return + LOG.debug(f"Writing sample sheet to {file_path}") + FILE_TYPE_TO_WRITER[file_type](content=content, file_path=file_path) + + +def create_file( + content_creator: FileContentCreator, + case_path: Path, + file_type: NextflowFileType, + dry_run: bool = False, +) -> None: + file_path: Path = get_file_path(case_path=case_path, file_type=file_type) + content: any = content_creator.create(case_path) + write_content_to_file_or_stdout( + content=content, file_path=file_path, file_type=file_type, dry_run=dry_run + ) + + +def get_case_id_from_path(case_path: Path) -> str: + return case_path.name diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index f8892bc6af..73c9d76461 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -19,19 +19,16 @@ RarediseaseSampleSheetHeaders, ) from cg.services.analysis_starter.configurator.abstract_service import Configurator -from cg.services.analysis_starter.configurator.config_file_creator.raredisease import ( - RarediseaseNextflowConfigCreator, +from cg.services.analysis_starter.configurator.file_creators.config_creator import ( + ConfigFileContentCreator, ) -from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig -from cg.services.analysis_starter.configurator.params_file_creator.raredisease import ( - RarediseaseParamsCreator, -) -from cg.services.analysis_starter.configurator.params_file_creator.utils import ( - replace_values_in_params_file, +from cg.services.analysis_starter.configurator.file_creators.params.raredisease import ( + RarediseaseParamsFileContentCreator, ) -from cg.services.analysis_starter.configurator.sample_sheet_creator.raredisease import ( - RarediseaseSampleSheetCreator, +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( + RarediseaseSampleSheetContentCreator, ) +from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig from cg.services.analysis_starter.configurator.utils import ( extract_read_files, get_phenotype_code, @@ -54,13 +51,13 @@ def __init__( config: RarediseaseConfig, housekeeper_api: HousekeeperAPI, lims: LimsAPI, - sample_sheet_creator: RarediseaseSampleSheetCreator, - config_file_creator: RarediseaseNextflowConfigCreator, - parameters_file_creator: RarediseaseParamsCreator, + sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, + config_file_content_creator: ConfigFileContentCreator, + parameters_content_file_creator: RarediseaseParamsFileContentCreator, ): - self.sample_sheet_creator = sample_sheet_creator - self.config_file_creator = config_file_creator - self.parameters_file_creator = parameters_file_creator + self.sample_sheet_content_creator = sample_sheet_content_creator + self.config_file_content_creator = config_file_content_creator + self.parameters_content_file_creator = parameters_content_file_creator self.account: str = config.slurm.account self.lims: LimsAPI = lims self.housekeeper_api: HousekeeperAPI = housekeeper_api @@ -76,11 +73,12 @@ def sample_sheet_headers(self) -> list[str]: return RarediseaseSampleSheetHeaders.list() def create_config(self, case_id: str, dry_run: bool = False) -> NextflowCaseConfig: - case_path: Path = self._get_case_path(case_id=case_id) self._create_case_directory(case_id=case_id, dry_run=False) self._create_sample_sheet(case_id=case_id, dry_run=False) self._create_params_file(case_id=case_id, dry_run=False) - self.config_file_creator.create(case_id=case_id, case_path=case_path, dry_run=dry_run) + self._create_nextflow_config(case_id=case_id, dry_run=dry_run) + self._create_gene_panel(case_id=case_id, dry_run=dry_run) + self._write_managed_variants(case_id=case_id, dry_run=dry_run) return NextflowCaseConfig( case_id=case_id, case_priority=self._get_case_priority(case_id), From 1cc90cf1937569eb7e2c0d7e69f78170dc69449c Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Tue, 4 Mar 2025 17:35:53 +0100 Subject: [PATCH 12/43] remove out of scope change --- cg/io/yaml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cg/io/yaml.py b/cg/io/yaml.py index 12f8092953..17d2f721a2 100644 --- a/cg/io/yaml.py +++ b/cg/io/yaml.py @@ -28,7 +28,6 @@ def write_yaml_stream(content: Any) -> str: return yaml.dump(content) -# TODO: Refactor this into a content convertor (without writting) def write_yaml_nextflow_style(content: dict[str, Any], file_path: Path) -> None: """Write content to yaml file accepted by Nextflow with non-quoted booleans and quoted strings.""" with open(file_path, "w") as outfile: From 1e4830ae9884d2d8832094f9d1331e9c847cbf38 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Wed, 5 Mar 2025 10:30:30 +0100 Subject: [PATCH 13/43] created a nextflow level of inheritance in configurator --- cg/models/raredisease/raredisease.py | 2 +- .../configurator/abstract_service.py | 4 +- .../file_creators/config_creator.py | 2 +- .../configurator/file_creators/utils.py | 13 +- .../configurator/implementations/nextflow.py | 106 ++++++ .../implementations/raredisease.py | 304 ++---------------- 6 files changed, 137 insertions(+), 294 deletions(-) create mode 100644 cg/services/analysis_starter/configurator/implementations/nextflow.py diff --git a/cg/models/raredisease/raredisease.py b/cg/models/raredisease/raredisease.py index 5bbc0a4733..bbe2b37446 100644 --- a/cg/models/raredisease/raredisease.py +++ b/cg/models/raredisease/raredisease.py @@ -14,7 +14,7 @@ class RarediseaseQCMetrics(QCMetrics): total_reads: int -# TODO: MOve this to models folder in appropriate service +# TODO: Move these to models folder in appropriate service class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry): """Raredisease sample model is used when building the sample sheet.""" diff --git a/cg/services/analysis_starter/configurator/abstract_service.py b/cg/services/analysis_starter/configurator/abstract_service.py index 0886cadc64..9352eba802 100644 --- a/cg/services/analysis_starter/configurator/abstract_service.py +++ b/cg/services/analysis_starter/configurator/abstract_service.py @@ -1,8 +1,10 @@ from abc import ABC +from cg.services.analysis_starter.configurator.abstract_model import CaseConfig + class Configurator(ABC): - def create_config(self, case_id: str, dry_run: bool = False): + def create_config(self, case_id: str) -> CaseConfig: """Abstract method to create a case config for a case.""" pass diff --git a/cg/services/analysis_starter/configurator/file_creators/config_creator.py b/cg/services/analysis_starter/configurator/file_creators/config_creator.py index fb7c87b853..86288c2337 100644 --- a/cg/services/analysis_starter/configurator/file_creators/config_creator.py +++ b/cg/services/analysis_starter/configurator/file_creators/config_creator.py @@ -4,7 +4,7 @@ from cg.store.store import Store -class ConfigFileContentCreator(FileContentCreator): +class NextflowConfigFileContentCreator(FileContentCreator): def __init__( self, store: Store, platform: str, workflow_config_path: str, resources: str, account: str diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py index 5847dddff6..d698d7d68a 100644 --- a/cg/services/analysis_starter/configurator/file_creators/utils.py +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -1,8 +1,6 @@ import logging from pathlib import Path -import rich_click as click - from cg.constants import FileExtensions from cg.constants.nf_analysis import NextflowFileType from cg.io.csv import write_csv @@ -32,12 +30,8 @@ def get_file_path(case_path: Path, file_type: NextflowFileType) -> Path: def write_content_to_file_or_stdout( - content: any, file_path: Path, file_type: NextflowFileType, dry_run: bool + content: any, file_path: Path, file_type: NextflowFileType ) -> None: - if dry_run: - LOG.info(f"Dry-run: printing content to stdout. Would have written to {file_path}") - click.echo(content) - return LOG.debug(f"Writing sample sheet to {file_path}") FILE_TYPE_TO_WRITER[file_type](content=content, file_path=file_path) @@ -46,13 +40,10 @@ def create_file( content_creator: FileContentCreator, case_path: Path, file_type: NextflowFileType, - dry_run: bool = False, ) -> None: file_path: Path = get_file_path(case_path=case_path, file_type=file_type) content: any = content_creator.create(case_path) - write_content_to_file_or_stdout( - content=content, file_path=file_path, file_type=file_type, dry_run=dry_run - ) + write_content_to_file_or_stdout(content=content, file_path=file_path, file_type=file_type) def get_case_id_from_path(case_path: Path) -> str: diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py new file mode 100644 index 0000000000..6081aae23e --- /dev/null +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -0,0 +1,106 @@ +from abc import abstractmethod +from pathlib import Path + +from cg.apps.housekeeper.hk import HousekeeperAPI +from cg.apps.lims import LimsAPI +from cg.constants import Workflow +from cg.constants.nf_analysis import NextflowFileType +from cg.services.analysis_starter.configurator.abstract_service import Configurator +from cg.services.analysis_starter.configurator.file_creators.config_creator import ( + NextflowConfigFileContentCreator, +) +from cg.services.analysis_starter.configurator.file_creators.utils import create_file, get_file_path +from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig +from cg.store.models import Case +from cg.store.store import Store + + +class NextflowConfigurator(Configurator): + + def __init__(self, config: any, store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI): + self.root_dir: str = config.root_dir + self.store: Store = store + self.housekeeper_api: HousekeeperAPI = housekeeper_api + self.lims: LimsAPI = lims + self.config_content_creator = NextflowConfigFileContentCreator( + store=self.store, + platform=config.platform, + workflow_config_path=config.workflow_config_path, + resources=config.resources, + account=config.slurm.account, + ) + + def create_config(self, case_id: str) -> NextflowCaseConfig: + """Create a Nextflow case config.""" + self._create_case_directory(case_id=case_id) + self._create_sample_sheet(case_id=case_id) + self._create_params_file(case_id=case_id) + self._create_nextflow_config(case_id=case_id) + self._do_pipeline_specific_actions(case_id=case_id) + return NextflowCaseConfig( + case_id=case_id, + case_priority=self._get_case_priority(case_id), + workflow=self._get_case_workflow(case_id), + netxflow_config_file=self._get_nextflow_config_path(case_id=case_id).as_posix(), + params_file=self._get_params_file_path(case_id=case_id).as_posix(), + work_dir=self._get_work_dir(case_id=case_id).as_posix(), + ) + + def _create_case_directory(self, case_id: str) -> None: + """Create case working directory.""" + case_path: Path = self._get_case_path(case_id=case_id) + case_path.mkdir(parents=True, exist_ok=True) + + def _get_case_path(self, case_id: str) -> Path: + """Path to case working directory.""" + return Path(self.root_dir, case_id) + + def _create_sample_sheet(self, case_id: str) -> None: + """Create sample sheet for case.""" + create_file( + content_creator=self.sample_sheet_content_creator, + case_path=self._get_case_path(case_id=case_id), + file_type=NextflowFileType.SAMPLE_SHEET, + ) + + def _create_params_file(self, case_id: str) -> None: + """Create parameters file for case.""" + create_file( + content_creator=self.params_file_content_creator, + case_path=self._get_case_path(case_id=case_id), + file_type=NextflowFileType.PARAMS, + ) + + def _create_nextflow_config(self, case_id: str) -> None: + """Create nextflow config file for case.""" + create_file( + content_creator=self.config_content_creator, + case_path=self._get_case_path(case_id=case_id), + file_type=NextflowFileType.CONFIG, + ) + + @abstractmethod + def _do_pipeline_specific_actions(self, case_id: str) -> None: + """Perform pipeline specific actions.""" + pass + + def _get_case_priority(self, case_id: str) -> str: + """Get case priority.""" + case: Case = self.store.get_case_by_internal_id(case_id) + return case.slurm_priority + + def _get_case_workflow(self, case_id: str) -> Workflow: + """Get case workflow.""" + case: Case = self.store.get_case_by_internal_id(case_id) + return Workflow(case.data_analysis) + + def _get_nextflow_config_path(self, case_id: str) -> Path: + case_path: Path = self._get_case_path(case_id) + return get_file_path(case_path=case_path, file_type=NextflowFileType.CONFIG) + + def _get_params_file_path(self, case_id: str) -> Path: + case_path: Path = self._get_case_path(case_id) + return get_file_path(case_path=case_path, file_type=NextflowFileType.PARAMS) + + def _get_work_dir(self, case_id: str) -> Path: + return Path(self.root_dir, case_id, "work") diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 73c9d76461..f079d17de6 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -1,311 +1,55 @@ import logging -from pathlib import Path from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims import LimsAPI -from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions, Priority, SequencingFileTag, Workflow -from cg.constants.constants import FileFormat -from cg.constants.scout import ScoutExportFileName -from cg.constants.tb import AnalysisType -from cg.exc import CgDataError -from cg.io.controller import WriteFile -from cg.io.txt import concat_txt -from cg.io.yaml import read_yaml, write_yaml_nextflow_style +from cg.constants.nf_analysis import NextflowFileType from cg.models.cg_config import RarediseaseConfig -from cg.models.fastq import FastqFileMeta -from cg.models.raredisease.raredisease import ( - RarediseaseParameters, - RarediseaseSampleSheetEntry, - RarediseaseSampleSheetHeaders, -) -from cg.services.analysis_starter.configurator.abstract_service import Configurator -from cg.services.analysis_starter.configurator.file_creators.config_creator import ( - ConfigFileContentCreator, -) from cg.services.analysis_starter.configurator.file_creators.params.raredisease import ( RarediseaseParamsFileContentCreator, ) from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( RarediseaseSampleSheetContentCreator, ) -from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig -from cg.services.analysis_starter.configurator.utils import ( - extract_read_files, - get_phenotype_code, - get_sex_code, - parse_fastq_data, - write_content_to_file_or_stdout, -) -from cg.store.models import BedVersion, Case, CaseSample, Sample +from cg.services.analysis_starter.configurator.file_creators.utils import create_file +from cg.services.analysis_starter.configurator.implementations.nextflow import NextflowConfigurator from cg.store.store import Store LOG = logging.getLogger(__name__) -class RarediseaseConfigurator(Configurator): +class RarediseaseConfigurator(NextflowConfigurator): """Configurator for Raredisease analysis.""" def __init__( self, - store: Store, config: RarediseaseConfig, + store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI, - sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, - config_file_content_creator: ConfigFileContentCreator, - parameters_content_file_creator: RarediseaseParamsFileContentCreator, ): - self.sample_sheet_content_creator = sample_sheet_content_creator - self.config_file_content_creator = config_file_content_creator - self.parameters_content_file_creator = parameters_content_file_creator - self.account: str = config.slurm.account - self.lims: LimsAPI = lims - self.housekeeper_api: HousekeeperAPI = housekeeper_api - self.platform: str = config.platform - self.resources: str = config.resources - self.root_dir: str = config.root - self.store: Store = store - self.workflow_config_path: str = config.config - - @property - def sample_sheet_headers(self) -> list[str]: - """Headers for sample sheet.""" - return RarediseaseSampleSheetHeaders.list() - - def create_config(self, case_id: str, dry_run: bool = False) -> NextflowCaseConfig: - self._create_case_directory(case_id=case_id, dry_run=False) - self._create_sample_sheet(case_id=case_id, dry_run=False) - self._create_params_file(case_id=case_id, dry_run=False) - self._create_nextflow_config(case_id=case_id, dry_run=dry_run) - self._create_gene_panel(case_id=case_id, dry_run=dry_run) - self._write_managed_variants(case_id=case_id, dry_run=dry_run) - return NextflowCaseConfig( - case_id=case_id, - case_priority=self._get_case_priority(case_id), - workflow=self._get_case_workflow(case_id), - netxflow_config_file=self._get_nextflow_config_path(case_id=case_id).as_posix(), - params_file=self._get_params_file_path(case_id=case_id).as_posix(), - work_dir=self._get_work_dir(case_id=case_id).as_posix(), - ) - - def _create_case_directory(self, case_id: str, dry_run: bool = False) -> None: - """Create case working directory.""" - case_path: Path = self._get_case_path(case_id=case_id) - if dry_run: - LOG.info(f"Would have created case directory {case_path.as_posix()}") - return - case_path.mkdir(parents=True, exist_ok=True) - LOG.debug(f"Created case directory {case_path.as_posix()} successfully") - - def _create_nextflow_config(self, case_id: str, dry_run: bool = False) -> None: - """Create nextflow config file.""" - content: str = self._get_nextflow_config_content(case_id=case_id) - file_path: Path = self._get_nextflow_config_path(case_id=case_id) - write_content_to_file_or_stdout(content=content, file_path=file_path, dry_run=dry_run) - LOG.debug(f"Created nextflow config file {file_path.as_posix()} successfully") - - def _create_params_file(self, case_id: str, dry_run: bool = False) -> None: - """Create parameters file for a case.""" - LOG.debug("Getting parameters information built on-the-fly") - built_workflow_parameters: dict | None = self._get_built_workflow_parameters( - case_id=case_id - ).model_dump() - LOG.debug("Adding parameters from the pipeline config file if it exist") - workflow_parameters: dict = built_workflow_parameters | ( - read_yaml(self.params) if hasattr(self, "params") and self.params else {} - ) - replaced_workflow_parameters: dict = replace_values_in_params_file( - workflow_parameters=workflow_parameters + super().__init__(config=config, store=store, housekeeper_api=housekeeper_api, lims=lims) + self.sample_sheet_content_creator = RarediseaseSampleSheetContentCreator( + store=self.store, housekeeper_api=self.housekeeper_api, lims=self.lims ) - if not dry_run: - self._write_params_file( - case_id=case_id, replaced_workflow_parameters=replaced_workflow_parameters - ) - - def _create_sample_sheet(self, case_id: str, dry_run: bool) -> None: - """Create sample sheet for a case.""" - sample_sheet_content: list[list[any]] = self._get_sample_sheet_content(case_id=case_id) - if not dry_run: - self._write_sample_sheet( - content=sample_sheet_content, - file_path=self._get_sample_sheet_path(case_id=case_id), - header=self.sample_sheet_headers, - ) - - def _get_built_workflow_parameters(self, case_id: str) -> RarediseaseParameters: - """Return parameters.""" - analysis_type: str = self._get_data_analysis_type(case_id=case_id) - target_bed_file: str = self._get_target_bed(case_id=case_id, analysis_type=analysis_type) - skip_germlinecnvcaller = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) - outdir = self._get_case_path(case_id=case_id) - - return RarediseaseParameters( - input=self._get_sample_sheet_path(case_id=case_id), - outdir=outdir, - analysis_type=analysis_type, - target_bed_file=target_bed_file, - save_mapped_as_cram=True, - skip_germlinecnvcaller=skip_germlinecnvcaller, - vcfanno_extra_resources=f"{outdir}/{ScoutExportFileName.MANAGED_VARIANTS}", - vep_filters_scout_fmt=f"{outdir}/{ScoutExportFileName.PANELS}", + self.params_content_creator = RarediseaseParamsFileContentCreator( + store=self.store, lims=self.lims, params=config.params ) - def _get_case_path(self, case_id: str) -> Path: - """Path to case working directory.""" - return Path(self.root_dir, case_id) - - def _get_case_priority(self, case_id: str) -> Priority: - return self.store.get_case_by_internal_id(case_id).priority - - def _get_case_workflow(self, case_id: str) -> Workflow: - case: Case = self.store.get_case_by_internal_id(case_id) - return Workflow(case.data_analysis) - - def _get_data_analysis_type(self, case_id: str) -> str: - """Return data analysis type carried out.""" - sample: Sample = self.store.get_samples_by_case_id(case_id=case_id)[0] - return sample.application_version.application.analysis_type - - def _get_file_metadata_for_sample(self, sample: Sample) -> list[FastqFileMeta]: - return [ - parse_fastq_data(hk_file.full_path) - for hk_file in self.housekeeper_api.files( - bundle=sample.internal_id, tags={SequencingFileTag.FASTQ} - ) - ] - - @staticmethod - def _get_germlinecnvcaller_flag(analysis_type: str) -> bool: - if analysis_type == AnalysisType.WGS: - return True - return False + def _do_pipeline_specific_actions(self, case_id: str) -> None: + """Perform pipeline specific actions.""" + self._create_gene_panel(case_id) + self._create_managed_variants(case_id) - def _get_nextflow_config_content(self, case_id: str) -> str: - config_files_list: list[str] = [ - self.platform, - self.workflow_config_path, - self.resources, - ] - case_specific_params: list[str] = [ - self._get_cluster_options(case_id=case_id), - ] - return concat_txt( - file_paths=config_files_list, - str_content=case_specific_params, + def _create_gene_panel(self, case_id: str) -> None: + create_file( + content_creator=self.gene_panel_content_creator, + case_path=self._get_case_path(case_id=case_id), + file_type=NextflowFileType.GENE_PANEL, ) - def _get_nextflow_config_path(self, case_id: str) -> Path: - return Path((self._get_case_path(case_id)), f"{case_id}_nextflow_config").with_suffix( - FileExtensions.JSON + def _create_managed_variants(self, case_id: str, dry_run: bool = False) -> None: + create_file( + content_creator=self.managed_variants_content_creator, + case_path=self._get_case_path(case_id=case_id), + file_type=NextflowFileType.MANAGED_VARIANTS, ) - - def _get_params_file_path(self, case_id: str) -> Path: - return Path((self._get_case_path(case_id)), f"{case_id}_params_file").with_suffix( - FileExtensions.YAML - ) - - def _get_paired_read_paths(self, sample: Sample) -> tuple[list[str], list[str]]: - """Returns a tuple of paired fastq file paths for the forward and reverse read.""" - sample_metadata: list[FastqFileMeta] = self._get_file_metadata_for_sample(sample=sample) - fastq_forward_read_paths: list[str] = extract_read_files( - metadata=sample_metadata, forward_read=True - ) - fastq_reverse_read_paths: list[str] = extract_read_files( - metadata=sample_metadata, reverse_read=True - ) - return fastq_forward_read_paths, fastq_reverse_read_paths - - def _get_sample_sheet_content(self, case_id: str) -> list[list[any]]: - """Return formatted information required to build a sample sheet for a case. - This contains information for all samples linked to the case.""" - sample_sheet_content: list = [] - case: Case = self.store.get_case_by_internal_id(internal_id=case_id) - LOG.info(f"Samples linked to case {case_id}: {len(case.links)}") - LOG.debug("Getting sample sheet information") - for link in case.links: - sample_sheet_content.extend(self._get_sample_sheet_content_per_sample(case_sample=link)) - return sample_sheet_content - - def _get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]: - """Collect and format information required to build a sample sheet for a single sample.""" - fastq_forward_read_paths, fastq_reverse_read_paths = self._get_paired_read_paths( - sample=case_sample.sample - ) - sample_sheet_entry = RarediseaseSampleSheetEntry( - name=case_sample.sample.internal_id, - fastq_forward_read_paths=fastq_forward_read_paths, - fastq_reverse_read_paths=fastq_reverse_read_paths, - sex=get_sex_code(case_sample.sample.sex), - phenotype=get_phenotype_code(case_sample.status), - paternal_id=case_sample.get_paternal_sample_id, - maternal_id=case_sample.get_maternal_sample_id, - case_id=case_sample.case.internal_id, - ) - return sample_sheet_entry.reformat_sample_content - - def _get_sample_sheet_path(self, case_id: str) -> Path: - """Path to sample sheet.""" - return Path(self._get_case_path(case_id), f"{case_id}_samplesheet").with_suffix( - FileExtensions.CSV - ) - - def _get_target_bed(self, case_id: str, analysis_type: str) -> str: - """ - Return the target bed file from LIMS or use default capture kit for WHOLE_GENOME_SEQUENCING. - """ - target_bed_file: str = self._get_target_bed_from_lims(case_id=case_id) - if not target_bed_file: - if analysis_type == AnalysisType.WGS: - return DEFAULT_CAPTURE_KIT - raise ValueError("No capture kit was found in LIMS") - return target_bed_file - - def _get_target_bed_from_lims(self, case_id: str) -> str | None: - """Get target bed filename from LIMS.""" - case: Case = self.store.get_case_by_internal_id(internal_id=case_id) - sample: Sample = case.links[0].sample - if sample.from_sample: - sample: Sample = self.store.get_sample_by_internal_id(internal_id=sample.from_sample) - target_bed_shortname: str | None = self.lims.capture_kit(lims_id=sample.internal_id) - if not target_bed_shortname: - return None - bed_version: BedVersion | None = self.store.get_bed_version_by_short_name( - bed_version_short_name=target_bed_shortname - ) - if not bed_version: - raise CgDataError(f"Bed-version {target_bed_shortname} does not exist") - return bed_version.filename - - def _get_work_dir(self, case_id: str) -> Path: - return Path(self.root_dir, case_id, "work") - - def _write_params_file(self, case_id: str, replaced_workflow_parameters: dict = None) -> None: - """Write params-file for analysis.""" - LOG.debug("Writing parameters file") - if replaced_workflow_parameters: - write_yaml_nextflow_style( - content=replaced_workflow_parameters, - file_path=self._get_params_file_path(case_id=case_id), - ) - else: - self._get_params_file_path(case_id=case_id).touch() - - @staticmethod - def _write_sample_sheet( - content: list[list[any]], - file_path: Path, - header: list[str], - ) -> None: - """Write sample sheet CSV file.""" - LOG.debug("Writing sample sheet") - if header: - content.insert(0, header) - WriteFile.write_file_from_content( - content=content, - file_format=FileFormat.CSV, - file_path=file_path, - ) - - def _get_cluster_options(self, case_id: str) -> str: - case: Case = self.store.get_case_by_internal_id(case_id) - return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n' From c5ec339a842be2fe0fc054253fd6243a2b3f0693 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Wed, 5 Mar 2025 10:39:32 +0100 Subject: [PATCH 14/43] add constants for gene files --- cg/constants/nf_analysis.py | 2 ++ .../analysis_starter/configurator/file_creators/utils.py | 1 + 2 files changed, 3 insertions(+) diff --git a/cg/constants/nf_analysis.py b/cg/constants/nf_analysis.py index 0cd1221aaf..86d13b690c 100644 --- a/cg/constants/nf_analysis.py +++ b/cg/constants/nf_analysis.py @@ -19,6 +19,8 @@ class NextflowFileType(StrEnum): PARAMS = "params_file" SAMPLE_SHEET = "sample_sheet" CONFIG = "nextflow_config" + GENE_PANEL = "gene_panel" + MANAGED_VARIANTS = "managed_variants" NALLO_METRIC_CONDITIONS: dict[str, dict[str, Any]] = { diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py index d698d7d68a..93d8194480 100644 --- a/cg/services/analysis_starter/configurator/file_creators/utils.py +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -10,6 +10,7 @@ LOG = logging.getLogger(__name__) +# TODO: Adapt to gene panel and variant files FILE_TYPE_TO_EXTENSION: dict[NextflowFileType, FileExtensions] = { NextflowFileType.PARAMS: FileExtensions.YAML, NextflowFileType.SAMPLE_SHEET: FileExtensions.CSV, From f447e20c273a2bc0f56fb033689ba640aeaa0429 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Wed, 5 Mar 2025 10:49:53 +0100 Subject: [PATCH 15/43] removed duplicated utils method --- .../analysis_starter/configurator/utils.py | 85 ------------------- 1 file changed, 85 deletions(-) delete mode 100644 cg/services/analysis_starter/configurator/utils.py diff --git a/cg/services/analysis_starter/configurator/utils.py b/cg/services/analysis_starter/configurator/utils.py deleted file mode 100644 index bab67315ea..0000000000 --- a/cg/services/analysis_starter/configurator/utils.py +++ /dev/null @@ -1,85 +0,0 @@ -import logging -import re -from pathlib import Path - -import rich_click as click - -from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex -from cg.io.gzip import read_gzip_first_line -from cg.io.txt import write_txt -from cg.meta.workflow.fastq import _is_undetermined_in_path -from cg.models.fastq import FastqFileMeta, GetFastqFileMeta - -LOG = logging.getLogger(__name__) - - -def write_content_to_file_or_stdout(content: str, file_path: Path, dry_run: bool = False) -> None: - """Write content to a file if dry-run is False, otherwise print to stdout.""" - if dry_run: - click.echo(content) - return - write_txt(content=content, file_path=file_path) - - -def get_sex_code(sex: str) -> int: - """Return Raredisease sex code.""" - LOG.debug("Translate sex to integer code") - try: - code = PlinkSex[sex.upper()] - except KeyError: - raise ValueError(f"{sex} is not a valid sex") - return code - - -def get_phenotype_code(phenotype: str) -> int: - """Return Raredisease phenotype code.""" - LOG.debug("Translate phenotype to integer code") - try: - code = PlinkPhenotypeStatus[phenotype.upper()] - except KeyError: - raise ValueError(f"{phenotype} is not a valid phenotype") - return code - - -def extract_read_files( - metadata: list[FastqFileMeta], forward_read: bool = False, reverse_read: bool = False -) -> list[str]: - """Extract a list of fastq file paths for either forward or reverse reads.""" - if forward_read and not reverse_read: - read_direction = 1 - elif reverse_read and not forward_read: - read_direction = 2 - else: - raise ValueError("Either forward or reverse needs to be specified") - sorted_metadata: list = sorted(metadata, key=lambda k: k.path) - return [ - fastq_file.path - for fastq_file in sorted_metadata - if fastq_file.read_direction == read_direction - ] - - -def parse_fastq_data(fastq_path: Path) -> FastqFileMeta: - header_line: str = read_gzip_first_line(file_path=fastq_path) - fastq_file_meta: FastqFileMeta = parse_fastq_header(header_line) - fastq_file_meta.path = fastq_path - fastq_file_meta.undetermined = _is_undetermined_in_path(fastq_path) - matches = re.findall(r"-l[1-9]t([1-9]{2})_", str(fastq_path)) - if len(matches) > 0: - fastq_file_meta.flow_cell_id = f"{fastq_file_meta.flow_cell_id}-{matches[0]}" - return fastq_file_meta - - -def parse_fastq_header(line: str) -> FastqFileMeta | None: - """Parse and return fastq header metadata. - Handle Illumina's two different header formats - @see https://en.wikipedia.org/wiki/FASTQ_format - Raise: - TypeError if unable to split line into expected parts. - """ - parts = line.split(":") - try: - return GetFastqFileMeta.header_format.get(len(parts))(parts=parts) - except TypeError as exception: - LOG.error(f"Could not parse header format for header: {line}") - raise exception From 215ced4b6a70843b0181f06cbc17e1a209f65a77 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Wed, 5 Mar 2025 11:02:30 +0100 Subject: [PATCH 16/43] renamed write function --- .../analysis_starter/configurator/file_creators/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py index 93d8194480..3d5a61e851 100644 --- a/cg/services/analysis_starter/configurator/file_creators/utils.py +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -30,9 +30,7 @@ def get_file_path(case_path: Path, file_type: NextflowFileType) -> Path: return Path(case_path, f"{case_id}_{file_type}").with_suffix(extension) -def write_content_to_file_or_stdout( - content: any, file_path: Path, file_type: NextflowFileType -) -> None: +def write_content_to_file(content: any, file_path: Path, file_type: NextflowFileType) -> None: LOG.debug(f"Writing sample sheet to {file_path}") FILE_TYPE_TO_WRITER[file_type](content=content, file_path=file_path) @@ -44,7 +42,7 @@ def create_file( ) -> None: file_path: Path = get_file_path(case_path=case_path, file_type=file_type) content: any = content_creator.create(case_path) - write_content_to_file_or_stdout(content=content, file_path=file_path, file_type=file_type) + write_content_to_file(content=content, file_path=file_path, file_type=file_type) def get_case_id_from_path(case_path: Path) -> str: From e6fd37cc17b271232bad91bb9c2ba1832628356d Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Wed, 5 Mar 2025 13:17:18 +0100 Subject: [PATCH 17/43] rolled back small fix for scope reasons --- cg/meta/workflow/nf_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg/meta/workflow/nf_analysis.py b/cg/meta/workflow/nf_analysis.py index 814e8839cc..b5b06d224a 100644 --- a/cg/meta/workflow/nf_analysis.py +++ b/cg/meta/workflow/nf_analysis.py @@ -362,8 +362,8 @@ def create_params_file(self, case_id: str, dry_run: bool) -> None: ) def replace_values_in_params_file(self, workflow_parameters: dict) -> dict: - """Iterate through the dictionary until all placeholders are replaced with the corresponding value from the dictionary""" replaced_workflow_parameters = copy.deepcopy(workflow_parameters) + """Iterate through the dictionary until all placeholders are replaced with the corresponding value from the dictionary""" while True: resolved: bool = True for key, value in replaced_workflow_parameters.items(): From 4f82eacd40074a146587a60301f45115f2d84403 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 08:25:49 +0100 Subject: [PATCH 18/43] inject content creators --- .../configurator/file_creators/utils.py | 21 +++++++++++++++---- .../configurator/implementations/nextflow.py | 17 ++++++++------- .../implementations/raredisease.py | 12 +++++------ 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py index 3d5a61e851..31aed9d7b8 100644 --- a/cg/services/analysis_starter/configurator/file_creators/utils.py +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -1,7 +1,8 @@ import logging from pathlib import Path -from cg.constants import FileExtensions +from cg.constants import FileExtensions, Workflow +from cg.constants.gene_panel import GenePanelGenomeBuild from cg.constants.nf_analysis import NextflowFileType from cg.io.csv import write_csv from cg.io.json import write_json @@ -10,13 +11,15 @@ LOG = logging.getLogger(__name__) -# TODO: Adapt to gene panel and variant files -FILE_TYPE_TO_EXTENSION: dict[NextflowFileType, FileExtensions] = { +FILE_TYPE_TO_EXTENSION: dict[NextflowFileType, str] = { NextflowFileType.PARAMS: FileExtensions.YAML, NextflowFileType.SAMPLE_SHEET: FileExtensions.CSV, NextflowFileType.CONFIG: FileExtensions.JSON, + NextflowFileType.GENE_PANEL: FileExtensions.BED, + NextflowFileType.MANAGED_VARIANTS: FileExtensions.VCF, } +# TODO: Adapt to gene panel and variant files FILE_TYPE_TO_WRITER: dict[NextflowFileType, callable] = { NextflowFileType.PARAMS: write_yaml_nextflow_style, NextflowFileType.SAMPLE_SHEET: write_csv, @@ -26,7 +29,7 @@ def get_file_path(case_path: Path, file_type: NextflowFileType) -> Path: case_id: str = case_path.name - extension: FileExtensions = FILE_TYPE_TO_EXTENSION[file_type] + extension: str = FILE_TYPE_TO_EXTENSION[file_type] return Path(case_path, f"{case_id}_{file_type}").with_suffix(extension) @@ -47,3 +50,13 @@ def create_file( def get_case_id_from_path(case_path: Path) -> str: return case_path.name + + +def get_genome_build(workflow: Workflow) -> GenePanelGenomeBuild: + """Return genome build for the given Workflow.""" + workflow_to_genome_build: dict[Workflow, GenePanelGenomeBuild] = { + Workflow.NALLO: GenePanelGenomeBuild.hg38, + Workflow.RAREDISEASE: GenePanelGenomeBuild.hg19, + Workflow.TOMTE: GenePanelGenomeBuild.hg38, + } + return workflow_to_genome_build.get(workflow) diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 6081aae23e..644152917e 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -17,18 +17,19 @@ class NextflowConfigurator(Configurator): - def __init__(self, config: any, store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI): + def __init__( + self, + config: any, + store: Store, + housekeeper_api: HousekeeperAPI, + lims: LimsAPI, + config_content_creator: NextflowConfigFileContentCreator, + ): self.root_dir: str = config.root_dir self.store: Store = store self.housekeeper_api: HousekeeperAPI = housekeeper_api self.lims: LimsAPI = lims - self.config_content_creator = NextflowConfigFileContentCreator( - store=self.store, - platform=config.platform, - workflow_config_path=config.workflow_config_path, - resources=config.resources, - account=config.slurm.account, - ) + self.config_content_creator = config_content_creator def create_config(self, case_id: str) -> NextflowCaseConfig: """Create a Nextflow case config.""" diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index f079d17de6..2cb5883ef9 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -26,14 +26,12 @@ def __init__( store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI, + sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, + params_content_creator: RarediseaseParamsFileContentCreator, ): super().__init__(config=config, store=store, housekeeper_api=housekeeper_api, lims=lims) - self.sample_sheet_content_creator = RarediseaseSampleSheetContentCreator( - store=self.store, housekeeper_api=self.housekeeper_api, lims=self.lims - ) - self.params_content_creator = RarediseaseParamsFileContentCreator( - store=self.store, lims=self.lims, params=config.params - ) + self.sample_sheet_content_creator = sample_sheet_content_creator + self.params_content_creator = params_content_creator def _do_pipeline_specific_actions(self, case_id: str) -> None: """Perform pipeline specific actions.""" @@ -47,7 +45,7 @@ def _create_gene_panel(self, case_id: str) -> None: file_type=NextflowFileType.GENE_PANEL, ) - def _create_managed_variants(self, case_id: str, dry_run: bool = False) -> None: + def _create_managed_variants(self, case_id: str) -> None: create_file( content_creator=self.managed_variants_content_creator, case_path=self._get_case_path(case_id=case_id), From edb4adbc8e83e2b45c7ee79a0f7c3c90ed4c508c Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 08:27:32 +0100 Subject: [PATCH 19/43] add gene panel creator --- .../configurator/file_creators/gene_panel.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 cg/services/analysis_starter/configurator/file_creators/gene_panel.py diff --git a/cg/services/analysis_starter/configurator/file_creators/gene_panel.py b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py new file mode 100644 index 0000000000..36868b154e --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py @@ -0,0 +1,53 @@ +from pathlib import Path + +from cg.apps.scout.scoutapi import ScoutAPI +from cg.constants import GenePanelMasterList +from cg.constants.gene_panel import GenePanelCombo, GenePanelGenomeBuild +from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.utils import ( + get_case_id_from_path, + get_genome_build, +) +from cg.store.models import Case +from cg.store.store import Store + + +class GenePanelFileContentCreator(FileContentCreator): + def __init__(self, store: Store, scout_api: ScoutAPI): + self.store = store + self.scout_api = scout_api + + def create(self, case_path: Path) -> list[str]: + case_id: str = get_case_id_from_path(case_path=case_path) + case: Case = self.store.get_case_by_internal_id(internal_id=case_id) + genome_build: GenePanelGenomeBuild = get_genome_build(workflow=case.data_analysis) + all_panels: list[str] = self._get_aggregated_panels( + customer_id=case.customer.internal_id, default_panels=set(case.panels) + ) + return self.scout_api.export_panels(build=genome_build, panels=all_panels) + + def _get_aggregated_panels(self, customer_id: str, default_panels: set[str]) -> list[str]: + """Check if customer is collaborator for gene panel master list + and if all default panels are included in the gene panel master list. + If not, add gene panel combo and broad non-specific gene panels. + Return an aggregated gene panel.""" + if GenePanelMasterList.is_customer_collaborator_and_panels_in_gene_panels_master_list( + customer_id=customer_id, gene_panels=default_panels + ): + return GenePanelMasterList.get_panel_names() + all_panels: set[str] = self._add_gene_panel_combo(gene_panels=default_panels) + all_panels |= GenePanelMasterList.get_non_specific_gene_panels() + return list(all_panels) + + @staticmethod + def _add_gene_panel_combo(gene_panels: set[str]) -> set[str]: + """ + Add gene panels combinations for gene panels being part of gene panel combination and + return updated gene panels. + """ + additional_panels = set() + for panel in gene_panels: + if panel in GenePanelCombo.COMBO_1: + additional_panels |= GenePanelCombo.COMBO_1.get(panel) + gene_panels |= additional_panels + return gene_panels From 77db7d15f2540f578e24b94657aadf7a5770c1d7 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 08:54:33 +0100 Subject: [PATCH 20/43] some renaming --- .../file_creators/{config_creator.py => config_file.py} | 0 .../file_creators/{params => params_file}/raredisease.py | 2 +- .../configurator/file_creators/{params => params_file}/utils.py | 0 .../analysis_starter/configurator/implementations/nextflow.py | 2 +- .../configurator/implementations/raredisease.py | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) rename cg/services/analysis_starter/configurator/file_creators/{config_creator.py => config_file.py} (100%) rename cg/services/analysis_starter/configurator/file_creators/{params => params_file}/raredisease.py (99%) rename cg/services/analysis_starter/configurator/file_creators/{params => params_file}/utils.py (100%) diff --git a/cg/services/analysis_starter/configurator/file_creators/config_creator.py b/cg/services/analysis_starter/configurator/file_creators/config_file.py similarity index 100% rename from cg/services/analysis_starter/configurator/file_creators/config_creator.py rename to cg/services/analysis_starter/configurator/file_creators/config_file.py diff --git a/cg/services/analysis_starter/configurator/file_creators/params/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py similarity index 99% rename from cg/services/analysis_starter/configurator/file_creators/params/raredisease.py rename to cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py index 62e0925571..a31f32f24e 100644 --- a/cg/services/analysis_starter/configurator/file_creators/params/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py @@ -9,7 +9,7 @@ from cg.io.yaml import read_yaml from cg.models.raredisease.raredisease import RarediseaseParameters from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator -from cg.services.analysis_starter.configurator.file_creators.params.utils import ( +from cg.services.analysis_starter.configurator.file_creators.params_file.utils import ( replace_values_in_params_file, ) from cg.services.analysis_starter.configurator.file_creators.utils import ( diff --git a/cg/services/analysis_starter/configurator/file_creators/params/utils.py b/cg/services/analysis_starter/configurator/file_creators/params_file/utils.py similarity index 100% rename from cg/services/analysis_starter/configurator/file_creators/params/utils.py rename to cg/services/analysis_starter/configurator/file_creators/params_file/utils.py diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 644152917e..87eb9c7bdb 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -6,7 +6,7 @@ from cg.constants import Workflow from cg.constants.nf_analysis import NextflowFileType from cg.services.analysis_starter.configurator.abstract_service import Configurator -from cg.services.analysis_starter.configurator.file_creators.config_creator import ( +from cg.services.analysis_starter.configurator.file_creators.config_file import ( NextflowConfigFileContentCreator, ) from cg.services.analysis_starter.configurator.file_creators.utils import create_file, get_file_path diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index 2cb5883ef9..fb719b9efa 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -4,7 +4,7 @@ from cg.apps.lims import LimsAPI from cg.constants.nf_analysis import NextflowFileType from cg.models.cg_config import RarediseaseConfig -from cg.services.analysis_starter.configurator.file_creators.params.raredisease import ( +from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( RarediseaseParamsFileContentCreator, ) from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( From 945733a57f2ef5d1f4d572548bd9b408151dda19 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 10:15:43 +0100 Subject: [PATCH 21/43] add sample sheet creator fixture --- .../sample_sheet_content_creators.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py diff --git a/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py b/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py new file mode 100644 index 0000000000..70b2bf076a --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py @@ -0,0 +1,17 @@ +import pytest + +from cg.models.cg_config import CGConfig +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( + RarediseaseSampleSheetContentCreator, +) + + +@pytest.fixture +def raredisease_sample_sheet_content_creator( + raredisease_context: CGConfig, +) -> RarediseaseSampleSheetContentCreator: + return RarediseaseSampleSheetContentCreator( + store=raredisease_context.status_db, + housekeeper_api=raredisease_context.housekeeper_api, + lims=raredisease_context.lims_api, + ) From b06cd5a4e9972b05f9c649412141095114afd2cc Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 10:16:04 +0100 Subject: [PATCH 22/43] add params file creator fixture --- .../params_file_content_creators.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/fixture_plugins/analysis_starter/params_file_content_creators.py diff --git a/tests/fixture_plugins/analysis_starter/params_file_content_creators.py b/tests/fixture_plugins/analysis_starter/params_file_content_creators.py new file mode 100644 index 0000000000..cd216ec69e --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/params_file_content_creators.py @@ -0,0 +1,17 @@ +import pytest + +from cg.models.cg_config import CGConfig +from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( + RarediseaseParamsFileContentCreator, +) + + +@pytest.fixture +def raredisease_params_content_creator( + raredisease_context: CGConfig, +) -> RarediseaseParamsFileContentCreator: + return RarediseaseParamsFileContentCreator( + store=raredisease_context.status_db, + lims=raredisease_context.lims_api, + params=raredisease_context.raredisease.params, + ) From 5caddaf7b0bc9ad4721dcb141cb00282836a6944 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 10:33:42 +0100 Subject: [PATCH 23/43] add other file creator fixtures --- .../analysis_starter/config_file_creators.py | 19 +++++++++++++++++++ .../analysis_starter/configurator_fixtures.py | 14 +++++++++++++- .../specific_file_creators.py | 16 ++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/fixture_plugins/analysis_starter/config_file_creators.py create mode 100644 tests/fixture_plugins/analysis_starter/specific_file_creators.py diff --git a/tests/fixture_plugins/analysis_starter/config_file_creators.py b/tests/fixture_plugins/analysis_starter/config_file_creators.py new file mode 100644 index 0000000000..d86fd7b519 --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/config_file_creators.py @@ -0,0 +1,19 @@ +import pytest + +from cg.models.cg_config import CGConfig +from cg.services.analysis_starter.configurator.file_creators.config_file import ( + NextflowConfigFileContentCreator, +) + + +@pytest.fixture +def raredisease_config_file_content_creator( + raredisease_context: CGConfig, +) -> NextflowConfigFileContentCreator: + return NextflowConfigFileContentCreator( + store=raredisease_context.status_db, + platform=raredisease_context.raredisease.platform, + workflow_config_path=raredisease_context.raredisease.workflow_config_path, + resources=raredisease_context.raredisease.resources, + account=raredisease_context.raredisease.slurm.account, + ) diff --git a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py index 515de92e0f..934c123103 100644 --- a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py @@ -1,16 +1,28 @@ import pytest from cg.models.cg_config import CGConfig +from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( + RarediseaseParamsFileContentCreator, +) +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( + RarediseaseSampleSheetContentCreator, +) from cg.services.analysis_starter.configurator.implementations.raredisease import ( RarediseaseConfigurator, ) @pytest.fixture -def raredisease_configurator(raredisease_context: CGConfig) -> RarediseaseConfigurator: +def raredisease_configurator( + raredisease_context: CGConfig, + raredisease_sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, + raredisease_params_content_creator: RarediseaseParamsFileContentCreator, +) -> RarediseaseConfigurator: return RarediseaseConfigurator( store=raredisease_context.status_db, config=raredisease_context.raredisease, housekeeper_api=raredisease_context.housekeeper_api, lims=raredisease_context.lims_api, + sample_sheet_content_creator=raredisease_sample_sheet_content_creator, + params_content_creator=raredisease_params_content_creator, ) diff --git a/tests/fixture_plugins/analysis_starter/specific_file_creators.py b/tests/fixture_plugins/analysis_starter/specific_file_creators.py new file mode 100644 index 0000000000..376b72da73 --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/specific_file_creators.py @@ -0,0 +1,16 @@ +import pytest + +from cg.models.cg_config import CGConfig +from cg.services.analysis_starter.configurator.file_creators.gene_panel import ( + GenePanelFileContentCreator, +) + + +@pytest.fixture +def raredisease_gene_panel_content_creator( + raredisease_context: CGConfig, +) -> GenePanelFileContentCreator: + return GenePanelFileContentCreator( + store=raredisease_context.status_db, + scout_api=raredisease_context.scout_api, + ) From 1dde474ef98d7b17287e16ccf50406dffe2c3c26 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 10:41:00 +0100 Subject: [PATCH 24/43] add fixtures in plugins to conftest --- tests/conftest.py | 4 ++++ ...onfig_file_creators.py => config_file_content_creators.py} | 0 2 files changed, 4 insertions(+) rename tests/fixture_plugins/analysis_starter/{config_file_creators.py => config_file_content_creators.py} (100%) diff --git a/tests/conftest.py b/tests/conftest.py index 242507bbe0..adc8eaa787 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -93,10 +93,14 @@ deliverables_yaml = "_deliverables.yaml" pytest_plugins = [ "tests.fixture_plugins.analysis_starter.case_config_fixtures", + "tests.fixture_plugins.analysis_starter.config_file_content_creators", "tests.fixture_plugins.analysis_starter.configurator_fixtures", "tests.fixture_plugins.analysis_starter.file_content_fixtures", "tests.fixture_plugins.analysis_starter.path_fixtures", + "tests.fixture_plugins.analysis_starter.sample_sheet_content_creators", "tests.fixture_plugins.analysis_starter.seqera_client_fixtures", + "tests.fixture_plugins.analysis_starter.specific_file_creators", + "tests.fixture_plugins.analysis_starter.params_file_content_creators", "tests.fixture_plugins.backup_fixtures.backup_fixtures", "tests.fixture_plugins.chanjo2_fixtures.api_fixtures", "tests.fixture_plugins.chanjo2_fixtures.models_fixtures", diff --git a/tests/fixture_plugins/analysis_starter/config_file_creators.py b/tests/fixture_plugins/analysis_starter/config_file_content_creators.py similarity index 100% rename from tests/fixture_plugins/analysis_starter/config_file_creators.py rename to tests/fixture_plugins/analysis_starter/config_file_content_creators.py From 9ccef27adbeaa0832c0cfbdc84a4c9736518fa2e Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 11:07:47 +0100 Subject: [PATCH 25/43] fix constructors and fixtures --- .../configurator/implementations/nextflow.py | 2 +- .../configurator/implementations/raredisease.py | 12 +++++++++++- .../analysis_starter/config_file_content_creators.py | 2 +- .../analysis_starter/configurator_fixtures.py | 5 +++++ tests/services/analysis_starter/test_configurator.py | 7 ++----- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 87eb9c7bdb..5fc1ee4da2 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -25,7 +25,7 @@ def __init__( lims: LimsAPI, config_content_creator: NextflowConfigFileContentCreator, ): - self.root_dir: str = config.root_dir + self.root_dir: str = config.root self.store: Store = store self.housekeeper_api: HousekeeperAPI = housekeeper_api self.lims: LimsAPI = lims diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index fb719b9efa..e0591b7fc3 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -4,6 +4,9 @@ from cg.apps.lims import LimsAPI from cg.constants.nf_analysis import NextflowFileType from cg.models.cg_config import RarediseaseConfig +from cg.services.analysis_starter.configurator.file_creators.config_file import ( + NextflowConfigFileContentCreator, +) from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( RarediseaseParamsFileContentCreator, ) @@ -26,10 +29,17 @@ def __init__( store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI, + config_content_creator: NextflowConfigFileContentCreator, sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, params_content_creator: RarediseaseParamsFileContentCreator, ): - super().__init__(config=config, store=store, housekeeper_api=housekeeper_api, lims=lims) + super().__init__( + config=config, + store=store, + housekeeper_api=housekeeper_api, + lims=lims, + config_content_creator=config_content_creator, + ) self.sample_sheet_content_creator = sample_sheet_content_creator self.params_content_creator = params_content_creator diff --git a/tests/fixture_plugins/analysis_starter/config_file_content_creators.py b/tests/fixture_plugins/analysis_starter/config_file_content_creators.py index d86fd7b519..3f8ed79636 100644 --- a/tests/fixture_plugins/analysis_starter/config_file_content_creators.py +++ b/tests/fixture_plugins/analysis_starter/config_file_content_creators.py @@ -13,7 +13,7 @@ def raredisease_config_file_content_creator( return NextflowConfigFileContentCreator( store=raredisease_context.status_db, platform=raredisease_context.raredisease.platform, - workflow_config_path=raredisease_context.raredisease.workflow_config_path, + workflow_config_path=raredisease_context.raredisease.config, resources=raredisease_context.raredisease.resources, account=raredisease_context.raredisease.slurm.account, ) diff --git a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py index 934c123103..e96151eb4e 100644 --- a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py @@ -1,6 +1,9 @@ import pytest from cg.models.cg_config import CGConfig +from cg.services.analysis_starter.configurator.file_creators.config_file import ( + NextflowConfigFileContentCreator, +) from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( RarediseaseParamsFileContentCreator, ) @@ -15,6 +18,7 @@ @pytest.fixture def raredisease_configurator( raredisease_context: CGConfig, + raredisease_config_file_content_creator: NextflowConfigFileContentCreator, raredisease_sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, raredisease_params_content_creator: RarediseaseParamsFileContentCreator, ) -> RarediseaseConfigurator: @@ -23,6 +27,7 @@ def raredisease_configurator( config=raredisease_context.raredisease, housekeeper_api=raredisease_context.housekeeper_api, lims=raredisease_context.lims_api, + config_content_creator=raredisease_config_file_content_creator, sample_sheet_content_creator=raredisease_sample_sheet_content_creator, params_content_creator=raredisease_params_content_creator, ) diff --git a/tests/services/analysis_starter/test_configurator.py b/tests/services/analysis_starter/test_configurator.py index 623a380dc0..abee83d94c 100644 --- a/tests/services/analysis_starter/test_configurator.py +++ b/tests/services/analysis_starter/test_configurator.py @@ -1,6 +1,3 @@ -from pathlib import Path -from unittest import mock - import pytest from cg.services.analysis_starter.configurator.abstract_model import CaseConfig @@ -52,10 +49,10 @@ def test_create_nextflow_config_file_exists( case_id: str = request.getfixturevalue(case_id_fixture) # GIVEN that a case directory exists - configurator._create_case_directory(case_id=case_id, dry_run=False) + configurator._create_case_directory(case_id=case_id) # WHEN creating nextflow config - configurator._create_nextflow_config(case_id=case_id, dry_run=False) + configurator._create_nextflow_config(case_id=case_id) # THEN the nextflow config is created assert configurator._get_nextflow_config_path(case_id).exists() From e92ab448027d8d9df32fceda2b84e1510bb1393f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20Ohlsson=20=C3=85ngnell?= <40887124+islean@users.noreply.github.com> Date: Thu, 6 Mar 2025 11:37:12 +0100 Subject: [PATCH 26/43] Add managed_variants support (#4265) (patch) ### Added - ManagedVariantsFileContentCreator --- .../file_creators/managed_variants.py | 24 +++++++++++++++++++ .../configurator/file_creators/utils.py | 16 ++++++++++++- .../implementations/raredisease.py | 10 ++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 cg/services/analysis_starter/configurator/file_creators/managed_variants.py diff --git a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py new file mode 100644 index 0000000000..4c8ed9eab4 --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py @@ -0,0 +1,24 @@ +from pathlib import Path + +from cg.apps.scout.scoutapi import ScoutAPI +from cg.constants import Workflow +from cg.constants.gene_panel import GenePanelGenomeBuild +from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.utils import ( + get_case_id_from_path, + get_genome_build, +) +from cg.store.store import Store + + +class ManagedVariantsFileContentCreator(FileContentCreator): + + def __init__(self, scout_api: ScoutAPI, store: Store): + self.scout_api = scout_api + self.store = store + + def create(self, case_path: Path) -> list[str]: + case_id: str = get_case_id_from_path(case_path) + workflow = Workflow(self.store.get_case_by_internal_id(case_id).data_analysis) + genome_build: GenePanelGenomeBuild = get_genome_build(workflow) + return self.scout_api.export_managed_variants(genome_build) diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py index 31aed9d7b8..3839c07715 100644 --- a/cg/services/analysis_starter/configurator/file_creators/utils.py +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -6,6 +6,7 @@ from cg.constants.nf_analysis import NextflowFileType from cg.io.csv import write_csv from cg.io.json import write_json +from cg.io.txt import write_txt from cg.io.yaml import write_yaml_nextflow_style from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator @@ -24,13 +25,26 @@ NextflowFileType.PARAMS: write_yaml_nextflow_style, NextflowFileType.SAMPLE_SHEET: write_csv, NextflowFileType.CONFIG: write_json, + NextflowFileType.MANAGED_VARIANTS: write_txt, } +def get_file_name(file_type: NextflowFileType) -> str: + if file_type in [ + NextflowFileType.CONFIG, + NextflowFileType.PARAMS, + NextflowFileType.SAMPLE_SHEET, + ]: + return "{case_id}_" + file_type + else: + return file_type + + def get_file_path(case_path: Path, file_type: NextflowFileType) -> Path: case_id: str = case_path.name extension: str = FILE_TYPE_TO_EXTENSION[file_type] - return Path(case_path, f"{case_id}_{file_type}").with_suffix(extension) + file_name: str = get_file_name(file_type).format(case_id=case_id) + return Path(case_path, file_name).with_suffix(extension) def write_content_to_file(content: any, file_path: Path, file_type: NextflowFileType) -> None: diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py index e0591b7fc3..6fb4af527c 100644 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ b/cg/services/analysis_starter/configurator/implementations/raredisease.py @@ -7,6 +7,12 @@ from cg.services.analysis_starter.configurator.file_creators.config_file import ( NextflowConfigFileContentCreator, ) +from cg.services.analysis_starter.configurator.file_creators.gene_panel import ( + GenePanelFileContentCreator, +) +from cg.services.analysis_starter.configurator.file_creators.managed_variants import ( + ManagedVariantsFileContentCreator, +) from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( RarediseaseParamsFileContentCreator, ) @@ -32,6 +38,8 @@ def __init__( config_content_creator: NextflowConfigFileContentCreator, sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, params_content_creator: RarediseaseParamsFileContentCreator, + gene_panel_content_creator: GenePanelFileContentCreator, + managed_variants_content_creator: ManagedVariantsFileContentCreator, ): super().__init__( config=config, @@ -40,8 +48,10 @@ def __init__( lims=lims, config_content_creator=config_content_creator, ) + self.gene_panel_content_creator = gene_panel_content_creator self.sample_sheet_content_creator = sample_sheet_content_creator self.params_content_creator = params_content_creator + self.managed_variants_content_creator = managed_variants_content_creator def _do_pipeline_specific_actions(self, case_id: str) -> None: """Perform pipeline specific actions.""" From 52878f36a018c4a1f61bdb1651cfea7705b1a000 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 6 Mar 2025 13:34:40 +0100 Subject: [PATCH 27/43] add first tests for content creation --- .../configurator/file_creators/config_file.py | 6 +- .../analysis_starter/file_content_fixtures.py | 22 ++++++- .../analysis_starter/path_fixtures.py | 46 ++++++++----- .../analysis_starter/test_configurator.py | 24 ------- .../analysis_starter/test_file_creation.py | 65 +++++++++++++++++++ 5 files changed, 121 insertions(+), 42 deletions(-) create mode 100644 tests/services/analysis_starter/test_file_creation.py diff --git a/cg/services/analysis_starter/configurator/file_creators/config_file.py b/cg/services/analysis_starter/configurator/file_creators/config_file.py index 86288c2337..2929cd2c27 100644 --- a/cg/services/analysis_starter/configurator/file_creators/config_file.py +++ b/cg/services/analysis_starter/configurator/file_creators/config_file.py @@ -1,5 +1,8 @@ +from pathlib import Path + from cg.io.txt import concat_txt from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.utils import get_case_id_from_path from cg.store.models import Case from cg.store.store import Store @@ -15,8 +18,9 @@ def __init__( self.resources = resources self.account = account - def create(self, case_id: str) -> str: + def create(self, case_path: Path) -> str: """Get the content of the nextflow config file.""" + case_id: str = get_case_id_from_path(case_path) config_files_list: list[str] = [ self.platform, self.workflow_config_path, diff --git a/tests/fixture_plugins/analysis_starter/file_content_fixtures.py b/tests/fixture_plugins/analysis_starter/file_content_fixtures.py index 976bc6b1c5..eac759b31f 100644 --- a/tests/fixture_plugins/analysis_starter/file_content_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/file_content_fixtures.py @@ -30,7 +30,27 @@ def nextflow_cluster_options() -> str: @pytest.fixture def expected_raredisease_config_content( - nextflow_config_base_content: str, nextflow_cluster_options: str, + nextflow_config_base_content: str, ) -> str: return nextflow_cluster_options + nextflow_config_base_content + + +@pytest.fixture +def expected_raredisease_params_file_content( + raredisease_case_path: Path, + raredisease_sample_sheet_path: Path, + raredisease_gene_panel_path: Path, + raredisease_managed_variants_path: Path, +) -> dict: + return { + "input": raredisease_sample_sheet_path, + "outdir": raredisease_case_path, + "target_bed_file": "twistexomecomprehensive_10.2_hg19_design.bed", + "analysis_type": "wgs", + "save_mapped_as_cram": True, + "skip_germlinecnvcaller": True, + "vcfanno_extra_resources": raredisease_managed_variants_path.as_posix(), + "vep_filters_scout_fmt": raredisease_gene_panel_path.as_posix(), + "someparam": "something", + } diff --git a/tests/fixture_plugins/analysis_starter/path_fixtures.py b/tests/fixture_plugins/analysis_starter/path_fixtures.py index 36eac986c8..9394a187d5 100644 --- a/tests/fixture_plugins/analysis_starter/path_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/path_fixtures.py @@ -6,33 +6,47 @@ @pytest.fixture -def raredisease_work_dir_path(raredisease_dir: Path, raredisease_case_id: Path) -> Path: - return Path(raredisease_dir, raredisease_case_id, "work") +def raredisease_case_path(raredisease_dir: Path, raredisease_case_id: str) -> Path: + return Path(raredisease_dir, raredisease_case_id) + + +@pytest.fixture +def raredisease_work_dir_path(raredisease_case_path: Path, raredisease_case_id: Path) -> Path: + return Path(raredisease_case_path, "work") @pytest.fixture(scope="function") -def raredisease_gene_panel_path(raredisease_dir: Path, raredisease_case_id) -> Path: +def raredisease_gene_panel_path(raredisease_case_path: Path) -> Path: """Path to gene panel file.""" - return Path(raredisease_dir, raredisease_case_id, "gene_panels").with_suffix(FileExtensions.BED) + return Path(raredisease_case_path, "gene_panels").with_suffix(FileExtensions.BED) + + +@pytest.fixture +def raredisease_managed_variants_path( + raredisease_case_path: Path, raredisease_case_id: str +) -> Path: + return Path(raredisease_case_path, "managed_variants").with_suffix(FileExtensions.VCF) @pytest.fixture -def raredisease_params_file_path(raredisease_dir: Path, raredisease_case_id: str) -> Path: - return Path( - raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_params_file" - ).with_suffix(FileExtensions.YAML) +def raredisease_params_file_path(raredisease_case_path: Path, raredisease_case_id: str) -> Path: + return Path(raredisease_case_path, f"{raredisease_case_id}_params_file").with_suffix( + FileExtensions.YAML + ) @pytest.fixture -def raredisease_nextflow_config_file_path(raredisease_dir: Path, raredisease_case_id: str) -> Path: - return Path( - raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_nextflow_config" - ).with_suffix(FileExtensions.JSON) +def raredisease_nextflow_config_file_path( + raredisease_case_path: Path, raredisease_case_id: str +) -> Path: + return Path(raredisease_case_path, f"{raredisease_case_id}_nextflow_config").with_suffix( + FileExtensions.JSON + ) @pytest.fixture(scope="function") -def raredisease_sample_sheet_path(raredisease_dir, raredisease_case_id) -> Path: +def raredisease_sample_sheet_path(raredisease_case_path, raredisease_case_id) -> Path: """Path to sample sheet.""" - return Path( - raredisease_dir, raredisease_case_id, f"{raredisease_case_id}_samplesheet" - ).with_suffix(FileExtensions.CSV) + return Path(raredisease_case_path, f"{raredisease_case_id}_sample_sheet").with_suffix( + FileExtensions.CSV + ) diff --git a/tests/services/analysis_starter/test_configurator.py b/tests/services/analysis_starter/test_configurator.py index abee83d94c..8c101a851f 100644 --- a/tests/services/analysis_starter/test_configurator.py +++ b/tests/services/analysis_starter/test_configurator.py @@ -56,27 +56,3 @@ def test_create_nextflow_config_file_exists( # THEN the nextflow config is created assert configurator._get_nextflow_config_path(case_id).exists() - - -@pytest.mark.parametrize( - "configurator_fixture, case_id_fixture, expected_content_fixture", - [("raredisease_configurator", "raredisease_case_id", "expected_raredisease_config_content")], - ids=["raredisease"], -) -def test_get_nextflow_config_content( - configurator_fixture: str, - case_id_fixture: str, - expected_content_fixture: str, - request: pytest.FixtureRequest, -): - """Test that the content of the nextflow config file is accurate for all Nextflow pipelines.""" - # GIVEN a configurator and a case id - configurator: RarediseaseConfigurator = request.getfixturevalue(configurator_fixture) - case_id: str = request.getfixturevalue(case_id_fixture) - - # WHEN getting nextflow config content - nextflow_config_content: str = configurator._get_nextflow_config_content(case_id=case_id) - - # THEN the expected content is returned - expected_content: str = request.getfixturevalue(expected_content_fixture) - assert nextflow_config_content.rstrip() == expected_content.rstrip() diff --git a/tests/services/analysis_starter/test_file_creation.py b/tests/services/analysis_starter/test_file_creation.py new file mode 100644 index 0000000000..009d106c53 --- /dev/null +++ b/tests/services/analysis_starter/test_file_creation.py @@ -0,0 +1,65 @@ +from pathlib import Path + +import pytest + +from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator + + +@pytest.mark.parametrize( + "content_creator_fixture, case_path_fixture, expected_content_fixture", + [ + ( + "raredisease_config_file_content_creator", + "raredisease_case_path", + "expected_raredisease_config_content", + ) + ], + ids=["raredisease"], +) +def test_create_nextflow_config_file_content( + content_creator_fixture: str, + case_path_fixture: str, + expected_content_fixture: str, + request: pytest.FixtureRequest, +): + """Test that a Nextflow config file content is created correctly for all pipelines.""" + # GIVEN a Nextflow config content creator and a case id + content_creator: FileContentCreator = request.getfixturevalue(content_creator_fixture) + case_path: Path = request.getfixturevalue(case_path_fixture) + + # WHEN creating a Nextflow config file + content: str = content_creator.create(case_path) + + # THEN the content of the file is the expected + expected_content: str = request.getfixturevalue(expected_content_fixture) + assert content.rstrip() == expected_content.rstrip() + + +@pytest.mark.parametrize( + "content_creator_fixture, case_path_fixture, expected_content_fixture", + [ + ( + "raredisease_params_content_creator", + "raredisease_case_path", + "expected_raredisease_params_file_content", + ) + ], + ids=["raredisease"], +) +def test_create_params_file_content( + content_creator_fixture: str, + case_path_fixture: str, + expected_content_fixture: str, + request: pytest.FixtureRequest, +): + """Test that the params file content is created correctly for all pipelines.""" + # GIVEN a params file content creator and a case id + content_creator: FileContentCreator = request.getfixturevalue(content_creator_fixture) + case_path: Path = request.getfixturevalue(case_path_fixture) + + # WHEN creating a params file + content: str = content_creator.create(case_path) + + # THEN the content of the file is the expected + expected_content: str = request.getfixturevalue(expected_content_fixture) + assert content == expected_content From cedfd598941bda0931341a8947e6dc838bc4d8fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20Ohlsson=20=C3=85ngnell?= <40887124+islean@users.noreply.github.com> Date: Thu, 6 Mar 2025 16:02:06 +0100 Subject: [PATCH 28/43] Rework raredisease extension (#4266) * Add managed_variants support * Inject creators * Inject creators * Inject creators * Refactor Raredisease configurator into extension * Fix type hint * Fix fixtures * Fix test --- .../configurator/extensions/abstract.py | 6 ++ .../configurator/extensions/raredisease.py | 42 +++++++++++ .../configurator/implementations/nextflow.py | 23 ++++-- .../implementations/raredisease.py | 73 ------------------- tests/conftest.py | 1 + .../analysis_starter/configurator_fixtures.py | 11 +-- .../analysis_starter/extension_fixtures.py | 8 ++ .../analysis_starter/test_configurator.py | 6 +- 8 files changed, 80 insertions(+), 90 deletions(-) create mode 100644 cg/services/analysis_starter/configurator/extensions/abstract.py create mode 100644 cg/services/analysis_starter/configurator/extensions/raredisease.py delete mode 100644 cg/services/analysis_starter/configurator/implementations/raredisease.py create mode 100644 tests/fixture_plugins/analysis_starter/extension_fixtures.py diff --git a/cg/services/analysis_starter/configurator/extensions/abstract.py b/cg/services/analysis_starter/configurator/extensions/abstract.py new file mode 100644 index 0000000000..2a2dba5ef8 --- /dev/null +++ b/cg/services/analysis_starter/configurator/extensions/abstract.py @@ -0,0 +1,6 @@ +from pathlib import Path + + +class PipelineExtension: + def configure(self, case_path: Path): + pass diff --git a/cg/services/analysis_starter/configurator/extensions/raredisease.py b/cg/services/analysis_starter/configurator/extensions/raredisease.py new file mode 100644 index 0000000000..cb5a8ea525 --- /dev/null +++ b/cg/services/analysis_starter/configurator/extensions/raredisease.py @@ -0,0 +1,42 @@ +from pathlib import Path + +from cg.constants.nf_analysis import NextflowFileType +from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension +from cg.services.analysis_starter.configurator.file_creators.gene_panel import ( + GenePanelFileContentCreator, +) +from cg.services.analysis_starter.configurator.file_creators.managed_variants import ( + ManagedVariantsFileContentCreator, +) +from cg.services.analysis_starter.configurator.file_creators.utils import create_file + + +class RarediseaseExtension(PipelineExtension): + """Configurator for Raredisease analysis.""" + + def __init__( + self, + gene_panel_content_creator: GenePanelFileContentCreator, + managed_variants_content_creator: ManagedVariantsFileContentCreator, + ): + self.gene_panel_content_creator = gene_panel_content_creator + self.managed_variants_content_creator = managed_variants_content_creator + + def configure(self, case_path: Path) -> None: + """Perform pipeline specific actions.""" + self._create_gene_panel(case_path) + self._create_managed_variants(case_path) + + def _create_gene_panel(self, case_path: Path) -> None: + create_file( + content_creator=self.gene_panel_content_creator, + case_path=case_path, + file_type=NextflowFileType.GENE_PANEL, + ) + + def _create_managed_variants(self, case_path: Path) -> None: + create_file( + content_creator=self.managed_variants_content_creator, + case_path=case_path, + file_type=NextflowFileType.MANAGED_VARIANTS, + ) diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 5fc1ee4da2..8671635b08 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -1,4 +1,3 @@ -from abc import abstractmethod from pathlib import Path from cg.apps.housekeeper.hk import HousekeeperAPI @@ -6,9 +5,16 @@ from cg.constants import Workflow from cg.constants.nf_analysis import NextflowFileType from cg.services.analysis_starter.configurator.abstract_service import Configurator +from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension from cg.services.analysis_starter.configurator.file_creators.config_file import ( NextflowConfigFileContentCreator, ) +from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( + RarediseaseParamsFileContentCreator, +) +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( + RarediseaseSampleSheetContentCreator, +) from cg.services.analysis_starter.configurator.file_creators.utils import create_file, get_file_path from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig from cg.store.models import Case @@ -24,12 +30,18 @@ def __init__( housekeeper_api: HousekeeperAPI, lims: LimsAPI, config_content_creator: NextflowConfigFileContentCreator, + sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, + params_content_creator: RarediseaseParamsFileContentCreator, + pipeline_extension: PipelineExtension = PipelineExtension(), ): self.root_dir: str = config.root self.store: Store = store self.housekeeper_api: HousekeeperAPI = housekeeper_api self.lims: LimsAPI = lims self.config_content_creator = config_content_creator + self.pipeline_extension = pipeline_extension + self.sample_sheet_content_creator = sample_sheet_content_creator + self.params_content_creator = params_content_creator def create_config(self, case_id: str) -> NextflowCaseConfig: """Create a Nextflow case config.""" @@ -37,7 +49,7 @@ def create_config(self, case_id: str) -> NextflowCaseConfig: self._create_sample_sheet(case_id=case_id) self._create_params_file(case_id=case_id) self._create_nextflow_config(case_id=case_id) - self._do_pipeline_specific_actions(case_id=case_id) + self.pipeline_extension.configure(self._get_case_path(case_id)) return NextflowCaseConfig( case_id=case_id, case_priority=self._get_case_priority(case_id), @@ -67,7 +79,7 @@ def _create_sample_sheet(self, case_id: str) -> None: def _create_params_file(self, case_id: str) -> None: """Create parameters file for case.""" create_file( - content_creator=self.params_file_content_creator, + content_creator=self.params_content_creator, case_path=self._get_case_path(case_id=case_id), file_type=NextflowFileType.PARAMS, ) @@ -80,11 +92,6 @@ def _create_nextflow_config(self, case_id: str) -> None: file_type=NextflowFileType.CONFIG, ) - @abstractmethod - def _do_pipeline_specific_actions(self, case_id: str) -> None: - """Perform pipeline specific actions.""" - pass - def _get_case_priority(self, case_id: str) -> str: """Get case priority.""" case: Case = self.store.get_case_by_internal_id(case_id) diff --git a/cg/services/analysis_starter/configurator/implementations/raredisease.py b/cg/services/analysis_starter/configurator/implementations/raredisease.py deleted file mode 100644 index 6fb4af527c..0000000000 --- a/cg/services/analysis_starter/configurator/implementations/raredisease.py +++ /dev/null @@ -1,73 +0,0 @@ -import logging - -from cg.apps.housekeeper.hk import HousekeeperAPI -from cg.apps.lims import LimsAPI -from cg.constants.nf_analysis import NextflowFileType -from cg.models.cg_config import RarediseaseConfig -from cg.services.analysis_starter.configurator.file_creators.config_file import ( - NextflowConfigFileContentCreator, -) -from cg.services.analysis_starter.configurator.file_creators.gene_panel import ( - GenePanelFileContentCreator, -) -from cg.services.analysis_starter.configurator.file_creators.managed_variants import ( - ManagedVariantsFileContentCreator, -) -from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( - RarediseaseParamsFileContentCreator, -) -from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( - RarediseaseSampleSheetContentCreator, -) -from cg.services.analysis_starter.configurator.file_creators.utils import create_file -from cg.services.analysis_starter.configurator.implementations.nextflow import NextflowConfigurator -from cg.store.store import Store - -LOG = logging.getLogger(__name__) - - -class RarediseaseConfigurator(NextflowConfigurator): - """Configurator for Raredisease analysis.""" - - def __init__( - self, - config: RarediseaseConfig, - store: Store, - housekeeper_api: HousekeeperAPI, - lims: LimsAPI, - config_content_creator: NextflowConfigFileContentCreator, - sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, - params_content_creator: RarediseaseParamsFileContentCreator, - gene_panel_content_creator: GenePanelFileContentCreator, - managed_variants_content_creator: ManagedVariantsFileContentCreator, - ): - super().__init__( - config=config, - store=store, - housekeeper_api=housekeeper_api, - lims=lims, - config_content_creator=config_content_creator, - ) - self.gene_panel_content_creator = gene_panel_content_creator - self.sample_sheet_content_creator = sample_sheet_content_creator - self.params_content_creator = params_content_creator - self.managed_variants_content_creator = managed_variants_content_creator - - def _do_pipeline_specific_actions(self, case_id: str) -> None: - """Perform pipeline specific actions.""" - self._create_gene_panel(case_id) - self._create_managed_variants(case_id) - - def _create_gene_panel(self, case_id: str) -> None: - create_file( - content_creator=self.gene_panel_content_creator, - case_path=self._get_case_path(case_id=case_id), - file_type=NextflowFileType.GENE_PANEL, - ) - - def _create_managed_variants(self, case_id: str) -> None: - create_file( - content_creator=self.managed_variants_content_creator, - case_path=self._get_case_path(case_id=case_id), - file_type=NextflowFileType.MANAGED_VARIANTS, - ) diff --git a/tests/conftest.py b/tests/conftest.py index adc8eaa787..23965c2708 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,6 +95,7 @@ "tests.fixture_plugins.analysis_starter.case_config_fixtures", "tests.fixture_plugins.analysis_starter.config_file_content_creators", "tests.fixture_plugins.analysis_starter.configurator_fixtures", + "tests.fixture_plugins.analysis_starter.extension_fixtures" "tests.fixture_plugins.analysis_starter.file_content_fixtures", "tests.fixture_plugins.analysis_starter.path_fixtures", "tests.fixture_plugins.analysis_starter.sample_sheet_content_creators", diff --git a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py index e96151eb4e..9141561b0b 100644 --- a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py @@ -1,6 +1,7 @@ import pytest from cg.models.cg_config import CGConfig +from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension from cg.services.analysis_starter.configurator.file_creators.config_file import ( NextflowConfigFileContentCreator, ) @@ -10,9 +11,7 @@ from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( RarediseaseSampleSheetContentCreator, ) -from cg.services.analysis_starter.configurator.implementations.raredisease import ( - RarediseaseConfigurator, -) +from cg.services.analysis_starter.configurator.implementations.nextflow import NextflowConfigurator @pytest.fixture @@ -21,8 +20,9 @@ def raredisease_configurator( raredisease_config_file_content_creator: NextflowConfigFileContentCreator, raredisease_sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, raredisease_params_content_creator: RarediseaseParamsFileContentCreator, -) -> RarediseaseConfigurator: - return RarediseaseConfigurator( + raredisease_extension: PipelineExtension, +) -> NextflowConfigurator: + return NextflowConfigurator( store=raredisease_context.status_db, config=raredisease_context.raredisease, housekeeper_api=raredisease_context.housekeeper_api, @@ -30,4 +30,5 @@ def raredisease_configurator( config_content_creator=raredisease_config_file_content_creator, sample_sheet_content_creator=raredisease_sample_sheet_content_creator, params_content_creator=raredisease_params_content_creator, + pipeline_extension=raredisease_extension, ) diff --git a/tests/fixture_plugins/analysis_starter/extension_fixtures.py b/tests/fixture_plugins/analysis_starter/extension_fixtures.py new file mode 100644 index 0000000000..5326731c4f --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/extension_fixtures.py @@ -0,0 +1,8 @@ +import pytest + +from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension + + +@pytest.fixture +def raredisease_extension() -> PipelineExtension: + return PipelineExtension() diff --git a/tests/services/analysis_starter/test_configurator.py b/tests/services/analysis_starter/test_configurator.py index 8c101a851f..2b026e63cf 100644 --- a/tests/services/analysis_starter/test_configurator.py +++ b/tests/services/analysis_starter/test_configurator.py @@ -2,9 +2,7 @@ from cg.services.analysis_starter.configurator.abstract_model import CaseConfig from cg.services.analysis_starter.configurator.abstract_service import Configurator -from cg.services.analysis_starter.configurator.implementations.raredisease import ( - RarediseaseConfigurator, -) +from cg.services.analysis_starter.configurator.implementations.nextflow import NextflowConfigurator @pytest.mark.parametrize( @@ -45,7 +43,7 @@ def test_create_nextflow_config_file_exists( ): """Test that a nextflow config file is created fro all Nextflow pipelines.""" # GIVEN a configurator and a case id - configurator: RarediseaseConfigurator = request.getfixturevalue(configurator_fixture) + configurator: NextflowConfigurator = request.getfixturevalue(configurator_fixture) case_id: str = request.getfixturevalue(case_id_fixture) # GIVEN that a case directory exists From 2932e07b4b8cafd346961a460dfa306eaa0a8181 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 09:31:40 +0100 Subject: [PATCH 29/43] remove inheritance of nextflow configurator (#4267) * remove inheritance of nextflow configurator * add sample sheet path as parameter to params * fix sample sheet path in configurator * address comments --- .../configurator/extensions/raredisease.py | 24 ++--- .../configurator/file_creators/abstract.py | 9 -- .../configurator/file_creators/config_file.py | 18 +++- .../configurator/file_creators/gene_panel.py | 3 +- .../file_creators/managed_variants.py | 3 +- .../file_creators/params_file/abstract.py | 14 +++ .../file_creators/params_file/raredisease.py | 42 +++++---- .../file_creators/sample_sheet/abstract.py | 14 +++ .../file_creators/sample_sheet/raredisease.py | 22 ++++- .../configurator/file_creators/utils.py | 60 +------------ .../configurator/implementations/nextflow.py | 88 +++++++------------ tests/conftest.py | 2 +- .../config_file_content_creators.py | 8 +- .../analysis_starter/configurator_fixtures.py | 18 ++-- .../params_file_content_creators.py | 8 +- .../sample_sheet_content_creators.py | 8 +- .../specific_file_creators.py | 8 +- .../analysis_starter/test_configurator.py | 17 ++-- .../analysis_starter/test_file_creation.py | 33 ++++--- 19 files changed, 188 insertions(+), 211 deletions(-) delete mode 100644 cg/services/analysis_starter/configurator/file_creators/abstract.py create mode 100644 cg/services/analysis_starter/configurator/file_creators/params_file/abstract.py create mode 100644 cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py diff --git a/cg/services/analysis_starter/configurator/extensions/raredisease.py b/cg/services/analysis_starter/configurator/extensions/raredisease.py index cb5a8ea525..5076edbe55 100644 --- a/cg/services/analysis_starter/configurator/extensions/raredisease.py +++ b/cg/services/analysis_starter/configurator/extensions/raredisease.py @@ -1,14 +1,10 @@ from pathlib import Path -from cg.constants.nf_analysis import NextflowFileType from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension -from cg.services.analysis_starter.configurator.file_creators.gene_panel import ( - GenePanelFileContentCreator, -) +from cg.services.analysis_starter.configurator.file_creators.gene_panel import GenePanelFileCreator from cg.services.analysis_starter.configurator.file_creators.managed_variants import ( - ManagedVariantsFileContentCreator, + ManagedVariantsFileCreator, ) -from cg.services.analysis_starter.configurator.file_creators.utils import create_file class RarediseaseExtension(PipelineExtension): @@ -16,8 +12,8 @@ class RarediseaseExtension(PipelineExtension): def __init__( self, - gene_panel_content_creator: GenePanelFileContentCreator, - managed_variants_content_creator: ManagedVariantsFileContentCreator, + gene_panel_content_creator: GenePanelFileCreator, + managed_variants_content_creator: ManagedVariantsFileCreator, ): self.gene_panel_content_creator = gene_panel_content_creator self.managed_variants_content_creator = managed_variants_content_creator @@ -28,15 +24,7 @@ def configure(self, case_path: Path) -> None: self._create_managed_variants(case_path) def _create_gene_panel(self, case_path: Path) -> None: - create_file( - content_creator=self.gene_panel_content_creator, - case_path=case_path, - file_type=NextflowFileType.GENE_PANEL, - ) + raise NotImplementedError def _create_managed_variants(self, case_path: Path) -> None: - create_file( - content_creator=self.managed_variants_content_creator, - case_path=case_path, - file_type=NextflowFileType.MANAGED_VARIANTS, - ) + raise NotImplementedError diff --git a/cg/services/analysis_starter/configurator/file_creators/abstract.py b/cg/services/analysis_starter/configurator/file_creators/abstract.py deleted file mode 100644 index 7fb09d4d42..0000000000 --- a/cg/services/analysis_starter/configurator/file_creators/abstract.py +++ /dev/null @@ -1,9 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path - - -class FileContentCreator(ABC): - - @abstractmethod - def create(self, case_path: Path) -> any: - pass diff --git a/cg/services/analysis_starter/configurator/file_creators/config_file.py b/cg/services/analysis_starter/configurator/file_creators/config_file.py index 2929cd2c27..20162b6069 100644 --- a/cg/services/analysis_starter/configurator/file_creators/config_file.py +++ b/cg/services/analysis_starter/configurator/file_creators/config_file.py @@ -1,13 +1,14 @@ from pathlib import Path +from cg.constants import FileExtensions +from cg.io.json import write_json from cg.io.txt import concat_txt -from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator from cg.services.analysis_starter.configurator.file_creators.utils import get_case_id_from_path from cg.store.models import Case from cg.store.store import Store -class NextflowConfigFileContentCreator(FileContentCreator): +class NextflowConfigFileCreator: def __init__( self, store: Store, platform: str, workflow_config_path: str, resources: str, account: str @@ -18,7 +19,18 @@ def __init__( self.resources = resources self.account = account - def create(self, case_path: Path) -> str: + @staticmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + """Return the path to the nextflow config file.""" + return Path(case_path, f"{case_id}_nextflow_config").with_suffix(FileExtensions.JSON) + + def create(self, case_id: str, case_path: Path) -> None: + """Create the nextflow config file for a case.""" + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + content: str = self._get_content(case_path=case_path) + write_json(file_path=file_path, content=content) + + def _get_content(self, case_path: Path) -> str: """Get the content of the nextflow config file.""" case_id: str = get_case_id_from_path(case_path) config_files_list: list[str] = [ diff --git a/cg/services/analysis_starter/configurator/file_creators/gene_panel.py b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py index 36868b154e..34841de31c 100644 --- a/cg/services/analysis_starter/configurator/file_creators/gene_panel.py +++ b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py @@ -3,7 +3,6 @@ from cg.apps.scout.scoutapi import ScoutAPI from cg.constants import GenePanelMasterList from cg.constants.gene_panel import GenePanelCombo, GenePanelGenomeBuild -from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator from cg.services.analysis_starter.configurator.file_creators.utils import ( get_case_id_from_path, get_genome_build, @@ -12,7 +11,7 @@ from cg.store.store import Store -class GenePanelFileContentCreator(FileContentCreator): +class GenePanelFileCreator: def __init__(self, store: Store, scout_api: ScoutAPI): self.store = store self.scout_api = scout_api diff --git a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py index 4c8ed9eab4..375ebc24c6 100644 --- a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py +++ b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py @@ -3,7 +3,6 @@ from cg.apps.scout.scoutapi import ScoutAPI from cg.constants import Workflow from cg.constants.gene_panel import GenePanelGenomeBuild -from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator from cg.services.analysis_starter.configurator.file_creators.utils import ( get_case_id_from_path, get_genome_build, @@ -11,7 +10,7 @@ from cg.store.store import Store -class ManagedVariantsFileContentCreator(FileContentCreator): +class ManagedVariantsFileCreator: def __init__(self, scout_api: ScoutAPI, store: Store): self.scout_api = scout_api diff --git a/cg/services/analysis_starter/configurator/file_creators/params_file/abstract.py b/cg/services/analysis_starter/configurator/file_creators/params_file/abstract.py new file mode 100644 index 0000000000..3252dc9bb7 --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/params_file/abstract.py @@ -0,0 +1,14 @@ +from abc import ABC, abstractmethod +from pathlib import Path + + +class ParamsFileCreator(ABC): + + @staticmethod + @abstractmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + pass + + @abstractmethod + def create(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> any: + pass diff --git a/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py index a31f32f24e..4c796ae720 100644 --- a/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py @@ -1,46 +1,56 @@ from pathlib import Path from cg.apps.lims import LimsAPI -from cg.constants import DEFAULT_CAPTURE_KIT -from cg.constants.nf_analysis import NextflowFileType +from cg.constants import DEFAULT_CAPTURE_KIT, FileExtensions from cg.constants.scout import ScoutExportFileName from cg.constants.tb import AnalysisType from cg.exc import CgDataError -from cg.io.yaml import read_yaml +from cg.io.yaml import read_yaml, write_yaml_nextflow_style from cg.models.raredisease.raredisease import RarediseaseParameters -from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.params_file.abstract import ( + ParamsFileCreator, +) from cg.services.analysis_starter.configurator.file_creators.params_file.utils import ( replace_values_in_params_file, ) -from cg.services.analysis_starter.configurator.file_creators.utils import ( - get_case_id_from_path, - get_file_path, -) from cg.store.models import BedVersion, Case, Sample from cg.store.store import Store -class RarediseaseParamsFileContentCreator(FileContentCreator): +class RarediseaseParamsFileCreator(ParamsFileCreator): def __init__(self, store: Store, lims: LimsAPI, params: str): self.store = store self.lims = lims self.params = params - def create(self, case_path: Path) -> dict: + @staticmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + """Return the path to the params file.""" + return Path(case_path, f"{case_id}_params_file").with_suffix(FileExtensions.YAML) + + def create(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> None: + """Create the params file for a case.""" + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + content: dict = self._get_content( + case_id=case_id, case_path=case_path, sample_sheet_path=sample_sheet_path + ) + write_yaml_nextflow_style(file_path=file_path, content=content) + + def _get_content(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> dict: """Create parameters file for a case.""" - case_workflow_parameters: dict = self._get_case_parameters(case_path).model_dump() + case_workflow_parameters: dict = self._get_case_parameters( + case_id=case_id, case_path=case_path, sample_sheet_path=sample_sheet_path + ).model_dump() workflow_parameters: any = read_yaml(self.params) parameters: dict = case_workflow_parameters | workflow_parameters curated_parameters: dict = replace_values_in_params_file(parameters) return curated_parameters - def _get_case_parameters(self, case_path: Path) -> RarediseaseParameters: + def _get_case_parameters( + self, case_id: str, case_path: Path, sample_sheet_path: Path + ) -> RarediseaseParameters: """Return case-specific parameters for the analysis.""" - case_id: str = get_case_id_from_path(case_path=case_path) - sample_sheet_path: Path = get_file_path( - case_path=case_path, file_type=NextflowFileType.SAMPLE_SHEET - ) analysis_type: str = self._get_data_analysis_type(case_id=case_id) target_bed_file: str = self._get_target_bed(case_id=case_id, analysis_type=analysis_type) skip_germlinecnvcaller: bool = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py new file mode 100644 index 0000000000..a7df467b36 --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py @@ -0,0 +1,14 @@ +from abc import ABC, abstractmethod +from pathlib import Path + + +class NextflowSampleSheetCreator(ABC): + + @staticmethod + @abstractmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + pass + + @abstractmethod + def create(self, case_id: str, case_path: Path) -> any: + pass diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py index 472751b2c0..5337f746f9 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py @@ -4,8 +4,9 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims import LimsAPI -from cg.constants import SequencingFileTag +from cg.constants import FileExtensions, SequencingFileTag from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex +from cg.io.csv import write_csv from cg.io.gzip import read_gzip_first_line from cg.meta.workflow.fastq import _is_undetermined_in_path from cg.models.fastq import FastqFileMeta, GetFastqFileMeta @@ -13,7 +14,9 @@ RarediseaseSampleSheetEntry, RarediseaseSampleSheetHeaders, ) -from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.abstract import ( + NextflowSampleSheetCreator, +) from cg.services.analysis_starter.configurator.file_creators.utils import get_case_id_from_path from cg.store.models import Case, CaseSample, Sample from cg.store.store import Store @@ -21,14 +24,25 @@ LOG = logging.getLogger(__name__) -class RarediseaseSampleSheetContentCreator(FileContentCreator): +class RarediseaseSampleSheetCreator(NextflowSampleSheetCreator): def __init__(self, store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI): self.store = store self.housekeeper_api = housekeeper_api self.lims = lims - def create(self, case_path: Path) -> any: + @staticmethod + def get_file_path(case_id: str, case_path: Path) -> Path: + """Return the path to the params file.""" + return Path(case_path, f"{case_id}_sample_sheet").with_suffix(FileExtensions.CSV) + + def create(self, case_id: str, case_path: Path) -> None: + """Create the sample sheet for a case.""" + file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) + content: any = self._get_content(case_path=case_path) + write_csv(file_path=file_path, content=content) + + def _get_content(self, case_path: Path) -> any: """Return formatted information required to build a sample sheet for a case. This contains information for all samples linked to the case.""" case_id: str = get_case_id_from_path(case_path=case_path) diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py index 3839c07715..e382b3efe6 100644 --- a/cg/services/analysis_starter/configurator/file_creators/utils.py +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -1,65 +1,7 @@ -import logging from pathlib import Path -from cg.constants import FileExtensions, Workflow +from cg.constants import Workflow from cg.constants.gene_panel import GenePanelGenomeBuild -from cg.constants.nf_analysis import NextflowFileType -from cg.io.csv import write_csv -from cg.io.json import write_json -from cg.io.txt import write_txt -from cg.io.yaml import write_yaml_nextflow_style -from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator - -LOG = logging.getLogger(__name__) - -FILE_TYPE_TO_EXTENSION: dict[NextflowFileType, str] = { - NextflowFileType.PARAMS: FileExtensions.YAML, - NextflowFileType.SAMPLE_SHEET: FileExtensions.CSV, - NextflowFileType.CONFIG: FileExtensions.JSON, - NextflowFileType.GENE_PANEL: FileExtensions.BED, - NextflowFileType.MANAGED_VARIANTS: FileExtensions.VCF, -} - -# TODO: Adapt to gene panel and variant files -FILE_TYPE_TO_WRITER: dict[NextflowFileType, callable] = { - NextflowFileType.PARAMS: write_yaml_nextflow_style, - NextflowFileType.SAMPLE_SHEET: write_csv, - NextflowFileType.CONFIG: write_json, - NextflowFileType.MANAGED_VARIANTS: write_txt, -} - - -def get_file_name(file_type: NextflowFileType) -> str: - if file_type in [ - NextflowFileType.CONFIG, - NextflowFileType.PARAMS, - NextflowFileType.SAMPLE_SHEET, - ]: - return "{case_id}_" + file_type - else: - return file_type - - -def get_file_path(case_path: Path, file_type: NextflowFileType) -> Path: - case_id: str = case_path.name - extension: str = FILE_TYPE_TO_EXTENSION[file_type] - file_name: str = get_file_name(file_type).format(case_id=case_id) - return Path(case_path, file_name).with_suffix(extension) - - -def write_content_to_file(content: any, file_path: Path, file_type: NextflowFileType) -> None: - LOG.debug(f"Writing sample sheet to {file_path}") - FILE_TYPE_TO_WRITER[file_type](content=content, file_path=file_path) - - -def create_file( - content_creator: FileContentCreator, - case_path: Path, - file_type: NextflowFileType, -) -> None: - file_path: Path = get_file_path(case_path=case_path, file_type=file_type) - content: any = content_creator.create(case_path) - write_content_to_file(content=content, file_path=file_path, file_type=file_type) def get_case_id_from_path(case_path: Path) -> str: diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 8671635b08..8dd89b4fe9 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -3,19 +3,17 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims import LimsAPI from cg.constants import Workflow -from cg.constants.nf_analysis import NextflowFileType from cg.services.analysis_starter.configurator.abstract_service import Configurator from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension from cg.services.analysis_starter.configurator.file_creators.config_file import ( - NextflowConfigFileContentCreator, + NextflowConfigFileCreator, ) -from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( - RarediseaseParamsFileContentCreator, +from cg.services.analysis_starter.configurator.file_creators.params_file.abstract import ( + ParamsFileCreator, ) -from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( - RarediseaseSampleSheetContentCreator, +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.abstract import ( + NextflowSampleSheetCreator, ) -from cg.services.analysis_starter.configurator.file_creators.utils import create_file, get_file_path from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig from cg.store.models import Case from cg.store.store import Store @@ -29,68 +27,56 @@ def __init__( store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI, - config_content_creator: NextflowConfigFileContentCreator, - sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, - params_content_creator: RarediseaseParamsFileContentCreator, + config_file_creator: NextflowConfigFileCreator, + sample_sheet_creator: NextflowSampleSheetCreator, + params_file_creator: ParamsFileCreator, pipeline_extension: PipelineExtension = PipelineExtension(), ): self.root_dir: str = config.root self.store: Store = store self.housekeeper_api: HousekeeperAPI = housekeeper_api self.lims: LimsAPI = lims - self.config_content_creator = config_content_creator + self.config_file_creator = config_file_creator self.pipeline_extension = pipeline_extension - self.sample_sheet_content_creator = sample_sheet_content_creator - self.params_content_creator = params_content_creator + self.sample_sheet_creator = sample_sheet_creator + self.params_file_creator = params_file_creator def create_config(self, case_id: str) -> NextflowCaseConfig: """Create a Nextflow case config.""" + case_path: Path = self._get_case_path(case_id=case_id) self._create_case_directory(case_id=case_id) - self._create_sample_sheet(case_id=case_id) - self._create_params_file(case_id=case_id) - self._create_nextflow_config(case_id=case_id) - self.pipeline_extension.configure(self._get_case_path(case_id)) + sample_sheet_path: Path = self.sample_sheet_creator.get_file_path( + case_id=case_id, case_path=case_path + ) + self.sample_sheet_creator.create(case_id=case_id, case_path=case_path) + self.params_file_creator.create( + case_id=case_id, case_path=case_path, sample_sheet_path=sample_sheet_path + ) + self.config_file_creator.create(case_id=case_id, case_path=case_path) + config_file_path: Path = self.config_file_creator.get_file_path( + case_id=case_id, case_path=case_path + ) + params_file_path: Path = self.params_file_creator.get_file_path( + case_id=case_id, case_path=case_path + ) + self.pipeline_extension.configure(case_path) return NextflowCaseConfig( case_id=case_id, case_priority=self._get_case_priority(case_id), workflow=self._get_case_workflow(case_id), - netxflow_config_file=self._get_nextflow_config_path(case_id=case_id).as_posix(), - params_file=self._get_params_file_path(case_id=case_id).as_posix(), + netxflow_config_file=config_file_path.as_posix(), + params_file=params_file_path.as_posix(), work_dir=self._get_work_dir(case_id=case_id).as_posix(), ) - def _create_case_directory(self, case_id: str) -> None: - """Create case working directory.""" - case_path: Path = self._get_case_path(case_id=case_id) - case_path.mkdir(parents=True, exist_ok=True) - def _get_case_path(self, case_id: str) -> Path: """Path to case working directory.""" return Path(self.root_dir, case_id) - def _create_sample_sheet(self, case_id: str) -> None: - """Create sample sheet for case.""" - create_file( - content_creator=self.sample_sheet_content_creator, - case_path=self._get_case_path(case_id=case_id), - file_type=NextflowFileType.SAMPLE_SHEET, - ) - - def _create_params_file(self, case_id: str) -> None: - """Create parameters file for case.""" - create_file( - content_creator=self.params_content_creator, - case_path=self._get_case_path(case_id=case_id), - file_type=NextflowFileType.PARAMS, - ) - - def _create_nextflow_config(self, case_id: str) -> None: - """Create nextflow config file for case.""" - create_file( - content_creator=self.config_content_creator, - case_path=self._get_case_path(case_id=case_id), - file_type=NextflowFileType.CONFIG, - ) + def _create_case_directory(self, case_id: str) -> None: + """Create case working directory.""" + case_path: Path = self._get_case_path(case_id=case_id) + case_path.mkdir(parents=True, exist_ok=True) def _get_case_priority(self, case_id: str) -> str: """Get case priority.""" @@ -102,13 +88,5 @@ def _get_case_workflow(self, case_id: str) -> Workflow: case: Case = self.store.get_case_by_internal_id(case_id) return Workflow(case.data_analysis) - def _get_nextflow_config_path(self, case_id: str) -> Path: - case_path: Path = self._get_case_path(case_id) - return get_file_path(case_path=case_path, file_type=NextflowFileType.CONFIG) - - def _get_params_file_path(self, case_id: str) -> Path: - case_path: Path = self._get_case_path(case_id) - return get_file_path(case_path=case_path, file_type=NextflowFileType.PARAMS) - def _get_work_dir(self, case_id: str) -> Path: return Path(self.root_dir, case_id, "work") diff --git a/tests/conftest.py b/tests/conftest.py index 23965c2708..9fbd0ba3c0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ "tests.fixture_plugins.analysis_starter.case_config_fixtures", "tests.fixture_plugins.analysis_starter.config_file_content_creators", "tests.fixture_plugins.analysis_starter.configurator_fixtures", - "tests.fixture_plugins.analysis_starter.extension_fixtures" + "tests.fixture_plugins.analysis_starter.extension_fixtures", "tests.fixture_plugins.analysis_starter.file_content_fixtures", "tests.fixture_plugins.analysis_starter.path_fixtures", "tests.fixture_plugins.analysis_starter.sample_sheet_content_creators", diff --git a/tests/fixture_plugins/analysis_starter/config_file_content_creators.py b/tests/fixture_plugins/analysis_starter/config_file_content_creators.py index 3f8ed79636..9cbb57305b 100644 --- a/tests/fixture_plugins/analysis_starter/config_file_content_creators.py +++ b/tests/fixture_plugins/analysis_starter/config_file_content_creators.py @@ -2,15 +2,15 @@ from cg.models.cg_config import CGConfig from cg.services.analysis_starter.configurator.file_creators.config_file import ( - NextflowConfigFileContentCreator, + NextflowConfigFileCreator, ) @pytest.fixture -def raredisease_config_file_content_creator( +def raredisease_config_file_creator( raredisease_context: CGConfig, -) -> NextflowConfigFileContentCreator: - return NextflowConfigFileContentCreator( +) -> NextflowConfigFileCreator: + return NextflowConfigFileCreator( store=raredisease_context.status_db, platform=raredisease_context.raredisease.platform, workflow_config_path=raredisease_context.raredisease.config, diff --git a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py index 9141561b0b..c580069b6d 100644 --- a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py @@ -3,13 +3,13 @@ from cg.models.cg_config import CGConfig from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension from cg.services.analysis_starter.configurator.file_creators.config_file import ( - NextflowConfigFileContentCreator, + NextflowConfigFileCreator, ) from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( - RarediseaseParamsFileContentCreator, + RarediseaseParamsFileCreator, ) from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( - RarediseaseSampleSheetContentCreator, + RarediseaseSampleSheetCreator, ) from cg.services.analysis_starter.configurator.implementations.nextflow import NextflowConfigurator @@ -17,9 +17,9 @@ @pytest.fixture def raredisease_configurator( raredisease_context: CGConfig, - raredisease_config_file_content_creator: NextflowConfigFileContentCreator, - raredisease_sample_sheet_content_creator: RarediseaseSampleSheetContentCreator, - raredisease_params_content_creator: RarediseaseParamsFileContentCreator, + raredisease_config_file_creator: NextflowConfigFileCreator, + raredisease_sample_sheet_creator: RarediseaseSampleSheetCreator, + raredisease_params_file_creator: RarediseaseParamsFileCreator, raredisease_extension: PipelineExtension, ) -> NextflowConfigurator: return NextflowConfigurator( @@ -27,8 +27,8 @@ def raredisease_configurator( config=raredisease_context.raredisease, housekeeper_api=raredisease_context.housekeeper_api, lims=raredisease_context.lims_api, - config_content_creator=raredisease_config_file_content_creator, - sample_sheet_content_creator=raredisease_sample_sheet_content_creator, - params_content_creator=raredisease_params_content_creator, + config_file_creator=raredisease_config_file_creator, + sample_sheet_creator=raredisease_sample_sheet_creator, + params_file_creator=raredisease_params_file_creator, pipeline_extension=raredisease_extension, ) diff --git a/tests/fixture_plugins/analysis_starter/params_file_content_creators.py b/tests/fixture_plugins/analysis_starter/params_file_content_creators.py index cd216ec69e..7161a62ad7 100644 --- a/tests/fixture_plugins/analysis_starter/params_file_content_creators.py +++ b/tests/fixture_plugins/analysis_starter/params_file_content_creators.py @@ -2,15 +2,15 @@ from cg.models.cg_config import CGConfig from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( - RarediseaseParamsFileContentCreator, + RarediseaseParamsFileCreator, ) @pytest.fixture -def raredisease_params_content_creator( +def raredisease_params_file_creator( raredisease_context: CGConfig, -) -> RarediseaseParamsFileContentCreator: - return RarediseaseParamsFileContentCreator( +) -> RarediseaseParamsFileCreator: + return RarediseaseParamsFileCreator( store=raredisease_context.status_db, lims=raredisease_context.lims_api, params=raredisease_context.raredisease.params, diff --git a/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py b/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py index 70b2bf076a..094c9baf81 100644 --- a/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py +++ b/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py @@ -2,15 +2,15 @@ from cg.models.cg_config import CGConfig from cg.services.analysis_starter.configurator.file_creators.sample_sheet.raredisease import ( - RarediseaseSampleSheetContentCreator, + RarediseaseSampleSheetCreator, ) @pytest.fixture -def raredisease_sample_sheet_content_creator( +def raredisease_sample_sheet_creator( raredisease_context: CGConfig, -) -> RarediseaseSampleSheetContentCreator: - return RarediseaseSampleSheetContentCreator( +) -> RarediseaseSampleSheetCreator: + return RarediseaseSampleSheetCreator( store=raredisease_context.status_db, housekeeper_api=raredisease_context.housekeeper_api, lims=raredisease_context.lims_api, diff --git a/tests/fixture_plugins/analysis_starter/specific_file_creators.py b/tests/fixture_plugins/analysis_starter/specific_file_creators.py index 376b72da73..f3bdbfb216 100644 --- a/tests/fixture_plugins/analysis_starter/specific_file_creators.py +++ b/tests/fixture_plugins/analysis_starter/specific_file_creators.py @@ -1,16 +1,14 @@ import pytest from cg.models.cg_config import CGConfig -from cg.services.analysis_starter.configurator.file_creators.gene_panel import ( - GenePanelFileContentCreator, -) +from cg.services.analysis_starter.configurator.file_creators.gene_panel import GenePanelFileCreator @pytest.fixture def raredisease_gene_panel_content_creator( raredisease_context: CGConfig, -) -> GenePanelFileContentCreator: - return GenePanelFileContentCreator( +) -> GenePanelFileCreator: + return GenePanelFileCreator( store=raredisease_context.status_db, scout_api=raredisease_context.scout_api, ) diff --git a/tests/services/analysis_starter/test_configurator.py b/tests/services/analysis_starter/test_configurator.py index 2b026e63cf..a8f7961884 100644 --- a/tests/services/analysis_starter/test_configurator.py +++ b/tests/services/analysis_starter/test_configurator.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from cg.services.analysis_starter.configurator.abstract_model import CaseConfig @@ -32,25 +34,30 @@ def test_create_config( @pytest.mark.parametrize( - "configurator_fixture, case_id_fixture", - [("raredisease_configurator", "raredisease_case_id")], + "configurator_fixture, case_id_fixture, case_path_fixture", + [("raredisease_configurator", "raredisease_case_id", "raredisease_case_path")], ids=["raredisease"], ) def test_create_nextflow_config_file_exists( configurator_fixture: str, case_id_fixture: str, + case_path_fixture: str, request: pytest.FixtureRequest, ): """Test that a nextflow config file is created fro all Nextflow pipelines.""" - # GIVEN a configurator and a case id + # GIVEN a configurator, a case id and a case path configurator: NextflowConfigurator = request.getfixturevalue(configurator_fixture) case_id: str = request.getfixturevalue(case_id_fixture) + case_path: Path = request.getfixturevalue(case_path_fixture) # GIVEN that a case directory exists configurator._create_case_directory(case_id=case_id) # WHEN creating nextflow config - configurator._create_nextflow_config(case_id=case_id) + configurator.config_file_creator.create(case_id=case_id, case_path=case_path) # THEN the nextflow config is created - assert configurator._get_nextflow_config_path(case_id).exists() + case_path: Path = configurator._get_case_path(case_id=case_id) + assert configurator.config_file_creator.get_file_path( + case_id=case_id, case_path=case_path + ).exists() diff --git a/tests/services/analysis_starter/test_file_creation.py b/tests/services/analysis_starter/test_file_creation.py index 009d106c53..c7ae0ed88c 100644 --- a/tests/services/analysis_starter/test_file_creation.py +++ b/tests/services/analysis_starter/test_file_creation.py @@ -2,14 +2,19 @@ import pytest -from cg.services.analysis_starter.configurator.file_creators.abstract import FileContentCreator +from cg.services.analysis_starter.configurator.file_creators.config_file import ( + NextflowConfigFileCreator, +) +from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( + RarediseaseParamsFileCreator, +) @pytest.mark.parametrize( - "content_creator_fixture, case_path_fixture, expected_content_fixture", + "file_creator_fixture, case_path_fixture, expected_content_fixture", [ ( - "raredisease_config_file_content_creator", + "raredisease_config_file_creator", "raredisease_case_path", "expected_raredisease_config_content", ) @@ -17,18 +22,18 @@ ids=["raredisease"], ) def test_create_nextflow_config_file_content( - content_creator_fixture: str, + file_creator_fixture: str, case_path_fixture: str, expected_content_fixture: str, request: pytest.FixtureRequest, ): """Test that a Nextflow config file content is created correctly for all pipelines.""" # GIVEN a Nextflow config content creator and a case id - content_creator: FileContentCreator = request.getfixturevalue(content_creator_fixture) + file_creator: NextflowConfigFileCreator = request.getfixturevalue(file_creator_fixture) case_path: Path = request.getfixturevalue(case_path_fixture) # WHEN creating a Nextflow config file - content: str = content_creator.create(case_path) + content: str = file_creator._get_content(case_path) # THEN the content of the file is the expected expected_content: str = request.getfixturevalue(expected_content_fixture) @@ -36,10 +41,11 @@ def test_create_nextflow_config_file_content( @pytest.mark.parametrize( - "content_creator_fixture, case_path_fixture, expected_content_fixture", + "file_creator_fixture, case_id_fixture, case_path_fixture, expected_content_fixture", [ ( - "raredisease_params_content_creator", + "raredisease_params_file_creator", + "raredisease_case_id", "raredisease_case_path", "expected_raredisease_params_file_content", ) @@ -47,18 +53,23 @@ def test_create_nextflow_config_file_content( ids=["raredisease"], ) def test_create_params_file_content( - content_creator_fixture: str, + file_creator_fixture: str, + case_id_fixture: str, case_path_fixture: str, expected_content_fixture: str, + raredisease_sample_sheet_path: Path, request: pytest.FixtureRequest, ): """Test that the params file content is created correctly for all pipelines.""" # GIVEN a params file content creator and a case id - content_creator: FileContentCreator = request.getfixturevalue(content_creator_fixture) + content_creator: RarediseaseParamsFileCreator = request.getfixturevalue(file_creator_fixture) + case_id: str = request.getfixturevalue(case_id_fixture) case_path: Path = request.getfixturevalue(case_path_fixture) # WHEN creating a params file - content: str = content_creator.create(case_path) + content: dict = content_creator._get_content( + case_id=case_id, case_path=case_path, sample_sheet_path=raredisease_sample_sheet_path + ) # THEN the content of the file is the expected expected_content: str = request.getfixturevalue(expected_content_fixture) From 890e9ce76904713ed06a7999b0f0f50ff5ec3a1a Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 09:53:16 +0100 Subject: [PATCH 30/43] fi sample sheet fixture (#4270) * add todo * fix samplesheet path fixture --- tests/conftest.py | 1 + tests/fixture_plugins/analysis_starter/path_fixtures.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9fbd0ba3c0..353275cc2a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2885,6 +2885,7 @@ def raredisease_case_id() -> str: return "raredisease_case_enough_reads" +# TODO: Move this to pluggins @pytest.fixture(scope="function") def raredisease_sample_sheet_content( sample_id: str, diff --git a/tests/fixture_plugins/analysis_starter/path_fixtures.py b/tests/fixture_plugins/analysis_starter/path_fixtures.py index 9394a187d5..1d37cd73aa 100644 --- a/tests/fixture_plugins/analysis_starter/path_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/path_fixtures.py @@ -47,6 +47,6 @@ def raredisease_nextflow_config_file_path( @pytest.fixture(scope="function") def raredisease_sample_sheet_path(raredisease_case_path, raredisease_case_id) -> Path: """Path to sample sheet.""" - return Path(raredisease_case_path, f"{raredisease_case_id}_sample_sheet").with_suffix( + return Path(raredisease_case_path, f"{raredisease_case_id}_samplesheet").with_suffix( FileExtensions.CSV ) From 229d4dfd8d5a719f182b824b75f36a2d42caf834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20Ohlsson=20=C3=85ngnell?= <40887124+islean@users.noreply.github.com> Date: Fri, 7 Mar 2025 10:00:10 +0100 Subject: [PATCH 31/43] Implement Raredisease Extension (#4271) --- .../configurator/extensions/raredisease.py | 18 ++++++------------ .../configurator/models/nextflow.py | 4 ++-- .../analysis_starter/case_config_fixtures.py | 5 +++-- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/cg/services/analysis_starter/configurator/extensions/raredisease.py b/cg/services/analysis_starter/configurator/extensions/raredisease.py index 5076edbe55..bec2ec0ec6 100644 --- a/cg/services/analysis_starter/configurator/extensions/raredisease.py +++ b/cg/services/analysis_starter/configurator/extensions/raredisease.py @@ -12,19 +12,13 @@ class RarediseaseExtension(PipelineExtension): def __init__( self, - gene_panel_content_creator: GenePanelFileCreator, - managed_variants_content_creator: ManagedVariantsFileCreator, + gene_panel_file_creator: GenePanelFileCreator, + managed_variants_file_creator: ManagedVariantsFileCreator, ): - self.gene_panel_content_creator = gene_panel_content_creator - self.managed_variants_content_creator = managed_variants_content_creator + self.gene_panel_file_creator = gene_panel_file_creator + self.managed_variants_file_creator = managed_variants_file_creator def configure(self, case_path: Path) -> None: """Perform pipeline specific actions.""" - self._create_gene_panel(case_path) - self._create_managed_variants(case_path) - - def _create_gene_panel(self, case_path: Path) -> None: - raise NotImplementedError - - def _create_managed_variants(self, case_path: Path) -> None: - raise NotImplementedError + self.gene_panel_file_creator.create(case_path) + self.managed_variants_file_creator.create(case_path) diff --git a/cg/services/analysis_starter/configurator/models/nextflow.py b/cg/services/analysis_starter/configurator/models/nextflow.py index 5dd1169ede..62c7c64d74 100644 --- a/cg/services/analysis_starter/configurator/models/nextflow.py +++ b/cg/services/analysis_starter/configurator/models/nextflow.py @@ -1,9 +1,9 @@ -from cg.constants import Priority +from cg.constants.priority import SlurmQos from cg.services.analysis_starter.configurator.abstract_model import CaseConfig class NextflowCaseConfig(CaseConfig): - case_priority: Priority + case_priority: SlurmQos netxflow_config_file: str params_file: str work_dir: str diff --git a/tests/fixture_plugins/analysis_starter/case_config_fixtures.py b/tests/fixture_plugins/analysis_starter/case_config_fixtures.py index c35744be06..4bdf01c474 100644 --- a/tests/fixture_plugins/analysis_starter/case_config_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/case_config_fixtures.py @@ -2,7 +2,8 @@ import pytest -from cg.constants import Priority, Workflow +from cg.constants import Workflow +from cg.constants.priority import SlurmQos from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig @@ -16,7 +17,7 @@ def raredisease_case_config( return NextflowCaseConfig( case_id=raredisease_case_id, workflow=Workflow.RAREDISEASE, - case_priority=Priority.standard, + case_priority=SlurmQos.NORMAL, netxflow_config_file=raredisease_nextflow_config_file_path.as_posix(), params_file=raredisease_params_file_path.as_posix(), work_dir=raredisease_work_dir_path.as_posix(), From e4d88c4121441d199fdf41841e3863b9d9510166 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 10:32:13 +0100 Subject: [PATCH 32/43] move raredisease models to the analysis starter directory --- cg/meta/workflow/raredisease.py | 12 ++-- cg/models/raredisease/raredisease.py | 62 ------------------ .../file_creators/params_file/raredisease.py | 4 +- .../file_creators/sample_sheet/models.py | 64 +++++++++++++++++++ .../file_creators/sample_sheet/raredisease.py | 8 +-- .../configurator/implementations/nextflow.py | 8 ++- tests/conftest.py | 5 +- 7 files changed, 86 insertions(+), 77 deletions(-) create mode 100644 cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py diff --git a/cg/meta/workflow/raredisease.py b/cg/meta/workflow/raredisease.py index 1c317396d7..78d43a07e3 100644 --- a/cg/meta/workflow/raredisease.py +++ b/cg/meta/workflow/raredisease.py @@ -16,27 +16,27 @@ from cg.constants import DEFAULT_CAPTURE_KIT, Workflow from cg.constants.constants import GenomeVersion from cg.constants.nf_analysis import ( + RAREDISEASE_ADAPTER_BASES_PERCENTAGE_THRESHOLD, RAREDISEASE_COVERAGE_FILE_TAGS, RAREDISEASE_COVERAGE_INTERVAL_TYPE, RAREDISEASE_COVERAGE_THRESHOLD, - RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION, - RAREDISEASE_METRIC_CONDITIONS_WGS, RAREDISEASE_METRIC_CONDITIONS_WES, - RAREDISEASE_ADAPTER_BASES_PERCENTAGE_THRESHOLD, + RAREDISEASE_METRIC_CONDITIONS_WGS, + RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION, ) from cg.constants.scout import RAREDISEASE_CASE_TAGS, ScoutExportFileName -from cg.constants.sequencing import SeqLibraryPrepCategory, NOVASEQ_SEQUENCING_READ_LENGTH +from cg.constants.sequencing import NOVASEQ_SEQUENCING_READ_LENGTH, SeqLibraryPrepCategory from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex from cg.constants.tb import AnalysisType from cg.meta.workflow.nf_analysis import NfAnalysisAPI from cg.models.cg_config import CGConfig from cg.models.deliverables.metric_deliverables import MetricsBase, MultiqcDataJson -from cg.models.raredisease.raredisease import ( +from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( RarediseaseParameters, RarediseaseSampleSheetEntry, RarediseaseSampleSheetHeaders, ) -from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH from cg.store.models import CaseSample, Sample LOG = logging.getLogger(__name__) diff --git a/cg/models/raredisease/raredisease.py b/cg/models/raredisease/raredisease.py index bbe2b37446..b3c383f007 100644 --- a/cg/models/raredisease/raredisease.py +++ b/cg/models/raredisease/raredisease.py @@ -1,7 +1,4 @@ -from enum import StrEnum - from cg.constants.constants import SexOptions -from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters from cg.models.qc_metrics import QCMetrics @@ -12,62 +9,3 @@ class RarediseaseQCMetrics(QCMetrics): percent_duplicates: float predicted_sex_sex_check: SexOptions total_reads: int - - -# TODO: Move these to models folder in appropriate service -class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry): - """Raredisease sample model is used when building the sample sheet.""" - - sex: str - phenotype: int - sex: int - paternal_id: str - maternal_id: str - case_id: str - - @property - def reformat_sample_content(self) -> list[list[str]]: - """Reformat sample sheet content as a list of lists, where each list represents a line in the final file.""" - return [ - [ - self.name, - lane + 1, - forward_path, - reverse_path, - self.sex, - self.phenotype, - self.paternal_id, - self.maternal_id, - self.case_id, - ] - for lane, (forward_path, reverse_path) in enumerate( - zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths) - ) - ] - - -class RarediseaseSampleSheetHeaders(StrEnum): - sample: str = "sample" - lane: str = "lane" - fastq_1: str = "fastq_1" - fastq_2: str = "fastq_2" - sex: str = "sex" - phenotype: str = "phenotype" - paternal_id: str = "paternal_id" - maternal_id: str = "maternal_id" - case_id: str = "case_id" - - @classmethod - def list(cls) -> list[str]: - return list(map(lambda header: header.value, cls)) - - -class RarediseaseParameters(WorkflowParameters): - """Model for Raredisease parameters.""" - - target_bed_file: str - analysis_type: str - save_mapped_as_cram: bool - skip_germlinecnvcaller: bool - vcfanno_extra_resources: str - vep_filters_scout_fmt: str diff --git a/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py index 4c796ae720..a4f2c3e9ac 100644 --- a/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py @@ -6,13 +6,15 @@ from cg.constants.tb import AnalysisType from cg.exc import CgDataError from cg.io.yaml import read_yaml, write_yaml_nextflow_style -from cg.models.raredisease.raredisease import RarediseaseParameters from cg.services.analysis_starter.configurator.file_creators.params_file.abstract import ( ParamsFileCreator, ) from cg.services.analysis_starter.configurator.file_creators.params_file.utils import ( replace_values_in_params_file, ) +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( + RarediseaseParameters, +) from cg.store.models import BedVersion, Case, Sample from cg.store.store import Store diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py new file mode 100644 index 0000000000..1c131df1bc --- /dev/null +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py @@ -0,0 +1,64 @@ +from enum import StrEnum + +from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters + + +class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry): + """Raredisease sample model is used when building the sample sheet.""" + + sex: str + phenotype: int + sex: int + paternal_id: str + maternal_id: str + case_id: str + + @property + def reformat_sample_content(self) -> list[list[str]]: + """ + Reformat sample sheet content as a list of lists, where each list represents a + line in the final file. + """ + return [ + [ + self.name, + lane + 1, + forward_path, + reverse_path, + self.sex, + self.phenotype, + self.paternal_id, + self.maternal_id, + self.case_id, + ] + for lane, (forward_path, reverse_path) in enumerate( + zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths) + ) + ] + + +class RarediseaseSampleSheetHeaders(StrEnum): + sample: str = "sample" + lane: str = "lane" + fastq_1: str = "fastq_1" + fastq_2: str = "fastq_2" + sex: str = "sex" + phenotype: str = "phenotype" + paternal_id: str = "paternal_id" + maternal_id: str = "maternal_id" + case_id: str = "case_id" + + @classmethod + def list(cls) -> list[str]: + return list(map(lambda header: header.value, cls)) + + +class RarediseaseParameters(WorkflowParameters): + """Model for Raredisease parameters.""" + + target_bed_file: str + analysis_type: str + save_mapped_as_cram: bool + skip_germlinecnvcaller: bool + vcfanno_extra_resources: str + vep_filters_scout_fmt: str diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py index 5337f746f9..3361e53ca8 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py @@ -10,13 +10,13 @@ from cg.io.gzip import read_gzip_first_line from cg.meta.workflow.fastq import _is_undetermined_in_path from cg.models.fastq import FastqFileMeta, GetFastqFileMeta -from cg.models.raredisease.raredisease import ( - RarediseaseSampleSheetEntry, - RarediseaseSampleSheetHeaders, -) from cg.services.analysis_starter.configurator.file_creators.sample_sheet.abstract import ( NextflowSampleSheetCreator, ) +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( + RarediseaseSampleSheetEntry, + RarediseaseSampleSheetHeaders, +) from cg.services.analysis_starter.configurator.file_creators.utils import get_case_id_from_path from cg.store.models import Case, CaseSample, Sample from cg.store.store import Store diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 8dd89b4fe9..7a39235ae7 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -3,6 +3,8 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims import LimsAPI from cg.constants import Workflow +from cg.constants.priority import SlurmQos +from cg.models.cg_config import CommonAppConfig from cg.services.analysis_starter.configurator.abstract_service import Configurator from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension from cg.services.analysis_starter.configurator.file_creators.config_file import ( @@ -23,7 +25,7 @@ class NextflowConfigurator(Configurator): def __init__( self, - config: any, + config: CommonAppConfig, store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI, @@ -78,10 +80,10 @@ def _create_case_directory(self, case_id: str) -> None: case_path: Path = self._get_case_path(case_id=case_id) case_path.mkdir(parents=True, exist_ok=True) - def _get_case_priority(self, case_id: str) -> str: + def _get_case_priority(self, case_id: str) -> SlurmQos: """Get case priority.""" case: Case = self.store.get_case_by_internal_id(case_id) - return case.slurm_priority + return SlurmQos(case.slurm_priority) def _get_case_workflow(self, case_id: str) -> Workflow: """Get case workflow.""" diff --git a/tests/conftest.py b/tests/conftest.py index 353275cc2a..11bbcdbddb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -53,7 +53,10 @@ from cg.models.cg_config import CGConfig, PDCArchivingDirectory from cg.models.downsample.downsample_data import DownsampleData from cg.models.nallo.nallo import NalloSampleSheetHeaders -from cg.models.raredisease.raredisease import RarediseaseParameters, RarediseaseSampleSheetHeaders +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( + RarediseaseSampleSheetHeaders, + RarediseaseParameters, +) from cg.models.rnafusion.rnafusion import RnafusionParameters, RnafusionSampleSheetEntry from cg.models.run_devices.illumina_run_directory_data import IlluminaRunDirectoryData from cg.models.taxprofiler.taxprofiler import TaxprofilerParameters, TaxprofilerSampleSheetEntry From 5d72e1c6c4c89fc37d48b9c58c41946297ccacef Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 10:45:13 +0100 Subject: [PATCH 33/43] rename content creator files --- tests/conftest.py | 42 ++++--------------- ...res.py => config_file_content_fixtures.py} | 0 ...nt_creators.py => config_file_creators.py} | 0 ...nt_creators.py => params_file_creators.py} | 0 .../sample_sheet_content_fixtures.py | 32 ++++++++++++++ ...t_creators.py => sample_sheet_creators.py} | 0 6 files changed, 40 insertions(+), 34 deletions(-) rename tests/fixture_plugins/analysis_starter/{file_content_fixtures.py => config_file_content_fixtures.py} (100%) rename tests/fixture_plugins/analysis_starter/{config_file_content_creators.py => config_file_creators.py} (100%) rename tests/fixture_plugins/analysis_starter/{params_file_content_creators.py => params_file_creators.py} (100%) create mode 100644 tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py rename tests/fixture_plugins/analysis_starter/{sample_sheet_content_creators.py => sample_sheet_creators.py} (100%) diff --git a/tests/conftest.py b/tests/conftest.py index 11bbcdbddb..27a7e84786 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -53,14 +53,13 @@ from cg.models.cg_config import CGConfig, PDCArchivingDirectory from cg.models.downsample.downsample_data import DownsampleData from cg.models.nallo.nallo import NalloSampleSheetHeaders -from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( - RarediseaseSampleSheetHeaders, - RarediseaseParameters, -) from cg.models.rnafusion.rnafusion import RnafusionParameters, RnafusionSampleSheetEntry from cg.models.run_devices.illumina_run_directory_data import IlluminaRunDirectoryData from cg.models.taxprofiler.taxprofiler import TaxprofilerParameters, TaxprofilerSampleSheetEntry from cg.models.tomte.tomte import TomteParameters, TomteSampleSheetHeaders +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( + RarediseaseParameters, +) from cg.services.deliver_files.rsync.service import DeliveryRsyncService from cg.services.illumina.backup.encrypt_service import IlluminaRunEncryptionService from cg.services.illumina.data_transfer.data_transfer_service import IlluminaDataTransferService @@ -96,15 +95,16 @@ deliverables_yaml = "_deliverables.yaml" pytest_plugins = [ "tests.fixture_plugins.analysis_starter.case_config_fixtures", - "tests.fixture_plugins.analysis_starter.config_file_content_creators", + "tests.fixture_plugins.analysis_starter.config_file_creators", "tests.fixture_plugins.analysis_starter.configurator_fixtures", "tests.fixture_plugins.analysis_starter.extension_fixtures", - "tests.fixture_plugins.analysis_starter.file_content_fixtures", + "tests.fixture_plugins.analysis_starter.config_file_content_fixtures", "tests.fixture_plugins.analysis_starter.path_fixtures", - "tests.fixture_plugins.analysis_starter.sample_sheet_content_creators", + "tests.fixture_plugins.analysis_starter.sample_sheet_creators", + "tests.fixture_plugins.analysis_starter.sample_sheet_content_fixtures", "tests.fixture_plugins.analysis_starter.seqera_client_fixtures", "tests.fixture_plugins.analysis_starter.specific_file_creators", - "tests.fixture_plugins.analysis_starter.params_file_content_creators", + "tests.fixture_plugins.analysis_starter.params_file_creators", "tests.fixture_plugins.backup_fixtures.backup_fixtures", "tests.fixture_plugins.chanjo2_fixtures.api_fixtures", "tests.fixture_plugins.chanjo2_fixtures.models_fixtures", @@ -2888,32 +2888,6 @@ def raredisease_case_id() -> str: return "raredisease_case_enough_reads" -# TODO: Move this to pluggins -@pytest.fixture(scope="function") -def raredisease_sample_sheet_content( - sample_id: str, - raredisease_case_id: str, - fastq_forward_read_path: Path, - fastq_reverse_read_path: Path, -) -> str: - """Return the expected sample sheet content for raredisease.""" - headers: str = ",".join(RarediseaseSampleSheetHeaders.list()) - row: str = ",".join( - [ - sample_id, - "1", - fastq_forward_read_path.as_posix(), - fastq_reverse_read_path.as_posix(), - "2", - "0", - "", - "", - raredisease_case_id, - ] - ) - return "\n".join([headers, row]) - - @pytest.fixture(scope="function") def raredisease_deliverable_data( raredisease_dir: Path, raredisease_case_id: str, sample_id: str diff --git a/tests/fixture_plugins/analysis_starter/file_content_fixtures.py b/tests/fixture_plugins/analysis_starter/config_file_content_fixtures.py similarity index 100% rename from tests/fixture_plugins/analysis_starter/file_content_fixtures.py rename to tests/fixture_plugins/analysis_starter/config_file_content_fixtures.py diff --git a/tests/fixture_plugins/analysis_starter/config_file_content_creators.py b/tests/fixture_plugins/analysis_starter/config_file_creators.py similarity index 100% rename from tests/fixture_plugins/analysis_starter/config_file_content_creators.py rename to tests/fixture_plugins/analysis_starter/config_file_creators.py diff --git a/tests/fixture_plugins/analysis_starter/params_file_content_creators.py b/tests/fixture_plugins/analysis_starter/params_file_creators.py similarity index 100% rename from tests/fixture_plugins/analysis_starter/params_file_content_creators.py rename to tests/fixture_plugins/analysis_starter/params_file_creators.py diff --git a/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py b/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py new file mode 100644 index 0000000000..6384998b58 --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py @@ -0,0 +1,32 @@ +from pathlib import Path + +import pytest + +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( + RarediseaseSampleSheetHeaders, +) + + +@pytest.fixture(scope="function") +def raredisease_sample_sheet_content( + sample_id: str, + raredisease_case_id: str, + fastq_forward_read_path: Path, + fastq_reverse_read_path: Path, +) -> str: + """Return the expected sample sheet content for raredisease.""" + headers: str = ",".join(RarediseaseSampleSheetHeaders.list()) + row: str = ",".join( + [ + sample_id, + "1", + fastq_forward_read_path.as_posix(), + fastq_reverse_read_path.as_posix(), + "2", + "0", + "", + "", + raredisease_case_id, + ] + ) + return "\n".join([headers, row]) diff --git a/tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py b/tests/fixture_plugins/analysis_starter/sample_sheet_creators.py similarity index 100% rename from tests/fixture_plugins/analysis_starter/sample_sheet_content_creators.py rename to tests/fixture_plugins/analysis_starter/sample_sheet_creators.py From aab06db47259640a2010c307df98dad43f354d64 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 12:07:06 +0100 Subject: [PATCH 34/43] add tests for managed variants --- cg/constants/nf_analysis.py | 8 --- .../configurator/extensions/abstract.py | 2 +- .../configurator/extensions/raredisease.py | 2 +- .../file_creators/managed_variants.py | 16 +++--- .../configurator/implementations/nextflow.py | 2 +- tests/conftest.py | 1 + .../specific_file_content_fixtures.py | 11 ++++ .../specific_file_creators.py | 15 +++++- .../analysis_starter/test_file_creation.py | 54 +++++++++++++++++++ 9 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py diff --git a/cg/constants/nf_analysis.py b/cg/constants/nf_analysis.py index 86d13b690c..51edc6af46 100644 --- a/cg/constants/nf_analysis.py +++ b/cg/constants/nf_analysis.py @@ -15,14 +15,6 @@ class NfTowerStatus(StrEnum): UNKNOWN: str = "UNKNOWN" -class NextflowFileType(StrEnum): - PARAMS = "params_file" - SAMPLE_SHEET = "sample_sheet" - CONFIG = "nextflow_config" - GENE_PANEL = "gene_panel" - MANAGED_VARIANTS = "managed_variants" - - NALLO_METRIC_CONDITIONS: dict[str, dict[str, Any]] = { "median_coverage": {"norm": "gt", "threshold": 25}, } diff --git a/cg/services/analysis_starter/configurator/extensions/abstract.py b/cg/services/analysis_starter/configurator/extensions/abstract.py index 2a2dba5ef8..b38cb7a7e3 100644 --- a/cg/services/analysis_starter/configurator/extensions/abstract.py +++ b/cg/services/analysis_starter/configurator/extensions/abstract.py @@ -2,5 +2,5 @@ class PipelineExtension: - def configure(self, case_path: Path): + def configure(self, case_id: str, case_path: Path): pass diff --git a/cg/services/analysis_starter/configurator/extensions/raredisease.py b/cg/services/analysis_starter/configurator/extensions/raredisease.py index bec2ec0ec6..25a2026398 100644 --- a/cg/services/analysis_starter/configurator/extensions/raredisease.py +++ b/cg/services/analysis_starter/configurator/extensions/raredisease.py @@ -18,7 +18,7 @@ def __init__( self.gene_panel_file_creator = gene_panel_file_creator self.managed_variants_file_creator = managed_variants_file_creator - def configure(self, case_path: Path) -> None: + def configure(self, case_id: str, case_path: Path) -> None: """Perform pipeline specific actions.""" self.gene_panel_file_creator.create(case_path) self.managed_variants_file_creator.create(case_path) diff --git a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py index 375ebc24c6..68a5d17ead 100644 --- a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py +++ b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py @@ -1,12 +1,10 @@ from pathlib import Path from cg.apps.scout.scoutapi import ScoutAPI -from cg.constants import Workflow +from cg.constants import FileExtensions, Workflow from cg.constants.gene_panel import GenePanelGenomeBuild -from cg.services.analysis_starter.configurator.file_creators.utils import ( - get_case_id_from_path, - get_genome_build, -) +from cg.io.txt import write_txt +from cg.services.analysis_starter.configurator.file_creators.utils import get_genome_build from cg.store.store import Store @@ -16,8 +14,12 @@ def __init__(self, scout_api: ScoutAPI, store: Store): self.scout_api = scout_api self.store = store - def create(self, case_path: Path) -> list[str]: - case_id: str = get_case_id_from_path(case_path) + def create(self, case_id: str, case_path: Path) -> None: + file_path = Path(case_path, f"managed_variants").with_suffix(FileExtensions.VCF) + content: list[str] = self._get_content(case_id=case_id) + write_txt(file_path=file_path, content=content) + + def _get_content(self, case_id: str) -> list[str]: workflow = Workflow(self.store.get_case_by_internal_id(case_id).data_analysis) genome_build: GenePanelGenomeBuild = get_genome_build(workflow) return self.scout_api.export_managed_variants(genome_build) diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 7a39235ae7..74f8611eab 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -61,7 +61,7 @@ def create_config(self, case_id: str) -> NextflowCaseConfig: params_file_path: Path = self.params_file_creator.get_file_path( case_id=case_id, case_path=case_path ) - self.pipeline_extension.configure(case_path) + self.pipeline_extension.configure(case_id=case_id, case_path=case_path) return NextflowCaseConfig( case_id=case_id, case_priority=self._get_case_priority(case_id), diff --git a/tests/conftest.py b/tests/conftest.py index 27a7e84786..89a52d3a71 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -104,6 +104,7 @@ "tests.fixture_plugins.analysis_starter.sample_sheet_content_fixtures", "tests.fixture_plugins.analysis_starter.seqera_client_fixtures", "tests.fixture_plugins.analysis_starter.specific_file_creators", + "tests.fixture_plugins.analysis_starter.specific_file_content_fixtures", "tests.fixture_plugins.analysis_starter.params_file_creators", "tests.fixture_plugins.backup_fixtures.backup_fixtures", "tests.fixture_plugins.chanjo2_fixtures.api_fixtures", diff --git a/tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py b/tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py new file mode 100644 index 0000000000..aae77531d2 --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py @@ -0,0 +1,11 @@ +import pytest + + +@pytest.fixture +def raredisease_gene_panel_file_content() -> str: + return "content" + + +@pytest.fixture +def raredisease_managed_variants_file_content() -> list[str]: + return ["variant_from_scout1", "variant_from_scout2"] diff --git a/tests/fixture_plugins/analysis_starter/specific_file_creators.py b/tests/fixture_plugins/analysis_starter/specific_file_creators.py index f3bdbfb216..3f2d1b4e82 100644 --- a/tests/fixture_plugins/analysis_starter/specific_file_creators.py +++ b/tests/fixture_plugins/analysis_starter/specific_file_creators.py @@ -2,13 +2,26 @@ from cg.models.cg_config import CGConfig from cg.services.analysis_starter.configurator.file_creators.gene_panel import GenePanelFileCreator +from cg.services.analysis_starter.configurator.file_creators.managed_variants import ( + ManagedVariantsFileCreator, +) @pytest.fixture -def raredisease_gene_panel_content_creator( +def raredisease_gene_panel_creator( raredisease_context: CGConfig, ) -> GenePanelFileCreator: return GenePanelFileCreator( store=raredisease_context.status_db, scout_api=raredisease_context.scout_api, ) + + +@pytest.fixture +def raredisease_managed_variants_creator( + raredisease_context: CGConfig, +) -> ManagedVariantsFileCreator: + return ManagedVariantsFileCreator( + store=raredisease_context.status_db, + scout_api=raredisease_context.scout_api, + ) diff --git a/tests/services/analysis_starter/test_file_creation.py b/tests/services/analysis_starter/test_file_creation.py index c7ae0ed88c..08501731e7 100644 --- a/tests/services/analysis_starter/test_file_creation.py +++ b/tests/services/analysis_starter/test_file_creation.py @@ -1,10 +1,16 @@ from pathlib import Path +from unittest import mock import pytest +from cg.apps.scout.scoutapi import ScoutAPI from cg.services.analysis_starter.configurator.file_creators.config_file import ( NextflowConfigFileCreator, ) +from cg.services.analysis_starter.configurator.file_creators.gene_panel import GenePanelFileCreator +from cg.services.analysis_starter.configurator.file_creators.managed_variants import ( + ManagedVariantsFileCreator, +) from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( RarediseaseParamsFileCreator, ) @@ -74,3 +80,51 @@ def test_create_params_file_content( # THEN the content of the file is the expected expected_content: str = request.getfixturevalue(expected_content_fixture) assert content == expected_content + + +# TODO: test creation of sample sheet content + + +def test_create_gene_panel_file_content( + raredisease_gene_panel_creator: GenePanelFileCreator, raredisease_case_path: Path +): + """Test that the gene panel file content is created correctly.""" + # GIVEN a gene panel file content creator and a case path + # WHEN creating a gene panel file + # THEN the content of the file is the expected + pass + + +@pytest.mark.parametrize( + "file_creator_fixture, case_id_fixture, expected_content_fixture", + [ + ( + "raredisease_managed_variants_creator", + "raredisease_case_id", + "raredisease_managed_variants_file_content", + ) + ], + ids=["raredisease"], +) +def test_create_managed_variants_content( + file_creator_fixture: str, + case_id_fixture: str, + expected_content_fixture: str, + request: pytest.FixtureRequest, +): + """Test that the managed variants file content is created correctly.""" + # GIVEN a managed variants file content creator and a case path + file_creator: ManagedVariantsFileCreator = request.getfixturevalue(file_creator_fixture) + case_id: str = request.getfixturevalue(case_id_fixture) + + # GIVEN a mock of Scout variants + expected_content_fixture: str = request.getfixturevalue(expected_content_fixture) + + # WHEN creating a managed variants file + with mock.patch.object( + ScoutAPI, "export_managed_variants", return_value=expected_content_fixture + ): + content: list[str] = file_creator._get_content(case_id) + + # THEN the content of the file is the expected + assert content == expected_content_fixture From be00ee0aab0ab4c20be1ee3c2a93e46469adcd5e Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 13:34:07 +0100 Subject: [PATCH 35/43] add tests for gene panel --- .../configurator/file_creators/gene_panel.py | 14 ++++- .../file_creators/managed_variants.py | 2 +- tests/conftest.py | 1 + .../specific_file_content_fixtures.py | 7 ++- .../analysis_starter/test_file_creation.py | 52 +++++++++++-------- 5 files changed, 50 insertions(+), 26 deletions(-) diff --git a/cg/services/analysis_starter/configurator/file_creators/gene_panel.py b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py index 34841de31c..07d4a0a2c0 100644 --- a/cg/services/analysis_starter/configurator/file_creators/gene_panel.py +++ b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py @@ -1,8 +1,9 @@ from pathlib import Path from cg.apps.scout.scoutapi import ScoutAPI -from cg.constants import GenePanelMasterList +from cg.constants import FileExtensions, GenePanelMasterList from cg.constants.gene_panel import GenePanelCombo, GenePanelGenomeBuild +from cg.io.txt import write_txt from cg.services.analysis_starter.configurator.file_creators.utils import ( get_case_id_from_path, get_genome_build, @@ -16,7 +17,16 @@ def __init__(self, store: Store, scout_api: ScoutAPI): self.store = store self.scout_api = scout_api - def create(self, case_path: Path) -> list[str]: + @staticmethod + def get_file_path(case_path: Path) -> Path: + return Path(case_path, "gene_panels").with_suffix(FileExtensions.BED) + + def create(self, case_path: Path) -> None: + file_path: Path = self.get_file_path(case_path=case_path) + content: list[str] = self._get_content(case_path=case_path) + write_txt(file_path=file_path, content=content) + + def _get_content(self, case_path: Path) -> list[str]: case_id: str = get_case_id_from_path(case_path=case_path) case: Case = self.store.get_case_by_internal_id(internal_id=case_id) genome_build: GenePanelGenomeBuild = get_genome_build(workflow=case.data_analysis) diff --git a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py index 68a5d17ead..5032d46f2b 100644 --- a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py +++ b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py @@ -15,7 +15,7 @@ def __init__(self, scout_api: ScoutAPI, store: Store): self.store = store def create(self, case_id: str, case_path: Path) -> None: - file_path = Path(case_path, f"managed_variants").with_suffix(FileExtensions.VCF) + file_path = Path(case_path, "managed_variants").with_suffix(FileExtensions.VCF) content: list[str] = self._get_content(case_id=case_id) write_txt(file_path=file_path, content=content) diff --git a/tests/conftest.py b/tests/conftest.py index 89a52d3a71..b8689340aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2916,6 +2916,7 @@ def raredisease_deliverables_file_path(raredisease_dir, raredisease_case_id) -> ).with_suffix(FileExtensions.YAML) +# TODO: Take a look at this for the tests @pytest.fixture(scope="function") def raredisease_parameters_default( raredisease_dir: Path, diff --git a/tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py b/tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py index aae77531d2..fe54987e7a 100644 --- a/tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/specific_file_content_fixtures.py @@ -2,8 +2,11 @@ @pytest.fixture -def raredisease_gene_panel_file_content() -> str: - return "content" +def raredisease_gene_panel_file_content() -> list[str]: + return [ + "##genome_build=37", + "##gene_panel=OMIM-AUTO,version=32.0,updated_at=2024-11-18,display_name=OMIM-AUTO", + ] @pytest.fixture diff --git a/tests/services/analysis_starter/test_file_creation.py b/tests/services/analysis_starter/test_file_creation.py index 08501731e7..291c137740 100644 --- a/tests/services/analysis_starter/test_file_creation.py +++ b/tests/services/analysis_starter/test_file_creation.py @@ -85,46 +85,56 @@ def test_create_params_file_content( # TODO: test creation of sample sheet content -def test_create_gene_panel_file_content( - raredisease_gene_panel_creator: GenePanelFileCreator, raredisease_case_path: Path -): - """Test that the gene panel file content is created correctly.""" - # GIVEN a gene panel file content creator and a case path - # WHEN creating a gene panel file - # THEN the content of the file is the expected - pass - - @pytest.mark.parametrize( - "file_creator_fixture, case_id_fixture, expected_content_fixture", + "file_creator_fixture, case_path_fixture, expected_content_fixture", [ ( - "raredisease_managed_variants_creator", - "raredisease_case_id", - "raredisease_managed_variants_file_content", + "raredisease_gene_panel_creator", + "raredisease_case_path", + "raredisease_gene_panel_file_content", ) ], ids=["raredisease"], ) -def test_create_managed_variants_content( +def test_create_gene_panel_file_content( file_creator_fixture: str, - case_id_fixture: str, + case_path_fixture: str, expected_content_fixture: str, request: pytest.FixtureRequest, +): + """Test that the gene panel file content is created correctly.""" + # GIVEN a gene panel file content creator and a case path + file_creator: GenePanelFileCreator = request.getfixturevalue(file_creator_fixture) + case_path: Path = request.getfixturevalue(case_path_fixture) + + # GIVEN a mock of Scout gene panels + expected_content: list[str] = request.getfixturevalue(expected_content_fixture) + + # WHEN creating a gene panel file + with mock.patch.object(ScoutAPI, "export_panels", return_value=expected_content): + content: list[str] = file_creator._get_content(case_path) + + # THEN the content of the file is the expected + + assert content == expected_content + + +def test_create_managed_variants_content( + raredisease_managed_variants_creator: ManagedVariantsFileCreator, + raredisease_case_id: str, + raredisease_managed_variants_file_content: list[str], ): """Test that the managed variants file content is created correctly.""" - # GIVEN a managed variants file content creator and a case path - file_creator: ManagedVariantsFileCreator = request.getfixturevalue(file_creator_fixture) - case_id: str = request.getfixturevalue(case_id_fixture) + # GIVEN a Raredisease managed variants file content creator and a case path # GIVEN a mock of Scout variants - expected_content_fixture: str = request.getfixturevalue(expected_content_fixture) # WHEN creating a managed variants file + expected_content_fixture: list[str] = raredisease_managed_variants_file_content with mock.patch.object( ScoutAPI, "export_managed_variants", return_value=expected_content_fixture ): - content: list[str] = file_creator._get_content(case_id) + content: list[str] = raredisease_managed_variants_creator._get_content(raredisease_case_id) # THEN the content of the file is the expected assert content == expected_content_fixture From b7a49328bbcdc2f7d7143d8da82167c123e72c86 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 13:55:58 +0100 Subject: [PATCH 36/43] fix sample sheet creator --- .../file_creators/sample_sheet/abstract.py | 6 ++- .../file_creators/sample_sheet/models.py | 1 - .../file_creators/sample_sheet/raredisease.py | 3 +- .../sample_sheet_content_fixtures.py | 41 +++++++++++------- .../analysis_starter/test_file_creation.py | 43 ++++++++++++++++--- 5 files changed, 69 insertions(+), 25 deletions(-) diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py index a7df467b36..d0437b3b80 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py @@ -10,5 +10,9 @@ def get_file_path(case_id: str, case_path: Path) -> Path: pass @abstractmethod - def create(self, case_id: str, case_path: Path) -> any: + def create(self, case_id: str, case_path: Path) -> None: + pass + + @abstractmethod + def _get_content(self, case_id: str, case_path: Path) -> list[list[str]]: pass diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py index 1c131df1bc..1b5f0b5bb2 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/models.py @@ -6,7 +6,6 @@ class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry): """Raredisease sample model is used when building the sample sheet.""" - sex: str phenotype: int sex: int paternal_id: str diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py index 3361e53ca8..3bcbd66971 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py @@ -42,10 +42,9 @@ def create(self, case_id: str, case_path: Path) -> None: content: any = self._get_content(case_path=case_path) write_csv(file_path=file_path, content=content) - def _get_content(self, case_path: Path) -> any: + def _get_content(self, case_id: str, case_path: Path) -> list[list[str]]: """Return formatted information required to build a sample sheet for a case. This contains information for all samples linked to the case.""" - case_id: str = get_case_id_from_path(case_path=case_path) case: Case = self.store.get_case_by_internal_id(internal_id=case_id) sample_sheet_content: list[list[str]] = [RarediseaseSampleSheetHeaders.list()] for link in case.links: diff --git a/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py b/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py index 6384998b58..b9d13b908d 100644 --- a/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py @@ -13,20 +13,29 @@ def raredisease_sample_sheet_content( raredisease_case_id: str, fastq_forward_read_path: Path, fastq_reverse_read_path: Path, -) -> str: +) -> list[list[str]]: """Return the expected sample sheet content for raredisease.""" - headers: str = ",".join(RarediseaseSampleSheetHeaders.list()) - row: str = ",".join( - [ - sample_id, - "1", - fastq_forward_read_path.as_posix(), - fastq_reverse_read_path.as_posix(), - "2", - "0", - "", - "", - raredisease_case_id, - ] - ) - return "\n".join([headers, row]) + headers: list[str] = RarediseaseSampleSheetHeaders.list() + row_link_1: list[str] = [ + sample_id, + "1", + fastq_forward_read_path.as_posix(), + fastq_reverse_read_path.as_posix(), + "2", + "0", + "", + "", + raredisease_case_id, + ] + row_link_2: list[str] = [ + sample_id, + "1", + fastq_forward_read_path.as_posix(), + fastq_reverse_read_path.as_posix(), + "2", + "0", + "", + "", + raredisease_case_id, + ] + return [headers, row_link_1, row_link_2] diff --git a/tests/services/analysis_starter/test_file_creation.py b/tests/services/analysis_starter/test_file_creation.py index 291c137740..cbdb8dd3b3 100644 --- a/tests/services/analysis_starter/test_file_creation.py +++ b/tests/services/analysis_starter/test_file_creation.py @@ -14,6 +14,9 @@ from cg.services.analysis_starter.configurator.file_creators.params_file.raredisease import ( RarediseaseParamsFileCreator, ) +from cg.services.analysis_starter.configurator.file_creators.sample_sheet.abstract import ( + NextflowSampleSheetCreator, +) @pytest.mark.parametrize( @@ -27,7 +30,7 @@ ], ids=["raredisease"], ) -def test_create_nextflow_config_file_content( +def test_nextflow_config_file_content( file_creator_fixture: str, case_path_fixture: str, expected_content_fixture: str, @@ -58,7 +61,7 @@ def test_create_nextflow_config_file_content( ], ids=["raredisease"], ) -def test_create_params_file_content( +def test_params_file_content( file_creator_fixture: str, case_id_fixture: str, case_path_fixture: str, @@ -82,7 +85,37 @@ def test_create_params_file_content( assert content == expected_content -# TODO: test creation of sample sheet content +@pytest.mark.parametrize( + "file_creator_fixture, case_id_fixture, case_path_fixture, expected_content_fixture", + [ + ( + "raredisease_sample_sheet_creator", + "raredisease_case_id", + "raredisease_case_path", + "raredisease_sample_sheet_content", + ) + ], + ids=["raredisease"], +) +def test_nextflow_sample_sheet_content( + file_creator_fixture: str, + case_id_fixture: str, + case_path_fixture: str, + expected_content_fixture: str, + request: pytest.FixtureRequest, +): + """Test that the sample sheet content is created correctly.""" + # GIVEN a sample sheet content creator, a case id and a case path + file_creator: NextflowSampleSheetCreator = request.getfixturevalue(file_creator_fixture) + case_id: str = request.getfixturevalue(case_id_fixture) + case_path: Path = request.getfixturevalue(case_path_fixture) + + # WHEN creating a sample sheet + content: list[list[str]] = file_creator._get_content(case_id=case_id, case_path=case_path) + + # THEN the content of the file is the expected + expected_content: str = request.getfixturevalue(expected_content_fixture) + assert content == expected_content @pytest.mark.parametrize( @@ -96,7 +129,7 @@ def test_create_params_file_content( ], ids=["raredisease"], ) -def test_create_gene_panel_file_content( +def test_gene_panel_file_content( file_creator_fixture: str, case_path_fixture: str, expected_content_fixture: str, @@ -119,7 +152,7 @@ def test_create_gene_panel_file_content( assert content == expected_content -def test_create_managed_variants_content( +def test_managed_variants_content( raredisease_managed_variants_creator: ManagedVariantsFileCreator, raredisease_case_id: str, raredisease_managed_variants_file_content: list[str], From 5a057e636208166b060460528f97e444cf2bdb4a Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 13:59:04 +0100 Subject: [PATCH 37/43] fix creator --- .../configurator/file_creators/sample_sheet/raredisease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py index 3bcbd66971..82323a717e 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py @@ -39,7 +39,7 @@ def get_file_path(case_id: str, case_path: Path) -> Path: def create(self, case_id: str, case_path: Path) -> None: """Create the sample sheet for a case.""" file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: any = self._get_content(case_path=case_path) + content: any = self._get_content(case_id=case_id, case_path=case_path) write_csv(file_path=file_path, content=content) def _get_content(self, case_id: str, case_path: Path) -> list[list[str]]: From 2ceaa73d75d51b0bd5bb999eb9f6e5552a1b942b Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 7 Mar 2025 14:24:56 +0100 Subject: [PATCH 38/43] fix sample sheet test --- .../file_creators/sample_sheet/raredisease.py | 1 - tests/conftest.py | 26 +++++++ .../sample_sheet_content_fixtures.py | 71 ++++++++++++------- .../analysis_starter/test_file_creation.py | 2 +- 4 files changed, 74 insertions(+), 26 deletions(-) diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py index 82323a717e..40ce5bde1b 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py @@ -17,7 +17,6 @@ RarediseaseSampleSheetEntry, RarediseaseSampleSheetHeaders, ) -from cg.services.analysis_starter.configurator.file_creators.utils import get_case_id_from_path from cg.store.models import Case, CaseSample, Sample from cg.store.store import Store diff --git a/tests/conftest.py b/tests/conftest.py index b8689340aa..61fa89a1dc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,6 +59,7 @@ from cg.models.tomte.tomte import TomteParameters, TomteSampleSheetHeaders from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import ( RarediseaseParameters, + RarediseaseSampleSheetHeaders, ) from cg.services.deliver_files.rsync.service import DeliveryRsyncService from cg.services.illumina.backup.encrypt_service import IlluminaRunEncryptionService @@ -2889,6 +2890,31 @@ def raredisease_case_id() -> str: return "raredisease_case_enough_reads" +@pytest.fixture(scope="function") +def raredisease_sample_sheet_content( + sample_id: str, + raredisease_case_id: str, + fastq_forward_read_path: Path, + fastq_reverse_read_path: Path, +) -> str: + """Return the expected sample sheet content for raredisease.""" + headers: str = ",".join(RarediseaseSampleSheetHeaders.list()) + row: str = ",".join( + [ + sample_id, + "1", + fastq_forward_read_path.as_posix(), + fastq_reverse_read_path.as_posix(), + "2", + "0", + "", + "", + raredisease_case_id, + ] + ) + return "\n".join([headers, row]) + + @pytest.fixture(scope="function") def raredisease_deliverable_data( raredisease_dir: Path, raredisease_case_id: str, sample_id: str diff --git a/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py b/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py index b9d13b908d..83fc5a7f60 100644 --- a/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/sample_sheet_content_fixtures.py @@ -7,35 +7,58 @@ ) +def get_raredisease_sample_sheet_entry( + sample_id: str, lane: int, fastq1: Path, fastq2: Path, case_id: str +) -> list[str]: + return [ + sample_id, + lane, + fastq1, + fastq2, + 2, + 0, + "", + "", + case_id, + ] + + @pytest.fixture(scope="function") -def raredisease_sample_sheet_content( +def raredisease_sample_sheet_expected_content( sample_id: str, + father_sample_id: str, raredisease_case_id: str, fastq_forward_read_path: Path, fastq_reverse_read_path: Path, ) -> list[list[str]]: """Return the expected sample sheet content for raredisease.""" headers: list[str] = RarediseaseSampleSheetHeaders.list() - row_link_1: list[str] = [ - sample_id, - "1", - fastq_forward_read_path.as_posix(), - fastq_reverse_read_path.as_posix(), - "2", - "0", - "", - "", - raredisease_case_id, - ] - row_link_2: list[str] = [ - sample_id, - "1", - fastq_forward_read_path.as_posix(), - fastq_reverse_read_path.as_posix(), - "2", - "0", - "", - "", - raredisease_case_id, - ] - return [headers, row_link_1, row_link_2] + entry_1: list[str] = get_raredisease_sample_sheet_entry( + sample_id=sample_id, + lane=1, + fastq1=fastq_forward_read_path, + fastq2=fastq_reverse_read_path, + case_id=raredisease_case_id, + ) + entry_2: list[str] = get_raredisease_sample_sheet_entry( + sample_id=sample_id, + lane=2, + fastq1=fastq_forward_read_path, + fastq2=fastq_reverse_read_path, + case_id=raredisease_case_id, + ) + entry_3: list[str] = get_raredisease_sample_sheet_entry( + sample_id=father_sample_id, + lane=1, + fastq1=fastq_forward_read_path, + fastq2=fastq_reverse_read_path, + case_id=raredisease_case_id, + ) + entry_4: list[str] = get_raredisease_sample_sheet_entry( + sample_id=father_sample_id, + lane=2, + fastq1=fastq_forward_read_path, + fastq2=fastq_reverse_read_path, + case_id=raredisease_case_id, + ) + return [headers, entry_1, entry_2, entry_3, entry_4] diff --git a/tests/services/analysis_starter/test_file_creation.py b/tests/services/analysis_starter/test_file_creation.py index cbdb8dd3b3..1d544a92e5 100644 --- a/tests/services/analysis_starter/test_file_creation.py +++ b/tests/services/analysis_starter/test_file_creation.py @@ -92,7 +92,7 @@ def test_params_file_content( "raredisease_sample_sheet_creator", "raredisease_case_id", "raredisease_case_path", - "raredisease_sample_sheet_content", + "raredisease_sample_sheet_expected_content", ) ], ids=["raredisease"], From 771721380f7adfec90fd3bdf4b91bd03ee4c76f7 Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 10 Mar 2025 10:35:25 +0100 Subject: [PATCH 39/43] Address comments --- .../configurator/extensions/raredisease.py | 6 ++-- .../configurator/file_creators/config_file.py | 14 ++++---- .../configurator/file_creators/gene_panel.py | 18 ++++------ .../file_creators/managed_variants.py | 2 +- .../file_creators/params_file/raredisease.py | 10 +++--- .../file_creators/sample_sheet/abstract.py | 4 --- .../file_creators/sample_sheet/raredisease.py | 4 +-- .../configurator/file_creators/utils.py | 6 ---- .../configurator/implementations/nextflow.py | 36 ++++++++----------- cg/store/crud/read.py | 11 ++++++ tests/conftest.py | 1 + .../config_file_content_fixtures.py | 20 ----------- .../analysis_starter/configurator_fixtures.py | 2 +- .../params_file_content_fixtures.py | 23 ++++++++++++ .../analysis_starter/test_file_creation.py | 27 +++++++------- 15 files changed, 87 insertions(+), 97 deletions(-) create mode 100644 tests/fixture_plugins/analysis_starter/params_file_content_fixtures.py diff --git a/cg/services/analysis_starter/configurator/extensions/raredisease.py b/cg/services/analysis_starter/configurator/extensions/raredisease.py index 25a2026398..28bd64f9de 100644 --- a/cg/services/analysis_starter/configurator/extensions/raredisease.py +++ b/cg/services/analysis_starter/configurator/extensions/raredisease.py @@ -8,7 +8,7 @@ class RarediseaseExtension(PipelineExtension): - """Configurator for Raredisease analysis.""" + """Contains Raredisease specific file creations which differ from the default Nextflow flow.""" def __init__( self, @@ -20,5 +20,5 @@ def __init__( def configure(self, case_id: str, case_path: Path) -> None: """Perform pipeline specific actions.""" - self.gene_panel_file_creator.create(case_path) - self.managed_variants_file_creator.create(case_path) + self.gene_panel_file_creator.create(case_id=case_id, case_path=case_path) + self.managed_variants_file_creator.create(case_id=case_id, case_path=case_path) diff --git a/cg/services/analysis_starter/configurator/file_creators/config_file.py b/cg/services/analysis_starter/configurator/file_creators/config_file.py index 20162b6069..41d142f823 100644 --- a/cg/services/analysis_starter/configurator/file_creators/config_file.py +++ b/cg/services/analysis_starter/configurator/file_creators/config_file.py @@ -3,7 +3,6 @@ from cg.constants import FileExtensions from cg.io.json import write_json from cg.io.txt import concat_txt -from cg.services.analysis_starter.configurator.file_creators.utils import get_case_id_from_path from cg.store.models import Case from cg.store.store import Store @@ -21,25 +20,24 @@ def __init__( @staticmethod def get_file_path(case_id: str, case_path: Path) -> Path: - """Return the path to the nextflow config file.""" + """Return the path to the Nextflow config file.""" return Path(case_path, f"{case_id}_nextflow_config").with_suffix(FileExtensions.JSON) def create(self, case_id: str, case_path: Path) -> None: - """Create the nextflow config file for a case.""" + """Create the Nextflow config file for a case.""" file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: str = self._get_content(case_path=case_path) + content: str = self._get_content(case_id) write_json(file_path=file_path, content=content) - def _get_content(self, case_path: Path) -> str: - """Get the content of the nextflow config file.""" - case_id: str = get_case_id_from_path(case_path) + def _get_content(self, case_id: str) -> str: + """Get the content of the Nextflow config file.""" config_files_list: list[str] = [ self.platform, self.workflow_config_path, self.resources, ] case_specific_params: list[str] = [ - self._get_cluster_options(case_id=case_id), + self._get_cluster_options(case_id), ] return concat_txt( file_paths=config_files_list, diff --git a/cg/services/analysis_starter/configurator/file_creators/gene_panel.py b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py index 07d4a0a2c0..01f9666c9d 100644 --- a/cg/services/analysis_starter/configurator/file_creators/gene_panel.py +++ b/cg/services/analysis_starter/configurator/file_creators/gene_panel.py @@ -1,13 +1,10 @@ from pathlib import Path from cg.apps.scout.scoutapi import ScoutAPI -from cg.constants import FileExtensions, GenePanelMasterList +from cg.constants import FileExtensions, GenePanelMasterList, Workflow from cg.constants.gene_panel import GenePanelCombo, GenePanelGenomeBuild from cg.io.txt import write_txt -from cg.services.analysis_starter.configurator.file_creators.utils import ( - get_case_id_from_path, - get_genome_build, -) +from cg.services.analysis_starter.configurator.file_creators.utils import get_genome_build from cg.store.models import Case from cg.store.store import Store @@ -21,15 +18,14 @@ def __init__(self, store: Store, scout_api: ScoutAPI): def get_file_path(case_path: Path) -> Path: return Path(case_path, "gene_panels").with_suffix(FileExtensions.BED) - def create(self, case_path: Path) -> None: - file_path: Path = self.get_file_path(case_path=case_path) - content: list[str] = self._get_content(case_path=case_path) + def create(self, case_id: str, case_path: Path) -> None: + file_path: Path = self.get_file_path(case_path) + content: list[str] = self._get_content(case_id) write_txt(file_path=file_path, content=content) - def _get_content(self, case_path: Path) -> list[str]: - case_id: str = get_case_id_from_path(case_path=case_path) + def _get_content(self, case_id: str) -> list[str]: case: Case = self.store.get_case_by_internal_id(internal_id=case_id) - genome_build: GenePanelGenomeBuild = get_genome_build(workflow=case.data_analysis) + genome_build: GenePanelGenomeBuild = get_genome_build(workflow=Workflow(case.data_analysis)) all_panels: list[str] = self._get_aggregated_panels( customer_id=case.customer.internal_id, default_panels=set(case.panels) ) diff --git a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py index 5032d46f2b..1b6338a84d 100644 --- a/cg/services/analysis_starter/configurator/file_creators/managed_variants.py +++ b/cg/services/analysis_starter/configurator/file_creators/managed_variants.py @@ -16,7 +16,7 @@ def __init__(self, scout_api: ScoutAPI, store: Store): def create(self, case_id: str, case_path: Path) -> None: file_path = Path(case_path, "managed_variants").with_suffix(FileExtensions.VCF) - content: list[str] = self._get_content(case_id=case_id) + content: list[str] = self._get_content(case_id) write_txt(file_path=file_path, content=content) def _get_content(self, case_id: str) -> list[str]: diff --git a/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py index a4f2c3e9ac..610adc6c2e 100644 --- a/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/params_file/raredisease.py @@ -40,11 +40,11 @@ def create(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> None write_yaml_nextflow_style(file_path=file_path, content=content) def _get_content(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> dict: - """Create parameters file for a case.""" + """Return the content of a params file for a case.""" case_workflow_parameters: dict = self._get_case_parameters( case_id=case_id, case_path=case_path, sample_sheet_path=sample_sheet_path ).model_dump() - workflow_parameters: any = read_yaml(self.params) + workflow_parameters: any = read_yaml(Path(self.params)) parameters: dict = case_workflow_parameters | workflow_parameters curated_parameters: dict = replace_values_in_params_file(parameters) return curated_parameters @@ -53,7 +53,7 @@ def _get_case_parameters( self, case_id: str, case_path: Path, sample_sheet_path: Path ) -> RarediseaseParameters: """Return case-specific parameters for the analysis.""" - analysis_type: str = self._get_data_analysis_type(case_id=case_id) + analysis_type: str = self._get_data_analysis_type(case_id) target_bed_file: str = self._get_target_bed(case_id=case_id, analysis_type=analysis_type) skip_germlinecnvcaller: bool = self._get_germlinecnvcaller_flag(analysis_type=analysis_type) return RarediseaseParameters( @@ -78,6 +78,8 @@ def _get_data_analysis_type(self, case_id: str) -> str: def _get_target_bed(self, case_id: str, analysis_type: str) -> str: """ Return the target bed file from LIMS or use default capture kit for WHOLE_GENOME_SEQUENCING. + Raises: + ValueError if not capture kit can be assigned to the case. """ target_bed_file: str = self._get_target_bed_from_lims(case_id=case_id) if not target_bed_file: @@ -109,4 +111,4 @@ def _get_target_bed_from_lims(self, case_id: str) -> str | None: @staticmethod def _get_germlinecnvcaller_flag(analysis_type: str) -> bool: """Return True if the germlinecnvcaller should be skipped.""" - return True if analysis_type == AnalysisType.WGS else False + return analysis_type == AnalysisType.WGS diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py index d0437b3b80..3e7d08fb5d 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/abstract.py @@ -12,7 +12,3 @@ def get_file_path(case_id: str, case_path: Path) -> Path: @abstractmethod def create(self, case_id: str, case_path: Path) -> None: pass - - @abstractmethod - def _get_content(self, case_id: str, case_path: Path) -> list[list[str]]: - pass diff --git a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py index 40ce5bde1b..513675c7c4 100644 --- a/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py +++ b/cg/services/analysis_starter/configurator/file_creators/sample_sheet/raredisease.py @@ -38,10 +38,10 @@ def get_file_path(case_id: str, case_path: Path) -> Path: def create(self, case_id: str, case_path: Path) -> None: """Create the sample sheet for a case.""" file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path) - content: any = self._get_content(case_id=case_id, case_path=case_path) + content: list[list[str]] = self._get_content(case_id) write_csv(file_path=file_path, content=content) - def _get_content(self, case_id: str, case_path: Path) -> list[list[str]]: + def _get_content(self, case_id: str) -> list[list[str]]: """Return formatted information required to build a sample sheet for a case. This contains information for all samples linked to the case.""" case: Case = self.store.get_case_by_internal_id(internal_id=case_id) diff --git a/cg/services/analysis_starter/configurator/file_creators/utils.py b/cg/services/analysis_starter/configurator/file_creators/utils.py index e382b3efe6..10d9c5a37a 100644 --- a/cg/services/analysis_starter/configurator/file_creators/utils.py +++ b/cg/services/analysis_starter/configurator/file_creators/utils.py @@ -1,13 +1,7 @@ -from pathlib import Path - from cg.constants import Workflow from cg.constants.gene_panel import GenePanelGenomeBuild -def get_case_id_from_path(case_path: Path) -> str: - return case_path.name - - def get_genome_build(workflow: Workflow) -> GenePanelGenomeBuild: """Return genome build for the given Workflow.""" workflow_to_genome_build: dict[Workflow, GenePanelGenomeBuild] = { diff --git a/cg/services/analysis_starter/configurator/implementations/nextflow.py b/cg/services/analysis_starter/configurator/implementations/nextflow.py index 74f8611eab..91fbdfc099 100644 --- a/cg/services/analysis_starter/configurator/implementations/nextflow.py +++ b/cg/services/analysis_starter/configurator/implementations/nextflow.py @@ -2,8 +2,6 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims import LimsAPI -from cg.constants import Workflow -from cg.constants.priority import SlurmQos from cg.models.cg_config import CommonAppConfig from cg.services.analysis_starter.configurator.abstract_service import Configurator from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension @@ -17,7 +15,6 @@ NextflowSampleSheetCreator, ) from cg.services.analysis_starter.configurator.models.nextflow import NextflowCaseConfig -from cg.store.models import Case from cg.store.store import Store @@ -25,7 +22,7 @@ class NextflowConfigurator(Configurator): def __init__( self, - config: CommonAppConfig, + pipeline_config: CommonAppConfig, store: Store, housekeeper_api: HousekeeperAPI, lims: LimsAPI, @@ -34,7 +31,7 @@ def __init__( params_file_creator: ParamsFileCreator, pipeline_extension: PipelineExtension = PipelineExtension(), ): - self.root_dir: str = config.root + self.root_dir: str = pipeline_config.root self.store: Store = store self.housekeeper_api: HousekeeperAPI = housekeeper_api self.lims: LimsAPI = lims @@ -44,28 +41,33 @@ def __init__( self.params_file_creator = params_file_creator def create_config(self, case_id: str) -> NextflowCaseConfig: - """Create a Nextflow case config.""" + """Configure a Nextflow case so that it is ready for analysis. This entails + 1. Creating a case directory. + 2. Creating a sample sheet. + 3. Creating a parameters file. + 4. Creating a configuration file. + 5. Creating any pipeline specific files.""" case_path: Path = self._get_case_path(case_id=case_id) self._create_case_directory(case_id=case_id) + self.sample_sheet_creator.create(case_id=case_id, case_path=case_path) sample_sheet_path: Path = self.sample_sheet_creator.get_file_path( case_id=case_id, case_path=case_path ) - self.sample_sheet_creator.create(case_id=case_id, case_path=case_path) self.params_file_creator.create( case_id=case_id, case_path=case_path, sample_sheet_path=sample_sheet_path ) self.config_file_creator.create(case_id=case_id, case_path=case_path) - config_file_path: Path = self.config_file_creator.get_file_path( + self.pipeline_extension.configure(case_id=case_id, case_path=case_path) + params_file_path: Path = self.params_file_creator.get_file_path( case_id=case_id, case_path=case_path ) - params_file_path: Path = self.params_file_creator.get_file_path( + config_file_path: Path = self.config_file_creator.get_file_path( case_id=case_id, case_path=case_path ) - self.pipeline_extension.configure(case_id=case_id, case_path=case_path) return NextflowCaseConfig( case_id=case_id, - case_priority=self._get_case_priority(case_id), - workflow=self._get_case_workflow(case_id), + case_priority=self.store.get_case_priority(case_id), + workflow=self.store.get_case_workflow(case_id), netxflow_config_file=config_file_path.as_posix(), params_file=params_file_path.as_posix(), work_dir=self._get_work_dir(case_id=case_id).as_posix(), @@ -80,15 +82,5 @@ def _create_case_directory(self, case_id: str) -> None: case_path: Path = self._get_case_path(case_id=case_id) case_path.mkdir(parents=True, exist_ok=True) - def _get_case_priority(self, case_id: str) -> SlurmQos: - """Get case priority.""" - case: Case = self.store.get_case_by_internal_id(case_id) - return SlurmQos(case.slurm_priority) - - def _get_case_workflow(self, case_id: str) -> Workflow: - """Get case workflow.""" - case: Case = self.store.get_case_by_internal_id(case_id) - return Workflow(case.data_analysis) - def _get_work_dir(self, case_id: str) -> Path: return Path(self.root_dir, case_id, "work") diff --git a/cg/store/crud/read.py b/cg/store/crud/read.py index 02b3f5ad92..078ed191f3 100644 --- a/cg/store/crud/read.py +++ b/cg/store/crud/read.py @@ -14,6 +14,7 @@ CustomerId, SampleType, ) +from cg.constants.priority import SlurmQos from cg.constants.sequencing import DNA_PREP_CATEGORIES, SeqLibraryPrepCategory from cg.exc import CaseNotFoundError, CgDataError, CgError, OrderNotFoundError, SampleNotFoundError from cg.models.orders.constants import OrderType @@ -1828,3 +1829,13 @@ def get_pacbio_sequencing_runs_by_run_name(self, run_name: str) -> list[PacbioSe if runs.count() == 0: raise EntryNotFoundError(f"Could not find any sequencing runs for {run_name}") return runs.all() + + def get_case_priority(self, case_id: str) -> SlurmQos: + """Get case priority.""" + case: Case = self.get_case_by_internal_id(case_id) + return SlurmQos(case.slurm_priority) + + def get_case_workflow(self, case_id: str) -> Workflow: + """Get case workflow.""" + case: Case = self.get_case_by_internal_id(case_id) + return Workflow(case.data_analysis) diff --git a/tests/conftest.py b/tests/conftest.py index 61fa89a1dc..a35b9c3d3f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -107,6 +107,7 @@ "tests.fixture_plugins.analysis_starter.specific_file_creators", "tests.fixture_plugins.analysis_starter.specific_file_content_fixtures", "tests.fixture_plugins.analysis_starter.params_file_creators", + "tests.fixture_plugins.analysis_starter.params_file_content_fixtures", "tests.fixture_plugins.backup_fixtures.backup_fixtures", "tests.fixture_plugins.chanjo2_fixtures.api_fixtures", "tests.fixture_plugins.chanjo2_fixtures.models_fixtures", diff --git a/tests/fixture_plugins/analysis_starter/config_file_content_fixtures.py b/tests/fixture_plugins/analysis_starter/config_file_content_fixtures.py index eac759b31f..aab65c6242 100644 --- a/tests/fixture_plugins/analysis_starter/config_file_content_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/config_file_content_fixtures.py @@ -34,23 +34,3 @@ def expected_raredisease_config_content( nextflow_config_base_content: str, ) -> str: return nextflow_cluster_options + nextflow_config_base_content - - -@pytest.fixture -def expected_raredisease_params_file_content( - raredisease_case_path: Path, - raredisease_sample_sheet_path: Path, - raredisease_gene_panel_path: Path, - raredisease_managed_variants_path: Path, -) -> dict: - return { - "input": raredisease_sample_sheet_path, - "outdir": raredisease_case_path, - "target_bed_file": "twistexomecomprehensive_10.2_hg19_design.bed", - "analysis_type": "wgs", - "save_mapped_as_cram": True, - "skip_germlinecnvcaller": True, - "vcfanno_extra_resources": raredisease_managed_variants_path.as_posix(), - "vep_filters_scout_fmt": raredisease_gene_panel_path.as_posix(), - "someparam": "something", - } diff --git a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py index c580069b6d..91dd531ac3 100644 --- a/tests/fixture_plugins/analysis_starter/configurator_fixtures.py +++ b/tests/fixture_plugins/analysis_starter/configurator_fixtures.py @@ -24,7 +24,7 @@ def raredisease_configurator( ) -> NextflowConfigurator: return NextflowConfigurator( store=raredisease_context.status_db, - config=raredisease_context.raredisease, + pipeline_config=raredisease_context.raredisease, housekeeper_api=raredisease_context.housekeeper_api, lims=raredisease_context.lims_api, config_file_creator=raredisease_config_file_creator, diff --git a/tests/fixture_plugins/analysis_starter/params_file_content_fixtures.py b/tests/fixture_plugins/analysis_starter/params_file_content_fixtures.py new file mode 100644 index 0000000000..d27a4b37b4 --- /dev/null +++ b/tests/fixture_plugins/analysis_starter/params_file_content_fixtures.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + + +@pytest.fixture +def expected_raredisease_params_file_content( + raredisease_case_path: Path, + raredisease_sample_sheet_path: Path, + raredisease_gene_panel_path: Path, + raredisease_managed_variants_path: Path, +) -> dict: + return { + "input": raredisease_sample_sheet_path, + "outdir": raredisease_case_path, + "target_bed_file": "twistexomecomprehensive_10.2_hg19_design.bed", + "analysis_type": "wgs", + "save_mapped_as_cram": True, + "skip_germlinecnvcaller": True, + "vcfanno_extra_resources": raredisease_managed_variants_path.as_posix(), + "vep_filters_scout_fmt": raredisease_gene_panel_path.as_posix(), + "someparam": "something", + } diff --git a/tests/services/analysis_starter/test_file_creation.py b/tests/services/analysis_starter/test_file_creation.py index 1d544a92e5..703e3c58c7 100644 --- a/tests/services/analysis_starter/test_file_creation.py +++ b/tests/services/analysis_starter/test_file_creation.py @@ -20,11 +20,11 @@ @pytest.mark.parametrize( - "file_creator_fixture, case_path_fixture, expected_content_fixture", + "file_creator_fixture, case_id_fixture, expected_content_fixture", [ ( "raredisease_config_file_creator", - "raredisease_case_path", + "raredisease_case_id", "expected_raredisease_config_content", ) ], @@ -32,17 +32,17 @@ ) def test_nextflow_config_file_content( file_creator_fixture: str, - case_path_fixture: str, + case_id_fixture: str, expected_content_fixture: str, request: pytest.FixtureRequest, ): """Test that a Nextflow config file content is created correctly for all pipelines.""" # GIVEN a Nextflow config content creator and a case id file_creator: NextflowConfigFileCreator = request.getfixturevalue(file_creator_fixture) - case_path: Path = request.getfixturevalue(case_path_fixture) + case_id: str = request.getfixturevalue(case_id_fixture) # WHEN creating a Nextflow config file - content: str = file_creator._get_content(case_path) + content: str = file_creator._get_content(case_id) # THEN the content of the file is the expected expected_content: str = request.getfixturevalue(expected_content_fixture) @@ -86,12 +86,11 @@ def test_params_file_content( @pytest.mark.parametrize( - "file_creator_fixture, case_id_fixture, case_path_fixture, expected_content_fixture", + "file_creator_fixture, case_id_fixture, expected_content_fixture", [ ( "raredisease_sample_sheet_creator", "raredisease_case_id", - "raredisease_case_path", "raredisease_sample_sheet_expected_content", ) ], @@ -100,7 +99,6 @@ def test_params_file_content( def test_nextflow_sample_sheet_content( file_creator_fixture: str, case_id_fixture: str, - case_path_fixture: str, expected_content_fixture: str, request: pytest.FixtureRequest, ): @@ -108,10 +106,9 @@ def test_nextflow_sample_sheet_content( # GIVEN a sample sheet content creator, a case id and a case path file_creator: NextflowSampleSheetCreator = request.getfixturevalue(file_creator_fixture) case_id: str = request.getfixturevalue(case_id_fixture) - case_path: Path = request.getfixturevalue(case_path_fixture) # WHEN creating a sample sheet - content: list[list[str]] = file_creator._get_content(case_id=case_id, case_path=case_path) + content: list[list[str]] = file_creator._get_content(case_id=case_id) # THEN the content of the file is the expected expected_content: str = request.getfixturevalue(expected_content_fixture) @@ -119,11 +116,11 @@ def test_nextflow_sample_sheet_content( @pytest.mark.parametrize( - "file_creator_fixture, case_path_fixture, expected_content_fixture", + "file_creator_fixture, case_id_fixture, expected_content_fixture", [ ( "raredisease_gene_panel_creator", - "raredisease_case_path", + "raredisease_case_id", "raredisease_gene_panel_file_content", ) ], @@ -131,21 +128,21 @@ def test_nextflow_sample_sheet_content( ) def test_gene_panel_file_content( file_creator_fixture: str, - case_path_fixture: str, + case_id_fixture: str, expected_content_fixture: str, request: pytest.FixtureRequest, ): """Test that the gene panel file content is created correctly.""" # GIVEN a gene panel file content creator and a case path file_creator: GenePanelFileCreator = request.getfixturevalue(file_creator_fixture) - case_path: Path = request.getfixturevalue(case_path_fixture) + case_id: str = request.getfixturevalue(case_id_fixture) # GIVEN a mock of Scout gene panels expected_content: list[str] = request.getfixturevalue(expected_content_fixture) # WHEN creating a gene panel file with mock.patch.object(ScoutAPI, "export_panels", return_value=expected_content): - content: list[str] = file_creator._get_content(case_path) + content: list[str] = file_creator._get_content(case_id) # THEN the content of the file is the expected From e8971a56fc893e4ecf6b76f92bf890915bdcd6ae Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 10 Mar 2025 10:44:58 +0100 Subject: [PATCH 40/43] Remove TODO --- tests/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index a35b9c3d3f..d8e392e1e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2943,7 +2943,6 @@ def raredisease_deliverables_file_path(raredisease_dir, raredisease_case_id) -> ).with_suffix(FileExtensions.YAML) -# TODO: Take a look at this for the tests @pytest.fixture(scope="function") def raredisease_parameters_default( raredisease_dir: Path, From 4eedb27b5fe821e8f418989400fc8f99ec175fd8 Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 10 Mar 2025 10:48:50 +0100 Subject: [PATCH 41/43] Fix typo --- tests/services/analysis_starter/test_configurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/services/analysis_starter/test_configurator.py b/tests/services/analysis_starter/test_configurator.py index a8f7961884..8bca709dff 100644 --- a/tests/services/analysis_starter/test_configurator.py +++ b/tests/services/analysis_starter/test_configurator.py @@ -44,7 +44,7 @@ def test_create_nextflow_config_file_exists( case_path_fixture: str, request: pytest.FixtureRequest, ): - """Test that a nextflow config file is created fro all Nextflow pipelines.""" + """Test that a Nextflow config file is created for all Nextflow pipelines.""" # GIVEN a configurator, a case id and a case path configurator: NextflowConfigurator = request.getfixturevalue(configurator_fixture) case_id: str = request.getfixturevalue(case_id_fixture) From a926e8271cc3c14615a33746477f2b55e34be4f2 Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 10 Mar 2025 10:50:18 +0100 Subject: [PATCH 42/43] Make interface --- .../analysis_starter/configurator/extensions/abstract.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cg/services/analysis_starter/configurator/extensions/abstract.py b/cg/services/analysis_starter/configurator/extensions/abstract.py index b38cb7a7e3..57798c8b8f 100644 --- a/cg/services/analysis_starter/configurator/extensions/abstract.py +++ b/cg/services/analysis_starter/configurator/extensions/abstract.py @@ -1,6 +1,9 @@ +from abc import ABC, abstractmethod from pathlib import Path -class PipelineExtension: +class PipelineExtension(ABC): + + @abstractmethod def configure(self, case_id: str, case_path: Path): pass From cdab38c7b071bf217ed5014b809fa00fad8638ae Mon Sep 17 00:00:00 2001 From: islean Date: Mon, 10 Mar 2025 12:56:13 +0100 Subject: [PATCH 43/43] Revert interface --- .../analysis_starter/configurator/extensions/abstract.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cg/services/analysis_starter/configurator/extensions/abstract.py b/cg/services/analysis_starter/configurator/extensions/abstract.py index 57798c8b8f..a4ff284795 100644 --- a/cg/services/analysis_starter/configurator/extensions/abstract.py +++ b/cg/services/analysis_starter/configurator/extensions/abstract.py @@ -1,9 +1,8 @@ -from abc import ABC, abstractmethod from pathlib import Path -class PipelineExtension(ABC): - - @abstractmethod +class PipelineExtension: def configure(self, case_id: str, case_path: Path): + """Intended for pipeline specific configurations. If none is needed, this bare class + can be used.""" pass