Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raredisease configurator - Create services for file creation #4260

Merged
merged 44 commits into from
Mar 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
4c75f39
add code for the params file
diitaz93 Feb 28, 2025
683ea3c
add creation of file in create_cpnfig
diitaz93 Feb 28, 2025
4b6665d
fix 2 out of 3 failing tests
diitaz93 Feb 28, 2025
6c1c8aa
add also sample sheet code to configurator
diitaz93 Feb 28, 2025
9706056
fix fixture
diitaz93 Feb 28, 2025
c189d12
add skeleton of new classes
diitaz93 Mar 3, 2025
82e2898
Merge branch 'dev-start-pipelines' into config-separate-services
diitaz93 Mar 3, 2025
5854181
resolve conflicts
diitaz93 Mar 3, 2025
baf2220
stash commit
diitaz93 Mar 3, 2025
e8308fa
enhance sample sheet and config
diitaz93 Mar 3, 2025
8bc008d
unify classes
diitaz93 Mar 4, 2025
cc83172
stash commit
diitaz93 Mar 4, 2025
1cc90cf
remove out of scope change
diitaz93 Mar 4, 2025
1e4830a
created a nextflow level of inheritance in configurator
diitaz93 Mar 5, 2025
c5ec339
add constants for gene files
diitaz93 Mar 5, 2025
f447e20
removed duplicated utils method
diitaz93 Mar 5, 2025
215ced4
renamed write function
diitaz93 Mar 5, 2025
e6fd37c
rolled back small fix for scope reasons
diitaz93 Mar 5, 2025
4f82eac
inject content creators
diitaz93 Mar 6, 2025
edb4adb
add gene panel creator
diitaz93 Mar 6, 2025
77db7d1
some renaming
diitaz93 Mar 6, 2025
945733a
add sample sheet creator fixture
diitaz93 Mar 6, 2025
b06cd5a
add params file creator fixture
diitaz93 Mar 6, 2025
5caddaf
add other file creator fixtures
diitaz93 Mar 6, 2025
1dde474
add fixtures in plugins to conftest
diitaz93 Mar 6, 2025
9ccef27
fix constructors and fixtures
diitaz93 Mar 6, 2025
e92ab44
Add managed_variants support (#4265) (patch)
islean Mar 6, 2025
52878f3
add first tests for content creation
diitaz93 Mar 6, 2025
cedfd59
Rework raredisease extension (#4266)
islean Mar 6, 2025
2932e07
remove inheritance of nextflow configurator (#4267)
diitaz93 Mar 7, 2025
890e9ce
fi sample sheet fixture (#4270)
diitaz93 Mar 7, 2025
229d4df
Implement Raredisease Extension (#4271)
islean Mar 7, 2025
e4d88c4
move raredisease models to the analysis starter directory
diitaz93 Mar 7, 2025
5d72e1c
rename content creator files
diitaz93 Mar 7, 2025
aab06db
add tests for managed variants
diitaz93 Mar 7, 2025
be00ee0
add tests for gene panel
diitaz93 Mar 7, 2025
b7a4932
fix sample sheet creator
diitaz93 Mar 7, 2025
5a057e6
fix creator
diitaz93 Mar 7, 2025
2ceaa73
fix sample sheet test
diitaz93 Mar 7, 2025
7717213
Address comments
islean Mar 10, 2025
e8971a5
Remove TODO
islean Mar 10, 2025
4eedb27
Fix typo
islean Mar 10, 2025
a926e82
Make interface
islean Mar 10, 2025
cdab38c
Revert interface
islean Mar 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions cg/meta/workflow/raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,27 @@
from cg.constants import DEFAULT_CAPTURE_KIT, Workflow
from cg.constants.constants import GenomeVersion
from cg.constants.nf_analysis import (
RAREDISEASE_ADAPTER_BASES_PERCENTAGE_THRESHOLD,
RAREDISEASE_COVERAGE_FILE_TAGS,
RAREDISEASE_COVERAGE_INTERVAL_TYPE,
RAREDISEASE_COVERAGE_THRESHOLD,
RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION,
RAREDISEASE_METRIC_CONDITIONS_WGS,
RAREDISEASE_METRIC_CONDITIONS_WES,
RAREDISEASE_ADAPTER_BASES_PERCENTAGE_THRESHOLD,
RAREDISEASE_METRIC_CONDITIONS_WGS,
RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION,
)
from cg.constants.scout import RAREDISEASE_CASE_TAGS, ScoutExportFileName
from cg.constants.sequencing import SeqLibraryPrepCategory, NOVASEQ_SEQUENCING_READ_LENGTH
from cg.constants.sequencing import NOVASEQ_SEQUENCING_READ_LENGTH, SeqLibraryPrepCategory
from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
from cg.constants.tb import AnalysisType
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from cg.models.deliverables.metric_deliverables import MetricsBase, MultiqcDataJson
from cg.models.raredisease.raredisease import (
from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH
from cg.services.analysis_starter.configurator.file_creators.sample_sheet.models import (
RarediseaseParameters,
RarediseaseSampleSheetEntry,
RarediseaseSampleSheetHeaders,
)
from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH
from cg.store.models import CaseSample, Sample

LOG = logging.getLogger(__name__)
Expand Down
61 changes: 0 additions & 61 deletions cg/models/raredisease/raredisease.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
from enum import StrEnum

from cg.constants.constants import SexOptions
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
from cg.models.qc_metrics import QCMetrics


Expand All @@ -12,61 +9,3 @@ class RarediseaseQCMetrics(QCMetrics):
percent_duplicates: float
predicted_sex_sex_check: SexOptions
total_reads: int


class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry):
"""Raredisease sample model is used when building the sample sheet."""

sex: str
phenotype: int
sex: int
paternal_id: str
maternal_id: str
case_id: str

@property
def reformat_sample_content(self) -> list[list[str]]:
"""Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
return [
[
self.name,
lane + 1,
self.fastq_forward_read_paths,
self.fastq_reverse_read_paths,
self.sex,
self.phenotype,
self.paternal_id,
self.maternal_id,
self.case_id,
]
for lane, (self.fastq_forward_read_paths, self.fastq_reverse_read_paths) in enumerate(
zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths)
)
]


class RarediseaseSampleSheetHeaders(StrEnum):
sample: str = "sample"
lane: str = "lane"
fastq_1: str = "fastq_1"
fastq_2: str = "fastq_2"
sex: str = "sex"
phenotype: str = "phenotype"
paternal_id: str = "paternal_id"
maternal_id: str = "maternal_id"
case_id: str = "case_id"

@classmethod
def list(cls) -> list[str]:
return list(map(lambda header: header.value, cls))


class RarediseaseParameters(WorkflowParameters):
"""Model for Raredisease parameters."""

target_bed_file: str
analysis_type: str
save_mapped_as_cram: bool
skip_germlinecnvcaller: bool
vcfanno_extra_resources: str
vep_filters_scout_fmt: str
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from abc import ABC

from cg.services.analysis_starter.configurator.abstract_model import CaseConfig


class Configurator(ABC):

def create_config(self, case_id: str, dry_run: bool = False):
def create_config(self, case_id: str) -> CaseConfig:
"""Abstract method to create a case config for a case."""
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pathlib import Path


class PipelineExtension:
def configure(self, case_id: str, case_path: Path):
"""Intended for pipeline specific configurations. If none is needed, this bare class
can be used."""
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from pathlib import Path

from cg.services.analysis_starter.configurator.extensions.abstract import PipelineExtension
from cg.services.analysis_starter.configurator.file_creators.gene_panel import GenePanelFileCreator
from cg.services.analysis_starter.configurator.file_creators.managed_variants import (
ManagedVariantsFileCreator,
)


class RarediseaseExtension(PipelineExtension):
"""Contains Raredisease specific file creations which differ from the default Nextflow flow."""

def __init__(
self,
gene_panel_file_creator: GenePanelFileCreator,
managed_variants_file_creator: ManagedVariantsFileCreator,
):
self.gene_panel_file_creator = gene_panel_file_creator
self.managed_variants_file_creator = managed_variants_file_creator

def configure(self, case_id: str, case_path: Path) -> None:
"""Perform pipeline specific actions."""
self.gene_panel_file_creator.create(case_id=case_id, case_path=case_path)
self.managed_variants_file_creator.create(case_id=case_id, case_path=case_path)
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from pathlib import Path

from cg.constants import FileExtensions
from cg.io.json import write_json
from cg.io.txt import concat_txt
from cg.store.models import Case
from cg.store.store import Store


class NextflowConfigFileCreator:

def __init__(
self, store: Store, platform: str, workflow_config_path: str, resources: str, account: str
):
self.store = store
self.platform = platform
self.workflow_config_path = workflow_config_path
self.resources = resources
self.account = account

@staticmethod
def get_file_path(case_id: str, case_path: Path) -> Path:
"""Return the path to the Nextflow config file."""
return Path(case_path, f"{case_id}_nextflow_config").with_suffix(FileExtensions.JSON)

def create(self, case_id: str, case_path: Path) -> None:
"""Create the Nextflow config file for a case."""
file_path: Path = self.get_file_path(case_id=case_id, case_path=case_path)
content: str = self._get_content(case_id)
write_json(file_path=file_path, content=content)

def _get_content(self, case_id: str) -> str:
"""Get the content of the Nextflow config file."""
config_files_list: list[str] = [
self.platform,
self.workflow_config_path,
self.resources,
]
case_specific_params: list[str] = [
self._get_cluster_options(case_id),
]
return concat_txt(
file_paths=config_files_list,
str_content=case_specific_params,
)

def _get_cluster_options(self, case_id: str) -> str:
case: Case = self.store.get_case_by_internal_id(case_id)
return f'process.clusterOptions = "-A {self.account} --qos={case.slurm_priority}"\n'
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from pathlib import Path

from cg.apps.scout.scoutapi import ScoutAPI
from cg.constants import FileExtensions, GenePanelMasterList, Workflow
from cg.constants.gene_panel import GenePanelCombo, GenePanelGenomeBuild
from cg.io.txt import write_txt
from cg.services.analysis_starter.configurator.file_creators.utils import get_genome_build
from cg.store.models import Case
from cg.store.store import Store


class GenePanelFileCreator:
def __init__(self, store: Store, scout_api: ScoutAPI):
self.store = store
self.scout_api = scout_api

@staticmethod
def get_file_path(case_path: Path) -> Path:
return Path(case_path, "gene_panels").with_suffix(FileExtensions.BED)

def create(self, case_id: str, case_path: Path) -> None:
file_path: Path = self.get_file_path(case_path)
content: list[str] = self._get_content(case_id)
write_txt(file_path=file_path, content=content)

def _get_content(self, case_id: str) -> list[str]:
case: Case = self.store.get_case_by_internal_id(internal_id=case_id)
genome_build: GenePanelGenomeBuild = get_genome_build(workflow=Workflow(case.data_analysis))
all_panels: list[str] = self._get_aggregated_panels(
customer_id=case.customer.internal_id, default_panels=set(case.panels)
)
return self.scout_api.export_panels(build=genome_build, panels=all_panels)

def _get_aggregated_panels(self, customer_id: str, default_panels: set[str]) -> list[str]:
"""Check if customer is collaborator for gene panel master list
and if all default panels are included in the gene panel master list.
If not, add gene panel combo and broad non-specific gene panels.
Return an aggregated gene panel."""
if GenePanelMasterList.is_customer_collaborator_and_panels_in_gene_panels_master_list(
customer_id=customer_id, gene_panels=default_panels
):
return GenePanelMasterList.get_panel_names()
all_panels: set[str] = self._add_gene_panel_combo(gene_panels=default_panels)
all_panels |= GenePanelMasterList.get_non_specific_gene_panels()
return list(all_panels)

@staticmethod
def _add_gene_panel_combo(gene_panels: set[str]) -> set[str]:
"""
Add gene panels combinations for gene panels being part of gene panel combination and
return updated gene panels.
"""
additional_panels = set()
for panel in gene_panels:
if panel in GenePanelCombo.COMBO_1:
additional_panels |= GenePanelCombo.COMBO_1.get(panel)
gene_panels |= additional_panels
return gene_panels
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pathlib import Path

from cg.apps.scout.scoutapi import ScoutAPI
from cg.constants import FileExtensions, Workflow
from cg.constants.gene_panel import GenePanelGenomeBuild
from cg.io.txt import write_txt
from cg.services.analysis_starter.configurator.file_creators.utils import get_genome_build
from cg.store.store import Store


class ManagedVariantsFileCreator:

def __init__(self, scout_api: ScoutAPI, store: Store):
self.scout_api = scout_api
self.store = store

def create(self, case_id: str, case_path: Path) -> None:
file_path = Path(case_path, "managed_variants").with_suffix(FileExtensions.VCF)
content: list[str] = self._get_content(case_id)
write_txt(file_path=file_path, content=content)

def _get_content(self, case_id: str) -> list[str]:
workflow = Workflow(self.store.get_case_by_internal_id(case_id).data_analysis)
genome_build: GenePanelGenomeBuild = get_genome_build(workflow)
return self.scout_api.export_managed_variants(genome_build)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from abc import ABC, abstractmethod
from pathlib import Path


class ParamsFileCreator(ABC):

@staticmethod
@abstractmethod
def get_file_path(case_id: str, case_path: Path) -> Path:
pass

@abstractmethod
def create(self, case_id: str, case_path: Path, sample_sheet_path: Path) -> any:
pass
Loading
Loading