Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add(rnafusion support) #24

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions janus/constants/FileTag.py
Original file line number Diff line number Diff line change
@@ -12,3 +12,6 @@ class FileTag(StrEnum):
FASTP: str = "fastp"
SOMALIER: str = "somalier"
SAMTOOLS_STATS: str = "samtools-stats"
GENERAL_STATS: str = "general-stats"
STAR: str = "star"
RNA_SEQ_METRICS: str = "picard-rnaseq"
6 changes: 3 additions & 3 deletions janus/dto/collect_qc_response.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from pydantic import BaseModel

from janus.models.workflow.balsamic import Balsamic
from janus.models.workflow.rnafusion import RNAFusion
from pydantic import BaseModel


class CollectQCResponse(BaseModel):
"""Collect QC response model."""

case_id: str
case_info: Balsamic
case_info: Balsamic | RNAFusion
27 changes: 14 additions & 13 deletions janus/mappers/tag_to_models.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
from enum import Enum

from janus.constants.FileTag import FileTag
from janus.models.multiqc.models import (
PicardHsMetrics,
PicardWGSMetrics,
Fastp,
PicardAlignmentSummary,
PicardDuplicates,
PicardHsMetrics,
PicardInsertSize,
PicardAlignmentSummary,
Fastp,
Somalier,
PicardRNASeqMetrics,
PicardWGSMetrics,
RNAFusionGeneralStats,
SamtoolsStats,
Somalier,
)


tag_to_model = {
FileTag.HS_METRICS: PicardHsMetrics,
FileTag.WGS_METRICS: PicardWGSMetrics,
FileTag.ALIGNMENT_SUMMARY_METRICS: PicardAlignmentSummary,
FileTag.DUPLICATES: PicardDuplicates,
FileTag.INSERT_SIZE: PicardInsertSize,
FileTag.FASTP: Fastp,
FileTag.ALIGNMENT_SUMMARY_METRICS: PicardAlignmentSummary,
FileTag.SOMALIER: Somalier,
FileTag.GENERAL_STATS: RNAFusionGeneralStats,
FileTag.HS_METRICS: PicardHsMetrics,
FileTag.INSERT_SIZE: PicardInsertSize,
FileTag.RNA_SEQ_METRICS: PicardRNASeqMetrics,
FileTag.SAMTOOLS_STATS: SamtoolsStats,
FileTag.SOMALIER: Somalier,
FileTag.WGS_METRICS: PicardWGSMetrics,
}
21 changes: 14 additions & 7 deletions janus/mappers/tag_to_parse_function.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from janus.constants.FileTag import FileTag
from janus.services.parser import parse_sample_metrics, parse_fastp, parse_somalier

from janus.services.parser import (
parse_fastp,
parse_general_stats,
parse_sample_metrics,
parse_somalier,
)

tag_to_parse_function: dict = {
FileTag.HS_METRICS: parse_sample_metrics,
FileTag.WGS_METRICS: parse_sample_metrics,
FileTag.DUPLICATES: parse_sample_metrics,
FileTag.INSERT_SIZE: parse_sample_metrics,
FileTag.ALIGNMENT_SUMMARY_METRICS: parse_sample_metrics,
FileTag.DUPLICATES: parse_sample_metrics,
FileTag.FASTP: parse_fastp,
FileTag.SOMALIER: parse_somalier,
FileTag.GENERAL_STATS: parse_general_stats,
FileTag.HS_METRICS: parse_sample_metrics,
FileTag.INSERT_SIZE: parse_sample_metrics,
FileTag.RNA_SEQ_METRICS: parse_sample_metrics,
FileTag.SAMTOOLS_STATS: parse_sample_metrics,
FileTag.SOMALIER: parse_somalier,
FileTag.STAR: parse_sample_metrics,
FileTag.WGS_METRICS: parse_sample_metrics,
}
119 changes: 106 additions & 13 deletions janus/models/multiqc/models.py
Original file line number Diff line number Diff line change
@@ -13,11 +13,15 @@ def validate_float(value):
class PicardDuplicates(BaseModel):
unpaired_reads_examined: float = Field(..., alias="UNPAIRED_READS_EXAMINED")
read_pairs_examined: float = Field(..., alias="READ_PAIRS_EXAMINED")
secondary_or_supplementary_reads: float = Field(..., alias="SECONDARY_OR_SUPPLEMENTARY_RDS")
secondary_or_supplementary_reads: float = Field(
..., alias="SECONDARY_OR_SUPPLEMENTARY_RDS"
)
unmapped_reads: float = Field(..., alias="UNMAPPED_READS")
unpaired_read_duplicates: float = Field(..., alias="UNPAIRED_READ_DUPLICATES")
read_pair_duplicates: float = Field(..., alias="READ_PAIR_DUPLICATES")
read_pair_optical_duplicates: float = Field(..., alias="READ_PAIR_OPTICAL_DUPLICATES")
read_pair_optical_duplicates: float = Field(
..., alias="READ_PAIR_OPTICAL_DUPLICATES"
)
percent_duplication: float = Field(..., alias="PERCENT_DUPLICATION")
estimated_library_size: float = Field(..., alias="ESTIMATED_LIBRARY_SIZE")

@@ -193,7 +197,9 @@ class PicardHsMetrics(BaseModel):
het_snp_sensitivity: float = Field(..., alias="HET_SNP_SENSITIVITY")
het_snp_q: float = Field(..., alias="HET_SNP_Q")

validate_float = validator("fold_80_base_penalty", pre=True, always=True)(validate_float)
validate_float = validator("fold_80_base_penalty", pre=True, always=True)(
validate_float
)


class PicardAlignmentSummary(BaseModel):
@@ -228,7 +234,9 @@ class PicardAlignmentSummary(BaseModel):
pct_adapter: float = Field(..., alias="PCT_ADAPTER")
pct_softclip: float = Field(..., alias="PCT_SOFTCLIP")
pct_hardclip: float = Field(..., alias="PCT_HARDCLIP")
avg_pos_3prime_softclip_length: float = Field(..., alias="AVG_POS_3PRIME_SOFTCLIP_LENGTH")
avg_pos_3prime_softclip_length: float = Field(
..., alias="AVG_POS_3PRIME_SOFTCLIP_LENGTH"
)


class SomalierIndividual(BaseModel):
@@ -368,11 +376,19 @@ class PicardRNASeqMetrics(BaseModel):
ignored_reads: float = Field(..., alias="IGNORED_READS")
correct_strand_reads: float = Field(..., alias="CORRECT_STRAND_READS")
incorrect_strand_reads: float = Field(..., alias="INCORRECT_STRAND_READS")
num_r1_transcript_strand_reads: float = Field(..., alias="NUM_R1_TRANSCRIPT_STRAND_READS")
num_r2_transcript_strand_reads: float = Field(..., alias="NUM_R2_TRANSCRIPT_STRAND_READS")
num_r1_transcript_strand_reads: float = Field(
..., alias="NUM_R1_TRANSCRIPT_STRAND_READS"
)
num_r2_transcript_strand_reads: float = Field(
..., alias="NUM_R2_TRANSCRIPT_STRAND_READS"
)
num_unexplained_reads: float = Field(..., alias="NUM_UNEXPLAINED_READS")
pct_r1_transcript_strand_reads: float = Field(..., alias="PCT_R1_TRANSCRIPT_STRAND_READS")
pct_r2_transcript_strand_reads: float = Field(..., alias="PCT_R2_TRANSCRIPT_STRAND_READS")
pct_r1_transcript_strand_reads: float = Field(
..., alias="PCT_R1_TRANSCRIPT_STRAND_READS"
)
pct_r2_transcript_strand_reads: float = Field(
..., alias="PCT_R2_TRANSCRIPT_STRAND_READS"
)
pct_ribosomal_bases: float = Field(..., alias="PCT_RIBOSOMAL_BASES")
pct_coding_bases: float = Field(..., alias="PCT_CODING_BASES")
pct_utr_bases: float = Field(..., alias="PCT_UTR_BASES")
@@ -384,7 +400,9 @@ class PicardRNASeqMetrics(BaseModel):
median_cv_coverage: float = Field(..., alias="MEDIAN_CV_COVERAGE")
median_5prime_bias: float = Field(..., alias="MEDIAN_5PRIME_BIAS")
median_3prime_bias: float = Field(..., alias="MEDIAN_3PRIME_BIAS")
median_5prime_to_3prime_bias: float = Field(..., alias="MEDIAN_5PRIME_TO_3PRIME_BIAS")
median_5prime_to_3prime_bias: float = Field(
..., alias="MEDIAN_5PRIME_TO_3PRIME_BIAS"
)
library: str = Field(..., alias="LIBRARY")
read_group: str = Field(..., alias="READ_GROUP")
pf_not_aligned_bases: float = Field(..., alias="PF_NOT_ALIGNED_BASES")
@@ -419,7 +437,7 @@ class STARAlignment(BaseModel):
unmapped_other: float


class RNAfusionGeneralStats(BaseModel):
class RNAFusionGeneralStatsSummary(BaseModel):
insert_size_sum_median: float = Field(
...,
alias="Picard_InsertSizeMetrics_mqc_generalstats_picard_insertsizemetrics_summed_median",
@@ -428,7 +446,7 @@ class RNAfusionGeneralStats(BaseModel):
...,
alias="Picard_InsertSizeMetrics_mqc_generalstats_picard_insertsizemetrics_summed_mean",
)
percent_duplication: float = Field(
percent_duplication_picard: float = Field(
...,
alias="Picard_MarkDuplicates_mqc_generalstats_picard_mark_duplicates_PERCENT_DUPLICATION",
)
@@ -443,7 +461,12 @@ class RNAfusionGeneralStats(BaseModel):
percent_uniquely_mapped: float = Field(
..., alias="STAR_mqc_generalstats_star_uniquely_mapped_percent"
)
uniquely_mapped: float = Field(..., alias="STAR_mqc_generalstats_star_uniquely_mapped")
uniquely_mapped: float = Field(
..., alias="STAR_mqc_generalstats_star_uniquely_mapped"
)
percent_duplication_fastq: float = Field(
..., alias="fastp_mqc-generalstats-fastp-pct_duplication"
)
after_filtering_q30_rate: float = Field(
..., alias="fastp_mqc_generalstats_fastp_after_filtering_q30_rate"
)
@@ -456,5 +479,75 @@ class RNAfusionGeneralStats(BaseModel):
after_filtering_gc_content: float = Field(
..., alias="fastp_mqc_generalstats_fastp_after_filtering_gc_content"
)
pct_surviving: float = Field(..., alias="fastp_mqc_generalstats_fastp_pct_surviving")
pct_surviving: float = Field(
..., alias="fastp_mqc_generalstats_fastp_pct_surviving"
)
pct_adapter: float = Field(..., alias="fastp_mqc_generalstats_fastp_pct_adapter")


class RNAFusionGeneralStatsRead(BaseModel):
percent_duplicates: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-percent_duplicates"
)
percent_gc: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-percent_gc"
)
avg_sequence_length: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-avg_sequence_length"
)
median_sequence_length: int = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-median_sequence_length"
)
percent_fails: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-percent_fails"
)
total_sequences: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-total_sequences"
)


class RNAFusionGeneralStatsTrimmedRead(BaseModel):
raw_percent_duplicates: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-percent_duplicates"
)
raw_percent_gc: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-percent_gc"
)
raw_avg_sequence_length: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-avg_sequence_length"
)
raw_median_sequence_length: int = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-median_sequence_length"
)
raw_percent_fails: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-percent_fails"
)
raw_total_sequences: float = Field(
alias="FastQC (raw)_mqc-generalstats-fastqc_raw-total_sequences"
)
trimmed_percent_duplicates: float = Field(
alias="FastQC (trimmed)_mqc-generalstats-fastqc_trimmed-percent_duplicates"
)
trimmed_percent_gc: float = Field(
alias="FastQC (trimmed)_mqc-generalstats-fastqc_trimmed-percent_gc"
)
trimmed_avg_sequence_length: float = Field(
alias="FastQC (trimmed)_mqc-generalstats-fastqc_trimmed-avg_sequence_length"
)
trimmed_median_sequence_length: int = Field(
alias="FastQC (trimmed)_mqc-generalstats-fastqc_trimmed-median_sequence_length"
)
trimmed_percent_fails: float = Field(
alias="FastQC (trimmed)_mqc-generalstats-fastqc_trimmed-percent_fails"
)
trimmed_total_sequences: float = Field(
alias="FastQC (trimmed)_mqc-generalstats-fastqc_trimmed-total_sequences"
)


class RNAFusionGeneralStats(BaseModel):
summary: RNAFusionGeneralStatsSummary
read_1: RNAFusionGeneralStatsRead
read_1_trimmed: RNAFusionGeneralStatsTrimmedRead
read_2: RNAFusionGeneralStatsRead
read_2_trimmed: RNAFusionGeneralStatsTrimmedRead
28 changes: 28 additions & 0 deletions janus/models/workflow/rnafusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Module for the RNAfusion workflow model."""

from pydantic import BaseModel, Field

from janus.constants.FileTag import FileTag
from janus.dto.collect_qc_request import WorkflowInfo
from janus.models.multiqc.models import (
PicardDuplicates,
PicardInsertSize,
Fastp,
STARAlignment,
PicardRNASeqMetrics,
RNAFusionGeneralStats,
)


class RNAFusionSample(BaseModel):
duplicates: PicardDuplicates = Field(..., alias=FileTag.DUPLICATES)
rna_seq_metrics: PicardRNASeqMetrics
insert_size: PicardInsertSize = Field(..., alias=FileTag.INSERT_SIZE)
star_alignment: STARAlignment
fastp: Fastp = Field(..., alias=FileTag.FASTP)
general_stats: RNAFusionGeneralStats


class RNAFusion(BaseModel):
samples: list[RNAFusionSample]
workflow: WorkflowInfo
15 changes: 12 additions & 3 deletions janus/server/api.py
Original file line number Diff line number Diff line change
@@ -19,11 +19,20 @@
response_model_by_alias=False,
response_model_exclude_none=True,
)
def collect_qc(collect_request: CollectQCRequest = Body(...)) -> CollectQCResponse | JSONResponse:
def collect_qc(
collect_request: CollectQCRequest = Body(...),
) -> CollectQCResponse | JSONResponse:
"""Collect qc metrics for the external request."""
service = CollectQCService(collect_request)
try:
collected_qc_metrics: CollectQCResponse = service.collect_qc_metrics_for_request()
collected_qc_metrics: CollectQCResponse = (
service.collect_qc_metrics_for_request()
)
return collected_qc_metrics
except (ValueError, FileNotFoundError, ValidationError, WorkflowNotSupportedError) as error:
except (
ValueError,
FileNotFoundError,
ValidationError,
WorkflowNotSupportedError,
) as error:
return JSONResponse(content=repr(error), status_code=HTTPStatus.BAD_REQUEST)
4 changes: 3 additions & 1 deletion janus/services/collect_qc_service.py
Original file line number Diff line number Diff line change
@@ -104,5 +104,7 @@ def collect_qc_metrics_for_request(self) -> CollectQCResponse:
f"Janus does not support parsing of qc metrics for {self.request.workflow_info.workflow})"
)
case_info: callable = self.get_case_info_for_workflow()
qc_metrics = CollectQCResponse(case_id=self.request.case_id, case_info=case_info)
qc_metrics = CollectQCResponse(
case_id=self.request.case_id, case_info=case_info
)
return qc_metrics
Loading
Loading