Partially fix #5310 - allow loading of mixed cases with some fully wts sample individuals

dnil · dnil · commit 72525977d4fa · 2025-03-14T12:13:39.000+01:00
diff --git a/scout/adapter/mongo/variant_loader.py b/scout/adapter/mongo/variant_loader.py
@@ -12,7 +12,7 @@
 from pymongo.errors import BulkWriteError, DuplicateKeyError
 
 from scout.build import build_variant
-from scout.constants import CHROMOSOMES, ORDERED_FILE_TYPE_MAP
+from scout.constants import CHROMOSOMES, INVALID_SAMPLE_TYPES, ORDERED_FILE_TYPE_MAP
 from scout.exceptions import IntegrityError
 from scout.parse.variant import parse_variant
 from scout.parse.variant.clnsig import is_pathogenic
@@ -621,16 +621,16 @@ def _has_variants_in_file(self, variant_file: str) -> bool:
 
     def load_variants(
         self,
-        case_obj,
-        variant_type="clinical",
-        category="snv",
-        rank_threshold=None,
-        chrom=None,
-        start=None,
-        end=None,
-        gene_obj=None,
-        custom_images=None,
-        build="37",
+        case_obj: dict,
+        variant_type: str = "clinical",
+        category: str = "snv",
+        rank_threshold: float = None,
+        chrom: str = None,
+        start: int = None,
+        end: int = None,
+        gene_obj: dict = None,
+        custom_images: list = None,
+        build: str = "37",
     ):
         """Load variants for a case into scout.
 
@@ -675,7 +675,7 @@ def load_variants(
             )
 
         gene_to_panels = self.gene_to_panels(case_obj)
-        genes = [gene_obj for gene_obj in self.all_genes(build=build)]
+        genes = list(self.all_genes(build=build))
         hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
         genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
 
@@ -695,7 +695,11 @@ def load_variants(
                 LOG.debug("Found VEP header %s", "|".join(vep_header))
 
             # This is a dictionary to tell where ind are in vcf
-            individual_positions = {ind: i for i, ind in enumerate(vcf_obj.samples)}
+            individual_positions = {
+                ind: i
+                for i, ind in enumerate(vcf_obj.samples)
+                if vcf_obj.samples[i].analysis_type not in INVALID_SAMPLE_TYPES[category]
+            }
 
             # Dictionary for cancer analysis
             sample_info = {}
diff --git a/scout/constants/file_types.py b/scout/constants/file_types.py
@@ -23,6 +23,18 @@
     ]
 )
 
+INVALID_SAMPLE_TYPES = {
+    "snv": ["wts"],
+    "sv": ["wts"],
+    "mei": ["wts"],
+    "str": ["wts"],
+    "vcf_snv_mt": ["wts"],
+    "vcf_snv_research_mt": ["wts"],
+    "vcf_snv_research": ["wts"],
+    "vcf_sv_research_mt": ["wts"],
+    "vcf_sv_research": ["wts"],
+    "vcf_mei_research": ["wts"],
+}
 
 ORDERED_OMICS_FILE_TYPE_MAP = OrderedDict(
     [

Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,18 @@`
`23`	`23`	`]`
`24`	`24`	`)`
`25`	`25`
	`26`	`+INVALID_SAMPLE_TYPES = {`
	`27`	`+ "snv": ["wts"],`
	`28`	`+ "sv": ["wts"],`
	`29`	`+ "mei": ["wts"],`
	`30`	`+ "str": ["wts"],`
	`31`	`+ "vcf_snv_mt": ["wts"],`
	`32`	`+ "vcf_snv_research_mt": ["wts"],`
	`33`	`+ "vcf_snv_research": ["wts"],`
	`34`	`+ "vcf_sv_research_mt": ["wts"],`
	`35`	`+ "vcf_sv_research": ["wts"],`
	`36`	`+ "vcf_mei_research": ["wts"],`
	`37`	`+}`
`26`	`38`
`27`	`39`	`ORDERED_OMICS_FILE_TYPE_MAP = OrderedDict(`
`28`	`40`	`[`