Skip to content

Commit 846681d

Browse files
Don't pass encodings through Celery
1 parent 514c785 commit 846681d

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

src/hope_dedup_engine/apps/api/models/deduplication.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from django.conf import settings
55
from django.core.validators import MaxValueValidator, MinValueValidator
6-
from django.db import models
6+
from django.db import models, transaction
77

88
from hope_dedup_engine.apps.security.models import ExternalSystem
99
from hope_dedup_engine.types import EncodingType, FindingType, IgnoredPairType
@@ -70,8 +70,10 @@ def get_ignored_pairs(self) -> IgnoredPairType:
7070
)
7171

7272
def update_encodings(self, encodings: EncodingType) -> None:
73-
self.encodings.update(encodings)
74-
self.save()
73+
with transaction.atomic():
74+
fresh_self: DeduplicationSet = DeduplicationSet.objects.select_for_update().get(pk=self.pk)
75+
fresh_self.encodings.update(encodings)
76+
fresh_self.save()
7577

7678
def update_findings(self, findings: FindingType) -> None:
7779
images = Image.objects.filter(deduplication_set=self).values("filename", "reference_pk")

src/hope_dedup_engine/apps/faces/celery_tasks.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import traceback
2-
from collections import ChainMap
32
from functools import partial
43
from typing import Any, Final
54

@@ -17,7 +16,7 @@
1716
from hope_dedup_engine.apps.faces.services.facial import dedupe_images, encode_faces
1817
from hope_dedup_engine.apps.faces.utils import report_long_execution
1918
from hope_dedup_engine.config.celery import DedupeTask, app
20-
from hope_dedup_engine.types import EncodingType, FindingType
19+
from hope_dedup_engine.types import FindingType
2120

2221
CHUNK_SIZE: Final[int] = 25
2322

@@ -71,7 +70,7 @@ def encode_chunk(
7170
self: DedupeTask,
7271
files: list[str],
7372
config: dict[str, Any],
74-
) -> tuple[EncodingType, int, int]:
73+
) -> None:
7574
"""Encode faces in a chunk of files."""
7675
with report_long_execution('DeduplicationSet.objects.get(pk=config.get("deduplication_set_id"))'):
7776
ds = DeduplicationSet.objects.get(pk=config.get("deduplication_set_id"))
@@ -80,7 +79,9 @@ def encode_chunk(
8079
with report_long_execution("ds.get_encodings()"):
8180
pre_encodings = ds.get_encodings()
8281
with report_long_execution('encode_faces(files, config.get("encoding"), pre_encodings, progress=callback)'):
83-
return encode_faces(files, config.get("encoding"), pre_encodings, progress=callback)
82+
results = encode_faces(files, config.get("encoding"), pre_encodings, progress=callback)
83+
with report_long_execution('ds.update_encodings(results[0])'):
84+
ds.update_encodings(results[0])
8485
except Exception as e:
8586
sentry_sdk.capture_exception(e)
8687
handle_error(ds)
@@ -149,17 +150,15 @@ def callback_findings(
149150
@app.task(bind=True, base=DedupeTask)
150151
def callback_encodings(
151152
self: Task,
152-
results: tuple[EncodingType, int, int],
153+
results: list[None],
153154
config: dict[str, Any],
154155
) -> dict[str, Any]:
155156
"""Aggregate and save encodings."""
156157
ds = DeduplicationSet.objects.get(pk=config.get("deduplication_set_id"))
157158
try:
158-
encodings = dict(ChainMap(*[result[0] for result in results]))
159-
ds.update_encodings(encodings)
160159
deduplicate_dataset.delay(config)
161160
return {
162-
"Encoded": len(encodings),
161+
"Encoded": True,
163162
}
164163
except Exception as e:
165164
sentry_sdk.capture_exception(e)

0 commit comments

Comments
 (0)