Update

rohitvinnakota-codecov · rohitvinnakota-codecov · commit e672c0f026f2 · 2025-03-03T14:10:55.000-05:00
diff --git a/src/migrations/versions/b2a0463d9b91_migration.py b/src/migrations/versions/b2a0463d9b91_migration.py
@@ -1,16 +1,16 @@
 """Migration
 
-Revision ID: 52d271c30ee5
+Revision ID: b2a0463d9b91
 Revises: e0fcdc14251c
-Create Date: 2025-02-28 23:53:40.954667
+Create Date: 2025-03-03 17:34:37.242326
 
 """
 from alembic import op
 import sqlalchemy as sa
 
 
 # revision identifiers, used by Alembic.
-revision = '52d271c30ee5'
+revision = 'b2a0463d9b91'
 down_revision = 'e0fcdc14251c'
 branch_labels = None
 depends_on = None
@@ -26,12 +26,13 @@ def upgrade():
     sa.Column('repo', sa.String(), nullable=False),
     sa.Column('run_id', sa.Integer(), nullable=False),
     sa.Column('iterations', sa.Integer(), nullable=False),
+    sa.Column('original_pr_url', sa.String(), nullable=False),
     sa.ForeignKeyConstraint(['run_id'], ['run_state.id'], ondelete='CASCADE'),
     sa.PrimaryKeyConstraint('id'),
-    sa.UniqueConstraint('provider', 'pr_id', 'repo', 'owner')
+    sa.UniqueConstraint('provider', 'pr_id', 'repo', 'owner', 'original_pr_url')
     )
     with op.batch_alter_table('codegen_unit_test_generation_pr_context_to_run_id', schema=None) as batch_op:
-        batch_op.create_index('ix_autofix_repo_owner_pr_id', ['owner', 'repo', 'pr_id'], unique=False)
+        batch_op.create_index('ix_autofix_repo_owner_pr_id', ['owner', 'repo', 'pr_id', 'original_pr_url'], unique=False)
 
     # ### end Alembic commands ###
 
diff --git a/src/seer/app.py b/src/seer/app.py
@@ -290,7 +290,9 @@ def codegen_pr_review_state_endpoint(
 
 @json_api(blueprint, "/v1/automation/codegen/retry-unit-tests")
 def codegen_retry_unit_tests_endpoint(data: CodegenBaseRequest) -> CodegenUnitTestsResponse:
-    return codegen_retry_unittest(data)
+    raise NotImplementedError("Retry unit tests is not implemented yet.")
+    # TODO: Finish implementation of retry-unit-tests
+    # return codegen_retry_unittest(data)
 
 
 @json_api(blueprint, "/v1/automation/codecov-request")
@@ -308,8 +310,9 @@ def codecov_request_endpoint(
         return codegen_pr_review_endpoint(data.data)
     elif data.request_type == "unit-tests":
         return codegen_unit_tests_endpoint(data.data)
-    elif data.request_type == "retry-unit-tests":
-        return codegen_retry_unit_tests_endpoint(data.data)
+    # TODO: Finish implementation of retry-unit-tests
+    # elif data.request_type == "retry-unit-tests":
+    #     return codegen_retry_unit_tests_endpoint(data.data)
 
     raise ValueError(f"Unsupported request_type: {data.request_type}")
 
diff --git a/src/seer/automation/codebase/repo_client.py b/src/seer/automation/codebase/repo_client.py
@@ -622,11 +622,17 @@ def get_pr_head_sha(self, pr_url: str) -> str:
         data.raise_for_status()  # Raise an exception for HTTP errors
         return data.json()["head"]["sha"]
 
-    def post_unit_test_reference_to_original_pr(self, original_pr_url: str, unit_test_pr_url: str):
+    def post_unit_test_reference_to_original_pr(
+        self,
+        original_pr_url: str,
+        unit_test_pr_url: str,
+        type: str = RepoClientType.CODECOV_UNIT_TEST,
+    ):
         original_pr_id = int(original_pr_url.split("/")[-1])
         repo_name = original_pr_url.split("github.com/")[1].split("/pull")[0]
         url = f"https://api.github.com/repos/{repo_name}/issues/{original_pr_id}/comments"
-        comment = f"Sentry has generated a new [PR]({unit_test_pr_url}) with unit tests for this PR. View the new PR({unit_test_pr_url}) to review the changes."
+        gh_app = "Sentry" if type == RepoClientType.CODECOV_UNIT_TEST else "Codecov"
+        comment = f"{gh_app} has generated a new [PR]({unit_test_pr_url}) with unit tests for this PR. View the new PR({unit_test_pr_url}) to review the changes."
         params = {"body": comment}
         headers = self._get_auth_headers()
         response = requests.post(url, headers=headers, json=params)
@@ -637,7 +643,8 @@ def post_unit_test_not_generated_message_to_original_pr(self, original_pr_url: s
         original_pr_id = int(original_pr_url.split("/")[-1])
         repo_name = original_pr_url.split("github.com/")[1].split("/pull")[0]
         url = f"https://api.github.com/repos/{repo_name}/issues/{original_pr_id}/comments"
-        comment = "Sentry has determined that unit tests already exist on this PR or that they are not necessary."
+        gh_app = "Sentry" if type == RepoClientType.CODECOV_UNIT_TEST else "Codecov"
+        comment = f"{gh_app} has determined that unit tests are not necessary for this PR."
         params = {"body": comment}
         headers = self._get_auth_headers()
         response = requests.post(url, headers=headers, json=params)
diff --git a/src/seer/automation/codegen/codegen_context.py b/src/seer/automation/codegen/codegen_context.py
@@ -3,12 +3,12 @@
 from seer.automation.agent.models import Message
 from seer.automation.codebase.repo_client import RepoClient, RepoClientType
 from seer.automation.codegen.codegen_event_manager import CodegenEventManager
-from seer.automation.codegen.models import CodegenContinuation, UnitTestRunMemory
+from seer.automation.codegen.models import CodegenContinuation
 from seer.automation.codegen.state import CodegenContinuationState
 from seer.automation.models import RepoDefinition
 from seer.automation.pipeline import PipelineContext
 from seer.automation.state import DbStateRunTypes
-from seer.db import DbRunMemory, Session
+from seer.db import DbPrContextToUnitTestGenerationRunIdMapping
 
 logger = logging.getLogger(__name__)
 
@@ -80,30 +80,9 @@ def get_file_contents(
 
         return file_contents
 
-    def store_memory(self, key: str, memory: list[Message]):
-        with Session() as session:
-            memory_record = (
-                session.query(DbRunMemory).where(DbRunMemory.run_id == self.run_id).one_or_none()
-            )
-
-            if not memory_record:
-                memory_model = UnitTestRunMemory(run_id=self.run_id)
-            else:
-                memory_model = UnitTestRunMemory.from_db_model(memory_record)
-
-            memory_model.memory[key] = memory
-            memory_record = memory_model.to_db_model()
-
-            session.merge(memory_record)
-            session.commit()
-
-    def get_memory(self, key: str) -> list[Message]:
-        with Session() as session:
-            memory_record = (
-                session.query(DbRunMemory).where(DbRunMemory.run_id == self.run_id).one_or_none()
-            )
-
-            if not memory_record:
-                return []
-
-            return UnitTestRunMemory.from_db_model(memory_record).memory.get(key, [])
+    def get_unit_test_memory(self, owner: str, repo: str, pr_id: int) -> list[Message]:
+        return DbPrContextToUnitTestGenerationRunIdMapping.objects.filter(
+            owner=self.request.owner,
+            repo=self.request.repo_definition.name,
+            pr_id=self.request.pr_id,
+        ).first()
diff --git a/src/seer/automation/codegen/models.py b/src/seer/automation/codegen/models.py
@@ -1,6 +1,6 @@
 import datetime
 from enum import Enum
-from typing import Literal
+from typing import Literal, Optional
 
 from pydantic import BaseModel, Field
 
@@ -47,6 +47,7 @@ class CodeUnitTestOutput(BaseComponentOutput):
 class CodegenBaseRequest(BaseModel):
     repo: RepoDefinition
     pr_id: int  # The PR number
+    codecov_status: Optional[dict] = None
 
 
 class CodegenUnitTestsRequest(CodegenBaseRequest):
@@ -211,15 +212,3 @@ class CodecovTaskRequest(BaseModel):
     data: CodegenUnitTestsRequest | CodegenPrReviewRequest | CodegenRelevantWarningsRequest
     external_owner_id: str
     request_type: Literal["unit-tests", "pr-review", "relevant-warnings", "retry-unit-tests"]
-
-
-class UnitTestRunMemory(BaseModel):
-    run_id: int
-    memory: dict[str, list[Message]] = Field(default_factory=dict)
-
-    def to_db_model(self) -> DbRunMemory:
-        return DbRunMemory(run_id=self.run_id, value=self.model_dump(mode="json"))
-
-    @classmethod
-    def from_db_model(cls, model: DbRunMemory) -> "UnitTestRunMemory":
-        return cls.model_validate(model.value)
diff --git a/src/seer/automation/codegen/pr_review_step.py b/src/seer/automation/codegen/pr_review_step.py
@@ -86,4 +86,5 @@ def _invoke(self, **kwargs):
         except ValueError as e:
             self.logger.error(f"Error publishing pr review for {pr.url}: {e}")
             return
+
         self.context.event_manager.mark_completed()
diff --git a/src/seer/automation/codegen/retry_unittest_coding_component.py b/src/seer/automation/codegen/retry_unittest_coding_component.py
@@ -34,91 +34,4 @@ class RetryUnitTestCodingComponent(BaseComponent[CodeUnitTestRequest, CodeUnitTe
     def invoke(
         self, request: CodeUnitTestRequest, generated_run_id: int, llm_client: LlmClient = injected
     ) -> CodeUnitTestOutput | None:
-        with BaseTools(self.context, repo_client_type=RepoClientType.CODECOV_UNIT_TEST) as tools:
-            agent = LlmAgent(
-                tools=tools.get_tools(),
-                config=AgentConfig(interactive=False),
-            )
-
-            past_memory = self.context.get_memory("unit-tests", generated_run_id)
-            print(past_memory)
-
-            # codecov_client_params = request.codecov_client_params
-
-            # code_coverage_data = CodecovClient.fetch_coverage(
-            #     repo_name=codecov_client_params["repo_name"],
-            #     pullid=codecov_client_params["pullid"],
-            #     owner_username=codecov_client_params["owner_username"],
-            # )
-
-        #     test_result_data = CodecovClient.fetch_test_results_for_commit(
-        #         repo_name=codecov_client_params["repo_name"],
-        #         owner_username=codecov_client_params["owner_username"],
-        #         latest_commit_sha=codecov_client_params["head_sha"],
-        #     )
-
-        #     # GIVEN UNIT TEST INFORMATION, REGENERATE UNIT TESTS
-
-        #     existing_test_design_response = llm_client.generate_text(
-        #         model=AnthropicProvider.model("claude-3-5-sonnet@20240620"),
-        #         prompt=CodingUnitTestPrompts.format_find_unit_test_pattern_step_msg(
-        #             diff_str=request.diff
-        #         ),
-        #     )
-
-        #     formatted_plan_response = llm_client.generate_text(
-        #         model=AnthropicProvider.model("claude-3-5-sonnet@20240620"),
-        #         prompt=CodingUnitTestPrompts.format_plan_step_msg(
-        #             diff_str=request.diff,
-        #             has_coverage_info=code_coverage_data,
-        #             has_test_result_info=test_result_data,
-        #         ),
-        #     )
-
-        #     final_response = agent.run(
-        #         run_config=RunConfig(
-        #             prompt=CodingUnitTestPrompts.format_unit_test_msg(
-        #                 diff_str=request.diff,
-        #                 test_design_hint=f"{existing_test_design_response}\n\n{formatted_plan_response}",
-        #             ),
-        #             system_prompt=CodingUnitTestPrompts.format_system_msg(),
-        #             model=AnthropicProvider.model("claude-3-5-sonnet@20240620"),
-        #             run_name="Retry Generate Unit Tests",
-        #         ),
-        #     )
-
-        #     if not final_response:
-        #         return None
-        #     plan_steps_content = extract_text_inside_tags(final_response, "plan_steps")
-
-        #     if len(plan_steps_content) == 0:
-        #         raise ValueError("Failed to extract plan_steps from the planning step of LLM")
-
-        #     coding_output = PlanStepsPromptXml.from_xml(
-        #         f"<plan_steps>{escape_multi_xml(plan_steps_content, ['diff', 'description', 'commit_message'])}</plan_steps>"
-        #     ).to_model()
-
-        # if not coding_output.tasks:
-        #     raise ValueError("No tasks found in coding output")
-        # file_changes: list[FileChange] = []
-        # for task in coding_output.tasks:
-        #     repo_client = self.context.get_repo_client(
-        #         task.repo_name, type=RepoClientType.CODECOV_UNIT_TEST
-        #     )
-        #     if task.type == "file_change":
-        #         file_content, _ = repo_client.get_file_content(task.file_path)
-        #         if not file_content:
-        #             logger.warning(f"Failed to get content for {task.file_path}")
-        #             continue
-
-        #         changes, _ = task_to_file_change(task, file_content)
-        #         file_changes += changes
-        #     elif task.type == "file_delete":
-        #         change = task_to_file_delete(task)
-        #         file_changes.append(change)
-        #     elif task.type == "file_create":
-        #         change = task_to_file_create(task)
-        #         file_changes.append(change)
-        #     else:
-        #         logger.warning(f"Unsupported task type: {task.type}")
-        return CodeUnitTestOutput(diffs=[])  #
+        pass
diff --git a/src/seer/automation/codegen/retry_unittest_step.py b/src/seer/automation/codegen/retry_unittest_step.py
@@ -9,7 +9,6 @@
     AUTOFIX_EXECUTION_SOFT_TIME_LIMIT_SECS,
 )
 
-# from seer.automation.codegen.retry_unit_test_coding_component import RetryUnitTestCodingComponent
 from seer.automation.codebase.repo_client import RepoClientType
 from seer.automation.codegen.models import CodeUnitTestRequest
 from seer.automation.codegen.retry_unittest_coding_component import RetryUnitTestCodingComponent
@@ -19,12 +18,13 @@
 from seer.automation.pipeline import PipelineStepTaskRequest
 from seer.automation.state import DbStateRunTypes
 from seer.automation.utils import determine_mapped_unit_test_run_id
+from seer.db import DbPrContextToUnitTestGenerationRunIdMapping
 
 
 class RetryUnittestStepRequest(PipelineStepTaskRequest):
     pr_id: int
     repo_definition: RepoDefinition
-    # codecov_status: dict
+    codecov_status: dict
 
 
 @celery_app.task(
@@ -50,50 +50,50 @@ def _instantiate_request(request: dict[str, Any]) -> RetryUnittestStepRequest:
 
     @staticmethod
     def get_task():
-        x = retry_unittest_task
-        return x
+        return retry_unittest_task
 
     @observe(name="Codegen - Retry Unittest Step")
     @ai_track(description="Codegen - Retry Unittest Step")
     def _invoke(self, **kwargs):
         self.logger.info("Executing Codegen - Retry Unittest Step")
         self.context.event_manager.mark_running()
-        # TODO: IF STATUS CHECK HAS PASSED OR WE HAVE MORE THAN 3 COMMITS, SKIP UNIT TEST GENERATION:
-
-        repo_client = self.context.get_repo_client(type=RepoClientType.CODECOV_UNIT_TEST)
+        repo_client = self.context.get_repo_client(
+            type=RepoClientType.CODECOV_PR_REVIEW
+        )  # Codecov-ai GH app
         pr = repo_client.repo.get_pull(self.request.pr_id)
-        diff_content = repo_client.get_pr_diff_content(pr.url)
+        codecov_status = self.request.codecov_status["check_run"]["conclusion"]
 
-        latest_commit_sha = repo_client.get_pr_head_sha(pr.url)
+        if codecov_status == "success":
+            saved_memory = DbPrContextToUnitTestGenerationRunIdMapping.objects.filter(
+                owner=self.request.owner,
+                repo=self.request.repo_definition.name,
+                pr_id=self.request.pr_id,
+            ).first()
 
-        codecov_client_params = {
-            "repo_name": self.request.repo_definition.name,
-            "pullid": self.request.pr_id,
-            "owner_username": self.request.repo_definition.owner,
-            "head_sha": latest_commit_sha,
-        }
-        try:
-            unittest_output = RetryUnitTestCodingComponent(self.context).invoke(
-                CodeUnitTestRequest(
-                    diff=diff_content,
-                    codecov_client_params=codecov_client_params,
-                ),
-                generated_run_id=determine_mapped_unit_test_run_id(
-                    owner=self.request.repo_definition.owner,
-                    repo_name=self.request.repo_definition.name,
-                    pr_id=self.request.pr_id,
-                ),
+            repo_client.post_unit_test_reference_to_original_pr(
+                saved_memory.original_pr_url, pr.html_url
             )
-
-            if unittest_output:
-                for file_change in unittest_output.diffs:
-                    self.context.event_manager.append_file_change(file_change)
-                generator = GeneratedTestsPullRequestCreator(unittest_output.diffs, pr, repo_client)
-                generator.create_github_pull_request()
-            else:
-                repo_client.post_unit_test_not_generated_message_to_original_pr(pr.html_url)
+            self.context.event_manager.mark_completed()
+        else:
+            past_run = DbPrContextToUnitTestGenerationRunIdMapping.objects.filter(
+                owner=self.request.owner,
+                repo=self.request.repo_definition.name,
+                pr_id=self.request.pr_id,
+            ).first()
+            if not past_run:
                 return
+            if past_run.iterations == 3:
+                # TODO: Fetch the "best" run and update the PR
+                return
+            else:
+                # TODO: Retry test generation
+                pass
+                self.context.event_manager.mark_completed()
 
-        except ValueError:
-            repo_client.post_unit_test_not_generated_message_to_original_pr(pr.html_url)
-        self.context.event_manager.mark_completed()
+    def get_mapping(owner, repo, pr_id):
+        try:
+            return DbPrContextToUnitTestGenerationRunIdMapping.objects.get(
+                owner=owner, repo=repo, pr_id=pr_id
+            )
+        except DbPrContextToUnitTestGenerationRunIdMapping.DoesNotExist:
+            return None
diff --git a/src/seer/automation/codegen/tasks.py b/src/seer/automation/codegen/tasks.py
@@ -158,6 +158,7 @@ def codegen_retry_unittest(request: CodegenBaseRequest, app_config: AppConfig =
         run_id=cur_state.run_id,
         pr_id=request.pr_id,
         repo_definition=request.repo,
+        codecov_status=request.codecov_status,
     )
     RetryUnittestStep.get_signature(
         retry_unittest_request, queue=app_config.CELERY_WORKER_QUEUE
diff --git a/src/seer/automation/codegen/unit_test_github_pr_creator.py b/src/seer/automation/codegen/unit_test_github_pr_creator.py
diff --git a/src/seer/db.py b/src/seer/db.py

Original file line number	Diff line number	Diff line change
`@@ -158,6 +158,7 @@ def codegen_retry_unittest(request: CodegenBaseRequest, app_config: AppConfig =`
`158`	`158`	`run_id=cur_state.run_id,`
`159`	`159`	`pr_id=request.pr_id,`
`160`	`160`	`repo_definition=request.repo,`
	`161`	`+ codecov_status=request.codecov_status,`
`161`	`162`	`)`
`162`	`163`	`RetryUnittestStep.get_signature(`
`163`	`164`	`retry_unittest_request, queue=app_config.CELERY_WORKER_QUEUE`