Fix attention aggregation.

omriav · omriav · commit 135cd4a40e11 · 2023-09-21T14:04:56.000Z
- Fix attention aggregation (for visualization).
- Removed some unused code.
diff --git a/train.py b/train.py
@@ -1391,7 +1391,6 @@ def aggregate_attention(
     ):
         out = []
         attention_maps = self.get_average_attention()
-        attention_maps = self.controller.attention_store
         num_pixels = res**2
         for location in from_where:
             for item in attention_maps[f"{location}_{'cross' if is_cross else 'self'}"]:
@@ -1474,58 +1473,6 @@ def save_cross_attention_vis(self, prompt, attention_maps, path):
         vis = ptp_utils.view_images(np.stack(images, axis=0))
         vis.save(path)
 
-    def show_cross_attention(
-        self,
-        prompts,
-        attention_store: AttentionStore,
-        res: int,
-        from_where: List[str],
-        select: int = 0,
-    ):
-        tokens = self.tokenizer.encode(prompts[select])
-        decoder = self.tokenizer.decode
-        attention_maps = self.aggregate_attention(
-            prompts, attention_store, res, from_where, True, select
-        )
-        images = []
-        for i in range(len(tokens)):
-            image = attention_maps[:, :, i]
-            image = 255 * image / image.max()
-            image = image.unsqueeze(-1).expand(*image.shape, 3)
-            image = image.numpy().astype(np.uint8)
-            image = np.array(Image.fromarray(image).resize((256, 256)))
-            image = ptp_utils.text_under_image(image, decoder(int(tokens[i])))
-            images.append(image)
-        return ptp_utils.view_images(np.stack(images, axis=0))
-
-    def show_self_attention_comp(
-        self,
-        attention_store: AttentionStore,
-        res: int,
-        from_where: List[str],
-        max_com=10,
-        select: int = 0,
-    ):
-        attention_maps = (
-            self.aggregate_attention(attention_store, res, from_where, False, select)
-            .numpy()
-            .reshape((res**2, res**2))
-        )
-        u, s, vh = np.linalg.svd(
-            attention_maps - np.mean(attention_maps, axis=1, keepdims=True)
-        )
-        images = []
-        for i in range(max_com):
-            image = vh[i].reshape(res, res)
-            image = image - image.min()
-            image = 255 * image / image.max()
-            image = np.repeat(np.expand_dims(image, axis=2), 3, axis=2).astype(np.uint8)
-            image = Image.fromarray(image).resize((256, 256))
-            image = np.array(image)
-            images.append(image)
-        ptp_utils.view_images(np.concatenate(images, axis=1))
-
-
 class P2PCrossAttnProcessor:
     def __init__(self, controller, place_in_unet):
         super().__init__()