[ci] add vllm_test_utils (vllm-project#10659)

youkaichao · web-flow · commit 334d64d1e816 · 2024-11-26T00:20:04.000-08:00
Signed-off-by: youkaichao &lt;youkaichao@gmail.com&gt;
diff --git a/Dockerfile b/Dockerfile
@@ -191,6 +191,10 @@ ADD . /vllm-workspace/
 RUN --mount=type=cache,target=/root/.cache/pip \
     python3 -m pip install -r requirements-dev.txt
 
+# install development dependencies (for testing)
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install -e tests/vllm_test_utils
+
 # enable fast downloads from hf (for testing)
 RUN --mount=type=cache,target=/root/.cache/pip \
     python3 -m pip install hf_transfer
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
@@ -62,4 +62,8 @@ WORKDIR /workspace/
 
 RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
 
+# install development dependencies (for testing)
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -e tests/vllm_test_utils
+
 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/Dockerfile.hpu b/Dockerfile.hpu
@@ -11,6 +11,9 @@ ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
 
 RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
 
+# install development dependencies (for testing)
+RUN python3 -m pip install -e tests/vllm_test_utils
+
 WORKDIR /workspace/
 
 RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
diff --git a/Dockerfile.neuron b/Dockerfile.neuron
@@ -38,4 +38,7 @@ ENV VLLM_TARGET_DEVICE neuron
 RUN --mount=type=bind,source=.git,target=.git \
     pip install --no-build-isolation -v -e .
 
+# install development dependencies (for testing)
+RUN python3 -m pip install -e tests/vllm_test_utils
+
 CMD ["/bin/bash"]
diff --git a/Dockerfile.openvino b/Dockerfile.openvino
@@ -22,4 +22,7 @@ RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVIC
 COPY examples/ /workspace/examples
 COPY benchmarks/ /workspace/benchmarks
 
+# install development dependencies (for testing)
+RUN python3 -m pip install -e tests/vllm_test_utils
+
 CMD ["/bin/bash"]
diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le
@@ -29,6 +29,9 @@ RUN --mount=type=cache,target=/root/.cache/pip  \
 RUN --mount=type=bind,source=.git,target=.git \
     VLLM_TARGET_DEVICE=cpu python3 setup.py install
 
+# install development dependencies (for testing)
+RUN python3 -m pip install -e tests/vllm_test_utils
+
 WORKDIR /workspace/
 
 RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
diff --git a/Dockerfile.rocm b/Dockerfile.rocm
@@ -168,4 +168,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     if ls libs/*.whl; then \
     python3 -m pip install libs/*.whl; fi
 
+# install development dependencies (for testing)
+RUN python3 -m pip install -e tests/vllm_test_utils
+
 CMD ["/bin/bash"]
diff --git a/Dockerfile.tpu b/Dockerfile.tpu
@@ -22,4 +22,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
         -r requirements-tpu.txt
 RUN python3 setup.py develop
 
+# install development dependencies (for testing)
+RUN python3 -m pip install -e tests/vllm_test_utils
+
 CMD ["/bin/bash"]
diff --git a/Dockerfile.xpu b/Dockerfile.xpu
@@ -64,5 +64,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 
 ENV VLLM_USAGE_SOURCE production-docker-image \
     TRITON_XPU_PROFILE 1
-
+# install development dependencies (for testing)
+RUN python3 -m pip install -e tests/vllm_test_utils
 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/tests/entrypoints/llm/test_lazy_outlines.py b/tests/entrypoints/llm/test_lazy_outlines.py
@@ -1,12 +1,12 @@
 import sys
 
+from vllm_test_utils import blame
+
 from vllm import LLM, SamplingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
 
-def test_lazy_outlines(sample_regex):
-    """If users don't use guided decoding, outlines should not be imported.
-    """
+def run_normal():
     prompts = [
         "Hello, my name is",
         "The president of the United States is",
@@ -25,13 +25,12 @@ def test_lazy_outlines(sample_regex):
         generated_text = output.outputs[0].text
         print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 
-    # make sure outlines is not imported
-    assert 'outlines' not in sys.modules
-
     # Destroy the LLM object and free up the GPU memory.
     del llm
     cleanup_dist_env_and_memory()
 
+
+def run_lmfe(sample_regex):
     # Create an LLM with guided decoding enabled.
     llm = LLM(model="facebook/opt-125m",
               enforce_eager=True,
@@ -51,5 +50,15 @@ def test_lazy_outlines(sample_regex):
         generated_text = output.outputs[0].text
         print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 
+
+def test_lazy_outlines(sample_regex):
+    """If users don't use guided decoding, outlines should not be imported.
+    """
     # make sure outlines is not imported
-    assert 'outlines' not in sys.modules
+    module_name = "outlines"
+    with blame(lambda: module_name in sys.modules) as result:
+        run_normal()
+        run_lmfe(sample_regex)
+    assert not result.found, (
+        f"Module {module_name} is already imported, the"
+        f" first import location is:\n{result.trace_stack}")
diff --git a/tests/test_lazy_torch_compile.py b/tests/test_lazy_torch_compile.py
@@ -1,61 +1,9 @@
 # Description: Test the lazy import module
 # The utility function cannot be placed in `vllm.utils`
 # this needs to be a standalone script
-
-import contextlib
-import dataclasses
 import sys
-import traceback
-from typing import Callable, Generator
-
-
-@dataclasses.dataclass
-class BlameResult:
-    found: bool = False
-    trace_stack: str = ""
-
-
-@contextlib.contextmanager
-def blame(func: Callable) -> Generator[BlameResult, None, None]:
-    """
-    Trace the function calls to find the first function that satisfies the
-    condition. The trace stack will be stored in the result.
-
-    Usage:
-
-    ```python
-    with blame(lambda: some_condition()) as result:
-        # do something
-    
-    if result.found:
-        print(result.trace_stack)
-    """
-    result = BlameResult()
-
-    def _trace_calls(frame, event, arg=None):
-        nonlocal result
-        if event in ['call', 'return']:
-            # for every function call or return
-            try:
-                # Temporarily disable the trace function
-                sys.settrace(None)
-                # check condition here
-                if not result.found and func():
-                    result.found = True
-                    result.trace_stack = "".join(traceback.format_stack())
-                # Re-enable the trace function
-                sys.settrace(_trace_calls)
-            except NameError:
-                # modules are deleted during shutdown
-                pass
-        return _trace_calls
-
-    sys.settrace(_trace_calls)
-
-    yield result
-
-    sys.settrace(None)
 
+from vllm_test_utils import blame
 
 module_name = "torch._inductor.async_compile"
 
diff --git a/tests/vllm_test_utils/setup.py b/tests/vllm_test_utils/setup.py
@@ -0,0 +1,7 @@
+from setuptools import setup
+
+setup(
+    name='vllm_test_utils',
+    version='0.1',
+    packages=['vllm_test_utils'],
+)
diff --git a/tests/vllm_test_utils/vllm_test_utils/__init__.py b/tests/vllm_test_utils/vllm_test_utils/__init__.py
@@ -0,0 +1,8 @@
+"""
+vllm_utils is a package for vLLM testing utilities.
+It does not import any vLLM modules.
+"""
+
+from .blame import BlameResult, blame
+
+__all__ = ["blame", "BlameResult"]
diff --git a/tests/vllm_test_utils/vllm_test_utils/blame.py b/tests/vllm_test_utils/vllm_test_utils/blame.py
@@ -0,0 +1,53 @@
+import contextlib
+import dataclasses
+import sys
+import traceback
+from typing import Callable, Generator
+
+
+@dataclasses.dataclass
+class BlameResult:
+    found: bool = False
+    trace_stack: str = ""
+
+
+@contextlib.contextmanager
+def blame(func: Callable) -> Generator[BlameResult, None, None]:
+    """
+    Trace the function calls to find the first function that satisfies the
+    condition. The trace stack will be stored in the result.
+
+    Usage:
+
+    ```python
+    with blame(lambda: some_condition()) as result:
+        # do something
+    
+    if result.found:
+        print(result.trace_stack)
+    """
+    result = BlameResult()
+
+    def _trace_calls(frame, event, arg=None):
+        nonlocal result
+        if event in ['call', 'return']:
+            # for every function call or return
+            try:
+                # Temporarily disable the trace function
+                sys.settrace(None)
+                # check condition here
+                if not result.found and func():
+                    result.found = True
+                    result.trace_stack = "".join(traceback.format_stack())
+                # Re-enable the trace function
+                sys.settrace(_trace_calls)
+            except NameError:
+                # modules are deleted during shutdown
+                pass
+        return _trace_calls
+
+    sys.settrace(_trace_calls)
+
+    yield result
+
+    sys.settrace(None)