IBM · maxdebayser · May 9, 2024 · May 13, 2024
diff --git a/lm-eval/Dockerfile b/lm-eval/Dockerfile
@@ -0,0 +1,16 @@
+ARG BASE_UBI_IMAGE_TAG=9.3-1552
+FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG}
+
+RUN dnf update -y && dnf install -y python3-pip python3-wheel git && dnf clean all
+
+WORKDIR /app
+COPY pyproject.toml pyproject.toml
+COPY tgis_eval tgis_eval
+
+RUN pip install .
+
+RUN useradd app
+VOLUME /cache
+ENV XDG_CACHE_HOME=/cache
+
+USER app
diff --git a/lm-eval/Makefile b/lm-eval/Makefile
@@ -0,0 +1,15 @@
+gen-client:
+	# Compile protos
+	pip install grpcio-tools==1.60.0 mypy-protobuf==3.5.0 'types-protobuf>=3.20.4' --no-cache-dir
+	mkdir tgis_eval/pb || true
+	python -m grpc_tools.protoc -I../proto --python_out=tgis_eval/pb \
+		--grpc_python_out=tgis_eval/pb --mypy_out=tgis_eval/pb ../proto/generation.proto
+	find tgis_eval/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
+	touch tgis_eval/pb/__init__.py
+
+install: gen-client
+	pip install pip --upgrade
+	pip install -e . --no-cache-dir
+
+image: gen-client
+  podman build -t quay.io/wxpe/lm-eval-tgis:0.0.2 .
diff --git a/lm-eval/README.md b/lm-eval/README.md
@@ -0,0 +1,51 @@
+# TGIS eval framework
+
+This directory contains an adapter to run the [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness)
+framework on a TGIS server. We subclass the Model class to collect the benchmark requests and send
+them to the TGIS server over gRPC.
+
+## Installing
+
+To install lm-eval with tgis support in your environment run `make install`.
+
+
+## Running:
+
+To run the benchmark, call it as python module on the command line:
+```
+python3 -m tgis_eval \
+  --model_args server=<host, defaults to localhost>,port=<defaults to 8033> \
+  --model=tgis_eval \ 
+  --batch_size=16 \ # <-- change the batch size to fit your gpu
+  --tasks <task_id>
+```
+
+For example, to run the 5 benchmarks that make up the huggingface leaderboard
+on a TGIS instance running on hostname `flan-t5-inference-server`:
+
+```
+python3 -m tgis_eval \
+  --model_args server=flan-t5-inference-server,port=8033 \
+  --model=tgis_eval \
+  --batch_size=16 \
+  --tasks ai2_arc,hellaswag,mmlu,truthfulqa,winogrande,gsm8k 
+```
+
+## Building the container
+
+To build the container, run `make image`.
+
+
+## Running as a job on Kubernetes
+
+You can run tgis-eval as a Kubernetes Job. Locate the `job.yaml` file in this directory
+and edit it to adjust it to your needs. Make sure that the hostname is correct and that
+the benchmarks to run are the ones you need. Then submit the job with
+
+```
+kubectl apply -f job.yaml
+```
+
+If you're going to run several rounds of tests, it is recommended to allocate a persistent
+volume and mount it in the job pod. This will avoid downloading the same datasets over and
+over.
diff --git a/lm-eval/job.yaml b/lm-eval/job.yaml
@@ -0,0 +1,30 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: lm-eval-job
+spec:
+  template:
+    spec:
+      containers:
+      - name: eval
+        image: quay.io/wxpe/lm-eval-tgis:0.0.7
+        command:
+          - python3
+          - -m
+          - tgis_eval
+          - --model_args
+          - server=flan-t5-inference-server,port=8033
+          - --model=tgis_eval
+          - --batch_size=16
+          - --tasks
+          - ai2_arc,hellaswag,mmlu,truthfulqa,winogrande,gsm8k
+          #- --limit=10
+        volumeMounts:
+        - name: cache-volume
+          mountPath: /cache
+      restartPolicy: Never
+      volumes:
+      - name: cache-volume
+        emptyDir: {}
+  backoffLimit: 0
+
diff --git a/lm-eval/pyproject.toml b/lm-eval/pyproject.toml
@@ -0,0 +1,14 @@
+[tool.poetry]
+name = "tgis-eval"
+version = "0.1.0"
+description = "lm-eval backend for tgis"
+authors = ["Max de Bayser"]
+
+[tool.poetry.dependencies]
+python = ">=3.9"
+grpcio-tools = "^1.62.1"
+lm-eval ="^0.4.2"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/lm-eval/tgis_eval/__init__.py b/lm-eval/tgis_eval/__init__.py
diff --git a/lm-eval/tgis_eval/__main__.py b/lm-eval/tgis_eval/__main__.py
@@ -0,0 +1,10 @@
+try:
+    from lm_eval.__main__ import cli_evaluate
+except ImportError:
+    raise ImportError("Could not import lm_eval: Please install ibm-generative-ai[lm-eval] extension.")  # noqa: B904
+
+from .model import initialize_model
+
+initialize_model()
+
+cli_evaluate()