3
3
import pytest
4
4
from llama_index .core .llms import ChatMessage , MessageRole
5
5
from llama_index .llms .huggingface_api import HuggingFaceInferenceAPI
6
+ from huggingface_hub .inference ._generated .types import ChatCompletionOutput
6
7
7
- STUB_MODEL_NAME = "placeholder_model "
8
+ STUB_MODEL_NAME = "microsoft/Phi-4-multimodal-instruct "
8
9
9
10
10
11
@pytest .fixture (name = "hf_inference_api" )
@@ -45,15 +46,17 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
45
46
generated_response = (
46
47
" It's based on the book of the same name by James Fenimore Cooper."
47
48
)
48
- conversational_return = {
49
- "choices" : [
50
- {
51
- "message" : {
52
- "content" : generated_response ,
49
+ conversational_return = ChatCompletionOutput .parse_obj (
50
+ {
51
+ "choices" : [
52
+ {
53
+ "message" : {
54
+ "content" : generated_response ,
55
+ }
53
56
}
54
- }
55
- ],
56
- }
57
+ ],
58
+ }
59
+ )
57
60
58
61
with patch .object (
59
62
hf_inference_api ._sync_client ,
@@ -67,6 +70,8 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
67
70
mock_conversational .assert_called_once_with (
68
71
messages = [{"role" : m .role .value , "content" : m .content } for m in messages ],
69
72
model = STUB_MODEL_NAME ,
73
+ temperature = 0.1 ,
74
+ max_tokens = 256 ,
70
75
)
71
76
72
77
def test_chat_text_generation (
@@ -97,6 +102,8 @@ def test_chat_text_generation(
97
102
assert response .message .content == conversational_return
98
103
mock_complete .assert_called_once_with (
99
104
"System: You are an expert movie reviewer\n User: Which movie is the best?\n Assistant:" ,
105
+ model = STUB_MODEL_NAME ,
106
+ temperature = 0.1 ,
100
107
max_new_tokens = 256 ,
101
108
)
102
109
@@ -109,5 +116,7 @@ def test_complete(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
109
116
return_value = generated_text ,
110
117
) as mock_text_generation :
111
118
response = hf_inference_api .complete (prompt )
112
- mock_text_generation .assert_called_once_with (prompt , max_new_tokens = 256 )
119
+ mock_text_generation .assert_called_once_with (
120
+ prompt , model = STUB_MODEL_NAME , temperature = 0.1 , max_new_tokens = 256
121
+ )
113
122
assert response .text == generated_text
0 commit comments