Skip to content

Commit 5815613

Browse files
v0.12.22 (#17969)
1 parent 7529a74 commit 5815613

File tree

9 files changed

+65
-25
lines changed

9 files changed

+65
-25
lines changed

CHANGELOG.md

+14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# ChangeLog
22

3+
## [2025-02-28]
4+
5+
### `llama-index-core` [0.12.22]
6+
7+
- fix agentworkflow tool call tracking on final response (#17968)
8+
9+
### `llama-index-readers-github` [0.6.0]
10+
11+
- Ensure that Github reader uses timeout and retries params (#17959)
12+
13+
### `llama-index-readers-web` [0.3.7]
14+
15+
- chore: update FireCrawlWebReader integration to support extract (#17957)
16+
317
## [2025-02-27]
418

519
### `llama-index-core` [0.12.21]

docs/docs/CHANGELOG.md

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# ChangeLog
22

3+
## [2025-02-28]
4+
5+
### `llama-index-core` [0.12.22]
6+
7+
- fix agentworkflow tool call tracking on final response (#17968)
8+
9+
### `llama-index-readers-github` [0.6.0]
10+
11+
- Ensure that Github reader uses timeout and retries params (#17959)
12+
13+
### `llama-index-readers-web` [0.3.7]
14+
15+
- chore: update FireCrawlWebReader integration to support extract (#17957)
16+
317
## [2025-02-27]
418

519
### `llama-index-core` [0.12.21]
@@ -10,7 +24,7 @@
1024
- Feature/remove retriever tool template override (#17909)
1125
- only modify delta if 'Answer:' was actually detected (#17901)
1226
- Fix CitationQueryEngine init function for response_synthesizer (#17897)
13-
- fix ChatSummaryMemoryBuffer._summarize_oldest_chat_history (#17845)
27+
- fix ChatSummaryMemoryBuffer.\_summarize_oldest_chat_history (#17845)
1428
- fix: make base64 detection more robust across the board (#17930)
1529
- fix: stepwise execution breaks when steps do async work (#17914)
1630
- safer workflow cancel + fix restored context bug (#17938)

llama-index-core/llama_index/core/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Init file of LlamaIndex."""
22

3-
__version__ = "0.12.21"
3+
__version__ = "0.12.22"
44

55
import logging
66
from logging import NullHandler

llama-index-core/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ name = "llama-index-core"
4646
packages = [{include = "llama_index"}]
4747
readme = "README.md"
4848
repository = "https://github.com/run-llama/llama_index"
49-
version = "0.12.21"
49+
version = "0.12.22"
5050

5151
[tool.poetry.dependencies]
5252
SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}

llama-index-integrations/llms/llama-index-llms-huggingface-api/llama_index/llms/huggingface_api/base.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,13 @@ def __init__(self, **kwargs: Any) -> None:
173173
self._sync_client = InferenceClient(**self._get_inference_client_kwargs())
174174
self._async_client = AsyncInferenceClient(**self._get_inference_client_kwargs())
175175

176-
# set context window if not provided
177-
info = self._sync_client.get_endpoint_info()
178-
if "max_input_tokens" in info and kwargs.get("context_window") is None:
179-
self.context_window = info["max_input_tokens"]
176+
# set context window if not provided, if we can get the endpoint info
177+
try:
178+
info = self._sync_client.get_endpoint_info()
179+
if "max_input_tokens" in info and kwargs.get("context_window") is None:
180+
self.context_window = info["max_input_tokens"]
181+
except Exception:
182+
pass
180183

181184
def _get_inference_client_kwargs(self) -> Dict[str, Any]:
182185
"""Extract the Hugging Face InferenceClient construction parameters."""
@@ -224,7 +227,7 @@ def _to_huggingface_messages(
224227

225228
def _parse_streaming_tool_calls(
226229
self, tool_call_strs: List[str]
227-
) -> List[ToolSelection | str]:
230+
) -> List[Union[ToolSelection, str]]:
228231
tool_calls = []
229232
# Try to parse into complete objects, otherwise keep as strings
230233
for tool_call_str in tool_call_strs:

llama-index-integrations/llms/llama-index-llms-huggingface-api/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
2727
license = "MIT"
2828
name = "llama-index-llms-huggingface-api"
2929
readme = "README.md"
30-
version = "0.4.0"
30+
version = "0.4.1"
3131

3232
[tool.poetry.dependencies]
3333
python = ">=3.9,<4.0"

llama-index-integrations/llms/llama-index-llms-huggingface-api/tests/test_huggingface_api.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import pytest
44
from llama_index.core.llms import ChatMessage, MessageRole
55
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
6+
from huggingface_hub.inference._generated.types import ChatCompletionOutput
67

7-
STUB_MODEL_NAME = "placeholder_model"
8+
STUB_MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"
89

910

1011
@pytest.fixture(name="hf_inference_api")
@@ -45,15 +46,17 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
4546
generated_response = (
4647
" It's based on the book of the same name by James Fenimore Cooper."
4748
)
48-
conversational_return = {
49-
"choices": [
50-
{
51-
"message": {
52-
"content": generated_response,
49+
conversational_return = ChatCompletionOutput.parse_obj(
50+
{
51+
"choices": [
52+
{
53+
"message": {
54+
"content": generated_response,
55+
}
5356
}
54-
}
55-
],
56-
}
57+
],
58+
}
59+
)
5760

5861
with patch.object(
5962
hf_inference_api._sync_client,
@@ -67,6 +70,8 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
6770
mock_conversational.assert_called_once_with(
6871
messages=[{"role": m.role.value, "content": m.content} for m in messages],
6972
model=STUB_MODEL_NAME,
73+
temperature=0.1,
74+
max_tokens=256,
7075
)
7176

7277
def test_chat_text_generation(
@@ -97,6 +102,8 @@ def test_chat_text_generation(
97102
assert response.message.content == conversational_return
98103
mock_complete.assert_called_once_with(
99104
"System: You are an expert movie reviewer\nUser: Which movie is the best?\nAssistant:",
105+
model=STUB_MODEL_NAME,
106+
temperature=0.1,
100107
max_new_tokens=256,
101108
)
102109

@@ -109,5 +116,7 @@ def test_complete(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
109116
return_value=generated_text,
110117
) as mock_text_generation:
111118
response = hf_inference_api.complete(prompt)
112-
mock_text_generation.assert_called_once_with(prompt, max_new_tokens=256)
119+
mock_text_generation.assert_called_once_with(
120+
prompt, model=STUB_MODEL_NAME, temperature=0.1, max_new_tokens=256
121+
)
113122
assert response.text == generated_text

poetry.lock

+4-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ name = "llama-index"
4545
packages = [{from = "_llama-index", include = "llama_index"}]
4646
readme = "README.md"
4747
repository = "https://github.com/run-llama/llama_index"
48-
version = "0.12.21"
48+
version = "0.12.22"
4949

5050
[tool.poetry.dependencies]
5151
python = ">=3.9,<4.0"
@@ -57,7 +57,7 @@ llama-index-agent-openai = "^0.4.0"
5757
llama-index-readers-file = "^0.4.0"
5858
llama-index-readers-llama-parse = ">=0.4.0"
5959
llama-index-indices-managed-llama-cloud = ">=0.4.0"
60-
llama-index-core = "^0.12.21"
60+
llama-index-core = "^0.12.22"
6161
llama-index-multi-modal-llms-openai = "^0.4.0"
6262
llama-index-cli = "^0.4.1"
6363
nltk = ">3.8.1" # avoids a CVE, temp until next release, should be in llama-index-core

0 commit comments

Comments
 (0)