forked from michelderu/build-your-local-ragstack-chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp_5.py
113 lines (94 loc) · 4.26 KB
/
app_5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models.ollama import ChatOllama
from cassandra.cluster import Cluster
from langchain_community.vectorstores import Cassandra
from langchain.schema.runnable import RunnableMap
from langchain.prompts import ChatPromptTemplate
# Cache prompt for future runs
@st.cache_data()
def load_prompt():
template = """You're a helpful AI assistent tasked to answer the user's question.
You're friendly and you answer extensively with multiple sentences. You prefer to use bulletpoints to summarize.
CONTEXT:
{context}
USER'S QUESTION:
{question}
YOUR ANSWER:"""
return ChatPromptTemplate.from_messages([("system", template)])
prompt = load_prompt()
# Cache Mistral Chat Model for future runs
@st.cache_resource()
def load_chat_model():
# parameters for ollama see: https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.ollama.ChatOllama.html
# num_ctx is the context window size
return ChatOllama(
model="mistral:latest",
num_ctx=18192,
base_url=st.secrets['OLLAMA_ENDPOINT']
)
chat_model = load_chat_model()
# Cache the DataStax Enterprise Vector Store for future runs
@st.cache_resource(show_spinner='Connecting to Datastax Enterprise v7 with Vector Support')
def load_vector_store():
# Connect to DSE
cluster = Cluster(
[st.secrets['DSE_ENDPOINT']]
)
session = cluster.connect()
# Connect to the Vector Store
vector_store = Cassandra(
session=session,
embedding=HuggingFaceEmbeddings(),
keyspace=st.secrets['DSE_KEYSPACE'],
table_name=st.secrets['DSE_TABLE']
)
return vector_store
vector_store = load_vector_store()
# Cache the Retriever for future runs
@st.cache_resource(show_spinner='Getting retriever')
def load_retriever():
# Get the retriever for the Chat Model
retriever = vector_store.as_retriever(
search_kwargs={"k": 5}
)
return retriever
retriever = load_retriever()
# Start with empty messages, stored in session state
if 'messages' not in st.session_state:
st.session_state.messages = []
# Draw a title and some markdown
st.markdown("""# Your Enterprise Co-Pilot 🚀
Generative AI is considered to bring the next Industrial Revolution.
Why? Studies show a **37% efficiency boost** in day to day work activities!
### Security and safety
This Chatbot is safe to work with sensitive data. Why?
- First of all it makes use of [Ollama, a local inference engine](https://ollama.com);
- On top of the inference engine, we're running [Mistral, a local and open Large Language Model (LLM)](https://mistral.ai/);
- Also the LLM does not contain any sensitive or enterprise data, as there is no way to secure it in a LLM;
- Instead, your sensitive data is stored securely within the firewall inside [DataStax Enterprise v7 Vector Database](https://www.datastax.com/blog/get-started-with-the-datastax-enterprise-7-0-developer-vector-search-preview);
- And lastly, the chains are built on [RAGStack](https://www.datastax.com/products/ragstack), an enterprise version of Langchain and LLamaIndex, supported by [DataStax](https://www.datastax.com/).""")
st.divider()
# Draw all messages, both user and bot so far (every time the app reruns)
for message in st.session_state.messages:
st.chat_message(message['role']).markdown(message['content'])
# Draw the chat input box
if question := st.chat_input("What's up?"):
# Store the user's question in a session object for redrawing next time
st.session_state.messages.append({"role": "human", "content": question})
# Draw the user's question
with st.chat_message('human'):
st.markdown(question)
# Generate the answer by calling Mistral's Chat Model
inputs = RunnableMap({
'context': lambda x: retriever.get_relevant_documents(x['question']),
'question': lambda x: x['question']
})
chain = inputs | prompt | chat_model
response = chain.invoke({'question': question})
answer = response.content
# Store the bot's answer in a session object for redrawing next time
st.session_state.messages.append({"role": "ai", "content": answer})
# Draw the bot's answer
with st.chat_message('assistant'):
st.markdown(answer)