Please refer the references below to get up to speed on this project:
- Building an AI-Driven Document Query Assistant: A Step-by-Step Python Tutorial
- Building an AI-Driven Document Query Assistant: Using Non-OpenAI’s Embed Model
This is a continuation update for the “Building an AI-Driven Document Query Assistant” series. In the last update, I implemented a non-OpenAI embed model, specifically the nomic-embed-text:latest using Ollama. In this update, I have added an incremental re-indexing feature and enabled users to choose from multiple GPT language models. Specifically, the script now supports gpt-3.5-turbo and gpt-4o.
You can find the public github repo for this project at: https://github.com/aarriitt666/ragai_llamaindex
Complete code:
import streamlit as st
import os
from dotenv import load_dotenv
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage, StorageContext
from llama_index.core.settings import Settings
from llama_index.core.response.pprint_utils import pprint_response
import warnings
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import TextNode
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json
import traceback
from llama_index.embeddings.ollama import OllamaEmbedding
# Load environment variables
load_dotenv('.env')
# Suppress specific FutureWarnings from huggingface_hub
warnings.filterwarnings("ignore", category=FutureWarning, module='huggingface_hub')
class QueryBundle:
def __init__(self, query_str):
self.query_str = query_str
# Define paths
storage_path = './vectorstore'
documents_path = './documents'
# Model selection
model_option = st.selectbox(
"Select LLM model",
("gpt-3.5-turbo", "gpt-4o")
)
# Set the model configuration
Settings.llm = OpenAI(model=model_option, temperature=0.1)
Settings.chunk_size = 2048
Settings.chunk_overlap = 500
ollama_embedding = OllamaEmbedding(
model_name="nomic-embed-text:latest",
base_url="http://localhost:11434",
ollama_additional_kwargs={"mirostat": 0},
)
Settings.embed_model = ollama_embedding
# Ensure directories exist
if not os.path.exists(storage_path):
os.makedirs(storage_path, exist_ok=True)
if not os.path.exists(documents_path):
os.makedirs(documents_path, exist_ok=True)
# Initialize the reranker
reranker = SentenceTransformerRerank(model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=5)
# Initialize the parser
parser = SentenceSplitter()
def document_changes_detected(documents_path, metadata_path):
# Load existing metadata if available
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as f:
indexed_files = set(json.load(f))
else:
indexed_files = set()
# Get the current set of documents
current_files = {file for file in os.listdir(documents_path) if os.path.isfile(os.path.join(documents_path, file))}
# Detect changes
new_files = current_files - indexed_files
removed_files = indexed_files - current_files
# Log changes
# st.write(f"Current files: {current_files}")
# st.write(f"Indexed files: {indexed_files}")
# st.write(f"New files: {new_files}")
# st.write(f"Removed files: {removed_files}")
# Update metadata file if changes are detected
if new_files or removed_files:
with open(metadata_path, 'w') as f:
json.dump(list(current_files), f)
return list(new_files), list(removed_files)
def pprint_response(response, show_source=False):
if isinstance(response, str):
print(response) # Handle the string directly
else:
if response.response is None:
print("No response.")
else:
print(response.response)
if show_source:
print("Source:", response.source)
class EnhancedTextNode:
def __init__(self, text_node):
self.node = text_node # Wrap the original TextNode
def get_content(self, metadata_mode):
return self.node.text # Implement a method that the reranker might call
# Modify the enhance_and_rerank_responses function to wrap TextNodes
def enhance_and_rerank_responses(responses, query):
""" Combine reranking and enhancing to select the most comprehensive and relevant response. """
if not responses:
return "No responses available."
# Reranking using the semantic reranker
query_bundle = QueryBundle(query)
nodes = [EnhancedTextNode(TextNode(text=res)) for res in responses] # Wrap TextNodes for compatibility
reranked_nodes = reranker.postprocess_nodes(nodes=nodes, query_bundle=query_bundle)
reranked_responses = [node.node.text for node in reranked_nodes] # Adjust access to text
# Enhance the response quality by selecting the most comprehensive answer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(reranked_responses)
cosine_matrix = cosine_similarity(tfidf_matrix)
avg_similarity = cosine_matrix.mean(axis=0)
best_response_idx = avg_similarity.argmax()
return reranked_responses[best_response_idx]
@st.cache_resource(show_spinner=False)
def initialize(force_reindex=False, incremental_index=False):
metadata_path = os.path.join(storage_path, 'metadata.json')
new_files, removed_files = document_changes_detected(documents_path, metadata_path)
if force_reindex:
# If force_reindex is True, load or create a new index from scratch
documents = SimpleDirectoryReader(input_dir=documents_path).load_data()
nodes = parser.get_nodes_from_documents(documents)
index = VectorStoreIndex(nodes, embed_model=Settings.embed_model)
index.storage_context.persist(persist_dir=storage_path)
return index, "Re-indexing completed."
else:
# Load the existing index from storage
storage_context = StorageContext.from_defaults(persist_dir=storage_path)
index = load_index_from_storage(storage_context)
# Handle incremental indexing
if new_files:
new_file_paths = [os.path.join(documents_path, file) for file in new_files]
new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
new_nodes = parser.get_nodes_from_documents(new_documents)
index.insert_nodes(new_nodes)
incremental_message = "Incremental re-indexing completed."
else:
incremental_message = "No new documents detected. Incremental re-indexing not needed."
# Handle removed files
if removed_files:
for file in removed_files:
index.docstore.delete_document(file)
index.storage_context.persist(persist_dir=storage_path)
return index, incremental_message
def main():
st.title('Ask the Document')
# Button to force re-indexing
force_reindex = st.button("Re-index Documents")
# Button to trigger incremental indexing
incremental_index = st.button("Incremental Indexing")
if force_reindex:
st.info("Re-indexing triggered...")
if incremental_index:
st.info("Incremental indexing triggered...")
if st.button('Clear Cache'):
st.cache_data.clear()
st.info('Cache cleared!')
try:
# Initialize or reinitialize index if needed
index, message = initialize(force_reindex=force_reindex, incremental_index=incremental_index)
st.info(f"Index initialized or loaded successfully. {message}")
# Check for documents and handle uploads
if not os.listdir(documents_path):
st.error("No documents found. Please upload your documents.")
uploaded_files = st.file_uploader("Upload documents", accept_multiple_files=True, type=['pdf', 'txt', 'docx'])
if uploaded_files:
for uploaded_file in uploaded_files:
with open(os.path.join(documents_path, uploaded_file.name), "wb") as f:
f.write(uploaded_file.getvalue())
st.experimental_rerun() # Rerun the script after files are uploaded
else:
if 'messages' not in st.session_state:
st.session_state.messages = [{'role': 'assistant', 'content': 'Ask me a question!'}]
# Document interaction section
chat_engine = index.as_chat_engine(chat_mode='condense_question', verbose=True)
if prompt := st.text_input('Your question'):
st.session_state.messages.append({'role': 'user', 'content': prompt})
for message in st.session_state.messages:
with st.expander(f"{message['role'].title()} says:"):
st.write(message['content'])
if st.session_state.messages[-1]['role'] != 'assistant':
with st.spinner('Thinking...'):
response = chat_engine.chat(prompt)
response_texts = response.response if isinstance(response.response, list) else [response.response]
st.write(response_texts)
best_response = enhance_and_rerank_responses(response_texts, prompt)
pprint_response(best_response, show_source=True)
st.session_state.messages.append({'role': 'assistant', 'content': best_response})
except Exception as e:
st.error("An error occurred during document processing or initialization.")
st.text(f"Error: {e}")
st.text(traceback.format_exc()) # To show full traceback in the interface
if __name__ == "__main__":
main()


Leave a comment