agc-chatbot/embedding/rag_service.py

100 lines
3.7 KiB
Python

import json
import requests
from embedding.embedding_service_http import generate_embedding, search_similar_documents
from db.db_utils import get_all_embeddings, log_search
from config import OPENAI_API_KEY, OPENAI_CHAT_MODEL, MAX_SEARCH_RESULTS, SIMILARITY_THRESHOLD
def rag_search(query):
"""
Perform RAG (Retrieval Augmented Generation) search:
1. Generate embedding for the query
2. Find similar documents
3. Generate an answer based on retrieved documents
"""
# Generate embedding for the query
query_embedding = generate_embedding(query)
# Get all document embeddings from the database
document_embeddings = get_all_embeddings()
# Find similar documents
similar_docs = search_similar_documents(
query_embedding,
document_embeddings,
top_k=MAX_SEARCH_RESULTS,
threshold=SIMILARITY_THRESHOLD
)
# Extract document contents for context
contexts = []
result_docs = []
print("Debug - Similar docs:", [(doc['document_id'], doc['title']) for doc, _ in similar_docs])
for doc, similarity in similar_docs:
# Add document to contexts for the LLM
contexts.append(f"Document Title: {doc['title']}\nContent: {doc['content']}\nSource: {doc['source'] or 'Unknown'}\nRelevance: {similarity:.4f}")
# Add document to results for display - ensure we use the right ID
result_docs.append({
"id": doc['document_id'], # This should be the ID from documents table
"title": doc['title'],
"content_preview": doc['content'][:200] + "..." if len(doc['content']) > 200 else doc['content'],
"source": doc['source'],
"similarity": float(similarity)
})
# Generate answer using LLM with retrieved contexts
if contexts:
context_text = "\n\n---\n\n".join(contexts)
# Prepare the system message
system_message = """
You are a helpful assistant that answers questions based on the provided document contexts.
If the context doesn't contain relevant information, say so clearly.
If the information is incomplete, explain what's missing.
Always cite your sources when providing information.
Format your answers to be clear and concise.
Always answer in formal Bahasa Malaysia.
"""
# Use direct HTTP request to OpenAI API instead of client library
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
payload = {
"model": OPENAI_CHAT_MODEL,
"messages": [
{"role": "system", "content": system_message},
{"role": "user", "content": f"Context documents:\n\n{context_text}\n\nBased on these documents, please answer the following query: {query}"}
],
"temperature": 0.5
}
print(f"Requesting chat completion using model: {OPENAI_CHAT_MODEL}")
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
json=payload
)
if response.status_code != 200:
raise Exception(f"Error from OpenAI API: {response.text}")
result = response.json()
answer = result["choices"][0]["message"]["content"]
else:
answer = "I couldn't find any relevant documents to answer your question."
# Log the search
search_results = {
"query": query,
"documents": result_docs,
"answer": answer
}
log_search(query, search_results)
return search_results