import json import requests from embedding.embedding_service_http import generate_embedding, search_similar_documents from db.db_utils import get_all_embeddings, log_search from config import OPENAI_API_KEY, OPENAI_CHAT_MODEL, MAX_SEARCH_RESULTS, SIMILARITY_THRESHOLD def rag_search(query): """ Perform RAG (Retrieval Augmented Generation) search: 1. Generate embedding for the query 2. Find similar documents 3. Generate an answer based on retrieved documents """ # Generate embedding for the query query_embedding = generate_embedding(query) # Get all document embeddings from the database document_embeddings = get_all_embeddings() # Find similar documents similar_docs = search_similar_documents( query_embedding, document_embeddings, top_k=MAX_SEARCH_RESULTS, threshold=SIMILARITY_THRESHOLD ) # Extract document contents for context contexts = [] result_docs = [] print("Debug - Similar docs:", [(doc['document_id'], doc['title']) for doc, _ in similar_docs]) for doc, similarity in similar_docs: # Add document to contexts for the LLM contexts.append(f"Document Title: {doc['title']}\nContent: {doc['content']}\nSource: {doc['source'] or 'Unknown'}\nRelevance: {similarity:.4f}") # Add document to results for display - ensure we use the right ID result_docs.append({ "id": doc['document_id'], # This should be the ID from documents table "title": doc['title'], "content_preview": doc['content'][:200] + "..." if len(doc['content']) > 200 else doc['content'], "source": doc['source'], "similarity": float(similarity) }) # Generate answer using LLM with retrieved contexts if contexts: context_text = "\n\n---\n\n".join(contexts) # Prepare the system message system_message = """ You are a helpful assistant that answers questions based on the provided document contexts. If the context doesn't contain relevant information, say so clearly. If the information is incomplete, explain what's missing. Always cite your sources when providing information. Format your answers to be clear and concise. Always answer in formal Bahasa Malaysia. """ # Use direct HTTP request to OpenAI API instead of client library headers = { "Content-Type": "application/json", "Authorization": f"Bearer {OPENAI_API_KEY}" } payload = { "model": OPENAI_CHAT_MODEL, "messages": [ {"role": "system", "content": system_message}, {"role": "user", "content": f"Context documents:\n\n{context_text}\n\nBased on these documents, please answer the following query: {query}"} ], "temperature": 0.5 } print(f"Requesting chat completion using model: {OPENAI_CHAT_MODEL}") response = requests.post( "https://api.openai.com/v1/chat/completions", headers=headers, json=payload ) if response.status_code != 200: raise Exception(f"Error from OpenAI API: {response.text}") result = response.json() answer = result["choices"][0]["message"]["content"] else: answer = "I couldn't find any relevant documents to answer your question." # Log the search search_results = { "query": query, "documents": result_docs, "answer": answer } log_search(query, search_results) return search_results