agc-chatbot/embedding/rag_service.py

import json
import requests
from embedding.embedding_service_http import generate_embedding, search_similar_documents
from db.db_utils import get_all_embeddings, log_search
from config import OPENAI_API_KEY, OPENAI_CHAT_MODEL, MAX_SEARCH_RESULTS, SIMILARITY_THRESHOLD

def rag_search(query):
    """
    Perform RAG (Retrieval Augmented Generation) search:
    1. Generate embedding for the query
    2. Find similar documents
    3. Generate an answer based on retrieved documents
    """
    # Generate embedding for the query
    query_embedding = generate_embedding(query)

    # Get all document embeddings from the database
    document_embeddings = get_all_embeddings()

    # Find similar documents
    similar_docs = search_similar_documents(
        query_embedding,
        document_embeddings,
        top_k=MAX_SEARCH_RESULTS,
        threshold=SIMILARITY_THRESHOLD
    )

    # Extract document contents for context
    contexts = []
    result_docs = []

    print("Debug - Similar docs:", [(doc['document_id'], doc['title']) for doc, _ in similar_docs])

    for doc, similarity in similar_docs:
        # Add document to contexts for the LLM
        contexts.append(f"Document Title: {doc['title']}\nContent: {doc['content']}\nSource: {doc['source'] or 'Unknown'}\nRelevance: {similarity:.4f}")

        # Add document to results for display - ensure we use the right ID
        result_docs.append({
            "id": doc['document_id'],  # This should be the ID from documents table
            "title": doc['title'],
            "content_preview": doc['content'][:200] + "..." if len(doc['content']) > 200 else doc['content'],
            "source": doc['source'],
            "similarity": float(similarity)
        })

    # Generate answer using LLM with retrieved contexts
    if contexts:
        context_text = "\n\n---\n\n".join(contexts)

        # Prepare the system message
        system_message = """
        You are a helpful assistant that answers questions based on the provided document contexts.
        If the context doesn't contain relevant information, say so clearly.
        If the information is incomplete, explain what's missing.
        Always cite your sources when providing information.
        Format your answers to be clear and concise.
        Always answer in formal Bahasa Malaysia.
        """

        # Use direct HTTP request to OpenAI API instead of client library
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {OPENAI_API_KEY}"
        }

        payload = {
            "model": OPENAI_CHAT_MODEL,
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": f"Context documents:\n\n{context_text}\n\nBased on these documents, please answer the following query: {query}"}
            ],
            "temperature": 0.5
        }

        print(f"Requesting chat completion using model: {OPENAI_CHAT_MODEL}")

        response = requests.post(
            "https://api.openai.com/v1/chat/completions",
            headers=headers,
            json=payload
        )

        if response.status_code != 200:
            raise Exception(f"Error from OpenAI API: {response.text}")

        result = response.json()
        answer = result["choices"][0]["message"]["content"]
    else:
        answer = "I couldn't find any relevant documents to answer your question."

    # Log the search
    search_results = {
        "query": query,
        "documents": result_docs,
        "answer": answer
    }
    log_search(query, search_results)

    return search_results