import numpy as np from openai import OpenAI import httpx from config import OPENAI_API_KEY, OPENAI_EMBEDDING_MODEL import os # Print API key for debugging (only first few characters) api_key = OPENAI_API_KEY if api_key: masked_key = api_key[:8] + "..." + api_key[-4:] print(f"OpenAI API Key: {masked_key}") else: print("OpenAI API Key not found!") # Initialize OpenAI client with explicitly configured httpx client to avoid proxy issues http_client = httpx.Client() client = OpenAI(api_key=OPENAI_API_KEY, http_client=http_client) def generate_embedding(text): """Generate an embedding vector for text using OpenAI""" response = client.embeddings.create( input=text, model=OPENAI_EMBEDDING_MODEL ) # Return embedding as numpy array return np.array(response.data[0].embedding) def cosine_similarity(vec_a, vec_b): """Calculate cosine similarity between two vectors""" return np.dot(vec_a, vec_b) / (np.linalg.norm(vec_a) * np.linalg.norm(vec_b)) def search_similar_documents(query_embedding, document_embeddings, top_k=5, threshold=0.7): """Find documents most similar to query based on embedding similarity""" similarities = [] for doc in document_embeddings: doc_embedding = np.array(doc['embedding']) similarity = cosine_similarity(query_embedding, doc_embedding) # Only include documents above similarity threshold if similarity >= threshold: similarities.append((doc, similarity)) # Sort by similarity (highest first) similarities.sort(key=lambda x: x[1], reverse=True) # Return top_k results return similarities[:top_k]