import json import re from typing import List, Dict, Any from db.db_utils import get_all_documents def simple_keyword_search(query: str, documents: List[Dict[str, Any]], top_k: int = 5) -> List[Dict[str, Any]]: """Simple keyword-based search without AI""" query_words = query.lower().split() results = [] for doc in documents: score = 0 content = doc.get('content', '').lower() title = doc.get('title', '').lower() # Count keyword matches for word in query_words: score += content.count(word) * 1 # Content match worth 1 point score += title.count(word) * 2 # Title match worth 2 points if score > 0: content_preview = doc.get('content', '')[:300] + "..." if len(doc.get('content', '')) > 300 else doc.get('content', '') results.append({ 'id': doc['id'], 'title': doc.get('title', 'Untitled'), 'content': doc.get('content', ''), 'content_preview': content_preview, 'doc_type': doc.get('doc_type', 'Unknown'), 'similarity': score / 100.0 # Normalize score }) # Sort by score and return top results results.sort(key=lambda x: x['similarity'], reverse=True) return results[:top_k] def generate_simple_answer(query: str, relevant_docs: List[Dict[str, Any]]) -> str: """Generate a simple answer based on keyword matching""" if not relevant_docs: return "I couldn't find any relevant documents to answer your question. Please try rephrasing your query." # Find the most relevant document best_doc = relevant_docs[0] # Extract relevant sentences containing query keywords query_words = query.lower().split() content = best_doc.get('content', '') sentences = re.split(r'[.!?]+', content) relevant_sentences = [] for sentence in sentences: sentence_clean = sentence.strip() if any(word in sentence_clean.lower() for word in query_words): relevant_sentences.append(sentence_clean) if len(relevant_sentences) >= 3: # Limit to 3 sentences break if relevant_sentences: answer = f"Based on the document '{best_doc.get('title', 'Untitled')}', here's what I found:\n\n" answer += ". ".join(relevant_sentences[:2]) + "." if len(relevant_docs) > 1: answer += f"\n\nI found {len(relevant_docs)} relevant documents in total." return answer else: return f"I found relevant documents but couldn't extract specific information about '{query}'. You may want to review the document '{best_doc.get('title', 'Untitled')}' for more details." def simple_search(query: str, profile_search: bool = False) -> Dict[str, Any]: """ Perform simple keyword-based search without AI Args: query: The search query profile_search: Whether to search in user profiles (not used currently) Returns: Dict containing search results and generated answer """ try: print(f"Processing simple search query: {query}") # Get all documents documents = get_all_documents(include_embeddings=False) print(f"Found {len(documents)} documents") # Enhanced query (simple expansion) enhanced_query = query if len(query.split()) == 1: # Add common legal terms for single word queries legal_expansions = { 'seksyen': 'seksyen section akta', 'jenayah': 'jenayah criminal crime', 'hukuman': 'hukuman punishment penalty', 'kesalahan': 'kesalahan offense offence', 'mahkamah': 'mahkamah court tribunal' } enhanced_query = legal_expansions.get(query.lower(), query) # Get relevant documents relevant_docs = simple_keyword_search(enhanced_query, documents) print(f"Found {len(relevant_docs)} relevant documents") # Generate answer answer = generate_simple_answer(query, relevant_docs) print(f"Generated answer: {answer[:100]}...") return { "query": query, "enhanced_query": enhanced_query, "documents": relevant_docs, "answer": answer } except Exception as e: print(f"Error in simple search: {e}") return { "query": query, "enhanced_query": query, "documents": [], "answer": f"I apologize, but I encountered an error while processing your query: {str(e)}. Please try again." }