121 lines
4.7 KiB
Python
121 lines
4.7 KiB
Python
import json
|
|
import re
|
|
from typing import List, Dict, Any
|
|
from db.db_utils import get_all_documents
|
|
|
|
def simple_keyword_search(query: str, documents: List[Dict[str, Any]], top_k: int = 5) -> List[Dict[str, Any]]:
|
|
"""Simple keyword-based search without AI"""
|
|
query_words = query.lower().split()
|
|
|
|
results = []
|
|
for doc in documents:
|
|
score = 0
|
|
content = doc.get('content', '').lower()
|
|
title = doc.get('title', '').lower()
|
|
|
|
# Count keyword matches
|
|
for word in query_words:
|
|
score += content.count(word) * 1 # Content match worth 1 point
|
|
score += title.count(word) * 2 # Title match worth 2 points
|
|
|
|
if score > 0:
|
|
content_preview = doc.get('content', '')[:300] + "..." if len(doc.get('content', '')) > 300 else doc.get('content', '')
|
|
|
|
results.append({
|
|
'id': doc['id'],
|
|
'title': doc.get('title', 'Untitled'),
|
|
'content': doc.get('content', ''),
|
|
'content_preview': content_preview,
|
|
'doc_type': doc.get('doc_type', 'Unknown'),
|
|
'similarity': score / 100.0 # Normalize score
|
|
})
|
|
|
|
# Sort by score and return top results
|
|
results.sort(key=lambda x: x['similarity'], reverse=True)
|
|
return results[:top_k]
|
|
|
|
def generate_simple_answer(query: str, relevant_docs: List[Dict[str, Any]]) -> str:
|
|
"""Generate a simple answer based on keyword matching"""
|
|
if not relevant_docs:
|
|
return "I couldn't find any relevant documents to answer your question. Please try rephrasing your query."
|
|
|
|
# Find the most relevant document
|
|
best_doc = relevant_docs[0]
|
|
|
|
# Extract relevant sentences containing query keywords
|
|
query_words = query.lower().split()
|
|
content = best_doc.get('content', '')
|
|
sentences = re.split(r'[.!?]+', content)
|
|
|
|
relevant_sentences = []
|
|
for sentence in sentences:
|
|
sentence_clean = sentence.strip()
|
|
if any(word in sentence_clean.lower() for word in query_words):
|
|
relevant_sentences.append(sentence_clean)
|
|
if len(relevant_sentences) >= 3: # Limit to 3 sentences
|
|
break
|
|
|
|
if relevant_sentences:
|
|
answer = f"Based on the document '{best_doc.get('title', 'Untitled')}', here's what I found:\n\n"
|
|
answer += ". ".join(relevant_sentences[:2]) + "."
|
|
|
|
if len(relevant_docs) > 1:
|
|
answer += f"\n\nI found {len(relevant_docs)} relevant documents in total."
|
|
|
|
return answer
|
|
else:
|
|
return f"I found relevant documents but couldn't extract specific information about '{query}'. You may want to review the document '{best_doc.get('title', 'Untitled')}' for more details."
|
|
|
|
def simple_search(query: str, profile_search: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Perform simple keyword-based search without AI
|
|
|
|
Args:
|
|
query: The search query
|
|
profile_search: Whether to search in user profiles (not used currently)
|
|
|
|
Returns:
|
|
Dict containing search results and generated answer
|
|
"""
|
|
try:
|
|
print(f"Processing simple search query: {query}")
|
|
|
|
# Get all documents
|
|
documents = get_all_documents(include_embeddings=False)
|
|
print(f"Found {len(documents)} documents")
|
|
|
|
# Enhanced query (simple expansion)
|
|
enhanced_query = query
|
|
if len(query.split()) == 1:
|
|
# Add common legal terms for single word queries
|
|
legal_expansions = {
|
|
'seksyen': 'seksyen section akta',
|
|
'jenayah': 'jenayah criminal crime',
|
|
'hukuman': 'hukuman punishment penalty',
|
|
'kesalahan': 'kesalahan offense offence',
|
|
'mahkamah': 'mahkamah court tribunal'
|
|
}
|
|
enhanced_query = legal_expansions.get(query.lower(), query)
|
|
|
|
# Get relevant documents
|
|
relevant_docs = simple_keyword_search(enhanced_query, documents)
|
|
print(f"Found {len(relevant_docs)} relevant documents")
|
|
|
|
# Generate answer
|
|
answer = generate_simple_answer(query, relevant_docs)
|
|
print(f"Generated answer: {answer[:100]}...")
|
|
|
|
return {
|
|
"query": query,
|
|
"enhanced_query": enhanced_query,
|
|
"documents": relevant_docs,
|
|
"answer": answer
|
|
}
|
|
except Exception as e:
|
|
print(f"Error in simple search: {e}")
|
|
return {
|
|
"query": query,
|
|
"enhanced_query": query,
|
|
"documents": [],
|
|
"answer": f"I apologize, but I encountered an error while processing your query: {str(e)}. Please try again."
|
|
} |