From cb2a56f70d4a3d4b623aa06d29687a947c030530 Mon Sep 17 00:00:00 2001 From: ammarhamzi2019278344 Date: Wed, 4 Jun 2025 15:04:53 +0800 Subject: [PATCH] env file --- cf-plan.md | 178 ++++++++++++++++++++++++++++++++++++++++++++ demoEnv.txt | 20 +++++ frontend/index.html | 2 +- 3 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 cf-plan.md create mode 100644 demoEnv.txt diff --git a/cf-plan.md b/cf-plan.md new file mode 100644 index 0000000..30db54b --- /dev/null +++ b/cf-plan.md @@ -0,0 +1,178 @@ +# Cross-Reference Tab Implementation Plan + +## Overview + +Create a standalone tab that allows users to upload a document, process it, and find related documents in the existing database. + +## Current System Analysis + +### Backend (FastAPI) + +- ✅ `/search` endpoint exists - can find related documents +- ✅ `/documents` endpoint exists - can retrieve documents +- ❌ No document upload endpoint +- ❌ No document processing for uploaded files + +### Frontend + +- ✅ Tab system exists +- ✅ Basic cross-reference function exists (hardcoded) +- ❌ No file upload functionality +- ❌ No dedicated cross-reference tab + +## Implementation Plan + +### Phase 1: Backend API Extensions + +#### New Endpoints Needed + +1. **`POST /upload-document`** + + - Accept file upload (PDF, DOC, TXT) + - Extract text content from uploaded file + - Return processed text and document metadata + - **No database storage** - temporary processing only + +2. **`POST /find-cross-references`** + - Accept processed document text + - Use existing search functionality internally + - Return related documents with similarity scores + - Include cross-reference analysis + +#### Leverage Existing APIs + +- Use existing `/search` endpoint logic for finding related documents +- Use existing `/documents` endpoint to fetch full related documents +- Use existing database connection and document retrieval functions + +### Phase 2: Frontend Implementation + +#### New Tab Structure + +1. **Upload Section** + + - File drop zone + - File type validation (PDF, DOC, DOCX, TXT) + - Upload progress indicator + - File preview/summary + +2. **Processing Section** + + - Processing status indicator + - Document analysis summary + - Key terms extraction display + +3. **Results Section** + - Related documents list + - Similarity scores + - Cross-reference details + - Document preview capability + +#### UI Components Needed + +- File upload widget +- Progress bars +- Results grid/list +- Document preview modal +- Cross-reference visualization + +### Phase 3: Processing Logic + +#### Document Processing Pipeline + +1. **File Upload & Validation** + + - Validate file type and size + - Extract text content using appropriate libraries + - Clean and normalize text + +2. **Content Analysis** + + - Extract key terms and phrases + - Identify legal concepts + - Generate search queries from content + +3. **Cross-Reference Matching** + + - Use existing search service (enhanced_rag_service or simple_search_service) + - Multiple search strategies: + - Full text similarity + - Key terms matching + - Legal concept matching + - Rank results by relevance + +4. **Results Processing** + - Format cross-reference results + - Include similarity metrics + - Group by document type or relevance + +## Technical Approach + +### Backend Dependencies + +```python +# New libraries needed +- python-multipart # For file uploads +- PyPDF2 or pdfplumber # PDF text extraction +- python-docx # Word document processing +``` + +### API Strategy + +**Recommendation: Create new endpoints** because: + +- Current `/search` expects a text query, not document content +- Need specialized document processing logic +- Need different response format for cross-references +- Upload functionality is entirely new + +### Frontend Strategy + +- Add new tab to existing tab system +- Use existing styling and components where possible +- Implement file upload using HTML5 File API +- Use existing API calling patterns + +## File Structure + +### New Backend Files + +``` +embedding/ +├── document_processor.py # Handle file uploads and text extraction +├── cross_reference_service.py # Cross-reference logic +``` + +### New Frontend Components + +``` +frontend/ +├── js/ +│ ├── cross-reference.js # Cross-reference tab logic +│ └── file-upload.js # File upload utilities +├── css/ +│ └── cross-reference.css # Specific styling +``` + +### API Endpoints Summary + +1. **`POST /upload-document`** - New endpoint needed +2. **`POST /find-cross-references`** - New endpoint needed +3. **`GET /documents`** - Use existing +4. **`GET /documents/{id}`** - Use existing + +## Development Priority + +1. Backend document upload and processing +2. Cross-reference matching logic +3. Frontend tab and upload interface +4. Results display and formatting +5. Error handling and validation + +## Benefits of This Approach + +- Leverages existing search infrastructure +- Maintains separation of concerns +- Scalable and maintainable +- Consistent with current API patterns +- No database changes needed diff --git a/demoEnv.txt b/demoEnv.txt new file mode 100644 index 0000000..a2cf2de --- /dev/null +++ b/demoEnv.txt @@ -0,0 +1,20 @@ +# MySQL Database Configuration +MYSQL_HOST=47.130.80.140 +MYSQL_PORT=3333 +MYSQL_USER=root +MYSQL_PASSWORD=1ibL5A5cGevvM7Ax0ZDqyKXQTHMlEW5D5hwG6OcR7KPF77kMkEfxFEbLDtwzr6Ci +MYSQL_DATABASE=agc + + +# App Configuration +DEBUG=True +STREAMLIT_SERVER_PORT=8501 + + +# OpenAI API Configuration +OPENAI_API_KEY=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 +OPENAI_CHAT_MODEL=gpt-4o + +# Application Settings +MAX_SEARCH_RESULTS=5 +SIMILARITY_THRESHOLD=0.7 \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html index 62ce128..dceb06a 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -2267,7 +2267,7 @@