first commit

2025-02-28 01:25:10 +05:30 · 2025-02-28 01:25:10 +05:30 · acda3bf334
commit acda3bf334
7 changed files with 289 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,31 @@
+# Python bytecode files
+*.pyc
+*.pyo
+__pycache__/
+
+# Virtual environment directories
+env/
+venv/
+ENV/
+.venv/
+
+
+# Configuration files
+*.env
+*.env.*
+
+# IDE or editor settings (e.g., VSCode, PyCharm)
+.vscode/
+.idea/
+
+# Python packaging
+*.egg
+*.egg-info/
+dist/
+build/
+*.tar.gz
+
+
+faiss_index/
+
+.env
--- a/Book1.pdf
+++ b/Book1.pdf
--- a/Book2.pdf
+++ b/Book2.pdf
--- a/README.md
+++ b/README.md
@ -0,0 +1,66 @@
+# Ayurveda Chatbot using LLaMA and RAG
+
+This project is an interactive Ayurveda chatbot that uses a **Retrieval-Augmented Generation (RAG)** pipeline powered by the **LLaMA language model via Ollama**. The chatbot provides Ayurvedic knowledge and answers user queries based on pre-trained PDF content.
+
+---
+
+## Features
+
+- **PDF Knowledge Base**: Pretrained on Ayurvedic texts for domain-specific answers.
+- **RAG Pipeline**: Combines FAISS vector retrieval and LLaMA for context-aware responses.
+- **Streamlit Interface**: Easy-to-use frontend for interacting with the chatbot.
+
+---
+
+## Requirements
+
+- Python 3.8+
+- GPU support (optional but recommended for faster LLM inference)
+- LLaMA model via [Ollama](https://ollama.ai)
+
+---
+
+## Installation
+
+### 1. Clone the Repository
+```bash
+git clone https://github.com/your-username/ayurveda-chatbot.git
+cd ayurveda-chatbot
+```
+
+
+### 2. Create and Activate a Virtual Environment
+On Linux/macOS:
+```bash
+python3 -m venv env
+source env/bin/activate
+```
+On Windows:
+```
+python -m venv env
+env\Scripts\activate
+```
+3. Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+
+4. Set .env file
+
+Usage
+1. Preprocess PDF and Create FAISS Index
+Ensure the PDF file (e.g., ayurveda_text.pdf) is placed in the project directory.
+
+Run the backend script to preprocess the data and create a FAISS index:
+
+```bash
+python3 backend.py Book1.pdf Book2.pdf --index-path faiss_index
+```
+
+2. Start the Chatbot
+Launch the Streamlit interface:
+
+```bash
+streamlit run frontend.py
+```
+Access the chatbot in your browser at http://localhost:8501.
--- a/backend.py
+++ b/backend.py
@ -0,0 +1,66 @@
+import os
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from sentence_transformers import SentenceTransformer
+from langchain.embeddings.base import Embeddings
+from langchain.docstore.in_memory import InMemoryDocstore
+import faiss
+
+# Define a custom embedding wrapper for LangChain
+class SentenceTransformerEmbeddings(Embeddings):
+    def __init__(self, model_name="all-MiniLM-L6-v2"):
+        self.model = SentenceTransformer(model_name)
+    
+    def embed_documents(self, texts):
+        return self.model.encode(texts, show_progress_bar=True)
+    
+    def embed_query(self, text):
+        return self.model.encode([text], show_progress_bar=False)[0]
+
+# Function to create an empty FAISS index
+def create_empty_faiss_index(embedding_model):
+    embedding_dimension = embedding_model.model.get_sentence_embedding_dimension()
+    index = faiss.IndexFlatL2(embedding_dimension)  # Initialize FAISS index
+    docstore = InMemoryDocstore({})
+    index_to_docstore_id = {}
+    return FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, embedding_function=embedding_model)
+
+# Function to update the FAISS index with new books
+def update_faiss_index(book_paths, faiss_index_path="faiss_index"):
+    # Load or initialize FAISS index
+    embedding_model = SentenceTransformerEmbeddings()
+    if os.path.exists(faiss_index_path):
+        print("Loading existing FAISS index...")
+        db = FAISS.load_local(faiss_index_path, embedding_model)
+    else:
+        print("Creating a new FAISS index...")
+        db = create_empty_faiss_index(embedding_model)
+    
+    # Process each book
+    for book_path in book_paths:
+        print(f"Processing book: {book_path}")
+        loader = PyPDFLoader(book_path)
+        documents = loader.load()
+
+        # Split text into chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+        chunks = text_splitter.split_documents(documents)
+        texts = [chunk.page_content for chunk in chunks]
+
+        # Add embeddings to FAISS index
+        db.add_texts(texts)
+    
+    # Save the updated FAISS index
+    db.save_local(faiss_index_path)
+    print(f"FAISS index updated and saved at: {faiss_index_path}")
+
+# Command-line interface
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Update FAISS index with new books")
+    parser.add_argument("books", nargs="+", help="Path(s) to the PDF book(s)")
+    parser.add_argument("--index-path", default="faiss_index", help="Path to FAISS index directory")
+    args = parser.parse_args()
+
+    update_faiss_index(args.books, args.index_path)
--- a/frontend.py
+++ b/frontend.py
@ -0,0 +1,113 @@
+import os
+import streamlit as st
+from dotenv import load_dotenv
+from langchain_community.vectorstores import FAISS
+from sentence_transformers import SentenceTransformer
+from langchain.embeddings.base import Embeddings
+import requests  # To handle HTTP requests for Groq API
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Set page configuration
+st.set_page_config(page_title="Ayurveda Chatbot", layout="wide")
+
+# Check for the GROQ_API_KEY environment variable
+groq_key = os.getenv("GROQ_API_KEY")
+if not groq_key:
+    st.error("The 'GROQ_API_KEY' environment variable is not set. Please set it in the .env file or the environment.")
+else:
+    st.write(f"GROQ_KEY loaded successfully")
+
+# Define a custom embedding wrapper for LangChain
+class SentenceTransformerEmbeddings(Embeddings):
+    def __init__(self, model_name="all-MiniLM-L6-v2"):
+        self.model = SentenceTransformer(model_name)
+    
+    def embed_documents(self, texts):
+        return self.model.encode(texts, show_progress_bar=True)
+    
+    def embed_query(self, text):
+        return self.model.encode([text], show_progress_bar=False)[0]
+
+# Path to FAISS index
+faiss_index_path = "faiss_index"
+
+# Load FAISS Index with dangerous deserialization enabled
+embedding_model = SentenceTransformerEmbeddings()
+try:
+    db = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
+except Exception as e:
+    st.error(f"Failed to load FAISS index: {str(e)}")
+    db = None
+
+# Define the class to handle API calls to Groq
+class GroqAPI:
+    def __init__(self, api_key):
+        self.api_key = api_key
+        self.endpoint = "https://api.groq.com/openai/v1/chat/completions"
+    
+    def generate_answer(self, query, context, model="llama-3.3-70b-versatile"):
+        # Prepare the system message
+        system_message = (
+            "You are an Ayurvedic expert with deep knowledge of Ayurvedic practices, remedies, and diagnostics. "
+            "Use the provided Ayurvedic context to answer the question thoughtfully and accurately.\n\n"
+            f"Context:\n{context}\n\n"
+            f"Question:\n{query}\n\n"
+            "Answer as an Ayurvedic expert:"
+        )
+
+        payload = {
+            "model": model,
+            "messages": [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": query}
+            ]
+        }
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+
+        try:
+            response = requests.post(self.endpoint, json=payload, headers=headers)
+            if response.status_code == 200:
+                result = response.json()
+                return result["choices"][0]["message"]["content"]
+            else:
+                return f"Error: {response.status_code} - {response.text}"
+        except Exception as e:
+            return f"Error: {str(e)}"
+
+# Initialize the GroqAPI
+groq_api = GroqAPI(api_key=groq_key)
+
+# Custom QA chain function that integrates FAISS and Groq API
+def custom_qa_chain(query):
+    if not db:
+        return "FAISS index is not loaded."
+    try:
+        # Retrieve relevant context from FAISS index
+        context = db.similarity_search(query, k=3)
+        context_text = "\n".join([doc.page_content for doc in context])
+        
+        # Get the response from Groq API
+        response = groq_api.generate_answer(query, context_text)
+    except Exception as e:
+        response = f"Error during QA chain: {str(e)}"
+    
+    return response
+
+# Streamlit UI
+st.title("Ayurveda Chatbot")
+
+st.subheader("Ask your Ayurvedic Question")
+query = st.text_input("Enter your query:")
+if query:
+    with st.spinner("Retrieving answer..."):
+        st.write(f"Processing query: {query}")
+        
+        # Get the response from custom QA chain
+        response = custom_qa_chain(query)
+        
+        st.markdown(f"### Answer:\n{response}")
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,13 @@
+streamlit==1.41.1
+langchain==0.3.13
+langchain-community==0.3.13
+sentence-transformers==3.3.1
+faiss-cpu==1.9.0.post1
+PyPDF2==3.0.1
+torch==2.5.1
+transformers==4.47.1
+pandas==2.2.3
+numpy==1.26.4
+pypdf==5.1.0
+groq==0.15.0
+python-dotenv