first commit

2025-02-28 01:25:10 +05:30 · 2025-02-28 01:25:10 +05:30 · acda3bf334
commit acda3bf334
7 changed files with 289 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,31 @@
 # Python bytecode files
 *.pyc
 *.pyo
 __pycache__/
 # Virtual environment directories
 env/
 venv/
 ENV/
 .venv/
 # Configuration files
 *.env
 *.env.*
 # IDE or editor settings (e.g., VSCode, PyCharm)
 .vscode/
 .idea/
 # Python packaging
 *.egg
 *.egg-info/
 dist/
 build/
 *.tar.gz
 faiss_index/
 .env
--- a/Book1.pdf
+++ b/Book1.pdf
--- a/Book2.pdf
+++ b/Book2.pdf
--- a/README.md
+++ b/README.md
@ -0,0 +1,66 @@
 # Ayurveda Chatbot using LLaMA and RAG
 This project is an interactive Ayurveda chatbot that uses a **Retrieval-Augmented Generation (RAG)** pipeline powered by the **LLaMA language model via Ollama**. The chatbot provides Ayurvedic knowledge and answers user queries based on pre-trained PDF content.
 ---
 ## Features
 - **PDF Knowledge Base**: Pretrained on Ayurvedic texts for domain-specific answers.
 - **RAG Pipeline**: Combines FAISS vector retrieval and LLaMA for context-aware responses.
 - **Streamlit Interface**: Easy-to-use frontend for interacting with the chatbot.
 ---
 ## Requirements
 - Python 3.8+
 - GPU support (optional but recommended for faster LLM inference)
 - LLaMA model via [Ollama](https://ollama.ai)
 ---
 ## Installation
 ### 1. Clone the Repository
 ```bash
 git clone https://github.com/your-username/ayurveda-chatbot.git
 cd ayurveda-chatbot
 ```
 ### 2. Create and Activate a Virtual Environment
 On Linux/macOS:
 ```bash
 python3 -m venv env
 source env/bin/activate
 ```
 On Windows:
 ```
 python -m venv env
 env\Scripts\activate
 ```
 3. Install Dependencies
 ```bash
 pip install -r requirements.txt
 ```
 4. Set .env file
 Usage
 1. Preprocess PDF and Create FAISS Index
 Ensure the PDF file (e.g., ayurveda_text.pdf) is placed in the project directory.
 Run the backend script to preprocess the data and create a FAISS index:
 ```bash
 python3 backend.py Book1.pdf Book2.pdf --index-path faiss_index
 ```
 2. Start the Chatbot
 Launch the Streamlit interface:
 ```bash
 streamlit run frontend.py
 ```
 Access the chatbot in your browser at http://localhost:8501.
--- a/backend.py
+++ b/backend.py
@ -0,0 +1,66 @@
 import os
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from sentence_transformers import SentenceTransformer
 from langchain.embeddings.base import Embeddings
 from langchain.docstore.in_memory import InMemoryDocstore
 import faiss
 # Define a custom embedding wrapper for LangChain
 class SentenceTransformerEmbeddings(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
    def embed_documents(self, texts):
        return self.model.encode(texts, show_progress_bar=True)
    def embed_query(self, text):
        return self.model.encode([text], show_progress_bar=False)[0]
 # Function to create an empty FAISS index
 def create_empty_faiss_index(embedding_model):
    embedding_dimension = embedding_model.model.get_sentence_embedding_dimension()
    index = faiss.IndexFlatL2(embedding_dimension)  # Initialize FAISS index
    docstore = InMemoryDocstore({})
    index_to_docstore_id = {}
    return FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, embedding_function=embedding_model)
 # Function to update the FAISS index with new books
 def update_faiss_index(book_paths, faiss_index_path="faiss_index"):
    # Load or initialize FAISS index
    embedding_model = SentenceTransformerEmbeddings()
    if os.path.exists(faiss_index_path):
        print("Loading existing FAISS index...")
        db = FAISS.load_local(faiss_index_path, embedding_model)
    else:
        print("Creating a new FAISS index...")
        db = create_empty_faiss_index(embedding_model)
    # Process each book
    for book_path in book_paths:
        print(f"Processing book: {book_path}")
        loader = PyPDFLoader(book_path)
        documents = loader.load()
        # Split text into chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        chunks = text_splitter.split_documents(documents)
        texts = [chunk.page_content for chunk in chunks]
        # Add embeddings to FAISS index
        db.add_texts(texts)
    # Save the updated FAISS index
    db.save_local(faiss_index_path)
    print(f"FAISS index updated and saved at: {faiss_index_path}")
 # Command-line interface
 if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Update FAISS index with new books")
    parser.add_argument("books", nargs="+", help="Path(s) to the PDF book(s)")
    parser.add_argument("--index-path", default="faiss_index", help="Path to FAISS index directory")
    args = parser.parse_args()
    update_faiss_index(args.books, args.index_path)
--- a/frontend.py
+++ b/frontend.py
@ -0,0 +1,113 @@
 import os
 import streamlit as st
 from dotenv import load_dotenv
 from langchain_community.vectorstores import FAISS
 from sentence_transformers import SentenceTransformer
 from langchain.embeddings.base import Embeddings
 import requests  # To handle HTTP requests for Groq API
 # Load environment variables from .env file
 load_dotenv()
 # Set page configuration
 st.set_page_config(page_title="Ayurveda Chatbot", layout="wide")
 # Check for the GROQ_API_KEY environment variable
 groq_key = os.getenv("GROQ_API_KEY")
 if not groq_key:
    st.error("The 'GROQ_API_KEY' environment variable is not set. Please set it in the .env file or the environment.")
 else:
    st.write(f"GROQ_KEY loaded successfully")
 # Define a custom embedding wrapper for LangChain
 class SentenceTransformerEmbeddings(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
    def embed_documents(self, texts):
        return self.model.encode(texts, show_progress_bar=True)
    def embed_query(self, text):
        return self.model.encode([text], show_progress_bar=False)[0]
 # Path to FAISS index
 faiss_index_path = "faiss_index"
 # Load FAISS Index with dangerous deserialization enabled
 embedding_model = SentenceTransformerEmbeddings()
 try:
    db = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
 except Exception as e:
    st.error(f"Failed to load FAISS index: {str(e)}")
    db = None
 # Define the class to handle API calls to Groq
 class GroqAPI:
    def __init__(self, api_key):
        self.api_key = api_key
        self.endpoint = "https://api.groq.com/openai/v1/chat/completions"
    def generate_answer(self, query, context, model="llama-3.3-70b-versatile"):
        # Prepare the system message
        system_message = (
            "You are an Ayurvedic expert with deep knowledge of Ayurvedic practices, remedies, and diagnostics. "
            "Use the provided Ayurvedic context to answer the question thoughtfully and accurately.\n\n"
            f"Context:\n{context}\n\n"
            f"Question:\n{query}\n\n"
            "Answer as an Ayurvedic expert:"
        )
        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": query}
            ]
        }
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        try:
            response = requests.post(self.endpoint, json=payload, headers=headers)
            if response.status_code == 200:
                result = response.json()
                return result["choices"][0]["message"]["content"]
            else:
                return f"Error: {response.status_code} - {response.text}"
        except Exception as e:
            return f"Error: {str(e)}"
 # Initialize the GroqAPI
 groq_api = GroqAPI(api_key=groq_key)
 # Custom QA chain function that integrates FAISS and Groq API
 def custom_qa_chain(query):
    if not db:
        return "FAISS index is not loaded."
    try:
        # Retrieve relevant context from FAISS index
        context = db.similarity_search(query, k=3)
        context_text = "\n".join([doc.page_content for doc in context])
        # Get the response from Groq API
        response = groq_api.generate_answer(query, context_text)
    except Exception as e:
        response = f"Error during QA chain: {str(e)}"
    return response
 # Streamlit UI
 st.title("Ayurveda Chatbot")
 st.subheader("Ask your Ayurvedic Question")
 query = st.text_input("Enter your query:")
 if query:
    with st.spinner("Retrieving answer..."):
        st.write(f"Processing query: {query}")
        # Get the response from custom QA chain
        response = custom_qa_chain(query)
        st.markdown(f"### Answer:\n{response}")
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,13 @@
 streamlit==1.41.1
 langchain==0.3.13
 langchain-community==0.3.13
 sentence-transformers==3.3.1
 faiss-cpu==1.9.0.post1
 PyPDF2==3.0.1
 torch==2.5.1
 transformers==4.47.1
 pandas==2.2.3
 numpy==1.26.4
 pypdf==5.1.0
 groq==0.15.0
 python-dotenv