first commit

This commit is contained in:
Harsimran Singh 2025-02-28 01:25:10 +05:30
commit acda3bf334
7 changed files with 289 additions and 0 deletions

31
.gitignore vendored Normal file
View file

@ -0,0 +1,31 @@
# Python bytecode files
*.pyc
*.pyo
__pycache__/
# Virtual environment directories
env/
venv/
ENV/
.venv/
# Configuration files
*.env
*.env.*
# IDE or editor settings (e.g., VSCode, PyCharm)
.vscode/
.idea/
# Python packaging
*.egg
*.egg-info/
dist/
build/
*.tar.gz
faiss_index/
.env

BIN
Book1.pdf Normal file

Binary file not shown.

BIN
Book2.pdf Normal file

Binary file not shown.

66
README.md Normal file
View file

@ -0,0 +1,66 @@
# Ayurveda Chatbot using LLaMA and RAG
This project is an interactive Ayurveda chatbot that uses a **Retrieval-Augmented Generation (RAG)** pipeline powered by the **LLaMA language model via Ollama**. The chatbot provides Ayurvedic knowledge and answers user queries based on pre-trained PDF content.
---
## Features
- **PDF Knowledge Base**: Pretrained on Ayurvedic texts for domain-specific answers.
- **RAG Pipeline**: Combines FAISS vector retrieval and LLaMA for context-aware responses.
- **Streamlit Interface**: Easy-to-use frontend for interacting with the chatbot.
---
## Requirements
- Python 3.8+
- GPU support (optional but recommended for faster LLM inference)
- LLaMA model via [Ollama](https://ollama.ai)
---
## Installation
### 1. Clone the Repository
```bash
git clone https://github.com/your-username/ayurveda-chatbot.git
cd ayurveda-chatbot
```
### 2. Create and Activate a Virtual Environment
On Linux/macOS:
```bash
python3 -m venv env
source env/bin/activate
```
On Windows:
```
python -m venv env
env\Scripts\activate
```
3. Install Dependencies
```bash
pip install -r requirements.txt
```
4. Set .env file
Usage
1. Preprocess PDF and Create FAISS Index
Ensure the PDF file (e.g., ayurveda_text.pdf) is placed in the project directory.
Run the backend script to preprocess the data and create a FAISS index:
```bash
python3 backend.py Book1.pdf Book2.pdf --index-path faiss_index
```
2. Start the Chatbot
Launch the Streamlit interface:
```bash
streamlit run frontend.py
```
Access the chatbot in your browser at http://localhost:8501.

66
backend.py Normal file
View file

@ -0,0 +1,66 @@
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from langchain.embeddings.base import Embeddings
from langchain.docstore.in_memory import InMemoryDocstore
import faiss
# Define a custom embedding wrapper for LangChain
class SentenceTransformerEmbeddings(Embeddings):
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def embed_documents(self, texts):
return self.model.encode(texts, show_progress_bar=True)
def embed_query(self, text):
return self.model.encode([text], show_progress_bar=False)[0]
# Function to create an empty FAISS index
def create_empty_faiss_index(embedding_model):
embedding_dimension = embedding_model.model.get_sentence_embedding_dimension()
index = faiss.IndexFlatL2(embedding_dimension) # Initialize FAISS index
docstore = InMemoryDocstore({})
index_to_docstore_id = {}
return FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, embedding_function=embedding_model)
# Function to update the FAISS index with new books
def update_faiss_index(book_paths, faiss_index_path="faiss_index"):
# Load or initialize FAISS index
embedding_model = SentenceTransformerEmbeddings()
if os.path.exists(faiss_index_path):
print("Loading existing FAISS index...")
db = FAISS.load_local(faiss_index_path, embedding_model)
else:
print("Creating a new FAISS index...")
db = create_empty_faiss_index(embedding_model)
# Process each book
for book_path in book_paths:
print(f"Processing book: {book_path}")
loader = PyPDFLoader(book_path)
documents = loader.load()
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(documents)
texts = [chunk.page_content for chunk in chunks]
# Add embeddings to FAISS index
db.add_texts(texts)
# Save the updated FAISS index
db.save_local(faiss_index_path)
print(f"FAISS index updated and saved at: {faiss_index_path}")
# Command-line interface
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Update FAISS index with new books")
parser.add_argument("books", nargs="+", help="Path(s) to the PDF book(s)")
parser.add_argument("--index-path", default="faiss_index", help="Path to FAISS index directory")
args = parser.parse_args()
update_faiss_index(args.books, args.index_path)

113
frontend.py Normal file
View file

@ -0,0 +1,113 @@
import os
import streamlit as st
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from langchain.embeddings.base import Embeddings
import requests # To handle HTTP requests for Groq API
# Load environment variables from .env file
load_dotenv()
# Set page configuration
st.set_page_config(page_title="Ayurveda Chatbot", layout="wide")
# Check for the GROQ_API_KEY environment variable
groq_key = os.getenv("GROQ_API_KEY")
if not groq_key:
st.error("The 'GROQ_API_KEY' environment variable is not set. Please set it in the .env file or the environment.")
else:
st.write(f"GROQ_KEY loaded successfully")
# Define a custom embedding wrapper for LangChain
class SentenceTransformerEmbeddings(Embeddings):
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def embed_documents(self, texts):
return self.model.encode(texts, show_progress_bar=True)
def embed_query(self, text):
return self.model.encode([text], show_progress_bar=False)[0]
# Path to FAISS index
faiss_index_path = "faiss_index"
# Load FAISS Index with dangerous deserialization enabled
embedding_model = SentenceTransformerEmbeddings()
try:
db = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
except Exception as e:
st.error(f"Failed to load FAISS index: {str(e)}")
db = None
# Define the class to handle API calls to Groq
class GroqAPI:
def __init__(self, api_key):
self.api_key = api_key
self.endpoint = "https://api.groq.com/openai/v1/chat/completions"
def generate_answer(self, query, context, model="llama-3.3-70b-versatile"):
# Prepare the system message
system_message = (
"You are an Ayurvedic expert with deep knowledge of Ayurvedic practices, remedies, and diagnostics. "
"Use the provided Ayurvedic context to answer the question thoughtfully and accurately.\n\n"
f"Context:\n{context}\n\n"
f"Question:\n{query}\n\n"
"Answer as an Ayurvedic expert:"
)
payload = {
"model": model,
"messages": [
{"role": "system", "content": system_message},
{"role": "user", "content": query}
]
}
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
try:
response = requests.post(self.endpoint, json=payload, headers=headers)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
else:
return f"Error: {response.status_code} - {response.text}"
except Exception as e:
return f"Error: {str(e)}"
# Initialize the GroqAPI
groq_api = GroqAPI(api_key=groq_key)
# Custom QA chain function that integrates FAISS and Groq API
def custom_qa_chain(query):
if not db:
return "FAISS index is not loaded."
try:
# Retrieve relevant context from FAISS index
context = db.similarity_search(query, k=3)
context_text = "\n".join([doc.page_content for doc in context])
# Get the response from Groq API
response = groq_api.generate_answer(query, context_text)
except Exception as e:
response = f"Error during QA chain: {str(e)}"
return response
# Streamlit UI
st.title("Ayurveda Chatbot")
st.subheader("Ask your Ayurvedic Question")
query = st.text_input("Enter your query:")
if query:
with st.spinner("Retrieving answer..."):
st.write(f"Processing query: {query}")
# Get the response from custom QA chain
response = custom_qa_chain(query)
st.markdown(f"### Answer:\n{response}")

13
requirements.txt Normal file
View file

@ -0,0 +1,13 @@
streamlit==1.41.1
langchain==0.3.13
langchain-community==0.3.13
sentence-transformers==3.3.1
faiss-cpu==1.9.0.post1
PyPDF2==3.0.1
torch==2.5.1
transformers==4.47.1
pandas==2.2.3
numpy==1.26.4
pypdf==5.1.0
groq==0.15.0
python-dotenv