first commit
This commit is contained in:
commit
acda3bf334
31
.gitignore
vendored
Normal file
31
.gitignore
vendored
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# Python bytecode files
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
__pycache__/
|
||||||
|
|
||||||
|
# Virtual environment directories
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
.venv/
|
||||||
|
|
||||||
|
|
||||||
|
# Configuration files
|
||||||
|
*.env
|
||||||
|
*.env.*
|
||||||
|
|
||||||
|
# IDE or editor settings (e.g., VSCode, PyCharm)
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Python packaging
|
||||||
|
*.egg
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
*.tar.gz
|
||||||
|
|
||||||
|
|
||||||
|
faiss_index/
|
||||||
|
|
||||||
|
.env
|
66
README.md
Normal file
66
README.md
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
# Ayurveda Chatbot using LLaMA and RAG
|
||||||
|
|
||||||
|
This project is an interactive Ayurveda chatbot that uses a **Retrieval-Augmented Generation (RAG)** pipeline powered by the **LLaMA language model via Ollama**. The chatbot provides Ayurvedic knowledge and answers user queries based on pre-trained PDF content.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **PDF Knowledge Base**: Pretrained on Ayurvedic texts for domain-specific answers.
|
||||||
|
- **RAG Pipeline**: Combines FAISS vector retrieval and LLaMA for context-aware responses.
|
||||||
|
- **Streamlit Interface**: Easy-to-use frontend for interacting with the chatbot.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Python 3.8+
|
||||||
|
- GPU support (optional but recommended for faster LLM inference)
|
||||||
|
- LLaMA model via [Ollama](https://ollama.ai)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
### 1. Clone the Repository
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/your-username/ayurveda-chatbot.git
|
||||||
|
cd ayurveda-chatbot
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### 2. Create and Activate a Virtual Environment
|
||||||
|
On Linux/macOS:
|
||||||
|
```bash
|
||||||
|
python3 -m venv env
|
||||||
|
source env/bin/activate
|
||||||
|
```
|
||||||
|
On Windows:
|
||||||
|
```
|
||||||
|
python -m venv env
|
||||||
|
env\Scripts\activate
|
||||||
|
```
|
||||||
|
3. Install Dependencies
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Set .env file
|
||||||
|
|
||||||
|
Usage
|
||||||
|
1. Preprocess PDF and Create FAISS Index
|
||||||
|
Ensure the PDF file (e.g., ayurveda_text.pdf) is placed in the project directory.
|
||||||
|
|
||||||
|
Run the backend script to preprocess the data and create a FAISS index:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 backend.py Book1.pdf Book2.pdf --index-path faiss_index
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the Chatbot
|
||||||
|
Launch the Streamlit interface:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
streamlit run frontend.py
|
||||||
|
```
|
||||||
|
Access the chatbot in your browser at http://localhost:8501.
|
66
backend.py
Normal file
66
backend.py
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
import os
|
||||||
|
from langchain_community.document_loaders import PyPDFLoader
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
from langchain_community.vectorstores import FAISS
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
from langchain.embeddings.base import Embeddings
|
||||||
|
from langchain.docstore.in_memory import InMemoryDocstore
|
||||||
|
import faiss
|
||||||
|
|
||||||
|
# Define a custom embedding wrapper for LangChain
|
||||||
|
class SentenceTransformerEmbeddings(Embeddings):
|
||||||
|
def __init__(self, model_name="all-MiniLM-L6-v2"):
|
||||||
|
self.model = SentenceTransformer(model_name)
|
||||||
|
|
||||||
|
def embed_documents(self, texts):
|
||||||
|
return self.model.encode(texts, show_progress_bar=True)
|
||||||
|
|
||||||
|
def embed_query(self, text):
|
||||||
|
return self.model.encode([text], show_progress_bar=False)[0]
|
||||||
|
|
||||||
|
# Function to create an empty FAISS index
|
||||||
|
def create_empty_faiss_index(embedding_model):
|
||||||
|
embedding_dimension = embedding_model.model.get_sentence_embedding_dimension()
|
||||||
|
index = faiss.IndexFlatL2(embedding_dimension) # Initialize FAISS index
|
||||||
|
docstore = InMemoryDocstore({})
|
||||||
|
index_to_docstore_id = {}
|
||||||
|
return FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, embedding_function=embedding_model)
|
||||||
|
|
||||||
|
# Function to update the FAISS index with new books
|
||||||
|
def update_faiss_index(book_paths, faiss_index_path="faiss_index"):
|
||||||
|
# Load or initialize FAISS index
|
||||||
|
embedding_model = SentenceTransformerEmbeddings()
|
||||||
|
if os.path.exists(faiss_index_path):
|
||||||
|
print("Loading existing FAISS index...")
|
||||||
|
db = FAISS.load_local(faiss_index_path, embedding_model)
|
||||||
|
else:
|
||||||
|
print("Creating a new FAISS index...")
|
||||||
|
db = create_empty_faiss_index(embedding_model)
|
||||||
|
|
||||||
|
# Process each book
|
||||||
|
for book_path in book_paths:
|
||||||
|
print(f"Processing book: {book_path}")
|
||||||
|
loader = PyPDFLoader(book_path)
|
||||||
|
documents = loader.load()
|
||||||
|
|
||||||
|
# Split text into chunks
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
||||||
|
chunks = text_splitter.split_documents(documents)
|
||||||
|
texts = [chunk.page_content for chunk in chunks]
|
||||||
|
|
||||||
|
# Add embeddings to FAISS index
|
||||||
|
db.add_texts(texts)
|
||||||
|
|
||||||
|
# Save the updated FAISS index
|
||||||
|
db.save_local(faiss_index_path)
|
||||||
|
print(f"FAISS index updated and saved at: {faiss_index_path}")
|
||||||
|
|
||||||
|
# Command-line interface
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description="Update FAISS index with new books")
|
||||||
|
parser.add_argument("books", nargs="+", help="Path(s) to the PDF book(s)")
|
||||||
|
parser.add_argument("--index-path", default="faiss_index", help="Path to FAISS index directory")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
update_faiss_index(args.books, args.index_path)
|
113
frontend.py
Normal file
113
frontend.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
import os
|
||||||
|
import streamlit as st
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from langchain_community.vectorstores import FAISS
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
from langchain.embeddings.base import Embeddings
|
||||||
|
import requests # To handle HTTP requests for Groq API
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Set page configuration
|
||||||
|
st.set_page_config(page_title="Ayurveda Chatbot", layout="wide")
|
||||||
|
|
||||||
|
# Check for the GROQ_API_KEY environment variable
|
||||||
|
groq_key = os.getenv("GROQ_API_KEY")
|
||||||
|
if not groq_key:
|
||||||
|
st.error("The 'GROQ_API_KEY' environment variable is not set. Please set it in the .env file or the environment.")
|
||||||
|
else:
|
||||||
|
st.write(f"GROQ_KEY loaded successfully")
|
||||||
|
|
||||||
|
# Define a custom embedding wrapper for LangChain
|
||||||
|
class SentenceTransformerEmbeddings(Embeddings):
|
||||||
|
def __init__(self, model_name="all-MiniLM-L6-v2"):
|
||||||
|
self.model = SentenceTransformer(model_name)
|
||||||
|
|
||||||
|
def embed_documents(self, texts):
|
||||||
|
return self.model.encode(texts, show_progress_bar=True)
|
||||||
|
|
||||||
|
def embed_query(self, text):
|
||||||
|
return self.model.encode([text], show_progress_bar=False)[0]
|
||||||
|
|
||||||
|
# Path to FAISS index
|
||||||
|
faiss_index_path = "faiss_index"
|
||||||
|
|
||||||
|
# Load FAISS Index with dangerous deserialization enabled
|
||||||
|
embedding_model = SentenceTransformerEmbeddings()
|
||||||
|
try:
|
||||||
|
db = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Failed to load FAISS index: {str(e)}")
|
||||||
|
db = None
|
||||||
|
|
||||||
|
# Define the class to handle API calls to Groq
|
||||||
|
class GroqAPI:
|
||||||
|
def __init__(self, api_key):
|
||||||
|
self.api_key = api_key
|
||||||
|
self.endpoint = "https://api.groq.com/openai/v1/chat/completions"
|
||||||
|
|
||||||
|
def generate_answer(self, query, context, model="llama-3.3-70b-versatile"):
|
||||||
|
# Prepare the system message
|
||||||
|
system_message = (
|
||||||
|
"You are an Ayurvedic expert with deep knowledge of Ayurvedic practices, remedies, and diagnostics. "
|
||||||
|
"Use the provided Ayurvedic context to answer the question thoughtfully and accurately.\n\n"
|
||||||
|
f"Context:\n{context}\n\n"
|
||||||
|
f"Question:\n{query}\n\n"
|
||||||
|
"Answer as an Ayurvedic expert:"
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": system_message},
|
||||||
|
{"role": "user", "content": query}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(self.endpoint, json=payload, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
return result["choices"][0]["message"]["content"]
|
||||||
|
else:
|
||||||
|
return f"Error: {response.status_code} - {response.text}"
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error: {str(e)}"
|
||||||
|
|
||||||
|
# Initialize the GroqAPI
|
||||||
|
groq_api = GroqAPI(api_key=groq_key)
|
||||||
|
|
||||||
|
# Custom QA chain function that integrates FAISS and Groq API
|
||||||
|
def custom_qa_chain(query):
|
||||||
|
if not db:
|
||||||
|
return "FAISS index is not loaded."
|
||||||
|
try:
|
||||||
|
# Retrieve relevant context from FAISS index
|
||||||
|
context = db.similarity_search(query, k=3)
|
||||||
|
context_text = "\n".join([doc.page_content for doc in context])
|
||||||
|
|
||||||
|
# Get the response from Groq API
|
||||||
|
response = groq_api.generate_answer(query, context_text)
|
||||||
|
except Exception as e:
|
||||||
|
response = f"Error during QA chain: {str(e)}"
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
# Streamlit UI
|
||||||
|
st.title("Ayurveda Chatbot")
|
||||||
|
|
||||||
|
st.subheader("Ask your Ayurvedic Question")
|
||||||
|
query = st.text_input("Enter your query:")
|
||||||
|
if query:
|
||||||
|
with st.spinner("Retrieving answer..."):
|
||||||
|
st.write(f"Processing query: {query}")
|
||||||
|
|
||||||
|
# Get the response from custom QA chain
|
||||||
|
response = custom_qa_chain(query)
|
||||||
|
|
||||||
|
st.markdown(f"### Answer:\n{response}")
|
13
requirements.txt
Normal file
13
requirements.txt
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
streamlit==1.41.1
|
||||||
|
langchain==0.3.13
|
||||||
|
langchain-community==0.3.13
|
||||||
|
sentence-transformers==3.3.1
|
||||||
|
faiss-cpu==1.9.0.post1
|
||||||
|
PyPDF2==3.0.1
|
||||||
|
torch==2.5.1
|
||||||
|
transformers==4.47.1
|
||||||
|
pandas==2.2.3
|
||||||
|
numpy==1.26.4
|
||||||
|
pypdf==5.1.0
|
||||||
|
groq==0.15.0
|
||||||
|
python-dotenv
|
Loading…
Reference in a new issue