first commit
This commit is contained in:
commit
acda3bf334
31
.gitignore
vendored
Normal file
31
.gitignore
vendored
Normal file
|
@ -0,0 +1,31 @@
|
|||
# Python bytecode files
|
||||
*.pyc
|
||||
*.pyo
|
||||
__pycache__/
|
||||
|
||||
# Virtual environment directories
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
.venv/
|
||||
|
||||
|
||||
# Configuration files
|
||||
*.env
|
||||
*.env.*
|
||||
|
||||
# IDE or editor settings (e.g., VSCode, PyCharm)
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Python packaging
|
||||
*.egg
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
*.tar.gz
|
||||
|
||||
|
||||
faiss_index/
|
||||
|
||||
.env
|
66
README.md
Normal file
66
README.md
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Ayurveda Chatbot using LLaMA and RAG
|
||||
|
||||
This project is an interactive Ayurveda chatbot that uses a **Retrieval-Augmented Generation (RAG)** pipeline powered by the **LLaMA language model via Ollama**. The chatbot provides Ayurvedic knowledge and answers user queries based on pre-trained PDF content.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **PDF Knowledge Base**: Pretrained on Ayurvedic texts for domain-specific answers.
|
||||
- **RAG Pipeline**: Combines FAISS vector retrieval and LLaMA for context-aware responses.
|
||||
- **Streamlit Interface**: Easy-to-use frontend for interacting with the chatbot.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.8+
|
||||
- GPU support (optional but recommended for faster LLM inference)
|
||||
- LLaMA model via [Ollama](https://ollama.ai)
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Clone the Repository
|
||||
```bash
|
||||
git clone https://github.com/your-username/ayurveda-chatbot.git
|
||||
cd ayurveda-chatbot
|
||||
```
|
||||
|
||||
|
||||
### 2. Create and Activate a Virtual Environment
|
||||
On Linux/macOS:
|
||||
```bash
|
||||
python3 -m venv env
|
||||
source env/bin/activate
|
||||
```
|
||||
On Windows:
|
||||
```
|
||||
python -m venv env
|
||||
env\Scripts\activate
|
||||
```
|
||||
3. Install Dependencies
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. Set .env file
|
||||
|
||||
Usage
|
||||
1. Preprocess PDF and Create FAISS Index
|
||||
Ensure the PDF file (e.g., ayurveda_text.pdf) is placed in the project directory.
|
||||
|
||||
Run the backend script to preprocess the data and create a FAISS index:
|
||||
|
||||
```bash
|
||||
python3 backend.py Book1.pdf Book2.pdf --index-path faiss_index
|
||||
```
|
||||
|
||||
2. Start the Chatbot
|
||||
Launch the Streamlit interface:
|
||||
|
||||
```bash
|
||||
streamlit run frontend.py
|
||||
```
|
||||
Access the chatbot in your browser at http://localhost:8501.
|
66
backend.py
Normal file
66
backend.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
import os
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
import faiss
|
||||
|
||||
# Define a custom embedding wrapper for LangChain
|
||||
class SentenceTransformerEmbeddings(Embeddings):
|
||||
def __init__(self, model_name="all-MiniLM-L6-v2"):
|
||||
self.model = SentenceTransformer(model_name)
|
||||
|
||||
def embed_documents(self, texts):
|
||||
return self.model.encode(texts, show_progress_bar=True)
|
||||
|
||||
def embed_query(self, text):
|
||||
return self.model.encode([text], show_progress_bar=False)[0]
|
||||
|
||||
# Function to create an empty FAISS index
|
||||
def create_empty_faiss_index(embedding_model):
|
||||
embedding_dimension = embedding_model.model.get_sentence_embedding_dimension()
|
||||
index = faiss.IndexFlatL2(embedding_dimension) # Initialize FAISS index
|
||||
docstore = InMemoryDocstore({})
|
||||
index_to_docstore_id = {}
|
||||
return FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, embedding_function=embedding_model)
|
||||
|
||||
# Function to update the FAISS index with new books
|
||||
def update_faiss_index(book_paths, faiss_index_path="faiss_index"):
|
||||
# Load or initialize FAISS index
|
||||
embedding_model = SentenceTransformerEmbeddings()
|
||||
if os.path.exists(faiss_index_path):
|
||||
print("Loading existing FAISS index...")
|
||||
db = FAISS.load_local(faiss_index_path, embedding_model)
|
||||
else:
|
||||
print("Creating a new FAISS index...")
|
||||
db = create_empty_faiss_index(embedding_model)
|
||||
|
||||
# Process each book
|
||||
for book_path in book_paths:
|
||||
print(f"Processing book: {book_path}")
|
||||
loader = PyPDFLoader(book_path)
|
||||
documents = loader.load()
|
||||
|
||||
# Split text into chunks
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
||||
chunks = text_splitter.split_documents(documents)
|
||||
texts = [chunk.page_content for chunk in chunks]
|
||||
|
||||
# Add embeddings to FAISS index
|
||||
db.add_texts(texts)
|
||||
|
||||
# Save the updated FAISS index
|
||||
db.save_local(faiss_index_path)
|
||||
print(f"FAISS index updated and saved at: {faiss_index_path}")
|
||||
|
||||
# Command-line interface
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Update FAISS index with new books")
|
||||
parser.add_argument("books", nargs="+", help="Path(s) to the PDF book(s)")
|
||||
parser.add_argument("--index-path", default="faiss_index", help="Path to FAISS index directory")
|
||||
args = parser.parse_args()
|
||||
|
||||
update_faiss_index(args.books, args.index_path)
|
113
frontend.py
Normal file
113
frontend.py
Normal file
|
@ -0,0 +1,113 @@
|
|||
import os
|
||||
import streamlit as st
|
||||
from dotenv import load_dotenv
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from langchain.embeddings.base import Embeddings
|
||||
import requests # To handle HTTP requests for Groq API
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
# Set page configuration
|
||||
st.set_page_config(page_title="Ayurveda Chatbot", layout="wide")
|
||||
|
||||
# Check for the GROQ_API_KEY environment variable
|
||||
groq_key = os.getenv("GROQ_API_KEY")
|
||||
if not groq_key:
|
||||
st.error("The 'GROQ_API_KEY' environment variable is not set. Please set it in the .env file or the environment.")
|
||||
else:
|
||||
st.write(f"GROQ_KEY loaded successfully")
|
||||
|
||||
# Define a custom embedding wrapper for LangChain
|
||||
class SentenceTransformerEmbeddings(Embeddings):
|
||||
def __init__(self, model_name="all-MiniLM-L6-v2"):
|
||||
self.model = SentenceTransformer(model_name)
|
||||
|
||||
def embed_documents(self, texts):
|
||||
return self.model.encode(texts, show_progress_bar=True)
|
||||
|
||||
def embed_query(self, text):
|
||||
return self.model.encode([text], show_progress_bar=False)[0]
|
||||
|
||||
# Path to FAISS index
|
||||
faiss_index_path = "faiss_index"
|
||||
|
||||
# Load FAISS Index with dangerous deserialization enabled
|
||||
embedding_model = SentenceTransformerEmbeddings()
|
||||
try:
|
||||
db = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
|
||||
except Exception as e:
|
||||
st.error(f"Failed to load FAISS index: {str(e)}")
|
||||
db = None
|
||||
|
||||
# Define the class to handle API calls to Groq
|
||||
class GroqAPI:
|
||||
def __init__(self, api_key):
|
||||
self.api_key = api_key
|
||||
self.endpoint = "https://api.groq.com/openai/v1/chat/completions"
|
||||
|
||||
def generate_answer(self, query, context, model="llama-3.3-70b-versatile"):
|
||||
# Prepare the system message
|
||||
system_message = (
|
||||
"You are an Ayurvedic expert with deep knowledge of Ayurvedic practices, remedies, and diagnostics. "
|
||||
"Use the provided Ayurvedic context to answer the question thoughtfully and accurately.\n\n"
|
||||
f"Context:\n{context}\n\n"
|
||||
f"Question:\n{query}\n\n"
|
||||
"Answer as an Ayurvedic expert:"
|
||||
)
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": query}
|
||||
]
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(self.endpoint, json=payload, headers=headers)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
return result["choices"][0]["message"]["content"]
|
||||
else:
|
||||
return f"Error: {response.status_code} - {response.text}"
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
# Initialize the GroqAPI
|
||||
groq_api = GroqAPI(api_key=groq_key)
|
||||
|
||||
# Custom QA chain function that integrates FAISS and Groq API
|
||||
def custom_qa_chain(query):
|
||||
if not db:
|
||||
return "FAISS index is not loaded."
|
||||
try:
|
||||
# Retrieve relevant context from FAISS index
|
||||
context = db.similarity_search(query, k=3)
|
||||
context_text = "\n".join([doc.page_content for doc in context])
|
||||
|
||||
# Get the response from Groq API
|
||||
response = groq_api.generate_answer(query, context_text)
|
||||
except Exception as e:
|
||||
response = f"Error during QA chain: {str(e)}"
|
||||
|
||||
return response
|
||||
|
||||
# Streamlit UI
|
||||
st.title("Ayurveda Chatbot")
|
||||
|
||||
st.subheader("Ask your Ayurvedic Question")
|
||||
query = st.text_input("Enter your query:")
|
||||
if query:
|
||||
with st.spinner("Retrieving answer..."):
|
||||
st.write(f"Processing query: {query}")
|
||||
|
||||
# Get the response from custom QA chain
|
||||
response = custom_qa_chain(query)
|
||||
|
||||
st.markdown(f"### Answer:\n{response}")
|
13
requirements.txt
Normal file
13
requirements.txt
Normal file
|
@ -0,0 +1,13 @@
|
|||
streamlit==1.41.1
|
||||
langchain==0.3.13
|
||||
langchain-community==0.3.13
|
||||
sentence-transformers==3.3.1
|
||||
faiss-cpu==1.9.0.post1
|
||||
PyPDF2==3.0.1
|
||||
torch==2.5.1
|
||||
transformers==4.47.1
|
||||
pandas==2.2.3
|
||||
numpy==1.26.4
|
||||
pypdf==5.1.0
|
||||
groq==0.15.0
|
||||
python-dotenv
|
Loading…
Reference in a new issue