105 lines
4.3 KiB
Python
105 lines
4.3 KiB
Python
import streamlit as st
|
|
import faiss
|
|
import numpy as np
|
|
import os
|
|
import pandas as pd
|
|
from dotenv import load_dotenv
|
|
from sentence_transformers import SentenceTransformer
|
|
from langchain_community.vectorstores import FAISS
|
|
from langchain_huggingface import HuggingFaceEmbeddings
|
|
from groq import Groq
|
|
|
|
|
|
|
|
class JobRecommender:
|
|
def __init__(self, data_path: str, faiss_index_path: str, embeddings_path: str):
|
|
"""Initialize the job recommender system."""
|
|
load_dotenv()
|
|
self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
|
self.embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
self.data_path = data_path
|
|
self.faiss_index_path = faiss_index_path
|
|
self.embeddings_path = embeddings_path
|
|
self.df = self._load_data()
|
|
self.faiss_index, self.job_metadata = self._load_or_build_faiss_index()
|
|
|
|
def _load_data(self):
|
|
"""Load job data from CSV."""
|
|
df = pd.read_csv(self.data_path).head(5000)
|
|
return df
|
|
|
|
def _load_or_build_faiss_index(self):
|
|
"""Load precomputed FAISS index or build it if it doesn't exist."""
|
|
if os.path.exists(self.faiss_index_path) and os.path.exists(self.embeddings_path):
|
|
print("Loading precomputed FAISS index and embeddings.")
|
|
faiss_index = faiss.read_index(self.faiss_index_path)
|
|
job_embeddings = np.load(self.embeddings_path)
|
|
job_metadata = {i: self.df.iloc[i].to_dict() for i in range(len(self.df))}
|
|
else:
|
|
print("Building FAISS index and embeddings.")
|
|
job_texts = self.df.apply(lambda row: f"{row['Job Title']} - {row['Job Description']}", axis=1).tolist()
|
|
job_embeddings = self.embedding_model.embed_documents(job_texts)
|
|
dimension = len(job_embeddings[0])
|
|
faiss_index = faiss.IndexFlatL2(dimension)
|
|
faiss_index.add(np.array(job_embeddings))
|
|
# Save the FAISS index and embeddings
|
|
faiss.write_index(faiss_index, self.faiss_index_path)
|
|
np.save(self.embeddings_path, np.array(job_embeddings))
|
|
job_metadata = {i: self.df.iloc[i].to_dict() for i in range(len(self.df))}
|
|
return faiss_index, job_metadata
|
|
|
|
def find_similar_jobs(self, query: str, top_k=3):
|
|
"""Retrieve similar jobs using FAISS."""
|
|
query_embedding = np.array([self.embedding_model.embed_query(query)])
|
|
distances, indices = self.faiss_index.search(query_embedding, top_k)
|
|
return [self.job_metadata[idx] for idx in indices[0]]
|
|
|
|
def generate_response(self, user_query: str):
|
|
"""Generate AI-powered job recommendations using Groq API."""
|
|
jobs = self.find_similar_jobs(user_query)
|
|
if not jobs:
|
|
return "No matching jobs found."
|
|
|
|
job_details = "\n".join([f"{job['Job Title']}: {job['Job Description']}" for job in jobs])
|
|
prompt = f"""
|
|
You are a job recommendation assistant. A user is looking for a job related to: {user_query}.
|
|
Here are some recommended jobs:
|
|
{job_details}
|
|
Provide a detailed recommendation with insights on why these jobs are relevant.
|
|
"""
|
|
|
|
completion = self.client.chat.completions.create(
|
|
model="mixtral-8x7b-32768",
|
|
messages=[{"role": "user", "content": prompt}],
|
|
temperature=1,
|
|
max_tokens=1024,
|
|
top_p=1,
|
|
stream=True,
|
|
)
|
|
|
|
response_text = "".join(chunk.choices[0].delta.content or "" for chunk in completion)
|
|
return response_text
|
|
|
|
# Streamlit UI
|
|
def main():
|
|
st.title("💼 Job Recommendation Chatbot")
|
|
st.write("Enter your job preferences below, and the AI will suggest relevant jobs!")
|
|
|
|
recommender = JobRecommender(
|
|
data_path="job_desc.csv",
|
|
faiss_index_path="faiss_index.index",
|
|
embeddings_path="job_embeddings.npy"
|
|
)
|
|
user_input = st.text_input("Enter job title, skills, or interests:", "")
|
|
|
|
if st.button("Find Jobs"):
|
|
if user_input:
|
|
response = recommender.generate_response(user_input)
|
|
st.subheader("📌 Job Recommendations")
|
|
st.write(response)
|
|
else:
|
|
st.warning("Please enter a job-related query.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|