first change
This commit is contained in:
commit
d4bf290086
5001
job_desc.csv
Normal file
5001
job_desc.csv
Normal file
File diff suppressed because it is too large
Load diff
104
main.py
Normal file
104
main.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
import streamlit as st
|
||||
import faiss
|
||||
import numpy as np
|
||||
import os
|
||||
import pandas as pd
|
||||
from dotenv import load_dotenv
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
from groq import Groq
|
||||
|
||||
|
||||
|
||||
class JobRecommender:
|
||||
def __init__(self, data_path: str, faiss_index_path: str, embeddings_path: str):
|
||||
"""Initialize the job recommender system."""
|
||||
load_dotenv()
|
||||
self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
||||
self.embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
||||
self.data_path = data_path
|
||||
self.faiss_index_path = faiss_index_path
|
||||
self.embeddings_path = embeddings_path
|
||||
self.df = self._load_data()
|
||||
self.faiss_index, self.job_metadata = self._load_or_build_faiss_index()
|
||||
|
||||
def _load_data(self):
|
||||
"""Load job data from CSV."""
|
||||
df = pd.read_csv(self.data_path).head(5000)
|
||||
return df
|
||||
|
||||
def _load_or_build_faiss_index(self):
|
||||
"""Load precomputed FAISS index or build it if it doesn't exist."""
|
||||
if os.path.exists(self.faiss_index_path) and os.path.exists(self.embeddings_path):
|
||||
print("Loading precomputed FAISS index and embeddings.")
|
||||
faiss_index = faiss.read_index(self.faiss_index_path)
|
||||
job_embeddings = np.load(self.embeddings_path)
|
||||
job_metadata = {i: self.df.iloc[i].to_dict() for i in range(len(self.df))}
|
||||
else:
|
||||
print("Building FAISS index and embeddings.")
|
||||
job_texts = self.df.apply(lambda row: f"{row['Job Title']} - {row['Job Description']}", axis=1).tolist()
|
||||
job_embeddings = self.embedding_model.embed_documents(job_texts)
|
||||
dimension = len(job_embeddings[0])
|
||||
faiss_index = faiss.IndexFlatL2(dimension)
|
||||
faiss_index.add(np.array(job_embeddings))
|
||||
# Save the FAISS index and embeddings
|
||||
faiss.write_index(faiss_index, self.faiss_index_path)
|
||||
np.save(self.embeddings_path, np.array(job_embeddings))
|
||||
job_metadata = {i: self.df.iloc[i].to_dict() for i in range(len(self.df))}
|
||||
return faiss_index, job_metadata
|
||||
|
||||
def find_similar_jobs(self, query: str, top_k=3):
|
||||
"""Retrieve similar jobs using FAISS."""
|
||||
query_embedding = np.array([self.embedding_model.embed_query(query)])
|
||||
distances, indices = self.faiss_index.search(query_embedding, top_k)
|
||||
return [self.job_metadata[idx] for idx in indices[0]]
|
||||
|
||||
def generate_response(self, user_query: str):
|
||||
"""Generate AI-powered job recommendations using Groq API."""
|
||||
jobs = self.find_similar_jobs(user_query)
|
||||
if not jobs:
|
||||
return "No matching jobs found."
|
||||
|
||||
job_details = "\n".join([f"{job['Job Title']}: {job['Job Description']}" for job in jobs])
|
||||
prompt = f"""
|
||||
You are a job recommendation assistant. A user is looking for a job related to: {user_query}.
|
||||
Here are some recommended jobs:
|
||||
{job_details}
|
||||
Provide a detailed recommendation with insights on why these jobs are relevant.
|
||||
"""
|
||||
|
||||
completion = self.client.chat.completions.create(
|
||||
model="mixtral-8x7b-32768",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=1,
|
||||
max_tokens=1024,
|
||||
top_p=1,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
response_text = "".join(chunk.choices[0].delta.content or "" for chunk in completion)
|
||||
return response_text
|
||||
|
||||
# Streamlit UI
|
||||
def main():
|
||||
st.title("💼 Job Recommendation Chatbot")
|
||||
st.write("Enter your job preferences below, and the AI will suggest relevant jobs!")
|
||||
|
||||
recommender = JobRecommender(
|
||||
data_path="job_desc.csv",
|
||||
faiss_index_path="faiss_index.index",
|
||||
embeddings_path="job_embeddings.npy"
|
||||
)
|
||||
user_input = st.text_input("Enter job title, skills, or interests:", "")
|
||||
|
||||
if st.button("Find Jobs"):
|
||||
if user_input:
|
||||
response = recommender.generate_response(user_input)
|
||||
st.subheader("📌 Job Recommendations")
|
||||
st.write(response)
|
||||
else:
|
||||
st.warning("Please enter a job-related query.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
116
requirements.txt
Normal file
116
requirements.txt
Normal file
|
@ -0,0 +1,116 @@
|
|||
aiohappyeyeballs==2.4.6
|
||||
aiohttp==3.11.13
|
||||
aiosignal==1.3.2
|
||||
altair==5.5.0
|
||||
annotated-types==0.7.0
|
||||
anyio==4.8.0
|
||||
async-timeout==4.0.3
|
||||
attrs==25.1.0
|
||||
blinker==1.9.0
|
||||
cachetools==5.5.2
|
||||
certifi==2025.1.31
|
||||
charset-normalizer==3.4.1
|
||||
click==8.1.8
|
||||
dataclasses-json==0.6.7
|
||||
distro==1.9.0
|
||||
exceptiongroup==1.2.2
|
||||
faiss-cpu==1.10.0
|
||||
filelock==3.17.0
|
||||
frozenlist==1.5.0
|
||||
fsspec==2025.2.0
|
||||
gitdb==4.0.12
|
||||
GitPython==3.1.44
|
||||
greenlet==3.1.1
|
||||
groq==0.18.0
|
||||
h11==0.14.0
|
||||
httpcore==1.0.7
|
||||
httpx==0.28.1
|
||||
httpx-sse==0.4.0
|
||||
huggingface-hub==0.29.1
|
||||
idna==3.10
|
||||
Jinja2==3.1.5
|
||||
joblib==1.4.2
|
||||
jsonpatch==1.33
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2024.10.1
|
||||
langchain==0.3.19
|
||||
langchain-community==0.3.18
|
||||
langchain-core==0.3.40
|
||||
langchain-huggingface==0.1.2
|
||||
langchain-text-splitters==0.3.6
|
||||
langsmith==0.3.11
|
||||
markdown-it-py==3.0.0
|
||||
MarkupSafe==3.0.2
|
||||
marshmallow==3.26.1
|
||||
mdurl==0.1.2
|
||||
mpmath==1.3.0
|
||||
multidict==6.1.0
|
||||
mypy-extensions==1.0.0
|
||||
narwhals==1.28.0
|
||||
nest-asyncio==1.6.0
|
||||
networkx==3.4.2
|
||||
numpy==1.26.4
|
||||
nvidia-cublas-cu12==12.4.5.8
|
||||
nvidia-cuda-cupti-cu12==12.4.127
|
||||
nvidia-cuda-nvrtc-cu12==12.4.127
|
||||
nvidia-cuda-runtime-cu12==12.4.127
|
||||
nvidia-cudnn-cu12==9.1.0.70
|
||||
nvidia-cufft-cu12==11.2.1.3
|
||||
nvidia-curand-cu12==10.3.5.147
|
||||
nvidia-cusolver-cu12==11.6.1.9
|
||||
nvidia-cusparse-cu12==12.3.1.170
|
||||
nvidia-cusparselt-cu12==0.6.2
|
||||
nvidia-nccl-cu12==2.21.5
|
||||
nvidia-nvjitlink-cu12==12.4.127
|
||||
nvidia-nvtx-cu12==12.4.127
|
||||
orjson==3.10.15
|
||||
packaging==24.2
|
||||
pandas==2.2.3
|
||||
pillow==11.1.0
|
||||
propcache==0.3.0
|
||||
protobuf==5.29.3
|
||||
pyarrow==19.0.1
|
||||
pydantic==2.10.6
|
||||
pydantic-settings==2.8.0
|
||||
pydantic_core==2.27.2
|
||||
pydeck==0.9.1
|
||||
Pygments==2.19.1
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
pytz==2025.1
|
||||
PyYAML==6.0.2
|
||||
referencing==0.36.2
|
||||
regex==2024.11.6
|
||||
requests==2.32.3
|
||||
requests-toolbelt==1.0.0
|
||||
rich==13.9.4
|
||||
rpds-py==0.23.1
|
||||
safetensors==0.5.3
|
||||
scikit-learn==1.6.1
|
||||
scipy==1.15.2
|
||||
sentence-transformers==3.4.1
|
||||
six==1.17.0
|
||||
smmap==5.0.2
|
||||
sniffio==1.3.1
|
||||
SQLAlchemy==2.0.38
|
||||
streamlit==1.42.2
|
||||
sympy==1.13.1
|
||||
tenacity==9.0.0
|
||||
threadpoolctl==3.5.0
|
||||
tokenizers==0.21.0
|
||||
toml==0.10.2
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
torchvision==0.21.0
|
||||
tornado==6.4.2
|
||||
tqdm==4.67.1
|
||||
transformers==4.49.0
|
||||
triton==3.2.0
|
||||
typing-inspect==0.9.0
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2025.1
|
||||
urllib3==2.3.0
|
||||
watchdog==6.0.0
|
||||
yarl==1.18.3
|
||||
zstandard==0.23.0
|
Loading…
Reference in a new issue