From 4f28fc09eb537120ba5545d167f9935668b6bf14 Mon Sep 17 00:00:00 2001 From: SHREY Date: Mon, 9 Dec 2024 20:28:34 +0000 Subject: [PATCH] Upload files to "/" --- preprocess.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 preprocess.py diff --git a/preprocess.py b/preprocess.py new file mode 100644 index 0000000..ca95192 --- /dev/null +++ b/preprocess.py @@ -0,0 +1,26 @@ +from sentence_transformers import SentenceTransformer +import faiss +import pandas as pd + +def preprocess_medquad(): + # Load MedQuAD dataset + data = pd.read_csv("medquad.csv") + questions = data['question'].tolist() + answers = data['answer'].tolist() + + # Generate embeddings + print("Generating embeddings...") + model = SentenceTransformer('all-MiniLM-L6-v2') # Pre-trained sentence transformer + embeddings = model.encode(questions) + + # Build FAISS index + print("Building FAISS index...") + index = faiss.IndexFlatL2(embeddings.shape[1]) + index.add(embeddings) + + # Save the index + faiss.write_index(index, "medquad_index.faiss") + print("Index saved as 'medquad_index.faiss'") + +if __name__ == "__main__": + preprocess_medquad()