Shabdobhedi commited on
Commit
eb1a03e
·
verified ·
1 Parent(s): aaa0052

Upload 4 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/Medical_book.pdf filter=lfs diff=lfs merge=lfs -text
37
+ data/vector/index.faiss filter=lfs diff=lfs merge=lfs -text
data/Medical_book.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:753cd53b7a3020bbd91f05629b0e3ddcfb6a114d7bbedb22c2298b66f5dd00cc
3
+ size 16127037
data/vector/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefa9ec7a37afdb420c2071d7ad6152d242a556e8a8dcfa4a88716714afc4ca3
3
+ size 9001005
data/vector/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:746288fa8a336fe54359fd5ed7d43cc71fe78e463a389fc28f20e298dfc85082
3
+ size 3283602
store_index.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.embeddings import HuggingFaceEmbeddings
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ import os
6
+ from src.helper import load_pdf, text_split, download_hugging_face_embeddings
7
+ DATA_PATH = r'G:\Chatbot\data'
8
+ DB_FAISS_PATH = r'G:\Chatbot\data\vector'
9
+
10
+
11
+ '''extracted_data = load_pdf(r"G:\Chatbot\data")
12
+ text_chunks = text_split(extracted_data)
13
+ embeddings = download_hugging_face_embeddings()
14
+ # Initializing the Faiss
15
+ db = FAISS.from_documents(text_chunks, embeddings)
16
+ db.save_local(DB_FAISS_PATH)
17
+ # I change the above DB_FAISS_PATH
18
+ # db.save_local(r"G:\Chatbot\DB_FAISS_PATH")'''
19
+
20
+
21
+ # Load the data from the PDF file
22
+ def create_vector_db():
23
+ extracted_data = load_pdf(DATA_PATH)
24
+ text_chunks = text_split(extracted_data)
25
+ embeddings = download_hugging_face_embeddings()
26
+ db = FAISS.from_documents(text_chunks, embeddings)
27
+ db.save_local(DB_FAISS_PATH)
28
+ print("### db is created")
29
+
30
+
31
+ '''# Create vector database
32
+ def create_vector_db():
33
+ loader = DirectoryLoader(DATA_PATH,
34
+ glob='*.pdf',
35
+ loader_cls=PyPDFLoader)
36
+
37
+ documents = loader.load()
38
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
39
+ chunk_overlap=50)
40
+ texts = text_splitter.split_documents(documents)
41
+
42
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
43
+ model_kwargs={'device': 'cuda'})
44
+
45
+ db = FAISS.from_documents(texts, embeddings)
46
+ db.save_local(DB_FAISS_PATH)
47
+
48
+ create_vector_db() # Call the function directly in the cell'''