eremeev-d commited on
Commit
974b378
·
1 Parent(s): 312683f

Moved data directly to the Space

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.index filter=lfs diff=lfs merge=lfs -text
Data/{embeddings.npy → articles.hf/dataset.arrow} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b03978d1bf25675f47526cc5480bbe019a20f8c85bed35e35092a53d906fbeeb
3
- size 1713805184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9908602ac126bc96718c70b7d7d126a951526275c98ac98910d8852ca314431
3
+ size 2294981896
Data/articles.hf/dataset_info.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": null,
3
+ "citation": "",
4
+ "config_name": null,
5
+ "dataset_size": null,
6
+ "description": "",
7
+ "download_checksums": null,
8
+ "download_size": null,
9
+ "features": {
10
+ "id": {
11
+ "dtype": "string",
12
+ "id": null,
13
+ "_type": "Value"
14
+ },
15
+ "title": {
16
+ "dtype": "string",
17
+ "id": null,
18
+ "_type": "Value"
19
+ },
20
+ "abstract": {
21
+ "dtype": "string",
22
+ "id": null,
23
+ "_type": "Value"
24
+ }
25
+ },
26
+ "homepage": "",
27
+ "license": "",
28
+ "post_processed": null,
29
+ "post_processing_size": null,
30
+ "size_in_bytes": null,
31
+ "splits": null,
32
+ "supervised_keys": null,
33
+ "task_templates": null,
34
+ "version": null
35
+ }
Data/articles.hf/state.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "dataset.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "5b2841faf59fb931",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_indexes": {},
12
+ "_output_all_columns": false,
13
+ "_split": null
14
+ }
Data/articles.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5039ab8b65b9726fb81b3aabbb89774022c458e0c12a7c120a6fffe8c6775885
3
+ size 3438919725
core.py CHANGED
@@ -40,18 +40,10 @@ def load_model():
40
  @st.cache_resource
41
  def load_index():
42
  logging.info("Trying to load index")
43
- index = datasets.load_dataset(
44
- "eremeev-d/arxiv-abstracts",
45
- use_auth_token=True,
46
- split="train"
47
- )
48
- logging.info("Index succesfully loaded")
49
- logging.info("Loading embeddings")
50
- embeddings = np.load("Data/embeddings.npy")
51
- logging.info("Loaded embeddings")
52
- logging.info("Building index")
53
- index.add_faiss_index_from_external_arrays(embeddings, 'embedding')
54
- logging.info("Index built successfully")
55
  return index
56
 
57
 
 
40
  @st.cache_resource
41
  def load_index():
42
  logging.info("Trying to load index")
43
+ index = datasets.Dataset.load_from_disk("Data/articles.hf")
44
+ logging.info("Articles dataset loaded")
45
+ index.load_faiss_index("embedding", "Data/articles.index")
46
+ logging.info("FAISS index loaded")
 
 
 
 
 
 
 
 
47
  return index
48
 
49