ramanhyd99 commited on
Commit
63fc89e
·
verified ·
1 Parent(s): 583cc92

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ harry_potter_transformer.keras filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/transformer-checkpoint.ipynb ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "7c710f0a-59f2-445c-9464-d702fe44fe7a",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Num GPUs Available: 1\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "import tensorflow as tf\n",
19
+ "print(\"Num GPUs Available:\", len(tf.config.list_physical_devices('GPU')))"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 3,
25
+ "id": "33d41ac0-0a70-4b7f-9c00-5b1bcbcd1c9d",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "import numpy as np\n",
30
+ "import tensorflow as tf\n",
31
+ "from tensorflow.keras.preprocessing.text import Tokenizer"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 4,
37
+ "id": "2e794897-5d68-44e5-bc1a-111a6232ce26",
38
+ "metadata": {},
39
+ "outputs": [
40
+ {
41
+ "name": "stdout",
42
+ "output_type": "stream",
43
+ "text": [
44
+ "/opt/miniconda3/envs/tf-metal2/bin/python\n"
45
+ ]
46
+ }
47
+ ],
48
+ "source": [
49
+ "import sys\n",
50
+ "print(sys.executable)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 5,
56
+ "id": "8c8b6b39-3b6a-4e85-b446-2c5acacbd3e0",
57
+ "metadata": {},
58
+ "outputs": [
59
+ {
60
+ "ename": "FileNotFoundError",
61
+ "evalue": "[Errno 2] No such file or directory: '1.txt'",
62
+ "output_type": "error",
63
+ "traceback": [
64
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
65
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
66
+ "Cell \u001b[0;32mIn[5], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m data \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data\n\u001b[0;32m----> 6\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m1.txt\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mlower()\n",
67
+ "Cell \u001b[0;32mIn[5], line 2\u001b[0m, in \u001b[0;36mload_data\u001b[0;34m(file_path)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mload_data\u001b[39m(file_path):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 3\u001b[0m data \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data\n",
68
+ "File \u001b[0;32m/opt/miniconda3/envs/tf-metal2/lib/python3.9/site-packages/IPython/core/interactiveshell.py:310\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[1;32m 304\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 305\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 306\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 308\u001b[0m )\n\u001b[0;32m--> 310\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
69
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '1.txt'"
70
+ ]
71
+ }
72
+ ],
73
+ "source": [
74
+ "def load_data(file_path):\n",
75
+ " with open(file_path, 'r') as f:\n",
76
+ " data = f.read()\n",
77
+ " return data\n",
78
+ "\n",
79
+ "data = load_data('data.txt').lower()"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": null,
85
+ "id": "573b0963-aa70-44de-86ab-33ba19d5148a",
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "tokenizer = Tokenizer(oov_token='<OOV>')\n",
90
+ "tokenizer.fit_on_texts([data])\n",
91
+ "total_words_in_dict = len(tokenizer.word_index) + 1\n",
92
+ "total_words_in_dict"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "id": "cda10ef1-d1c2-4025-b66f-7d2325526df9",
99
+ "metadata": {},
100
+ "outputs": [],
101
+ "source": [
102
+ "tokenizer.word_index['<OOV>'], tokenizer.word_index['harry']"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": null,
108
+ "id": "8d52769c-58d8-4ea2-a4e0-9664d5a2da9d",
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "# tokens basically is the entire text from first to last converted into their\n",
113
+ "# index representation\n",
114
+ "tokens = tokenizer.texts_to_sequences([data])[0]"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": null,
120
+ "id": "03976234-376f-4b24-bab0-a7040c6760a3",
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": [
124
+ "# this creates lists of length 51 (seq_len + 1)\n",
125
+ "# 1-51, 2-52, 3-53, etc.\n",
126
+ "# 51 so that the last value is used as y\n",
127
+ "seq_length = 50\n",
128
+ "input_sequences = []\n",
129
+ "for i in range(seq_length, len(tokens)):\n",
130
+ " input_sequences.append(tokens[i - seq_length: i + 1])"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "id": "e49c6da4-64c0-4bc7-9526-6f3df699002a",
137
+ "metadata": {},
138
+ "outputs": [],
139
+ "source": [
140
+ "# this ensures all the lists are of same length\n",
141
+ "# here as well we need seq_len + 1 as the previous block\n",
142
+ "from tensorflow.keras.utils import pad_sequences\n",
143
+ "\n",
144
+ "final_input = np.array(pad_sequences(input_sequences, maxlen=seq_length + 1, padding='pre'))\n",
145
+ "final_input[0]"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": null,
151
+ "id": "83639aac-6ad1-4494-ac0c-b54a59e39025",
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "# create x and y, last value of each list is the prediction\n",
156
+ "# imagine sliding window\n",
157
+ "X, y = final_input[:, :-1], final_input[:, -1]\n",
158
+ "print('X : ', X[0], 'Y: ', y[0])"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "id": "70d67b34-401d-425a-bc37-3b58863ccc4c",
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "# if you print y, it will be integer values like 46, 274, etc.\n",
169
+ "# we need categorical, also it can belong to any word from the entire\n",
170
+ "# dict , we will generate probs and find crossentropy\n",
171
+ "y = tf.keras.utils.to_categorical(y, num_classes=total_words_in_dict)\n",
172
+ "y[0], y.shape"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "id": "a8678c88-b1fa-4d0b-9be6-e3fb27413d17",
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "# the shape will be number of lists x seq_len\n",
183
+ "X.shape, y.shape"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": null,
189
+ "id": "3b03bba6-6819-4282-b2e2-5d5219905eda",
190
+ "metadata": {},
191
+ "outputs": [],
192
+ "source": [
193
+ "from tensorflow.keras.layers import Layer, Dense, LayerNormalization, Dropout, Embedding\n",
194
+ "\n",
195
+ "class MultiHeadAttention(Layer):\n",
196
+ " def __init__(self, seq_length, num_heads, embed_dim):\n",
197
+ " super(MultiHeadAttention, self).__init__()\n",
198
+ "\n",
199
+ " self.seq_length = seq_length\n",
200
+ " self.num_heads = num_heads\n",
201
+ " self.embed_dim = embed_dim\n",
202
+ "\n",
203
+ " self.projection_dim = embed_dim // num_heads\n",
204
+ "\n",
205
+ " self.query = Dense(embed_dim)\n",
206
+ " self.key = Dense(embed_dim)\n",
207
+ " self.value = Dense(embed_dim)\n",
208
+ "\n",
209
+ " # need this to learn the interaction between the features learnt by all\n",
210
+ " # the different heads\n",
211
+ " self.combine_heads_layer = Dense(embed_dim)\n",
212
+ "\n",
213
+ " def split_heads(self, input):\n",
214
+ " batch_size = tf.shape(input)[0]\n",
215
+ " x = tf.reshape(input, (batch_size, -1, self.num_heads, self.projection_dim))\n",
216
+ " return tf.transpose(x, perm=[0, 2, 1, 3])\n",
217
+ "\n",
218
+ " def self_attention(self, query, key, value):\n",
219
+ " score = tf.matmul(query, key, transpose_b=True)\n",
220
+ " scaled_score = score / tf.math.sqrt(tf.cast(self.projection_dim, tf.float32))\n",
221
+ " weights = tf.nn.softmax(scaled_score, axis=-1) # row wise in QKt\n",
222
+ "\n",
223
+ " return tf.matmul(weights, value), weights\n",
224
+ "\n",
225
+ "\n",
226
+ " def call(self, x):\n",
227
+ " batch_size = tf.shape(x)[0]\n",
228
+ "\n",
229
+ " # finds the weights matrix then split across heads\n",
230
+ " # it is more efficient computationally if we find the weight matrix\n",
231
+ " # across all the heads first then split to find individual attention scores\n",
232
+ " query = self.split_heads(self.query(x))\n",
233
+ " key = self.split_heads(self.key(x))\n",
234
+ " value = self.split_heads(self.value(x))\n",
235
+ "\n",
236
+ " attention, _ = self.self_attention(query, key, value)\n",
237
+ " # attention is of size [batch_size, num_heads, seq_length, proj_dim]\n",
238
+ "\n",
239
+ " attention = tf.transpose(attention, perm=[0, 2, 1, 3])\n",
240
+ " # attention is of size [batch_size, seq_length, num_heads, proj_dim]\n",
241
+ "\n",
242
+ " concat_attention = tf.reshape(attention, (batch_size, -1, embed_dim))\n",
243
+ "\n",
244
+ " return self.combine_heads_layer(concat_attention)\n",
245
+ "\n",
246
+ "\n",
247
+ "\n",
248
+ "class TransformerBlock(Layer):\n",
249
+ " def __init__(self, seq_length, embed_dim, ffn_dim):\n",
250
+ " super(TransformerBlock, self).__init__()\n",
251
+ "\n",
252
+ " self.seq_length = seq_length\n",
253
+ " self.embed_dim = embed_dim\n",
254
+ " self.ffn = tf.keras.Sequential([\n",
255
+ " Dense(ffn_dim, activation='relu'),\n",
256
+ " Dense(embed_dim)\n",
257
+ " ])\n",
258
+ "\n",
259
+ " self.attn = MultiHeadAttention(seq_length, 8, embed_dim)\n",
260
+ "\n",
261
+ " self.LayerNorm1 = LayerNormalization(epsilon=1e-6) # prevent divide by 0\n",
262
+ " self.LayerNorm2 = LayerNormalization(epsilon=1e-6)\n",
263
+ "\n",
264
+ " self.Drop1 = Dropout(0.1)\n",
265
+ " self.Drop2 = Dropout(0.1)\n",
266
+ "\n",
267
+ "\n",
268
+ " def call(self, x, isTraining):\n",
269
+ " attention_output = self.attn(x)\n",
270
+ " print(attention_output.shape)\n",
271
+ " x = self.LayerNorm1(x + self.Drop1(attention_output, training=isTraining))\n",
272
+ " ffn_output = self.ffn(x)\n",
273
+ " x = self.LayerNorm2(x + self.Drop2(ffn_output, training=isTraining))\n",
274
+ " return x\n",
275
+ "\n",
276
+ "class TokenAndPositionEmbedding(Layer):\n",
277
+ " def __init__(self, seq_length, total_words_in_dict, embed_dim):\n",
278
+ " super(TokenAndPositionEmbedding, self).__init__()\n",
279
+ "\n",
280
+ " self.seq_length = seq_length\n",
281
+ " self.emb = Embedding(input_dim=total_words_in_dict, output_dim=embed_dim)\n",
282
+ " self.pos_emb = Embedding(input_dim=seq_length, output_dim=embed_dim)\n",
283
+ "\n",
284
+ " def call(self, x):\n",
285
+ " positions = tf.range(start=0, limit=self.seq_length, delta=1)\n",
286
+ " positions = self.pos_emb(positions)\n",
287
+ " x = self.emb(x)\n",
288
+ " return x + positions"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": null,
294
+ "id": "19e33d70-6984-44c2-bfe2-f525444bdf01",
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": [
298
+ "ff_dim = 512\n",
299
+ "embed_dim = 256\n",
300
+ "\n",
301
+ " # This is a placeholder in functional api style\n",
302
+ " # batch_size is taken during .fit() phase\n",
303
+ "input_placeholder = tf.keras.Input(shape=(seq_length,))\n",
304
+ "input_placeholder.shape\n",
305
+ "tokenPosLayer = TokenAndPositionEmbedding(seq_length, total_words_in_dict, embed_dim)\n",
306
+ "x = tokenPosLayer(input_placeholder) # call isn't run yet, just a link created\n",
307
+ "\n",
308
+ "transformerBlock = TransformerBlock(seq_length, embed_dim, ff_dim)\n",
309
+ "print(x.shape)\n",
310
+ "\n",
311
+ "# x contains contextualized data, now the last row of the seq_len holds\n",
312
+ "# the latest context hence it is extract out\n",
313
+ "x = x[:, -1, :]\n",
314
+ "print(x.shape) # batch_size, last_row, embed_dim\n",
315
+ "\n",
316
+ "# we pass this context to a dense layer to learn how to make predictions\n",
317
+ "x = Dense(total_words_in_dict, activation='softmax')(x)\n",
318
+ "# batch_size, total_words (prediction)\n",
319
+ "# prediction happens batch wise in parallel and is compared to y\n",
320
+ "# batch wise in parallel\n",
321
+ "\n",
322
+ "print(x.shape)\n",
323
+ "\n",
324
+ "model = tf.keras.Model(inputs=input_placeholder, outputs=x)\n",
325
+ "model.summary()"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": null,
331
+ "id": "1f60878b-6e12-4dcd-ab89-03a64e7a3367",
332
+ "metadata": {},
333
+ "outputs": [],
334
+ "source": [
335
+ "model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])"
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": null,
341
+ "id": "5e688417-13f5-41c7-b98e-c4b4330ef363",
342
+ "metadata": {},
343
+ "outputs": [],
344
+ "source": [
345
+ "\n",
346
+ "import time\n",
347
+ "\n",
348
+ "\n",
349
+ "# CPU Benchmark\n",
350
+ "with tf.device('/CPU:0'):\n",
351
+ " start = time.time()\n",
352
+ " model.fit(X, y, batch_size=32, epochs=10)\n",
353
+ " print(\"CPU Time:\", time.time() - start)\n",
354
+ "\n",
355
+ "\n"
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "code",
360
+ "execution_count": null,
361
+ "id": "7fb4552b-86cc-461c-8a9a-572f5bfd869b",
362
+ "metadata": {},
363
+ "outputs": [],
364
+ "source": [
365
+ "# # GPU Benchmark\n",
366
+ "# with tf.device('/GPU:0'):\n",
367
+ "# start = time.time()\n",
368
+ "# rnn.fit(X, y, batch_size=1024, epochs=10)\n",
369
+ "# print(\"GPU Time:\", time.time() - start)"
370
+ ]
371
+ },
372
+ {
373
+ "cell_type": "code",
374
+ "execution_count": null,
375
+ "id": "7659d823-faf4-4908-9a0c-bd18b076c240",
376
+ "metadata": {},
377
+ "outputs": [],
378
+ "source": [
379
+ "def predict_next_word(seed_text, num_words_to_predict, max_len):\n",
380
+ " for _ in range(num_words_to_predict):\n",
381
+ " seed_list = tokenizer.texts_to_sequences([seed_text])[0]\n",
382
+ " seed_list = pad_sequences([seed_list], maxlen=max_len - 1, padding='pre')\n",
383
+ " prediction = model.predict(seed_list, verbose=0)\n",
384
+ " # prediction is an embed_dim array of probabilities\n",
385
+ " max_pred_index = np.argmax(prediction)\n",
386
+ " seed_text+= \" \" + tokenizer.index_word[max_pred_index]\n",
387
+ "\n",
388
+ " return seed_text"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": null,
394
+ "id": "2dbbd786-5318-4172-9542-e56658ef79ba",
395
+ "metadata": {},
396
+ "outputs": [],
397
+ "source": [
398
+ "predict_next_word(\"who is harry is a \", 25, seq_length + 1)"
399
+ ]
400
+ },
401
+ {
402
+ "cell_type": "code",
403
+ "execution_count": null,
404
+ "id": "6751aa4b-2d22-47a2-9f17-f557f78c6f45",
405
+ "metadata": {},
406
+ "outputs": [],
407
+ "source": [
408
+ "!pip install huggingface_hub"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": null,
414
+ "id": "99e980a6-8686-4dd8-b26c-25a5542451b5",
415
+ "metadata": {},
416
+ "outputs": [],
417
+ "source": [
418
+ "model.save(\"harry_potter_transformer.keras\")"
419
+ ]
420
+ },
421
+ {
422
+ "cell_type": "code",
423
+ "execution_count": null,
424
+ "id": "d766fad6-e4be-4b97-9617-53a03661cb41",
425
+ "metadata": {
426
+ "scrolled": true
427
+ },
428
+ "outputs": [],
429
+ "source": [
430
+ "from huggingface_hub import notebook_login\n",
431
+ "\n",
432
+ "notebook_login()"
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "code",
437
+ "execution_count": null,
438
+ "id": "9171eefc-9952-42c6-8b00-9e7f9f6f6f58",
439
+ "metadata": {},
440
+ "outputs": [],
441
+ "source": [
442
+ "from huggingface_hub import HfApi\n",
443
+ "\n",
444
+ "repo_id = \"ramanhyd99/harry-potter-transformer\"\n",
445
+ "api = HfApi()\n",
446
+ "api.create_repo(repo_id=repo_id, exist_ok=True)\n"
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": null,
452
+ "id": "bab81223-1667-463c-9075-9ab00958b22c",
453
+ "metadata": {},
454
+ "outputs": [],
455
+ "source": [
456
+ "# Push the model to HF中国镜像站 Hub\n",
457
+ "from huggingface_hub import upload_folder\n",
458
+ "\n",
459
+ "upload_folder(\n",
460
+ " folder_path=\"\",\n",
461
+ " path_in_repo=\".\",\n",
462
+ " repo_id=repo_id,\n",
463
+ " repo_type=\"model\"\n",
464
+ ")"
465
+ ]
466
+ }
467
+ ],
468
+ "metadata": {
469
+ "kernelspec": {
470
+ "display_name": "Python (tf-metal2)",
471
+ "language": "python",
472
+ "name": "tf-metal2"
473
+ },
474
+ "language_info": {
475
+ "codemirror_mode": {
476
+ "name": "ipython",
477
+ "version": 3
478
+ },
479
+ "file_extension": ".py",
480
+ "mimetype": "text/x-python",
481
+ "name": "python",
482
+ "nbconvert_exporter": "python",
483
+ "pygments_lexer": "ipython3",
484
+ "version": "3.9.21"
485
+ }
486
+ },
487
+ "nbformat": 4,
488
+ "nbformat_minor": 5
489
+ }
data.txt ADDED
The diff for this file is too large to render. See raw diff
 
harry_potter_transformer.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653c6ee2695012b436f255ea782a2feb81483cef09ad86bbf616dcfbd3d9ae2f
3
+ size 41198299
transformer.ipynb ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "7c710f0a-59f2-445c-9464-d702fe44fe7a",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Num GPUs Available: 1\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "import tensorflow as tf\n",
19
+ "print(\"Num GPUs Available:\", len(tf.config.list_physical_devices('GPU')))"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 3,
25
+ "id": "33d41ac0-0a70-4b7f-9c00-5b1bcbcd1c9d",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "import numpy as np\n",
30
+ "import tensorflow as tf\n",
31
+ "from tensorflow.keras.preprocessing.text import Tokenizer"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 4,
37
+ "id": "2e794897-5d68-44e5-bc1a-111a6232ce26",
38
+ "metadata": {},
39
+ "outputs": [
40
+ {
41
+ "name": "stdout",
42
+ "output_type": "stream",
43
+ "text": [
44
+ "/opt/miniconda3/envs/tf-metal2/bin/python\n"
45
+ ]
46
+ }
47
+ ],
48
+ "source": [
49
+ "import sys\n",
50
+ "print(sys.executable)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 5,
56
+ "id": "8c8b6b39-3b6a-4e85-b446-2c5acacbd3e0",
57
+ "metadata": {},
58
+ "outputs": [
59
+ {
60
+ "ename": "FileNotFoundError",
61
+ "evalue": "[Errno 2] No such file or directory: '1.txt'",
62
+ "output_type": "error",
63
+ "traceback": [
64
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
65
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
66
+ "Cell \u001b[0;32mIn[5], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m data \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data\n\u001b[0;32m----> 6\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m1.txt\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mlower()\n",
67
+ "Cell \u001b[0;32mIn[5], line 2\u001b[0m, in \u001b[0;36mload_data\u001b[0;34m(file_path)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mload_data\u001b[39m(file_path):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 3\u001b[0m data \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data\n",
68
+ "File \u001b[0;32m/opt/miniconda3/envs/tf-metal2/lib/python3.9/site-packages/IPython/core/interactiveshell.py:310\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[1;32m 304\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 305\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 306\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 308\u001b[0m )\n\u001b[0;32m--> 310\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
69
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '1.txt'"
70
+ ]
71
+ }
72
+ ],
73
+ "source": [
74
+ "def load_data(file_path):\n",
75
+ " with open(file_path, 'r') as f:\n",
76
+ " data = f.read()\n",
77
+ " return data\n",
78
+ "\n",
79
+ "data = load_data('data.txt').lower()"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": null,
85
+ "id": "573b0963-aa70-44de-86ab-33ba19d5148a",
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "tokenizer = Tokenizer(oov_token='<OOV>')\n",
90
+ "tokenizer.fit_on_texts([data])\n",
91
+ "total_words_in_dict = len(tokenizer.word_index) + 1\n",
92
+ "total_words_in_dict"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "id": "cda10ef1-d1c2-4025-b66f-7d2325526df9",
99
+ "metadata": {},
100
+ "outputs": [],
101
+ "source": [
102
+ "tokenizer.word_index['<OOV>'], tokenizer.word_index['harry']"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": null,
108
+ "id": "8d52769c-58d8-4ea2-a4e0-9664d5a2da9d",
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "# tokens basically is the entire text from first to last converted into their\n",
113
+ "# index representation\n",
114
+ "tokens = tokenizer.texts_to_sequences([data])[0]"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": null,
120
+ "id": "03976234-376f-4b24-bab0-a7040c6760a3",
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": [
124
+ "# this creates lists of length 51 (seq_len + 1)\n",
125
+ "# 1-51, 2-52, 3-53, etc.\n",
126
+ "# 51 so that the last value is used as y\n",
127
+ "seq_length = 50\n",
128
+ "input_sequences = []\n",
129
+ "for i in range(seq_length, len(tokens)):\n",
130
+ " input_sequences.append(tokens[i - seq_length: i + 1])"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "id": "e49c6da4-64c0-4bc7-9526-6f3df699002a",
137
+ "metadata": {},
138
+ "outputs": [],
139
+ "source": [
140
+ "# this ensures all the lists are of same length\n",
141
+ "# here as well we need seq_len + 1 as the previous block\n",
142
+ "from tensorflow.keras.utils import pad_sequences\n",
143
+ "\n",
144
+ "final_input = np.array(pad_sequences(input_sequences, maxlen=seq_length + 1, padding='pre'))\n",
145
+ "final_input[0]"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": null,
151
+ "id": "83639aac-6ad1-4494-ac0c-b54a59e39025",
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "# create x and y, last value of each list is the prediction\n",
156
+ "# imagine sliding window\n",
157
+ "X, y = final_input[:, :-1], final_input[:, -1]\n",
158
+ "print('X : ', X[0], 'Y: ', y[0])"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "id": "70d67b34-401d-425a-bc37-3b58863ccc4c",
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "# if you print y, it will be integer values like 46, 274, etc.\n",
169
+ "# we need categorical, also it can belong to any word from the entire\n",
170
+ "# dict , we will generate probs and find crossentropy\n",
171
+ "y = tf.keras.utils.to_categorical(y, num_classes=total_words_in_dict)\n",
172
+ "y[0], y.shape"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "id": "a8678c88-b1fa-4d0b-9be6-e3fb27413d17",
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "# the shape will be number of lists x seq_len\n",
183
+ "X.shape, y.shape"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": null,
189
+ "id": "3b03bba6-6819-4282-b2e2-5d5219905eda",
190
+ "metadata": {},
191
+ "outputs": [],
192
+ "source": [
193
+ "from tensorflow.keras.layers import Layer, Dense, LayerNormalization, Dropout, Embedding\n",
194
+ "\n",
195
+ "class MultiHeadAttention(Layer):\n",
196
+ " def __init__(self, seq_length, num_heads, embed_dim):\n",
197
+ " super(MultiHeadAttention, self).__init__()\n",
198
+ "\n",
199
+ " self.seq_length = seq_length\n",
200
+ " self.num_heads = num_heads\n",
201
+ " self.embed_dim = embed_dim\n",
202
+ "\n",
203
+ " self.projection_dim = embed_dim // num_heads\n",
204
+ "\n",
205
+ " self.query = Dense(embed_dim)\n",
206
+ " self.key = Dense(embed_dim)\n",
207
+ " self.value = Dense(embed_dim)\n",
208
+ "\n",
209
+ " # need this to learn the interaction between the features learnt by all\n",
210
+ " # the different heads\n",
211
+ " self.combine_heads_layer = Dense(embed_dim)\n",
212
+ "\n",
213
+ " def split_heads(self, input):\n",
214
+ " batch_size = tf.shape(input)[0]\n",
215
+ " x = tf.reshape(input, (batch_size, -1, self.num_heads, self.projection_dim))\n",
216
+ " return tf.transpose(x, perm=[0, 2, 1, 3])\n",
217
+ "\n",
218
+ " def self_attention(self, query, key, value):\n",
219
+ " score = tf.matmul(query, key, transpose_b=True)\n",
220
+ " scaled_score = score / tf.math.sqrt(tf.cast(self.projection_dim, tf.float32))\n",
221
+ " weights = tf.nn.softmax(scaled_score, axis=-1) # row wise in QKt\n",
222
+ "\n",
223
+ " return tf.matmul(weights, value), weights\n",
224
+ "\n",
225
+ "\n",
226
+ " def call(self, x):\n",
227
+ " batch_size = tf.shape(x)[0]\n",
228
+ "\n",
229
+ " # finds the weights matrix then split across heads\n",
230
+ " # it is more efficient computationally if we find the weight matrix\n",
231
+ " # across all the heads first then split to find individual attention scores\n",
232
+ " query = self.split_heads(self.query(x))\n",
233
+ " key = self.split_heads(self.key(x))\n",
234
+ " value = self.split_heads(self.value(x))\n",
235
+ "\n",
236
+ " attention, _ = self.self_attention(query, key, value)\n",
237
+ " # attention is of size [batch_size, num_heads, seq_length, proj_dim]\n",
238
+ "\n",
239
+ " attention = tf.transpose(attention, perm=[0, 2, 1, 3])\n",
240
+ " # attention is of size [batch_size, seq_length, num_heads, proj_dim]\n",
241
+ "\n",
242
+ " concat_attention = tf.reshape(attention, (batch_size, -1, embed_dim))\n",
243
+ "\n",
244
+ " return self.combine_heads_layer(concat_attention)\n",
245
+ "\n",
246
+ "\n",
247
+ "\n",
248
+ "class TransformerBlock(Layer):\n",
249
+ " def __init__(self, seq_length, embed_dim, ffn_dim):\n",
250
+ " super(TransformerBlock, self).__init__()\n",
251
+ "\n",
252
+ " self.seq_length = seq_length\n",
253
+ " self.embed_dim = embed_dim\n",
254
+ " self.ffn = tf.keras.Sequential([\n",
255
+ " Dense(ffn_dim, activation='relu'),\n",
256
+ " Dense(embed_dim)\n",
257
+ " ])\n",
258
+ "\n",
259
+ " self.attn = MultiHeadAttention(seq_length, 8, embed_dim)\n",
260
+ "\n",
261
+ " self.LayerNorm1 = LayerNormalization(epsilon=1e-6) # prevent divide by 0\n",
262
+ " self.LayerNorm2 = LayerNormalization(epsilon=1e-6)\n",
263
+ "\n",
264
+ " self.Drop1 = Dropout(0.1)\n",
265
+ " self.Drop2 = Dropout(0.1)\n",
266
+ "\n",
267
+ "\n",
268
+ " def call(self, x, isTraining):\n",
269
+ " attention_output = self.attn(x)\n",
270
+ " print(attention_output.shape)\n",
271
+ " x = self.LayerNorm1(x + self.Drop1(attention_output, training=isTraining))\n",
272
+ " ffn_output = self.ffn(x)\n",
273
+ " x = self.LayerNorm2(x + self.Drop2(ffn_output, training=isTraining))\n",
274
+ " return x\n",
275
+ "\n",
276
+ "class TokenAndPositionEmbedding(Layer):\n",
277
+ " def __init__(self, seq_length, total_words_in_dict, embed_dim):\n",
278
+ " super(TokenAndPositionEmbedding, self).__init__()\n",
279
+ "\n",
280
+ " self.seq_length = seq_length\n",
281
+ " self.emb = Embedding(input_dim=total_words_in_dict, output_dim=embed_dim)\n",
282
+ " self.pos_emb = Embedding(input_dim=seq_length, output_dim=embed_dim)\n",
283
+ "\n",
284
+ " def call(self, x):\n",
285
+ " positions = tf.range(start=0, limit=self.seq_length, delta=1)\n",
286
+ " positions = self.pos_emb(positions)\n",
287
+ " x = self.emb(x)\n",
288
+ " return x + positions"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": null,
294
+ "id": "19e33d70-6984-44c2-bfe2-f525444bdf01",
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": [
298
+ "ff_dim = 512\n",
299
+ "embed_dim = 256\n",
300
+ "\n",
301
+ " # This is a placeholder in functional api style\n",
302
+ " # batch_size is taken during .fit() phase\n",
303
+ "input_placeholder = tf.keras.Input(shape=(seq_length,))\n",
304
+ "input_placeholder.shape\n",
305
+ "tokenPosLayer = TokenAndPositionEmbedding(seq_length, total_words_in_dict, embed_dim)\n",
306
+ "x = tokenPosLayer(input_placeholder) # call isn't run yet, just a link created\n",
307
+ "\n",
308
+ "transformerBlock = TransformerBlock(seq_length, embed_dim, ff_dim)\n",
309
+ "print(x.shape)\n",
310
+ "\n",
311
+ "# x contains contextualized data, now the last row of the seq_len holds\n",
312
+ "# the latest context hence it is extract out\n",
313
+ "x = x[:, -1, :]\n",
314
+ "print(x.shape) # batch_size, last_row, embed_dim\n",
315
+ "\n",
316
+ "# we pass this context to a dense layer to learn how to make predictions\n",
317
+ "x = Dense(total_words_in_dict, activation='softmax')(x)\n",
318
+ "# batch_size, total_words (prediction)\n",
319
+ "# prediction happens batch wise in parallel and is compared to y\n",
320
+ "# batch wise in parallel\n",
321
+ "\n",
322
+ "print(x.shape)\n",
323
+ "\n",
324
+ "model = tf.keras.Model(inputs=input_placeholder, outputs=x)\n",
325
+ "model.summary()"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": null,
331
+ "id": "1f60878b-6e12-4dcd-ab89-03a64e7a3367",
332
+ "metadata": {},
333
+ "outputs": [],
334
+ "source": [
335
+ "model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])"
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": null,
341
+ "id": "5e688417-13f5-41c7-b98e-c4b4330ef363",
342
+ "metadata": {},
343
+ "outputs": [],
344
+ "source": [
345
+ "\n",
346
+ "import time\n",
347
+ "\n",
348
+ "\n",
349
+ "# CPU Benchmark\n",
350
+ "with tf.device('/CPU:0'):\n",
351
+ " start = time.time()\n",
352
+ " model.fit(X, y, batch_size=32, epochs=10)\n",
353
+ " print(\"CPU Time:\", time.time() - start)\n",
354
+ "\n",
355
+ "\n"
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "code",
360
+ "execution_count": null,
361
+ "id": "7fb4552b-86cc-461c-8a9a-572f5bfd869b",
362
+ "metadata": {},
363
+ "outputs": [],
364
+ "source": [
365
+ "# # GPU Benchmark\n",
366
+ "# with tf.device('/GPU:0'):\n",
367
+ "# start = time.time()\n",
368
+ "# rnn.fit(X, y, batch_size=1024, epochs=10)\n",
369
+ "# print(\"GPU Time:\", time.time() - start)"
370
+ ]
371
+ },
372
+ {
373
+ "cell_type": "code",
374
+ "execution_count": null,
375
+ "id": "7659d823-faf4-4908-9a0c-bd18b076c240",
376
+ "metadata": {},
377
+ "outputs": [],
378
+ "source": [
379
+ "def predict_next_word(seed_text, num_words_to_predict, max_len):\n",
380
+ " for _ in range(num_words_to_predict):\n",
381
+ " seed_list = tokenizer.texts_to_sequences([seed_text])[0]\n",
382
+ " seed_list = pad_sequences([seed_list], maxlen=max_len - 1, padding='pre')\n",
383
+ " prediction = model.predict(seed_list, verbose=0)\n",
384
+ " # prediction is an embed_dim array of probabilities\n",
385
+ " max_pred_index = np.argmax(prediction)\n",
386
+ " seed_text+= \" \" + tokenizer.index_word[max_pred_index]\n",
387
+ "\n",
388
+ " return seed_text"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": null,
394
+ "id": "2dbbd786-5318-4172-9542-e56658ef79ba",
395
+ "metadata": {},
396
+ "outputs": [],
397
+ "source": [
398
+ "predict_next_word(\"who is harry is a \", 25, seq_length + 1)"
399
+ ]
400
+ },
401
+ {
402
+ "cell_type": "code",
403
+ "execution_count": null,
404
+ "id": "6751aa4b-2d22-47a2-9f17-f557f78c6f45",
405
+ "metadata": {},
406
+ "outputs": [],
407
+ "source": [
408
+ "!pip install huggingface_hub"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": null,
414
+ "id": "99e980a6-8686-4dd8-b26c-25a5542451b5",
415
+ "metadata": {},
416
+ "outputs": [],
417
+ "source": [
418
+ "model.save(\"harry_potter_transformer.keras\")"
419
+ ]
420
+ },
421
+ {
422
+ "cell_type": "code",
423
+ "execution_count": null,
424
+ "id": "d766fad6-e4be-4b97-9617-53a03661cb41",
425
+ "metadata": {
426
+ "scrolled": true
427
+ },
428
+ "outputs": [],
429
+ "source": [
430
+ "from huggingface_hub import notebook_login\n",
431
+ "\n",
432
+ "notebook_login()"
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "code",
437
+ "execution_count": null,
438
+ "id": "9171eefc-9952-42c6-8b00-9e7f9f6f6f58",
439
+ "metadata": {},
440
+ "outputs": [],
441
+ "source": [
442
+ "from huggingface_hub import HfApi\n",
443
+ "\n",
444
+ "repo_id = \"ramanhyd99/harry-potter-transformer\"\n",
445
+ "api = HfApi()\n",
446
+ "api.create_repo(repo_id=repo_id, exist_ok=True)\n"
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": null,
452
+ "id": "bab81223-1667-463c-9075-9ab00958b22c",
453
+ "metadata": {},
454
+ "outputs": [],
455
+ "source": [
456
+ "# Push the model to HF中国镜像站 Hub\n",
457
+ "from huggingface_hub import upload_folder\n",
458
+ "\n",
459
+ "upload_folder(\n",
460
+ " folder_path=\"\",\n",
461
+ " path_in_repo=\".\",\n",
462
+ " repo_id=repo_id,\n",
463
+ " repo_type=\"model\"\n",
464
+ ")"
465
+ ]
466
+ }
467
+ ],
468
+ "metadata": {
469
+ "kernelspec": {
470
+ "display_name": "Python (tf-metal2)",
471
+ "language": "python",
472
+ "name": "tf-metal2"
473
+ },
474
+ "language_info": {
475
+ "codemirror_mode": {
476
+ "name": "ipython",
477
+ "version": 3
478
+ },
479
+ "file_extension": ".py",
480
+ "mimetype": "text/x-python",
481
+ "name": "python",
482
+ "nbconvert_exporter": "python",
483
+ "pygments_lexer": "ipython3",
484
+ "version": "3.9.21"
485
+ }
486
+ },
487
+ "nbformat": 4,
488
+ "nbformat_minor": 5
489
+ }