import tensorflow as tf
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

import sys
print(sys.executable)

def load_data(file_path):
    with open(file_path, 'r') as f:
        data = f.read()
    return data

data = load_data('data.txt').lower() If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 308\u001b[0m )\n\u001b[0;32m--> 310\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '1.txt'" ] } ], "source": [ "def load_data(file_path):\n", " with open(file_path, 'r') as f:\n", " data = f.read()\n", " return data\n", "\n", "data = load_data('data.txt').lower()" ] }, { "cell_type": "code", "execution_count": null, "id": "573b0963-aa70-44de-86ab-33ba19d5148a", "metadata": {}, "outputs": [], "source": [ "tokenizer = Tokenizer(oov_token='')\n", "tokenizer.fit_on_texts([data])\n", "total_words_in_dict = len(tokenizer.word_index) + 1\n", "total_words_in_dict" ] }, { "cell_type": "code", "execution_count": null, "id": "cda10ef1-d1c2-4025-b66f-7d2325526df9", "metadata": {}, "outputs": [], "source": [ "tokenizer.word_index[''], tokenizer.word_index['harry']" ] }, { "cell_type": "code", "execution_count": null, "id": "8d52769c-58d8-4ea2-a4e0-9664d5a2da9d", "metadata": {}, "outputs": [], "source": [ "# tokens basically is the entire text from first to last converted into their\n", "# index representation\n", "tokens = tokenizer.texts_to_sequences([data])[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "03976234-376f-4b24-bab0-a7040c6760a3", "metadata": {}, "outputs": [], "source": [ "# this creates lists of length 51 (seq_len + 1)\n", "# 1-51, 2-52, 3-53, etc.\n", "# 51 so that the last value is used as y\n", "seq_length = 50\n", "input_sequences = []\n", "for i in range(seq_length, len(tokens)):\n", " input_sequences.append(tokens[i - seq_length: i + 1])" ] }, { "cell_type": "code", "execution_count": null, "id": "e49c6da4-64c0-4bc7-9526-6f3df699002a", "metadata": {}, "outputs": [], "source": [ "# this ensures all the lists are of same length\n", "# here as well we need seq_len + 1 as the previous block\n", "from tensorflow.keras.utils import pad_sequences\n", "\n", "final_input = np.array(pad_sequences(input_sequences, maxlen=seq_length + 1, padding='pre'))\n", "final_input[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "83639aac-6ad1-4494-ac0c-b54a59e39025", "metadata": {}, "outputs": [], "source": [ "# create x and y, last value of each list is the prediction\n", "# imagine sliding window\n", "X, y = final_input[:, :-1], final_input[:, -1]\n", "print('X : ', X[0], 'Y: ', y[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "70d67b34-401d-425a-bc37-3b58863ccc4c", "metadata": {}, "outputs": [], "source": [ "# if you print y, it will be integer values like 46, 274, etc.\n", "# we need categorical, also it can belong to any word from the entire\n", "# dict , we will generate probs and find crossentropy\n", "y = tf.keras.utils.to_categorical(y, num_classes=total_words_in_dict)\n", "y[0], y.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "a8678c88-b1fa-4d0b-9be6-e3fb27413d17", "metadata": {}, "outputs": [], "source": [ "# the shape will be number of lists x seq_len\n", "X.shape, y.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "3b03bba6-6819-4282-b2e2-5d5219905eda", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.layers import Layer, Dense, LayerNormalization, Dropout, Embedding\n", "\n", "class MultiHeadAttention(Layer):\n", " def __init__(self, seq_length, num_heads, embed_dim):\n", " super(MultiHeadAttention, self).__init__()\n", "\n", " self.seq_length = seq_length\n", " self.num_heads = num_heads\n", " self.embed_dim = embed_dim\n", "\n", " self.projection_dim = embed_dim // num_heads\n", "\n", " self.query = Dense(embed_dim)\n", " self.key = Dense(embed_dim)\n", " self.value = Dense(embed_dim)\n", "\n", " # need this to learn the interaction between the features learnt by all\n", " # the different heads\n", " self.combine_heads_layer = Dense(embed_dim)\n", "\n", " def split_heads(self, input):\n", " batch_size = tf.shape(input)[0]\n", " x = tf.reshape(input, (batch_size, -1, self.num_heads, self.projection_dim))\n", " return tf.transpose(x, perm=[0, 2, 1, 3])\n", "\n", " def self_attention(self, query, key, value):\n", " score = tf.matmul(query, key, transpose_b=True)\n", " scaled_score = score / tf.math.sqrt(tf.cast(self.projection_dim, tf.float32))\n", " weights = tf.nn.softmax(scaled_score, axis=-1) # row wise in QKt\n", "\n", " return tf.matmul(weights, value), weights\n", "\n", "\n", " def call(self, x):\n", " batch_size = tf.shape(x)[0]\n", "\n", " # finds the weights matrix then split across heads\n", " # it is more efficient computationally if we find the weight matrix\n", " # across all the heads first then split to find individual attention scores\n", " query = self.split_heads(self.query(x))\n", " key = self.split_heads(self.key(x))\n", " value = self.split_heads(self.value(x))\n", "\n", " attention, _ = self.self_attention(query, key, value)\n", " # attention is of size [batch_size, num_heads, seq_length, proj_dim]\n", "\n", " attention = tf.transpose(attention, perm=[0, 2, 1, 3])\n", " # attention is of size [batch_size, seq_length, num_heads, proj_dim]\n", "\n", " concat_attention = tf.reshape(attention, (batch_size, -1, embed_dim))\n", "\n", " return self.combine_heads_layer(concat_attention)\n", "\n", "\n", "\n", "class TransformerBlock(Layer):\n", " def __init__(self, seq_length, embed_dim, ffn_dim):\n", " super(TransformerBlock, self).__init__()\n", "\n", " self.seq_length = seq_length\n", " self.embed_dim = embed_dim\n", " self.ffn = tf.keras.Sequential([\n", " Dense(ffn_dim, activation='relu'),\n", " Dense(embed_dim)\n", " ])\n", "\n", " self.attn = MultiHeadAttention(seq_length, 8, embed_dim)\n", "\n", " self.LayerNorm1 = LayerNormalization(epsilon=1e-6) # prevent divide by 0\n", " self.LayerNorm2 = LayerNormalization(epsilon=1e-6)\n", "\n", " self.Drop1 = Dropout(0.1)\n", " self.Drop2 = Dropout(0.1)\n", "\n", "\n", " def call(self, x, isTraining):\n", " attention_output = self.attn(x)\n", " print(attention_output.shape)\n", " x = self.LayerNorm1(x + self.Drop1(attention_output, training=isTraining))\n", " ffn_output = self.ffn(x)\n", " x = self.LayerNorm2(x + self.Drop2(ffn_output, training=isTraining))\n", " return x\n", "\n", "class TokenAndPositionEmbedding(Layer):\n", " def __init__(self, seq_length, total_words_in_dict, embed_dim):\n", " super(TokenAndPositionEmbedding, self).__init__()\n", "\n", " self.seq_length = seq_length\n", " self.emb = Embedding(input_dim=total_words_in_dict, output_dim=embed_dim)\n", " self.pos_emb = Embedding(input_dim=seq_length, output_dim=embed_dim)\n", "\n", " def call(self, x):\n", " positions = tf.range(start=0, limit=self.seq_length, delta=1)\n", " positions = self.pos_emb(positions)\n", " x = self.emb(x)\n", " return x + positions" ] }, { "cell_type": "code", "execution_count": null, "id": "19e33d70-6984-44c2-bfe2-f525444bdf01", "metadata": {}, "outputs": [], "source": [ "ff_dim = 512\n", "embed_dim = 256\n", "\n", " # This is a placeholder in functional api style\n", " # batch_size is taken during .fit() phase\n", "input_placeholder = tf.keras.Input(shape=(seq_length,))\n", "input_placeholder.shape\n", "tokenPosLayer = TokenAndPositionEmbedding(seq_length, total_words_in_dict, embed_dim)\n", "x = tokenPosLayer(input_placeholder) # call isn't run yet, just a link created\n", "\n", "transformerBlock = TransformerBlock(seq_length, embed_dim, ff_dim)\n", "print(x.shape)\n", "\n", "# x contains contextualized data, now the last row of the seq_len holds\n", "# the latest context hence it is extract out\n", "x = x[:, -1, :]\n", "print(x.shape) # batch_size, last_row, embed_dim\n", "\n", "# we pass this context to a dense layer to learn how to make predictions\n", "x = Dense(total_words_in_dict, activation='softmax')(x)\n", "# batch_size, total_words (prediction)\n", "# prediction happens batch wise in parallel and is compared to y\n", "# batch wise in parallel\n", "\n", "print(x.shape)\n", "\n", "model = tf.keras.Model(inputs=input_placeholder, outputs=x)\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "id": "1f60878b-6e12-4dcd-ab89-03a64e7a3367", "metadata": {}, "outputs": [], "source": [ "model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "id": "5e688417-13f5-41c7-b98e-c4b4330ef363", "metadata": {}, "outputs": [], "source": [ "\n", "import time\n", "\n", "\n", "# CPU Benchmark\n", "with tf.device('/CPU:0'):\n", " start = time.time()\n", " model.fit(X, y, batch_size=32, epochs=10)\n", " print(\"CPU Time:\", time.time() - start)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7fb4552b-86cc-461c-8a9a-572f5bfd869b", "metadata": {}, "outputs": [], "source": [ "# # metadata": {
  "kernelspec": {
   "display_name": "Python (tf-metal2)",
   "language": "python",
   "name": "tf-metal2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.21"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5