Blog‎ > ‎

Tutorial for Deep Learning Using Kera In Python by Edureka

posted Jun 26, 2019, 1:40 AM by MUHAMMAD MUN`IM AHMAD ZABIDI   [ updated Jun 30, 2019, 8:39 PM ]
The code below was derived from the YouTube video

Keras Tutorial For Beginners



https: //www.youtube.com/watch?v=XNKeayZW4dY

This is definitely not a tutorial for REAL beginners!

#- * -coding: utf - 8 - * -

"""
https: //www.youtube.com/watch?v=XNKeayZW4dY
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import pandas as pd
import tensorflow as tf

from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras

startTime = datetime.now()

layers = keras.layers
print("You have TensorFlow ", tf.__version__)

URL = "https://storage.googleapis.com/sara-cloud-ml/wine_data.csv"
path = tf.keras.utils.get_file(URL.split('/')[-1], URL)

data = pd.read_csv(path)# convert to Pandas frame
data = data.sample(frac = 1)# shuffle the data
print(data.head())# print first 5 rows

# limit the varieties in the dataset
data = data[pd.notnull(data['country'])]
data = data[pd.notnull(data['price'])]
data = data.drop(data.columns[0], axis = 1)

variety_threshold = 500
value_counts = data['variety'].value_counts()
to_remove = value_counts[value_counts <= variety_threshold].index
data.replace(to_remove, np.nan, inplace = True)
data = data[pd.notnull(data['variety'])]

# split data into train & test
train_size = int(len(data) * 0.8)
print("Train size: %d" % train_size)
print("Test size: %d" % (len(data) - train_size))

# train features
description_train = data['description'][: train_size]
variety_train = data['variety'][: train_size]
labels_train = data['price'][: train_size]

# test features
description_test = data['description'][train_size: ]
variety_test = data['variety'][train_size: ]
labels_test = data['price'][train_size: ]

# Create a tokenizer
vocab_size = 12000# it 's a hyperparameter, can adjust
tokenize = keras.preprocessing.text.Tokenizer(num_words = vocab_size, char_level = False)
tokenize.fit_on_texts(description_train)# only fit on train

# Wide feature 1: sparse BOW vocab_size vector
description_bow_train = tokenize.texts_to_matrix(description_train)
description_bow_test = tokenize.texts_to_matrix(description_test)

# Wide feature 2: one-hot vector using sklearn
encoder = LabelEncoder()
encoder.fit(variety_train)
variety_train = encoder.transform(variety_train)
variety_test = encoder.transform(variety_test)
num_classes = np.max(variety_train) + 1

# Convert labels to one hot
variety_train = keras.utils.to_categorical(variety_train, num_classes)
variety_test = keras.utils.to_categorical(variety_test, num_classes)

# define out models
bow_inputs = layers.Input(shape = (vocab_size, ))
variety_inputs = layers.Input(shape = (num_classes, ))
merged_layer = layers.concatenate([bow_inputs, variety_inputs])
merged_layer = layers.Dense(256, activation = 'relu')(merged_layer)
predictions = layers.Dense(1)(merged_layer)
wide_model = keras.Model(inputs = [bow_inputs, variety_inputs], outputs = predictions)

wide_model.compile(loss = 'mse', optimizer = 'adam', metrics = ['accuracy'])
print(wide_model.summary())

# Deep model feature: word embeddings of descriptions
train_embed = tokenize.texts_to_sequences(description_train)
test_embed = tokenize.texts_to_sequences(description_test)

max_seq_length = 170
train_embed = keras.preprocessing.sequence.pad_sequences(
    train_embed, maxlen = max_seq_length, padding = "post")
test_embed = keras.preprocessing.sequence.pad_sequences(
    test_embed, maxlen = max_seq_length, padding = "post")

# define the deep models
deep_inputs = layers.Input(shape = (max_seq_length, ))
embedding = layers.Embedding(vocab_size, 8, input_length = max_seq_length)(deep_inputs)
embedding = layers.Flatten()(embedding)
embed_out = layers.Dense(1)(embedding)
deep_model = keras.Model(inputs = deep_inputs, outputs = embed_out)
print(deep_model.summary())

deep_model.compile(loss = 'mse', optimizer = 'adam', metrics = ['accuracy'])

# combine deep and wide
merged_out = layers.concatenate([wide_model.output, deep_model.output])
merged_out = layers.Dense(1)(merged_out)
combined_model = keras.Model(wide_model.input + [deep_model.input], merged_out)
print(combined_model.summary())

combined_model.compile(loss = 'mse', optimizer = 'adam', metrics = ['accuracy'])

# Run training
combined_model.fit([description_bow_train, variety_train] + [train_embed],
                   labels_train, epochs = 10, batch_size = 128)

combined_model.evaluate([description_bow_test, variety_test] + [test_embed],
                        labels_test, batch_size = 128)
print("************************************ LINE 120")

# Generate predictions
predictions = combined_model.predict([description_bow_test, variety_test] + [test_embed])

num_predictions = 40
diff = 0

for i in range(num_predictions):
    val = predictions[i]
    print(description_test.iloc[i])
    print('Predicted: ', val[0], 'Actual: ', labels_test.iloc[i], '\n')
    diff += abs(val[0] - labels_test.iloc[i])

print('Average prediction difference: ', diff / num_predictions)
print("Time taken:", datetime.now() - startTime)
print("\n"*10)
Comments