SW/Python

python : 텐서플로우 : feedfoward, node, optimizer 변경 결과 비교

얇은생각 2019. 6. 24. 12:30
반응형

MNIST 숫자 인식 

- hidden layer 수: 3 

- 각 hidden layer의 node 수: 64, 64, 64

- activation: ReLU

- optimizer: SGD (learning rate=0.001)

- weight initialization: Xavier

- batch size: 200

(가) (10점) Tensorflow를 사용하여 위에 주어진 feed-forward 네트워크 구조를 구현하여 50 epoch 동 안 학습한 후, training data와 validation data의 training curve를 그리시오. (Hint: 수업 자료 마지 막 실험) 

(나) (10점) 모든 hidden layer의 node 수를 512로 변환한 후, (가)와 동일한 실험을 진행하고 그 결과 를 비교하시오.

(다) (5점) optimizer를 Adam으로 변환한 후, (가) (나) 의 결과와 비교하시오.



# coding: utf-8
# 4-가
# In[1]:


# Lab 10 MNIST and Xavier
import tensorflow as tf
import random
import matplotlib.pyplot as plt

from tensorflow.examples.tutorials.mnist import input_data

tf.set_random_seed(777) # reproducibility

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# parameters
learning_rate = 0.001
training_epochs = 50
batch_size = 200

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# weights & bias for nn layers
W1 = tf.get_variable("W1", shape=[784, 64],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([64]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W2", shape=[64, 64],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([64]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W3", shape=[64, 64],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([64]))
L3 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W4 = tf.get_variable("W4", shape=[64, 10],
initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L3, W4) + b4

# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=hypothesis, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# set training curve
validationLossList = []
trainLossList = []

# train my model
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(mnist.train.num_examples / batch_size)

for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
feed_dict = {X: batch_xs, Y: batch_ys}
c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
avg_cost += c / total_batch

# Test model and check accuracy
validationVal, _ = sess.run([cost, optimizer], feed_dict={X: mnist.test.images, Y: mnist.test.labels})
print('Epoch:', '%04d' % (epoch + 1),
'train_loss =', '{:.9f}'.format(avg_cost),
'validation_loss = ', validationVal)

validationLossList.append(validationVal)
trainLossList.append(avg_cost)

print('Learning Finished!')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracyVal = sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels})
print("Accuracy is ", accuracyVal)


# 4-나
# In[2]:




tf.set_random_seed(777) # reproducibility

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# parameters
learning_rate = 0.001
training_epochs = 50
batch_size = 200

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# weights & bias for nn layers
W1 = tf.get_variable("W12", shape=[784, 512],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W22", shape=[512, 512],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W32", shape=[512, 512],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W4 = tf.get_variable("W42", shape=[512, 10],
initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L3, W4) + b4

# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=hypothesis, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# set training curve
validationLossList2 = []
trainLossList2 = []

# train my model
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(mnist.train.num_examples / batch_size)

for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
feed_dict = {X: batch_xs, Y: batch_ys}
c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
avg_cost += c / total_batch

# Test model and check accuracy
validationVal, _ = sess.run([cost, optimizer], feed_dict={X: mnist.test.images, Y: mnist.test.labels})
print('Epoch:', '%04d' % (epoch + 1),
'train_loss =', '{:.9f}'.format(avg_cost),
'validation_loss = ', validationVal)

validationLossList2.append(validationVal)
trainLossList2.append(avg_cost)

print('Learning Finished!')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracyVal = sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels})
print("Accuracy is ", accuracyVal)





# 4-다
# In[3]:




tf.set_random_seed(777) # reproducibility

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# parameters
learning_rate = 0.001
training_epochs = 50
batch_size = 200

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# weights & bias for nn layers
W1 = tf.get_variable("W13", shape=[784, 512],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W23", shape=[512, 512],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W33", shape=[512, 512],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W4 = tf.get_variable("W43", shape=[512, 10],
initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L3, W4) + b4

# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# set training curve
validationLossList3 = []
trainLossList3 = []

# train my model
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(mnist.train.num_examples / batch_size)

for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
feed_dict = {X: batch_xs, Y: batch_ys}
c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
avg_cost += c / total_batch

# Test model and check accuracy
validationVal, _ = sess.run([cost, optimizer], feed_dict={X: mnist.test.images, Y: mnist.test.labels})
print('Epoch:', '%04d' % (epoch + 1),
'train_loss =', '{:.9f}'.format(avg_cost),
'validation_loss = ', validationVal)

validationLossList3.append(validationVal)
trainLossList3.append(avg_cost)

print('Learning Finished!')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracyVal = sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels})
print("Accuracy is ", accuracyVal)


plt.plot(range(1, 51), trainLossList, label="4-a-train-loss")
plt.plot(range(1, 51), validationLossList, label="4-a-valid-loss")

plt.plot(range(1, 51), trainLossList2, label="4-b-train-loss")
plt.plot(range(1, 51), validationLossList2, label="4-b-valid-loss")

plt.plot(range(1, 51), trainLossList3, label="4-c-train-loss")
plt.plot(range(1, 51), validationLossList3, label="4-c-valid-loss")

plt.xlabel('epoch')
plt.ylabel('loss')
plt.title("result")

plt.legend()
plt.show()



( 가 )

정확도 : 0.8981

가장 낮은 정확도를 보여주고 있습니다.

아무래도 node 수가 낮은 만큼 낮은 성능을 보여주었습니다.


( 나 )

정확도 : 0.9029

(가) 보다 높은 정확도를 보여주고 있습니다.

아무래도 node 수가 더 많은 만큼 높은 성능을 보여주고 있습니다.

하지만 여전히 부족한 성능이 나타나고 있습니다.

그래프를 비교해보면 거의 근사한 값으로 나타나고 있다는 것을 확인할 수 있었습니다.


( 다 )

정확도 : 1 = 0.999~

제일 높은 정확도를 보여주고 있습니다.

adam이라는 옵티마이저의 중요성을 여실히 보여주고 있습니다.

또한, (나)보다 빠른 속도로 학습을 한다는 것을 체감할 수 있었습니다.

학습 결과 해당 그래프를 첨부하였습니다. 

이번 시험을 통해, 어떻게 모델을 잘 설계해야 빠르고 좋은 성능의 인공지능을 개발할 수 있다는 것을 몸소 체험할 수 있었습니다. 

반응형