繁簡切換您正在訪問的是FX168財經網,本網站所提供的內容及信息均遵守中華人民共和國香港特別行政區當地法律法規。

FX168财经网>人物频道>帖子

深度学习_分类:LSTM用于分类

作者/jedfsjfjsdf 2019-09-16 21:35 0 来源: FX168财经网人物频道
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import numpy as np
/Users/jiaohaibin/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
data = 'abcdefghijklmnopqrstuvwxyz'
#data_set = set(data)
data_set = list(data) #使用列表
word_len = len(data_set) #26

#制作字典
word_2_int = {b:a for a,b in enumerate(data_set)}

#交换位置
int_2_word = {a:b for a,b in enumerate(data_set)}

print(word_2_int)
print(int_2_word)
word_len
{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25}
{0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l', 12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w', 23: 'x', 24: 'y', 25: 'z'}
26
def words_2_ints(words):
    ints = []
    for itmp in words:
        ints.append(word_2_int[itmp])
    return ints
 
print(words_2_ints('ab'))
 
def words_2_one_hot(words, num_classes=word_len):
    return keras.utils.to_categorical(words_2_ints(words), num_classes=num_classes)
print(words_2_one_hot('a'))
[0, 1]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
def get_one_hot_max_idx(one_hot):
    idx_ = 0
    max_ = 0
    for i in range(len(one_hot)):
        if max_ < one_hot[i]:
            max_ = one_hot[i]
            idx_ = i
    return idx_
 
def one_hot_2_words(one_hot):
    tmp = []
    for itmp in one_hot:
        tmp.append(int_2_word[get_one_hot_max_idx(itmp)])
    return "".join(tmp)
 
words_2_one_hot('abcd')[0]
array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)
print( one_hot_2_words(words_2_one_hot('abcd')) )
abcd
time_step = 3 #一个句子有3个词,句子的长度
 
def genarate_data(batch_size=5, genarate_num=100):
    #genarate_num = -1 表示一直循环下去,genarate_num=1表示生成一个batch的数据,以此类推
    #这里,我也不知道数据有多少,就这么循环的生成下去吧。
    #入参batch_size 控制一个batch 有多少数据,也就是一次要yield进多少个batch_size的数据
    '''
    例如,一个batch有batch_size=5个样本,那么对于这个例子,需要yield进的数据为:
    abc->d
    bcd->e
    cde->f
    def->g
    efg->h
    
    然后,把这些数据都转换成one-hot形式,最终数据,输入x的形式为:
    
    [第1个batch]
    [第2个batch]
    ...
    [第genarate_num个batch]
    
    每个batch的形式为:句子组成的列表
    
    [第1句话(如abc)]
    [第2句话(如bcd)]
    ...
    
    
    每一句话的形式为:one-hot词向量组成的列表
    
    [第1个词的one-hot表示]
    [第2个词的one-hot表示]
    ...
    
    '''
    cnt = 0
    batch_x = []
    batch_y = []
    sample_num = 0
    while(True):
        for i in range(len(data) - time_step):
            batch_x.append(words_2_one_hot(data[i : i+time_step]))
            batch_y.append(words_2_one_hot(data[i+time_step])[0]) 
            #这里数据加[0],是为了符合keras的输出数据格式。
            #因为不加[0],表示是3维的数据。 你可以自己尝试不加0,看下面的test打印出来是什么
            sample_num += 1
            #print('sample num is :', sample_num)
            if len(batch_x) == batch_size:
                yield (np.array(batch_x), np.array(batch_y))
                batch_x = []
                batch_y = []
                if genarate_num != -1:
                    cnt += 1
 
                if cnt == genarate_num:
                    return
            
for test in genarate_data(batch_size=3, genarate_num=1):
    print('--------x:')
    print(test[0])
    print('--------y:')
    print(test[0])
--------x:
[[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]]

 [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]]

 [[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]]]
--------y:
[[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]]

 [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]]

 [[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
   0. 0. 0.]]]
 
model = Sequential()
 
# LSTM输出维度为 128
# input_shape 控制输入数据的形态:

# time_stemp 表示一句话 有多少个单词 序列长度 为3个字母
# word_len  表示一个单词用多少维度表示,这里是26维
 
model.add(LSTM(128, input_shape=(time_step, word_len))) # 3*26
model.add(Dense(word_len, activation='softmax')) 
#输出用一个softmax,来分类,维度就是26,预测是哪一个字母,26个字母
 
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'] )
model.summary()
#print(model.summary())
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_5 (LSTM)                (None, 128)               79360     
_________________________________________________________________
dense_5 (Dense)              (None, 26)                3354      
=================================================================
Total params: 82,714
Trainable params: 82,714
Non-trainable params: 0
_________________________________________________________________
history =model.fit_generator(generator=genarate_data(batch_size=5, genarate_num=-1),
                             epochs=50, steps_per_epoch=10)

#steps_per_epoch的意思是,一个epoch中,执行多少个batch
#batch_size 样本个数,在一个batch中,有多少个样本。,
#所以,batch_size*steps_per_epoch就等于一个epoch中,训练的样本数量。
#(这个说法不对!再观察看看吧)
#可以将epochs设置成1,或者2,然后在genarate_data中打印样本序号,观察到样本总数。
Epoch 1/50
10/10 [==============================] - 1s 139ms/step - loss: 3.2392 - acc: 0.1400
Epoch 2/50
10/10 [==============================] - 0s 9ms/step - loss: 3.1828 - acc: 0.4400
Epoch 3/50
10/10 [==============================] - 0s 8ms/step - loss: 3.1374 - acc: 0.7400
Epoch 4/50
10/10 [==============================] - 0s 9ms/step - loss: 3.0891 - acc: 0.8400
Epoch 5/50
10/10 [==============================] - 0s 8ms/step - loss: 3.0287 - acc: 0.9200
Epoch 6/50
10/10 [==============================] - 0s 7ms/step - loss: 2.9627 - acc: 0.9600
Epoch 7/50
10/10 [==============================] - 0s 7ms/step - loss: 2.8829 - acc: 0.9800
Epoch 8/50
10/10 [==============================] - 0s 7ms/step - loss: 2.7913 - acc: 1.0000
Epoch 9/50
10/10 [==============================] - 0s 7ms/step - loss: 2.6982 - acc: 1.0000
Epoch 10/50
10/10 [==============================] - 0s 7ms/step - loss: 2.5757 - acc: 1.0000
Epoch 11/50
10/10 [==============================] - 0s 7ms/step - loss: 2.4265 - acc: 1.0000
Epoch 12/50
10/10 [==============================] - 0s 7ms/step - loss: 2.2556 - acc: 1.0000
Epoch 13/50
10/10 [==============================] - 0s 7ms/step - loss: 2.0592 - acc: 1.0000
Epoch 14/50
10/10 [==============================] - 0s 6ms/step - loss: 1.8639 - acc: 1.0000
Epoch 15/50
10/10 [==============================] - 0s 6ms/step - loss: 1.6798 - acc: 1.0000
Epoch 16/50
10/10 [==============================] - 0s 6ms/step - loss: 1.4371 - acc: 1.0000
Epoch 17/50
10/10 [==============================] - 0s 7ms/step - loss: 1.2097 - acc: 1.0000
Epoch 18/50
10/10 [==============================] - 0s 6ms/step - loss: 0.9973 - acc: 1.0000
Epoch 19/50
10/10 [==============================] - 0s 6ms/step - loss: 0.8093 - acc: 1.0000
Epoch 20/50
10/10 [==============================] - 0s 6ms/step - loss: 0.6408 - acc: 1.0000
Epoch 21/50
10/10 [==============================] - 0s 6ms/step - loss: 0.5214 - acc: 1.0000
Epoch 22/50
10/10 [==============================] - 0s 6ms/step - loss: 0.3904 - acc: 1.0000
Epoch 23/50
10/10 [==============================] - 0s 6ms/step - loss: 0.2961 - acc: 1.0000
Epoch 24/50
10/10 [==============================] - 0s 5ms/step - loss: 0.2180 - acc: 1.0000
Epoch 25/50
10/10 [==============================] - 0s 6ms/step - loss: 0.1584 - acc: 1.0000
Epoch 26/50
10/10 [==============================] - 0s 6ms/step - loss: 0.1140 - acc: 1.0000
Epoch 27/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0888 - acc: 1.0000
Epoch 28/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0648 - acc: 1.0000
Epoch 29/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0490 - acc: 1.0000
Epoch 30/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0371 - acc: 1.0000
Epoch 31/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0268 - acc: 1.0000
Epoch 32/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0197 - acc: 1.0000
Epoch 33/50
10/10 [==============================] - 0s 6ms/step - loss: 0.0151 - acc: 1.0000
Epoch 34/50
10/10 [==============================] - 0s 10ms/step - loss: 0.0111 - acc: 1.0000
Epoch 35/50
10/10 [==============================] - 0s 10ms/step - loss: 0.0081 - acc: 1.0000
Epoch 36/50
10/10 [==============================] - 0s 8ms/step - loss: 0.0060 - acc: 1.0000
Epoch 37/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0042 - acc: 1.0000
Epoch 38/50
10/10 [==============================] - 0s 6ms/step - loss: 0.0031 - acc: 1.0000
Epoch 39/50
10/10 [==============================] - 0s 6ms/step - loss: 0.0023 - acc: 1.0000
Epoch 40/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0016 - acc: 1.0000
Epoch 41/50
10/10 [==============================] - 0s 7ms/step - loss: 0.0012 - acc: 1.0000
Epoch 42/50
10/10 [==============================] - 0s 10ms/step - loss: 8.4960e-04 - acc: 1.0000
Epoch 43/50
10/10 [==============================] - 0s 11ms/step - loss: 5.8763e-04 - acc: 1.0000
Epoch 44/50
10/10 [==============================] - 0s 9ms/step - loss: 4.2971e-04 - acc: 1.0000
Epoch 45/50
10/10 [==============================] - 0s 8ms/step - loss: 3.1050e-04 - acc: 1.0000
Epoch 46/50
10/10 [==============================] - 0s 8ms/step - loss: 2.1903e-04 - acc: 1.0000
Epoch 47/50
10/10 [==============================] - 0s 12ms/step - loss: 1.5835e-04 - acc: 1.0000
Epoch 48/50
10/10 [==============================] - 0s 11ms/step - loss: 1.1565e-04 - acc: 1.0000
Epoch 49/50
10/10 [==============================] - 0s 10ms/step - loss: 8.0106e-05 - acc: 1.0000
Epoch 50/50
10/10 [==============================] - 0s 11ms/step - loss: 6.0480e-05 - acc: 1.0000
history.history['acc']
[0.14000000208616256,
 0.44000001102685926,
 0.7400000095367432,
 0.8400000095367431,
 0.9200000047683716,
 0.9600000023841858,
 0.9800000011920929,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0]
import matplotlib.pyplot as plt
epochs = range(len(acc)) # 横坐标的长度
plt.figure()
acc = history.history['acc']
#val_acc = history.history['val_acc']
loss = history.history['loss']
#val_loss = history.history['val_loss']
#线条
plt.plot(epochs, acc, 'bo', label='Training acc')
#plt.plot(epochs, val_acc, 'b', label='Validation acc')

plt.title('Training and validation accuracy')#标题
plt.legend() #角标

plt.show()
plt.figure()
#线条
plt.plot(epochs, loss, 'bo', label='Training loss')
#plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss') #标题
plt.legend()#角标

plt.show()
result = model.predict(np.array([words_2_one_hot('bcd')]))
print(one_hot_2_words(result))
e
 
分享到:
举报财经168客户端下载

全部回复

0/140

投稿 您想发表你的观点和看法?

更多人气分析师

  • 张亦巧

    人气2192文章4145粉丝45

    暂无个人简介信息

  • 王启蒙现货黄金

    人气296文章3215粉丝8

    本人做分析师以来,并专注于贵金属投资市场,尤其是在现货黄金...

  • 指导老师

    人气1864文章4423粉丝52

    暂无个人简介信息

  • 李冉晴

    人气2320文章3821粉丝34

    李冉晴,专业现贷实盘分析师。

  • 梁孟梵

    人气2176文章3177粉丝39

    qq:2294906466 了解群指导添加微信mfmacd

  • 张迎妤

    人气1896文章3305粉丝34

    个人专注于行情技术分析,消息面解读剖析,给予您第一时间方向...

  • 金泰铬J

    人气2328文章3925粉丝51

    投资问答解咨询金泰铬V/信tgtg67即可获取每日的实时资讯、行情...

  • 金算盘

    人气2696文章7761粉丝125

    高级分析师,混过名校,厮杀于股市和期货、证券市场多年,专注...

  • 金帝财神

    人气4760文章8329粉丝119

    本文由资深分析师金帝财神微信:934295330,指导黄金,白银,...

FX168财经

FX168财经学院

FX168财经

FX168北美