.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python from mxnet import gluon, init, np, npx from mxnet.gluon import nn from d2l import mxnet as d2l npx.set_np() batch_size = 64 train_iter, test_iter, vocab = d2l.load_data_imdb(batch_size) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output [07:22:57] ../src/storage/storage.cc:196: Using Pooled (Naive) StorageManager for CPU .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python import torch from torch import nn from d2l import torch as d2l batch_size = 64 train_iter, test_iter, vocab = d2l.load_data_imdb(batch_size) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output Downloading ../data/aclImdb_v1.tar.gz from http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz... .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python import warnings from d2l import paddle as d2l warnings.filterwarnings("ignore") import paddle from paddle import nn batch_size = 64 train_iter, test_iter, vocab = d2l.load_data_imdb(batch_size) .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def corr1d(X, K): w = K.shape[0] Y = np.zeros((X.shape[0] - w + 1)) for i in range(Y.shape[0]): Y[i] = (X[i: i + w] * K).sum() return Y .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def corr1d(X, K): w = K.shape[0] Y = torch.zeros((X.shape[0] - w + 1)) for i in range(Y.shape[0]): Y[i] = (X[i: i + w] * K).sum() return Y .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def corr1d(X, K): w = K.shape[0] Y = paddle.zeros([X.shape[0] - w + 1], dtype=X.dtype) for i in range(Y.shape[0]): Y[i] = (X[i: i + w] * K).sum() return Y .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python X, K = np.array([0, 1, 2, 3, 4, 5, 6]), np.array([1, 2]) corr1d(X, K) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output array([ 2., 5., 8., 11., 14., 17.]) .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python X, K = torch.tensor([0, 1, 2, 3, 4, 5, 6]), torch.tensor([1, 2]) corr1d(X, K) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output tensor([ 2., 5., 8., 11., 14., 17.]) .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python X, K = paddle.to_tensor([0, 1, 2, 3, 4, 5, 6]), paddle.to_tensor([1, 2]) corr1d(X, K) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output W0818 09:28:13.297365 95147 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.8, Runtime API Version: 11.8 W0818 09:28:13.329474 95147 gpu_resources.cc:91] device: 0, cuDNN Version: 8.7. .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True, [2 , 5 , 8 , 11, 14, 17]) .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def corr1d_multi_in(X, K): # 首先，遍历'X'和'K'的第0维（通道维）。然后，把它们加在一起 return sum(corr1d(x, k) for x, k in zip(X, K)) X = np.array([[0, 1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7, 8]]) K = np.array([[1, 2], [3, 4], [-1, -3]]) corr1d_multi_in(X, K) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output array([ 2., 8., 14., 20., 26., 32.]) .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def corr1d_multi_in(X, K): # 首先，遍历'X'和'K'的第0维（通道维）。然后，把它们加在一起 return sum(corr1d(x, k) for x, k in zip(X, K)) X = torch.tensor([[0, 1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7, 8]]) K = torch.tensor([[1, 2], [3, 4], [-1, -3]]) corr1d_multi_in(X, K) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output tensor([ 2., 8., 14., 20., 26., 32.]) .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def corr1d_multi_in(X, K): # 首先，遍历'X'和'K'的第0维（通道维）。然后，把它们加在一起 return sum(corr1d(x, k) for x, k in zip(X, K)) X = paddle.to_tensor([[0, 1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7, 8]]) K = paddle.to_tensor([[1, 2], [3, 4], [-1, -3]]) corr1d_multi_in(X, K) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True, [2 , 8 , 14, 20, 26, 32]) .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python class TextCNN(nn.Block): def __init__(self, vocab_size, embed_size, kernel_sizes, num_channels, **kwargs): super(TextCNN, self).__init__(**kwargs) self.embedding = nn.Embedding(vocab_size, embed_size) # 这个嵌入层不需要训练 self.constant_embedding = nn.Embedding(vocab_size, embed_size) self.dropout = nn.Dropout(0.5) self.decoder = nn.Dense(2) # 最大时间汇聚层没有参数，因此可以共享此实例 self.pool = nn.GlobalMaxPool1D() # 创建多个一维卷积层 self.convs = nn.Sequential() for c, k in zip(num_channels, kernel_sizes): self.convs.add(nn.Conv1D(c, k, activation='relu')) def forward(self, inputs): # 沿着向量维度将两个嵌入层连结起来， # 每个嵌入层的输出形状都是（批量大小，词元数量，词元向量维度）连结起来 embeddings = np.concatenate(( self.embedding(inputs), self.constant_embedding(inputs)), axis=2) # 根据一维卷积层的输入格式，重新排列张量，以便通道作为第2维 embeddings = embeddings.transpose(0, 2, 1) # 每个一维卷积层在最大时间汇聚层合并后，获得的张量形状是（批量大小，通道数，1） # 删除最后一个维度并沿通道维度连结 encoding = np.concatenate([ np.squeeze(self.pool(conv(embeddings)), axis=-1) for conv in self.convs], axis=1) outputs = self.decoder(self.dropout(encoding)) return outputs .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python class TextCNN(nn.Module): def __init__(self, vocab_size, embed_size, kernel_sizes, num_channels, **kwargs): super(TextCNN, self).__init__(**kwargs) self.embedding = nn.Embedding(vocab_size, embed_size) # 这个嵌入层不需要训练 self.constant_embedding = nn.Embedding(vocab_size, embed_size) self.dropout = nn.Dropout(0.5) self.decoder = nn.Linear(sum(num_channels), 2) # 最大时间汇聚层没有参数，因此可以共享此实例 self.pool = nn.AdaptiveAvgPool1d(1) self.relu = nn.ReLU() # 创建多个一维卷积层 self.convs = nn.ModuleList() for c, k in zip(num_channels, kernel_sizes): self.convs.append(nn.Conv1d(2 * embed_size, c, k)) def forward(self, inputs): # 沿着向量维度将两个嵌入层连结起来， # 每个嵌入层的输出形状都是（批量大小，词元数量，词元向量维度）连结起来 embeddings = torch.cat(( self.embedding(inputs), self.constant_embedding(inputs)), dim=2) # 根据一维卷积层的输入格式，重新排列张量，以便通道作为第2维 embeddings = embeddings.permute(0, 2, 1) # 每个一维卷积层在最大时间汇聚层合并后，获得的张量形状是（批量大小，通道数，1） # 删除最后一个维度并沿通道维度连结 encoding = torch.cat([ torch.squeeze(self.relu(self.pool(conv(embeddings))), dim=-1) for conv in self.convs], dim=1) outputs = self.decoder(self.dropout(encoding)) return outputs .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python class TextCNN(nn.Layer): def __init__(self, vocab_size, embed_size, kernel_sizes, num_channels, **kwargs): super(TextCNN, self).__init__(**kwargs) self.embedding = nn.Embedding(vocab_size, embed_size) # 这个嵌入层不需要训练 self.constant_embedding = nn.Embedding(vocab_size, embed_size) self.dropout = nn.Dropout(0.5) self.decoder = nn.Linear(sum(num_channels), 2) # 最大时间汇聚层没有参数，因此可以共享此实例 self.pool = nn.AdaptiveAvgPool1D(1) self.relu = nn.ReLU() # 创建多个一维卷积层 self.convs = nn.LayerList() for c, k in zip(num_channels, kernel_sizes): self.convs.append(nn.Conv1D(2 * embed_size, c, k)) def forward(self, inputs): # 沿着向量维度将两个嵌入层连结起来， # 每个嵌入层的输出形状都是（批量大小，词元数量，词元向量维度）连结起来 embeddings = paddle.concat(( self.embedding(inputs), self.constant_embedding(inputs)), axis=2) # 根据一维卷积层的输入格式，重新排列张量，以便通道作为第2维 embeddings = embeddings.transpose([0, 2, 1]) # 每个一维卷积层在最大时间汇聚层合并后，获得的张量形状是（批量大小，通道数，1） # 删除最后一个维度并沿通道维度连结 encoding = paddle.concat([ paddle.squeeze(self.relu(self.pool(conv(embeddings))), axis=-1) for conv in self.convs], axis=1) outputs = self.decoder(self.dropout(encoding)) return outputs .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python embed_size, kernel_sizes, nums_channels = 100, [3, 4, 5], [100, 100, 100] devices = d2l.try_all_gpus() net = TextCNN(len(vocab), embed_size, kernel_sizes, nums_channels) net.initialize(init.Xavier(), ctx=devices) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output [07:23:02] ../src/storage/storage.cc:196: Using Pooled (Naive) StorageManager for GPU [07:23:03] ../src/storage/storage.cc:196: Using Pooled (Naive) StorageManager for GPU .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python embed_size, kernel_sizes, nums_channels = 100, [3, 4, 5], [100, 100, 100] devices = d2l.try_all_gpus() net = TextCNN(len(vocab), embed_size, kernel_sizes, nums_channels) def init_weights(m): if type(m) in (nn.Linear, nn.Conv1d): nn.init.xavier_uniform_(m.weight) net.apply(init_weights); .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python embed_size, kernel_sizes, nums_channels = 100, [3, 4, 5], [100, 100, 100] devices = d2l.try_all_gpus() net = TextCNN(len(vocab), embed_size, kernel_sizes, nums_channels) def init_weights(net): init_normal = nn.initializer.XavierUniform() for i in net.sublayers(): if type(i) in [nn.Linear, nn.Conv1D]: init_normal(i.weight) init_weights(net) .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python glove_embedding = d2l.TokenEmbedding('glove.6b.100d') embeds = glove_embedding[vocab.idx_to_token] net.embedding.weight.data.copy_(embeds) net.constant_embedding.weight.data.copy_(embeds) net.constant_embedding.weight.requires_grad = False .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output Downloading ../data/glove.6B.100d.zip from http://d2l-data.s3-accelerate.amazonaws.com/glove.6B.100d.zip... .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python lr, num_epochs = 0.001, 5 trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) loss = gluon.loss.SoftmaxCrossEntropyLoss() d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output loss 0.093, train acc 0.967, test acc 0.869 2278.6 examples/sec on [gpu(0), gpu(1)] .. figure:: output_sentiment-analysis-cnn_900d1d_87_1.svg .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python lr, num_epochs = 0.001, 5 trainer = torch.optim.Adam(net.parameters(), lr=lr) loss = nn.CrossEntropyLoss(reduction="none") d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output loss 0.066, train acc 0.978, test acc 0.861 4609.1 examples/sec on [device(type='cuda', index=0), device(type='cuda', index=1)] .. figure:: output_sentiment-analysis-cnn_900d1d_90_1.svg .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python lr, num_epochs = 0.001, 5 trainer = paddle.optimizer.Adam(learning_rate=lr, parameters=net.parameters()) loss = nn.CrossEntropyLoss(reduction="none") d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output loss 0.067, train acc 0.978, test acc 0.873 5186.2 examples/sec on [Place(gpu:0), Place(gpu:1)] .. figure:: output_sentiment-analysis-cnn_900d1d_93_1.svg .. raw:: html

.. raw:: html

mxnet pytorch paddle

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python d2l.predict_sentiment(net, vocab, 'this movie is so great') .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output 'positive' .. raw:: latex \diilbookstyleinputcell .. code:: python d2l.predict_sentiment(net, vocab, 'this movie is so bad') .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output 'negative' .. raw:: html

.. raw:: html