DEV Community

Super Kai (Kazuya Ito)
Super Kai (Kazuya Ito)

Posted on • Updated on

CrossEntropyLoss in PyTorch

Buy Me a Coffee

*Memos:

CrossEntropyLoss() can get the 0D or more D tensor of the zero or more values(float) computed by Cross Entropy Loss from the 1D or more D input tensor and the 0D or more D target tensor of zero or more elements as shown below: of zero or more elements as shown below:

*Memos:

  • The 1st argument for initialization is weight(Optional-Default:None-Type:tensor of float). If not given, it's 1.
  • There is ignore_index argument for initialization(Optional-Default:-100-Type:int): *Memos:
    • It works for class indices so keep it negative for class probabilities otherwise there is error.
  • There is reduction argument for initialization(Optional-Default:'mean'-Type:str). *'none', 'mean' or 'sum' can be selected.
  • There is label_smoothing argument for initialization(Optional-Default:0.0-Type:float). *It must be between [0, 1].
  • There are size_average and reduce argument for initialization but they are deprecated.
  • The 1st argument is input(Required-Type:tensor of float). *A 1D or more D tensor can be set. *softmax() or Softmax() is not needed to use for it because softmax() is used internally for it.
  • The 2nd argument is target(Required-Type:tensor of int for class indices or tensor of float for class probabilities): *Memos:
    • The target tensor whose size is different from input tensor is treated as class indices(The indices of input tensor). *softmax() or Softmax() is not needed to use for it because it just has the indices of the elements of input tensor.
    • The target tensor whose size is same as input tensor is treated as the class probabilities(The sum is 100%) which should be between [0, 1]. *softmax() or Softmax() should be used for it because softmax() is not used internally for it.
    • A 0D or 1D tensor can be set for class indices.
  • The empty 1D or more D input and target tensor with reduction='mean' return nan.
  • The empty 1D or more D input and target tensor with reduction='sum' or reduction='none' return -0..
  • For class indices: Image description
  • For class probabilities: Image description
import torch
from torch import nn

""" `target` tensor with class indices. """

tensor1 = torch.tensor([[7.4, 2.8, -0.6, 6.3],
                        [-1.9, 4.2, 3.9, 5.1],
                        [9.3, -5.3, 7.2, -8.4]])
tensor2 = torch.tensor([3, 0, 2])
# [softmax([7.4, 2.8, -0.6, 6.3]),
#  softmax([-1.9, 4.2, 3.9, 5.1]),
#  softmax([9.3, -5.3, 7.2, -8.4])]
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ 
# [[0.74446, 0.0074832, 0.00024974, <0.24781>],       # 3
#  [<0.00053368>, 0.23794, 0.17627, 0.58525],         # 0
#  [0.8909, 0.00000040657, <0.1091>, 0.000000018315]] # 2
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
# [-ln(0.24781), -ln(0.00053368), -ln(0.1091)]
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
# [1.3951, 7.5357, 2.2155]
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
# 1.3951 + 7.5357 + 2.2155 = 11.1463
# 11.1463 / 3 = 3.7154

cel = nn.CrossEntropyLoss()
cel(input=tensor1, target=tensor2)
# tensor(3.7154)

cel
# CrossEntropyLoss()

print(cel.weight)
# None

cel.ignore_index
# -100

cel.reduction
# 'mean'

cel.label_smoothing
# 0.0

cel = nn.CrossEntropyLoss(weight=None,
                          ignore_index=-100,
                          reduction='mean',
                          label_smoothing=0.0)
cel(input=tensor1, target=tensor2)
# tensor(3.7154)

cel = nn.CrossEntropyLoss(reduction='sum')
cel(input=tensor1, target=tensor2)
# tensor(11.1463)

cel = nn.CrossEntropyLoss(reduction='none')
cel(input=tensor1, target=tensor2)
# tensor([1.3951, 7.5357, 2.2155])

cel = nn.CrossEntropyLoss(weight=torch.tensor([0., 1., 2., 3.]))
cel(input=tensor1, target=tensor2)
# tensor(1.7233)

cel = nn.CrossEntropyLoss(ignore_index=2)
cel(input=tensor1, target=tensor2)
# tensor(4.4654)

cel = nn.CrossEntropyLoss(label_smoothing=0.8)
cel(input=tensor1, target=tensor2)
# tensor(4.8088)

""" `target` tensor with class probabilities. """

tensor1 = torch.tensor([[7.4, 2.8, -0.6],
                        [6.3, -1.9, 4.2]])
tensor2 = torch.tensor([[3.9, 5.1, 9.3],
                        [-5.3, 7.2, -8.4]])
# [softmax([7.4, 2.8, -0.6]),
#  softmax([6.3, -1.9, 4.2])]
# [softmax([3.9, 5.1, 9.3]),
#  softmax([-5.3, 7.2, -8.4])]
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
# [[0.98972(A1), 0.0099485(B1), 0.00033201(C1)],
#  [0.89069(D1), 0.00024463(E1), 0.10907(F1)]]
# [[0.0044301(A2), 0.014709(B2), 0.98086(C2)],
#  [0.0000037266(D2), 1.0(E2), 0.00000016788(F2)]])
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
# [[ln(A1)*A2*1(w), ln(B1)*B2*1(w), ln(C1)*C2*1(w)],
#  [ln(D1)*D2*1(w), ln(E1)*E2*1(w), ln(F1)*F2*1(w)]]
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
# [[-0.00004578, -0.0678, -7.857],
#  [-0.00000043139, -8.3157, -0.00000037198]]]
# ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
# -((-0.00004578) + (-0.0678) + (-7.857)) = 7.9249
# -((-0.00000043139) + (-8.3157) + (-0.00000037198)) = 8.3157
# 7.9249 + 8.3157 = 16.2406
# 16.2406 / 2 = 8.1203

cel = nn.CrossEntropyLoss()
cel(input=tensor1, target=tensor2.softmax(dim=1))
# tensor(8.1203)

cel
# CrossEntropyLoss()

print(cel.weight)
# None

cel.ignore_index
# -100

cel.reduction
# 'mean'

cel.label_smoothing
# 0.0

cel = nn.CrossEntropyLoss(weight=None,
                          ignore_index=-100,
                          reduction='mean',
                          label_smoothing=0.0)
cel(input=tensor1, target=tensor2.softmax(dim=1))
# tensor(8.1203)

cel = nn.CrossEntropyLoss(reduction='sum')
cel(input=tensor1, target=tensor2.softmax(dim=1))
# tensor(16.2406)

cel = nn.CrossEntropyLoss(reduction='none')
cel(input=tensor1, target=tensor2.softmax(dim=1))
# tensor([7.9249, 8.3157])

cel = nn.CrossEntropyLoss(weight=torch.tensor([0., 1., 2.]))
cel(input=tensor1, target=tensor2.softmax(dim=1))
# tensor(12.0488)

cel = nn.CrossEntropyLoss(label_smoothing=0.8)
cel(input=tensor1, target=tensor2.softmax(dim=1))
# tensor(4.7278)

tensor1 = torch.tensor([])
tensor2 = torch.tensor([])

cel = nn.CrossEntropyLoss(reduction='mean')
cel(input=tensor1, target=tensor2.softmax(dim=0))
# tensor(nan)

cel = nn.CrossEntropyLoss(reduction='sum')
cel(input=tensor1, target=tensor2.softmax(dim=0))
# tensor(-0.)

cel = nn.CrossEntropyLoss(reduction='none')
cel(input=tensor1, target=tensor2.softmax(dim=0))
# tensor(-0.)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)