Skip to content

Commit

Permalink
Trying to solve #485 to no avail, at least drop parallelization of si…
Browse files Browse the repository at this point in the history
…gmoid_cross_entropy
  • Loading branch information
mratsim committed Jan 3, 2021
1 parent 69efb08 commit 979f5d5
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 55 deletions.
96 changes: 49 additions & 47 deletions benchmarks/ex01_xor.nim
Original file line number Diff line number Diff line change
@@ -1,50 +1,52 @@
import ../src/arraymancer

# Learning XOR function with a neural network.

# Autograd context / neuralnet graph
let ctx = newContext Tensor[float32]
let bsz = 32 # batch size

let x_train_bool = randomTensor([bsz * 100, 2], 1).astype(bool)
let y_bool = x_train_bool[_,0] xor x_train_bool[_,1]
let x_train = ctx.variable(x_train_bool.astype(float32))
let y = y_bool.astype(float32)

# We will build the following network:
# Input --> Linear(out_features = 3) --> relu --> Linear(out_features = 1) --> Sigmoid --> Cross-Entropy Loss

let layer_3neurons = ctx.variable(
randomTensor(3, 2, 2.0f) -. 1.0f,
requires_grad = true
)

let classifier_layer = ctx.variable(
randomTensor(1, 3, 2.0f) -. 1.0f,
requires_grad = true
)

# Stochastic Gradient Descent
let optim = newSGD[float32](
layer_3neurons, classifier_layer, 0.01f
)

# Learning loop
for epoch in 0..10000:
for batch_id in 0..<100:

# minibatch offset in the Tensor
let offset = batch_id * 32
let x = x_train[offset ..< offset + 32, _]
let target = y[offset ..< offset + 32, _]

# Building the network
let n1 = relu linear(x, layer_3neurons)
let n2 = linear(n1, classifier_layer)
let loss = n2.sigmoid_cross_entropy(target)

# Compute the gradient (i.e. contribution of each parameter to the loss)
loss.backprop()

# Correct the weights now that we have the gradient information
optim.update()
proc main() =
# Autograd context / neuralnet graph
let ctx = newContext Tensor[float32]
let bsz = 32 # batch size

let x_train_bool = randomTensor([bsz * 100, 2], 1).astype(bool)
let y_bool = x_train_bool[_,0] xor x_train_bool[_,1]
let x_train = ctx.variable(x_train_bool.astype(float32))
let y = y_bool.astype(float32)

# We will build the following network:
# Input --> Linear(out_features = 3) --> relu --> Linear(out_features = 1) --> Sigmoid --> Cross-Entropy Loss

let layer_3neurons = ctx.variable(
randomTensor(3, 2, 2.0f) -. 1.0f,
requires_grad = true
)

let classifier_layer = ctx.variable(
randomTensor(1, 3, 2.0f) -. 1.0f,
requires_grad = true
)

# Stochastic Gradient Descent
let optim = newSGD[float32](
layer_3neurons, classifier_layer, 0.01f
)

# Learning loop
for epoch in 0..10000:
for batch_id in 0..<100:

# minibatch offset in the Tensor
let offset = batch_id * 32
let x = x_train[offset ..< offset + 32, _]
let target = y[offset ..< offset + 32, _]

# Building the network
let n1 = relu linear(x, layer_3neurons)
let n2 = linear(n1, classifier_layer)
let loss = n2.sigmoid_cross_entropy(target)

# Compute the gradient (i.e. contribution of each parameter to the loss)
loss.backprop()

# Correct the weights now that we have the gradient information
optim.update()

main()
22 changes: 14 additions & 8 deletions src/arraymancer/nn_primitives/nnp_sigmoid_cross_entropy.nim
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,20 @@ proc sigmoid_cross_entropy*[T](input, target: Tensor[T]): T =

# ln1p(x) does ln(1 + x) but avoids catastrophic cancellation if x << 1.

# result = 0.T
# for xi, ti in zip(input, target):
# result += (-ti * xi + max(xi,0) + ln1p(exp(-abs(xi))) ) / T(input.shape[1])

# We need parallel fused map2 -> reduce for all loss functions
result = sum:
map2_inline(input, target):
-y * x + max(x,0) + ln1p(exp(-abs(x))) # This leverage the logsumexp trick to improve numerical stability
result = 0.T
for xi, ti in zip(input, target):
result += (-ti * xi + max(xi,0) + ln1p(exp(-abs(xi))) ) / T(input.shape[1])

# TODO - Parallel fused map-reduce, openmp issue - https://github.com/mratsim/Arraymancer/issues/485
# forEachStaged ii in input, ti in target:
# before_loop:
# var local_sum{.exportc.} = 0.T
# in_loop:
# # This leverage the logsumexp trick to improve numerical stability
# local_sum += -ti * ii + max(ii,0) + ln1p(exp(-abs(ii)))
# after_loop:
# {.emit: "#pragma omp atomic".}
# {.emit: "`result` += `local_sum`;".}

# Normalize by batch_size
result /= T(batch_size)
Expand Down

0 comments on commit 979f5d5

Please sign in to comment.