Trying to solve #485 to no avail, at least drop parallelization of si…

…gmoid_cross_entropy
mratsim · Jan 3, 2021 · 979f5d5 · 979f5d5
1 parent 69efb08
commit 979f5d5
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 55 deletions.
diff --git a/benchmarks/ex01_xor.nim b/benchmarks/ex01_xor.nim
@@ -1,50 +1,52 @@
 import ../src/arraymancer
 
 # Learning XOR function with a neural network.
-
-# Autograd context / neuralnet graph
-let ctx = newContext Tensor[float32]
-let bsz = 32 # batch size
-
-let x_train_bool = randomTensor([bsz * 100, 2], 1).astype(bool)
-let y_bool = x_train_bool[_,0] xor x_train_bool[_,1]
-let x_train = ctx.variable(x_train_bool.astype(float32))
-let y = y_bool.astype(float32)
-
-# We will build the following network:
-# Input --> Linear(out_features = 3) --> relu --> Linear(out_features = 1) --> Sigmoid --> Cross-Entropy Loss
-
-let layer_3neurons = ctx.variable(
-                       randomTensor(3, 2, 2.0f) -. 1.0f,
-                       requires_grad = true
-                     )
-
-let classifier_layer = ctx.variable(
-                         randomTensor(1, 3, 2.0f) -. 1.0f,
-                         requires_grad = true
-                       )
-
-# Stochastic Gradient Descent
-let optim = newSGD[float32](
-    layer_3neurons, classifier_layer, 0.01f
-  )
-
-# Learning loop
-for epoch in 0..10000:
-  for batch_id in 0..<100:
-
-    # minibatch offset in the Tensor
-    let offset = batch_id * 32
-    let x = x_train[offset ..< offset + 32, _]
-    let target = y[offset ..< offset + 32, _]
-
-    # Building the network
-    let n1 = relu linear(x, layer_3neurons)
-    let n2 = linear(n1, classifier_layer)
-    let loss = n2.sigmoid_cross_entropy(target)
-
-    # Compute the gradient (i.e. contribution of each parameter to the loss)
-    loss.backprop()
-
-    # Correct the weights now that we have the gradient information
-    optim.update()
+proc main() =
+  # Autograd context / neuralnet graph
+  let ctx = newContext Tensor[float32]
+  let bsz = 32 # batch size
+
+  let x_train_bool = randomTensor([bsz * 100, 2], 1).astype(bool)
+  let y_bool = x_train_bool[_,0] xor x_train_bool[_,1]
+  let x_train = ctx.variable(x_train_bool.astype(float32))
+  let y = y_bool.astype(float32)
+
+  # We will build the following network:
+  # Input --> Linear(out_features = 3) --> relu --> Linear(out_features = 1) --> Sigmoid --> Cross-Entropy Loss
+
+  let layer_3neurons = ctx.variable(
+                        randomTensor(3, 2, 2.0f) -. 1.0f,
+                        requires_grad = true
+                      )
+
+  let classifier_layer = ctx.variable(
+                          randomTensor(1, 3, 2.0f) -. 1.0f,
+                          requires_grad = true
+                        )
+
+  # Stochastic Gradient Descent
+  let optim = newSGD[float32](
+      layer_3neurons, classifier_layer, 0.01f
+    )
+
+  # Learning loop
+  for epoch in 0..10000:
+    for batch_id in 0..<100:
+
+      # minibatch offset in the Tensor
+      let offset = batch_id * 32
+      let x = x_train[offset ..< offset + 32, _]
+      let target = y[offset ..< offset + 32, _]
+
+      # Building the network
+      let n1 = relu linear(x, layer_3neurons)
+      let n2 = linear(n1, classifier_layer)
+      let loss = n2.sigmoid_cross_entropy(target)
+
+      # Compute the gradient (i.e. contribution of each parameter to the loss)
+      loss.backprop()
+
+      # Correct the weights now that we have the gradient information
+      optim.update()
+
+main()
diff --git a/src/arraymancer/nn_primitives/nnp_sigmoid_cross_entropy.nim b/src/arraymancer/nn_primitives/nnp_sigmoid_cross_entropy.nim
@@ -41,14 +41,20 @@ proc sigmoid_cross_entropy*[T](input, target: Tensor[T]): T =
 
   # ln1p(x) does ln(1 + x) but avoids catastrophic cancellation if x << 1.
 
-  # result = 0.T
-  # for xi, ti in zip(input, target):
-  #   result += (-ti * xi +  max(xi,0) + ln1p(exp(-abs(xi))) ) / T(input.shape[1])
-
-  # We need parallel fused map2 -> reduce for all loss functions
-  result = sum:
-    map2_inline(input, target):
-      -y * x +  max(x,0) + ln1p(exp(-abs(x))) # This leverage the logsumexp trick to improve numerical stability
+  result = 0.T
+  for xi, ti in zip(input, target):
+    result += (-ti * xi +  max(xi,0) + ln1p(exp(-abs(xi))) ) / T(input.shape[1])
+
+  # TODO - Parallel fused map-reduce, openmp issue - https://github.com/mratsim/Arraymancer/issues/485
+  # forEachStaged ii in input, ti in target:
+  #   before_loop:
+  #     var local_sum{.exportc.} = 0.T
+  #   in_loop:
+  #     # This leverage the logsumexp trick to improve numerical stability
+  #     local_sum += -ti * ii +  max(ii,0) + ln1p(exp(-abs(ii)))
+  #   after_loop:
+  #     {.emit: "#pragma omp atomic".}
+  #     {.emit: "`result` += `local_sum`;".}
 
   # Normalize by batch_size
   result /= T(batch_size)