Implemented atrous convolution for depthwiseConv2D

oveddan · oveddan · commit 8ace2ee8d7da · 2018-03-04T16:48:26.000-05:00
* Modified cpu depthwiseConv2D logic to be similar to that on the GPU,
so that dilation can be easily applied.

Still need to add more tests for depthwise atrous convolution
diff --git a/src/kernels/backend_cpu.ts b/src/kernels/backend_cpu.ts
@@ -869,19 +869,17 @@ export class MathBackendCPU implements KernelBackend {
             for (let wR = 0; wR < filterHeight; wR++) {
               const xR = xRCorner + wR * dilationHeight;
 
-              if (xR < 0 || xR >= convInfo.inHeight)
-                continue;
+              if (xR < 0 || xR >= convInfo.inHeight) continue;
 
-              for(let wC = 0; wC < filterWidth; wC++) {
+              for (let wC = 0; wC < filterWidth; wC++) {
                 const xC = xCCorner + wC * dilationWidth;
 
                 if (xC < 0 || xC >= convInfo.inWidth) {
                   continue;
                 }
 
                 for (let d1 = 0; d1 < convInfo.inChannels; ++d1) {
-                  const pixel = x.get(
-                    b, xR, xC, d1);
+                  const pixel = x.get(b, xR, xC, d1);
                   const weight = filter.get(wR, wC, d1, d2);
                   dotProd += pixel * weight;
                 }
@@ -987,6 +985,8 @@ export class MathBackendCPU implements KernelBackend {
       Tensor4D {
     const filterHeight = convInfo.filterHeight;
     const filterWidth = convInfo.filterWidth;
+    const dilationHeight = convInfo.dilationHeight;
+    const dilationWidth = convInfo.dilationWidth;
     const padLeft = convInfo.padInfo.left;
     const padTop = convInfo.padInfo.top;
     const chMul = convInfo.outChannels / convInfo.inChannels;
@@ -996,18 +996,20 @@ export class MathBackendCPU implements KernelBackend {
       for (let d1 = 0; d1 < convInfo.inChannels; ++d1) {
         for (let yR = 0; yR < convInfo.outHeight; ++yR) {
           const xRCorner = yR * convInfo.strideHeight - padLeft;
-          const xRMin = Math.max(0, xRCorner);
-          const xRMax = Math.min(convInfo.inHeight, filterHeight + xRCorner);
           for (let yC = 0; yC < convInfo.outWidth; ++yC) {
             const xCCorner = yC * convInfo.strideWidth - padTop;
-            const xCMin = Math.max(0, xCCorner);
-            const xCMax = Math.min(convInfo.inWidth, filterWidth + xCCorner);
             for (let q = 0; q < chMul; ++q) {
               let dotProd = 0;
-              for (let xR = xRMin; xR < xRMax; ++xR) {
-                const wR = xR - xRCorner;
-                for (let xC = xCMin; xC < xCMax; ++xC) {
-                  const wC = xC - xCCorner;
+              for (let wR = 0; wR < filterHeight; ++wR) {
+                const xR = xRCorner + wR * dilationHeight;
+
+                if (xR < 0 || xR >= convInfo.inHeight) continue;
+
+                for (let wC = 0; wC < filterWidth; ++wC) {
+                  const xC = xCCorner + wC * dilationWidth;
+
+                  if (xC < 0 || xC >= convInfo.inWidth) continue;
+
                   const pixel = x.get(b, xR, xC, d1);
                   const weight = filter.get(wR, wC, d1, q);
                   dotProd += pixel * weight;
@@ -1019,6 +1021,7 @@ export class MathBackendCPU implements KernelBackend {
         }
       }
     }
+
     return y.toTensor();
   }
 
diff --git a/src/kernels/webgl/conv_gpu_depthwise.ts b/src/kernels/webgl/conv_gpu_depthwise.ts
@@ -32,6 +32,8 @@ export class DepthwiseConv2DProgram implements GPGPUProgram {
     const padLeft = convInfo.padInfo.left;
     const strideHeight = convInfo.strideHeight;
     const strideWidth = convInfo.strideWidth;
+    const dilationHeight = convInfo.dilationHeight;
+    const dilationWidth = convInfo.dilationWidth;
     const filterHeight = convInfo.filterHeight;
     const filterWidth = convInfo.filterWidth;
     const channelMul = convInfo.outChannels / convInfo.inChannels;
@@ -56,14 +58,14 @@ export class DepthwiseConv2DProgram implements GPGPUProgram {
         float dotProd = 0.0;
         // TODO(dsmilkov): Flatten the two for loops and vec4 the operations.
         for (int wR = 0; wR < ${filterHeight}; wR++) {
-          int xR = xRCorner + wR;
+          int xR = xRCorner + wR * ${dilationHeight};
 
           if (xR < 0 || xR >= ${xNumRows}) {
             continue;
           }
 
           for (int wC = 0; wC < ${filterWidth}; wC++) {
-            int xC = xCCorner + wC;
+            int xC = xCCorner + wC * ${dilationWidth};
 
             if (xC < 0 || xC >= ${xNumCols}) {
               continue;
diff --git a/src/ops/conv.ts b/src/ops/conv.ts
@@ -423,11 +423,11 @@ export class ConvOps {
     if (dilations == null) {
       dilations = [1, 1];
     }
-    const [dilationHeight, dilationWidth] = parseTupleParam(dilations);
     util.assert(
-        dilationHeight === 1 && dilationWidth === 1,
-        'Error in depthwiseConv2D: dilation rates greater than 1 are not yet ' +
-            `supported. Got dilations '${dilations}'`);
+        eitherStridesOrDilationsAreOne(strides, dilations),
+        'Error in depthwiseConv2d: Either strides or dilations must be 1.' +
+            `Got strides ${strides} and dilations '${dilations}'`);
+
     if (dimRoundingMode != null) {
       util.assert(
           util.isInt(pad as number),
diff --git a/src/ops/conv2d_depthwise_test.ts b/src/ops/conv2d_depthwise_test.ts
@@ -20,7 +20,7 @@ import {ALL_ENVS, describeWithFlags, expectArraysClose} from '../test_util';
 import {Rank} from '../types';
 
 describeWithFlags('depthwiseConv2D', ALL_ENVS, () => {
-  it('input=1x3x3x1,f=2,s=1,p=valid,chMul=1', () => {
+  it('input=1x3x3x1,f=2,s=1,d=1,p=valid,chMul=1', () => {
     const fSize = 2;
     const pad = 'valid';
     const stride = 1;
@@ -44,6 +44,40 @@ describeWithFlags('depthwiseConv2D', ALL_ENVS, () => {
     expectArraysClose(result, expected);
   });
 
+  it('input=1x3x3x1,f=2,s=1,d=2,p=valid,chMul=1', () => {
+    const fSize = 2;
+    const pad = 'valid';
+    const stride = 1;
+    const dilation = 2;
+    const chMul = 1;
+    const inDepth = 1;
+
+    const x = dl.tensor4d(
+        [
+          0.230664, 0.987388, 0.0685208, 0.419224, 0.887861, 0.731641,
+          0.0741907, 0.409265, 0.351377
+        ],
+        [1, 3, 3, inDepth]);
+    const w = dl.tensor4d(
+        [0.303873, 0.229223, 0.144333, 0.803373],
+        [fSize, fSize, inDepth, chMul],
+    );
+    // adding a dilation rate is equivalent to using a filter
+    // with 0s for the dilation rate
+    const fSizeDilated = fSize + (fSize - 1) * (dilation - 1);
+    const wDilated = dl.tensor4d(
+        [0.303873, 0, 0.229223, 0, 0, 0, 0.144333, 0, 0.803373],
+        [fSizeDilated, fSizeDilated, inDepth, chMul],
+    );
+
+    const result = dl.depthwiseConv2d(x, w, stride, pad, dilation);
+
+    const expectedResult = dl.depthwiseConv2d(x, wDilated, stride, pad);
+
+    expect(result.shape).toEqual(expectedResult.shape);
+    expectArraysClose(result, expectedResult);
+  });
+
   it('input=1x3x3x2,f=2,s=1,p=same,chMul=1', () => {
     const fSize = 2;
     const pad = 'same';