Examples/Image/Classification/AlexNet/BrainScript/AlexNet_ImageNet.cntk

# Note: reader configuration comes from AlexNet.cntk or AlexNetComposite.cntk, depending on the test
RootDir = "."

ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"

precision = "float"
deviceId = "Auto"

command = Train:Test

parallelTrain = "true"
traceLevel = 1
numMBsToShowResult = 500

modelPath = "$ModelDir$/AlexNet"
stderr = "$OutputDir$/AlexNet"

################################
Train = {
    action = "train"

    BrainScriptNetworkBuilder = {
        imageShape = 227:227:3
        labelDim = 1000
                
        # Local Response Normalization 
        # k : bias 
        # n : half radius 
        # alpha: scale factor 
        # beta: exponent 
        LRN {k, n, alpha, beta} = {
            apply (x) = {
                x2 = x .* x
                # reshape to insert a fake singleton reduction dimension after the 3rd axis
                x2s = SplitDimension(x2, 3, 1) 
                # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
                W = ParameterTensor{(1:1:2*n+1:1), learningRateMultiplier = 0, initValue = alpha/(2*n+1)}
                y = Convolution (W, x2s, (1:1:2*n+1), mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, maxTempMemSizeInSamples = 0)
                # reshape back to remove the fake singleton reduction dimension
                b = FlattenDimensions(y, 3, 2)
                den = Exp (beta .* Log(k + b)) 
                r = x .* Reciprocal(den)
            }.r
        }.apply

        model = Sequential (
            ConvolutionalLayer {96, (11:11), stride=(4:4), pad=false, init='normal', initValueScale=0.01} : ReLU : 
            LRN {1.0, 2, 0.0001, 0.75} : 
            MaxPoolingLayer    {(3:3), stride=(2:2)} :
            ConvolutionalLayer {192, (5:5), pad = true, init='normal', initValueScale=0.01, initBias=0.1} : ReLU : 
            LRN {1.0, 2, 0.0001, 0.75} : 
            MaxPoolingLayer    {(3:3), stride=(2:2)} :
            ConvolutionalLayer {384, (3:3), pad = true, init='normal', initValueScale=0.01} : ReLU : 
            ConvolutionalLayer {384, (3:3), pad = true, init='normal', initValueScale=0.01, initBias=0.1} : ReLU : 
            ConvolutionalLayer {256, (3:3), pad = true, init='normal', initValueScale=0.01, initBias=0.1} : ReLU : 
            MaxPoolingLayer    {(3:3), stride=(2:2)} : 
            DenseLayer         {4096, activation=ReLU, init='normal', initValueScale=0.005, initBias=0.1} : Dropout : 
            DenseLayer         {4096, activation=ReLU, init='normal', initValueScale=0.005, initBias=0.1} : Dropout :
            LinearLayer        {labelDim, init='normal', initValueScale=0.01}
        )

        # inputs
        features = Input {imageShape}
        featNorm = features - Constant(114)
        labels = Input {labelDim}

        # apply model to features
        z = model (featNorm)

        # loss and error computation
        ce       = CrossEntropyWithSoftmax  (labels, z)
        errs     = ClassificationError      (labels, z)
        top5Errs = ClassificationError      (labels, z, topN=5)  # only used in Eval action

        # declare special nodes
        featureNodes    = (features)
        labelNodes      = (labels)
        criterionNodes  = (ce)
        evaluationNodes = (errs)
        outputNodes     = (z)
    }
    
    SGD = {
        epochSize = 0
        minibatchSize = 256
        # CNTK weights new gradient by (1-momentum) for unit gain, thus we divide Caffe's learning rate by (1-momentum)
        learningRatesPerMB = 0.1*25:0.01*25:0.001*25:0.0001*25:0.00001
        momentumPerMB = 0.9
        maxEpochs = 112
        gradUpdateType = None
        L2RegWeight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
        dropoutRate = 0.5
        
        # TODO: try less bits?
        ParallelTrain = {
            parallelizationMethod = "DataParallelSGD"
            distributedMBReading = "true"
            parallelizationStartEpoch = 1
            DataParallelSGD = {
                gradientBits = 32
            }
        }
        
        numMBsToShowResult = 100
    }
    
    # Reader
    reader = {
        verbosity = 0
        randomize = true
        randomizationWindow = 1

        deserializers = (
        {   
            type = "ImageDeserializer" ; module = "ImageReader"
            file = "$DataDir$/train_map.txt"
            input = {
                features = {
                    transforms = (
                        {
                            type = "Crop"
                            cropType = "RandomSide"
                            sideRatio = 0.88671875
                            jitterType = "UniRatio"
                        }:{
                            type = "Scale"
                            width = 227
                            height = 227
                            channels = 3
                            interpolations = "linear"
                        }:{
                            type = "Transpose"
                        }
                    )
                }
                labels = {
                    labelDim = 1000
                }
            }
        })
    }    
}

################################
Test = {
    action=test
    minibatchSize=128
    evalNodeNames = errs:top5Errs  # also test top-5 error rate
    
    # Reader
    reader = {
        verbosity = 0
        randomize = false

        deserializers = (
        {
            type = "ImageDeserializer" ; module = "ImageReader"
            file="$DataDir$/val_map.txt"
            input = {
                features = {
                    transforms = (
                        {
                            type = "Crop"
                            cropType = "center"
                            sideRatio = 0.88671875
                        }:{
                            type = "Scale"
                            width = 227
                            height = 227
                            channels = 3
                        }:{
                            type = "Transpose"
                        }
                    )
                }
                labels = {
                    labelDim = 1000
                }
            }
        })
    }        
}