Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Jessica Lin authored May 20, 2020
2 parents 39ff9d8 + b9f3b2e commit a5fdab9
Show file tree
Hide file tree
Showing 13 changed files with 752 additions and 12 deletions.
21 changes: 21 additions & 0 deletions cpp/autograd/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 2.8)

project(autograd)
set(CMAKE_CXX_STANDARD 14)

find_package(Torch REQUIRED)

add_executable(${PROJECT_NAME} "autograd.cpp")
target_link_libraries(${PROJECT_NAME} "${TORCH_LIBRARIES}")

# The following code block is suggested to be used on Windows.
# According to https://github.com/pytorch/pytorch/issues/25457,
# the DLLs need to be copied to avoid memory errors.
if (MSVC)
file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
add_custom_command(TARGET ${PROJECT_NAME}
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${TORCH_DLLS}
$<TARGET_FILE_DIR:${PROJECT_NAME}>)
endif (MSVC)
78 changes: 78 additions & 0 deletions cpp/autograd/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# C++ autograd example

`autograd.cpp` contains several examples of doing autograd in PyTorch C++ frontend.

To build the code, run the following commands from your terminal:

```shell
$ cd autograd
$ mkdir build
$ cd build
$ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch ..
$ make
```

where `/path/to/libtorch` should be the path to the unzipped *LibTorch*
distribution, which you can get from the [PyTorch
homepage](https://pytorch.org/get-started/locally/).

Execute the compiled binary to run:

```shell
$ ./autograd
====== Running: "Basic autograd operations" ======
1 1
1 1
[ CPUFloatType{2,2} ]
3 3
3 3
[ CPUFloatType{2,2} ]
AddBackward1
27 27
27 27
[ CPUFloatType{2,2} ]
MulBackward1
27
[ CPUFloatType{} ]
MeanBackward0
false
true
SumBackward0
4.5000 4.5000
4.5000 4.5000
[ CPUFloatType{2,2} ]
813.6625
1015.0142
-664.8849
[ CPUFloatType{3} ]
MulBackward1
204.8000
2048.0000
0.2048
[ CPUFloatType{3} ]
true
true
false
true
false
true

====== Running "Computing higher-order gradients in C++" ======
0.0025 0.0946 0.1474 0.1387
0.0238 -0.0018 0.0259 0.0094
0.0513 -0.0549 -0.0604 0.0210
[ CPUFloatType{3,4} ]

====== Running "Using custom autograd function in C++" ======
-3.5513 3.7160 3.6477
-3.5513 3.7160 3.6477
[ CPUFloatType{2,3} ]
0.3095 1.4035 -0.0349
0.3095 1.4035 -0.0349
0.3095 1.4035 -0.0349
0.3095 1.4035 -0.0349
[ CPUFloatType{4,3} ]
5.5000
5.5000
[ CPUFloatType{2} ]
```
191 changes: 191 additions & 0 deletions cpp/autograd/autograd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
#include <torch/torch.h>
#include <iostream>

using namespace torch::autograd;

void basic_autograd_operations_example() {
std::cout << "====== Running: \"Basic autograd operations\" ======" << std::endl;

// Create a tensor and set ``torch::requires_grad()`` to track computation with it
auto x = torch::ones({2, 2}, torch::requires_grad());
std::cout << x << std::endl;

// Do a tensor operation:
auto y = x + 2;
std::cout << y << std::endl;

// ``y`` was created as a result of an operation, so it has a ``grad_fn``.
std::cout << y.grad_fn()->name() << std::endl;

// Do more operations on ``y``
auto z = y * y * 3;
auto out = z.mean();

std::cout << z << std::endl;
std::cout << z.grad_fn()->name() << std::endl;
std::cout << out << std::endl;
std::cout << out.grad_fn()->name() << std::endl;

// ``.requires_grad_( ... )`` changes an existing tensor's ``requires_grad`` flag in-place.
auto a = torch::randn({2, 2});
a = ((a * 3) / (a - 1));
std::cout << a.requires_grad() << std::endl;

a.requires_grad_(true);
std::cout << a.requires_grad() << std::endl;

auto b = (a * a).sum();
std::cout << b.grad_fn()->name() << std::endl;

// Let's backprop now. Because ``out`` contains a single scalar, ``out.backward()``
// is equivalent to ``out.backward(torch::tensor(1.))``.
out.backward();

// Print gradients d(out)/dx
std::cout << x.grad() << std::endl;

// Now let's take a look at an example of vector-Jacobian product:
x = torch::randn(3, torch::requires_grad());

y = x * 2;
while (y.norm().item<double>() < 1000) {
y = y * 2;
}

std::cout << y << std::endl;
std::cout << y.grad_fn()->name() << std::endl;

// If we want the vector-Jacobian product, pass the vector to ``backward`` as argument:
auto v = torch::tensor({0.1, 1.0, 0.0001}, torch::kFloat);
y.backward(v);

std::cout << x.grad() << std::endl;

// You can also stop autograd from tracking history on tensors that require gradients
// either by putting ``torch::NoGradGuard`` in a code block
std::cout << x.requires_grad() << std::endl;
std::cout << x.pow(2).requires_grad() << std::endl;

{
torch::NoGradGuard no_grad;
std::cout << x.pow(2).requires_grad() << std::endl;
}

// Or by using ``.detach()`` to get a new tensor with the same content but that does
// not require gradients:
std::cout << x.requires_grad() << std::endl;
y = x.detach();
std::cout << y.requires_grad() << std::endl;
std::cout << x.eq(y).all().item<bool>() << std::endl;
}

void compute_higher_order_gradients_example() {
std::cout << "====== Running \"Computing higher-order gradients in C++\" ======" << std::endl;

// One of the applications of higher-order gradients is calculating gradient penalty.
// Let's see an example of it using ``torch::autograd::grad``:

auto model = torch::nn::Linear(4, 3);

auto input = torch::randn({3, 4}).requires_grad_(true);
auto output = model(input);

// Calculate loss
auto target = torch::randn({3, 3});
auto loss = torch::nn::MSELoss()(output, target);

// Use norm of gradients as penalty
auto grad_output = torch::ones_like(output);
auto gradient = torch::autograd::grad({output}, {input}, /*grad_outputs=*/{grad_output}, /*create_graph=*/true)[0];
auto gradient_penalty = torch::pow((gradient.norm(2, /*dim=*/1) - 1), 2).mean();

// Add gradient penalty to loss
auto combined_loss = loss + gradient_penalty;
combined_loss.backward();

std::cout << input.grad() << std::endl;
}

// Inherit from Function
class LinearFunction : public Function<LinearFunction> {
public:
// Note that both forward and backward are static functions

// bias is an optional argument
static torch::Tensor forward(
AutogradContext *ctx, torch::Tensor input, torch::Tensor weight, torch::Tensor bias = torch::Tensor()) {
ctx->save_for_backward({input, weight, bias});
auto output = input.mm(weight.t());
if (bias.defined()) {
output += bias.unsqueeze(0).expand_as(output);
}
return output;
}

static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) {
auto saved = ctx->get_saved_variables();
auto input = saved[0];
auto weight = saved[1];
auto bias = saved[2];

auto grad_output = grad_outputs[0];
auto grad_input = grad_output.mm(weight);
auto grad_weight = grad_output.t().mm(input);
auto grad_bias = torch::Tensor();
if (bias.defined()) {
grad_bias = grad_output.sum(0);
}

return {grad_input, grad_weight, grad_bias};
}
};

class MulConstant : public Function<MulConstant> {
public:
static torch::Tensor forward(AutogradContext *ctx, torch::Tensor tensor, double constant) {
// ctx is a context object that can be used to stash information
// for backward computation
ctx->saved_data["constant"] = constant;
return tensor * constant;
}

static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) {
// We return as many input gradients as there were arguments.
// Gradients of non-tensor arguments to forward must be `torch::Tensor()`.
return {grad_outputs[0] * ctx->saved_data["constant"].toDouble(), torch::Tensor()};
}
};

void custom_autograd_function_example() {
std::cout << "====== Running \"Using custom autograd function in C++\" ======" << std::endl;
{
auto x = torch::randn({2, 3}).requires_grad_();
auto weight = torch::randn({4, 3}).requires_grad_();
auto y = LinearFunction::apply(x, weight);
y.sum().backward();

std::cout << x.grad() << std::endl;
std::cout << weight.grad() << std::endl;
}
{
auto x = torch::randn({2}).requires_grad_();
auto y = MulConstant::apply(x, 5.5);
y.sum().backward();

std::cout << x.grad() << std::endl;
}
}

int main() {
std::cout << std::boolalpha;

basic_autograd_operations_example();

std::cout << "\n";

compute_higher_order_gradients_example();

std::cout << "\n";

custom_autograd_function_example();
}
4 changes: 2 additions & 2 deletions cpp/dcgan/dcgan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ int main(int argc, const char* argv[]) {
torch::data::DataLoaderOptions().batch_size(kBatchSize).workers(2));

torch::optim::Adam generator_optimizer(
generator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
generator->parameters(), torch::optim::AdamOptions(2e-4).betas(std::make_tuple (0.5, 0.5)));
torch::optim::Adam discriminator_optimizer(
discriminator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
discriminator->parameters(), torch::optim::AdamOptions(2e-4).betas(std::make_tuple (0.5, 0.5)));

if (kRestoreFromCheckpoint) {
torch::load(generator, "generator-checkpoint.pt");
Expand Down
2 changes: 1 addition & 1 deletion distributed/ddp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,4 @@ that in turn produces the following output
```
# Conclusions
As the author of a distributed data parallel application, your code needs to be aware of two types of resources: compute nodes and the GPUs within each node. The process of setting up bookkeeping to track how the set of GPUs is mapped to the processes of your application can be tedious and error-prone. We hope that by structuring your application as shown in this example and using the launcher, the mechanics of setting up distributed training can be significantly simplified.
As the author of a distributed data parallel application, your code needs to be aware of two types of resources: compute nodes and the GPUs within each node. The process of setting up bookkeeping to track how the set of GPUs is mapped to the processes of your application can be tedious and error-prone. We hope that by structuring your application as shown in this example and using the launcher, the mechanics of setting up distributed training can be significantly simplified.
Loading

0 comments on commit a5fdab9

Please sign in to comment.