From 7b85716f5e8b6f5e3cc0d095e32a473bf3329079 Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Fri, 20 Sep 2024 15:32:20 -0400
Subject: [PATCH 1/7] added temporary test file

---
 test.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 test.py

diff --git a/test.py b/test.py
new file mode 100644
index 00000000..8c1593e6
--- /dev/null
+++ b/test.py
@@ -0,0 +1,70 @@
+import torch
+from tqdm import tqdm
+
+from gfn.gflownet import TBGFlowNet, SubTBGFlowNet
+from gfn.gym import HyperGrid  # We use the hyper grid environment
+from gfn.modules import DiscretePolicyEstimator
+from gfn.samplers import Sampler
+from gfn.utils import NeuralNet  # NeuralNet is a simple multi-layer perceptron (MLP)
+
+if __name__ == "__main__":
+
+    # 1 - We define the environment.
+    env = HyperGrid(ndim=4, height=8, R0=0.01)  # Grid of size 8x8x8x8
+
+    # 2 - We define the needed modules (neural networks).
+    # The environment has a preprocessor attribute, which is used to preprocess the state before feeding it to the policy estimator
+    module_PF = NeuralNet(
+        input_dim=env.preprocessor.output_dim,
+        output_dim=env.n_actions
+    )  # Neural network for the forward policy, with as many outputs as there are actions
+
+    module_PB = NeuralNet(
+        input_dim=env.preprocessor.output_dim,
+        output_dim=env.n_actions - 1,
+        torso=module_PF.torso  # We share all the parameters of P_F and P_B, except for the last layer
+    )
+
+    # 3 - We define the estimators.
+    pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor)
+    pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor)
+
+    # 4 - We define the GFlowNet.
+    use_tb = False
+    if use_tb:
+        gfn = TBGFlowNet(init_logZ=0., pf=pf_estimator, pb=pb_estimator)  # We initialize logZ to 0
+    else:
+        # import IPython; IPython.embed()
+        logF = DiscretePolicyEstimator(module=module_PF, n_actions=env.n_actions, preprocessor=env.preprocessor)
+        gfn = SubTBGFlowNet(pf=pf_estimator, pb=pb_estimator, logF=logF, lamda=0.9)
+
+
+    # 5 - We define the sampler and the optimizer.
+    sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy
+
+    # Policy parameters have their own LR.
+    if use_tb:
+        non_logz_params = [v for k, v in dict(gfn.named_parameters()).items() if k != "logZ"]
+        optimizer = torch.optim.Adam(non_logz_params, lr=1e-3)
+
+        # Log Z gets dedicated learning rate (typically higher).
+        logz_params = [dict(gfn.named_parameters())["logZ"]]
+        optimizer.add_param_group({"params": logz_params, "lr": 1e-1})
+    else:
+        import IPython; IPython.embed()
+        non_logz_params = [v for k, v in dict(gfn.named_parameters()).items() if k != "logF"]
+        optimizer = torch.optim.Adam(non_logz_params, lr=1e-3)
+
+        # Log Z gets dedicated learning rate (typically higher).
+        logz_params = [dict(gfn.named_parameters())["logF"]]
+        optimizer.add_param_group({"params": logz_params, "lr": 1e-1})
+
+    # 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
+    for i in (pbar := tqdm(range(1000))):
+        trajectories = sampler.sample_trajectories(env=env, n_trajectories=16)
+        optimizer.zero_grad()
+        loss = gfn.loss(env, trajectories)
+        loss.backward()
+        optimizer.step()
+        if i % 25 == 0:
+            pbar.set_postfix({"loss": loss.item()})

From dbac2cfa8e9fe0f194f1d1808da2c51aa66f5217 Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Fri, 20 Sep 2024 18:25:41 -0400
Subject: [PATCH 2/7] updated docs with extra example

---
 README.md | 148 +++++++++++++++++++++++++++++++++++++-----------------
 test.py   |  70 --------------------------
 2 files changed, 102 insertions(+), 116 deletions(-)
 delete mode 100644 test.py

diff --git a/README.md b/README.md
index 395064a9..38acaf4d 100644
--- a/README.md
+++ b/README.md
@@ -57,62 +57,118 @@ Example scripts and notebooks for the three environments are provided [here](htt
 
 ### Standalone example
 
-This example, which shows how to use the library for a simple discrete environment, requires [`tqdm`](https://github.com/tqdm/tqdm) package to run. Use `pip install tqdm` or install all extra requirements with `pip install .[scripts]` or `pip install torchgfn[scripts]`.
+This example, which shows how to use the library for a simple discrete environment, requires [`tqdm`](https://github.com/tqdm/tqdm) package to run. Use `pip install tqdm` or install all extra requirements with `pip install .[scripts]` or `pip install torchgfn[scripts]`. In the first example, we will train a Tarjectory Balance GFlowNet:
 
 ```python
 import torch
 from tqdm import tqdm
 
-from gfn.gflownet import TBGFlowNet  # We use a GFlowNet with the Trajectory Balance (TB) loss
+from gfn.gflownet import TBGFlowNet
 from gfn.gym import HyperGrid  # We use the hyper grid environment
 from gfn.modules import DiscretePolicyEstimator
 from gfn.samplers import Sampler
 from gfn.utils import NeuralNet  # NeuralNet is a simple multi-layer perceptron (MLP)
 
-if __name__ == "__main__":
-
-    # 1 - We define the environment.
-     env = HyperGrid(ndim=4, height=8, R0=0.01)  # Grid of size 8x8x8x8
-
-    # 2 - We define the needed modules (neural networks).
-    # The environment has a preprocessor attribute, which is used to preprocess the state before feeding it to the policy estimator
-    module_PF = NeuralNet(
-        input_dim=env.preprocessor.output_dim,
-        output_dim=env.n_actions
-    )  # Neural network for the forward policy, with as many outputs as there are actions
-    module_PB = NeuralNet(
-        input_dim=env.preprocessor.output_dim,
-        output_dim=env.n_actions - 1,
-        torso=module_PF.torso  # We share all the parameters of P_F and P_B, except for the last layer
-    )
-
-    # 3 - We define the estimators.
-    pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor)
-    pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor)
-
-    # 4 - We define the GFlowNet.
-    gfn = TBGFlowNet(init_logZ=0., pf=pf_estimator, pb=pb_estimator)  # We initialize logZ to 0
-
-    # 5 - We define the sampler and the optimizer.
-    sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy
-
-    # Policy parameters have their own LR.
-    non_logz_params = [v for k, v in dict(gfn.named_parameters()).items() if k != "logZ"]
-    optimizer = torch.optim.Adam(non_logz_params, lr=1e-3)
-
-    # Log Z gets dedicated learning rate (typically higher).
-    logz_params = [dict(gfn.named_parameters())["logZ"]]
-    optimizer.add_param_group({"params": logz_params, "lr": 1e-1})
-
-    # 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
-    for i in (pbar := tqdm(range(1000))):
-        trajectories = sampler.sample_trajectories(env=env, n_trajectories=16)
-        optimizer.zero_grad()
-        loss = gfn.loss(env, trajectories)
-        loss.backward()
-        optimizer.step()
-        if i % 25 == 0:
-            pbar.set_postfix({"loss": loss.item()})
+# 1 - We define the environment.
+env = HyperGrid(ndim=4, height=8, R0=0.01)  # Grid of size 8x8x8x8
+
+# 2 - We define the needed modules (neural networks).
+# The environment has a preprocessor attribute, which is used to preprocess the state before feeding it to the policy estimator
+module_PF = NeuralNet(
+    input_dim=env.preprocessor.output_dim,
+    output_dim=env.n_actions
+)  # Neural network for the forward policy, with as many outputs as there are actions
+
+module_PB = NeuralNet(
+    input_dim=env.preprocessor.output_dim,
+    output_dim=env.n_actions - 1,
+    torso=module_PF.torso  # We share all the parameters of P_F and P_B, except for the last layer
+)
+
+# 3 - We define the estimators.
+pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor)
+pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor)
+
+# 4 - We define the GFlowNet.
+gfn = TBGFlowNet(logZ=0., pf=pf_estimator, pb=pb_estimator)  # We initialize logZ to 0
+
+# 5 - We define the sampler and the optimizer.
+sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy
+
+# Different policy parameters can have their own LR.
+# Log Z gets dedicated learning rate (typically higher).
+optimizer = torch.optim.Adam(gfn.pf_pb_parameters(), lr=1e-3)
+optimizer.add_param_group({"params": gfn.logz_parameters(), "lr": 1e-1})
+
+# 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
+for i in (pbar := tqdm(range(1000))):
+    trajectories = sampler.sample_trajectories(env=env, n_trajectories=16)
+    optimizer.zero_grad()
+    loss = gfn.loss(env, trajectories)
+    loss.backward()
+    optimizer.step()
+    if i % 25 == 0:
+        pbar.set_postfix({"loss": loss.item()})
+```
+
+and in this example, we instead train using Sub Trajectory Balance. You can see we simply assemble our GFlowNet from slightly different building blocks
+
+```python
+import torch
+from tqdm import tqdm
+
+from gfn.gflownet import SubTBGFlowNet
+from gfn.gym import HyperGrid  # We use the hyper grid environment
+from gfn.modules import DiscretePolicyEstimator, ScalarEstimator
+from gfn.samplers import Sampler
+from gfn.utils import NeuralNet  # NeuralNet is a simple multi-layer perceptron (MLP)
+
+# 1 - We define the environment.
+env = HyperGrid(ndim=4, height=8, R0=0.01)  # Grid of size 8x8x8x8
+
+# 2 - We define the needed modules (neural networks).
+# The environment has a preprocessor attribute, which is used to preprocess the state before feeding it to the policy estimator
+module_PF = NeuralNet(
+    input_dim=env.preprocessor.output_dim,
+    output_dim=env.n_actions
+)  # Neural network for the forward policy, with as many outputs as there are actions
+
+module_PB = NeuralNet(
+    input_dim=env.preprocessor.output_dim,
+    output_dim=env.n_actions - 1,
+    torso=module_PF.torso  # We share all the parameters of P_F and P_B, except for the last layer
+)
+module_logF = NeuralNet(
+    input_dim=env.preprocessor.output_dim,
+    output_dim=1,  # Important for ScalarEstimators!
+)
+
+# 3 - We define the estimators.
+pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor)
+pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor)
+logF_estimator = ScalarEstimator(module=module_logF, preprocessor=env.preprocessor)
+
+# 4 - We define the GFlowNet.
+gfn = SubTBGFlowNet(pf=pf_estimator, pb=pb_estimator, logF=logF, lamda=0.9)
+
+# 5 - We define the sampler and the optimizer.
+sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy
+
+# Different policy parameters can have their own LR.
+# Log F gets dedicated learning rate (typically higher).
+optimizer = torch.optim.Adam(gfn.pf_pb_parameters(), lr=1e-3)
+optimizer.add_param_group({"params": gfn.logF_parameters(), "lr": 1e-2})
+
+# 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
+for i in (pbar := tqdm(range(1000))):
+    trajectories = sampler.sample_trajectories(env=env, n_trajectories=16)
+    optimizer.zero_grad()
+    loss = gfn.loss(env, trajectories)
+    loss.backward()
+    optimizer.step()
+    if i % 25 == 0:
+        pbar.set_postfix({"loss": loss.item()})
+
 ```
 
 ## Contributing
diff --git a/test.py b/test.py
deleted file mode 100644
index 8c1593e6..00000000
--- a/test.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import torch
-from tqdm import tqdm
-
-from gfn.gflownet import TBGFlowNet, SubTBGFlowNet
-from gfn.gym import HyperGrid  # We use the hyper grid environment
-from gfn.modules import DiscretePolicyEstimator
-from gfn.samplers import Sampler
-from gfn.utils import NeuralNet  # NeuralNet is a simple multi-layer perceptron (MLP)
-
-if __name__ == "__main__":
-
-    # 1 - We define the environment.
-    env = HyperGrid(ndim=4, height=8, R0=0.01)  # Grid of size 8x8x8x8
-
-    # 2 - We define the needed modules (neural networks).
-    # The environment has a preprocessor attribute, which is used to preprocess the state before feeding it to the policy estimator
-    module_PF = NeuralNet(
-        input_dim=env.preprocessor.output_dim,
-        output_dim=env.n_actions
-    )  # Neural network for the forward policy, with as many outputs as there are actions
-
-    module_PB = NeuralNet(
-        input_dim=env.preprocessor.output_dim,
-        output_dim=env.n_actions - 1,
-        torso=module_PF.torso  # We share all the parameters of P_F and P_B, except for the last layer
-    )
-
-    # 3 - We define the estimators.
-    pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor)
-    pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor)
-
-    # 4 - We define the GFlowNet.
-    use_tb = False
-    if use_tb:
-        gfn = TBGFlowNet(init_logZ=0., pf=pf_estimator, pb=pb_estimator)  # We initialize logZ to 0
-    else:
-        # import IPython; IPython.embed()
-        logF = DiscretePolicyEstimator(module=module_PF, n_actions=env.n_actions, preprocessor=env.preprocessor)
-        gfn = SubTBGFlowNet(pf=pf_estimator, pb=pb_estimator, logF=logF, lamda=0.9)
-
-
-    # 5 - We define the sampler and the optimizer.
-    sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy
-
-    # Policy parameters have their own LR.
-    if use_tb:
-        non_logz_params = [v for k, v in dict(gfn.named_parameters()).items() if k != "logZ"]
-        optimizer = torch.optim.Adam(non_logz_params, lr=1e-3)
-
-        # Log Z gets dedicated learning rate (typically higher).
-        logz_params = [dict(gfn.named_parameters())["logZ"]]
-        optimizer.add_param_group({"params": logz_params, "lr": 1e-1})
-    else:
-        import IPython; IPython.embed()
-        non_logz_params = [v for k, v in dict(gfn.named_parameters()).items() if k != "logF"]
-        optimizer = torch.optim.Adam(non_logz_params, lr=1e-3)
-
-        # Log Z gets dedicated learning rate (typically higher).
-        logz_params = [dict(gfn.named_parameters())["logF"]]
-        optimizer.add_param_group({"params": logz_params, "lr": 1e-1})
-
-    # 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
-    for i in (pbar := tqdm(range(1000))):
-        trajectories = sampler.sample_trajectories(env=env, n_trajectories=16)
-        optimizer.zero_grad()
-        loss = gfn.loss(env, trajectories)
-        loss.backward()
-        optimizer.step()
-        if i % 25 == 0:
-            pbar.set_postfix({"loss": loss.item()})

From 831815b96825a16f6add0171cfe5df7295ec49bb Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Fri, 20 Sep 2024 18:25:57 -0400
Subject: [PATCH 3/7] updated docs with extra example

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 38acaf4d..cb1b336b 100644
--- a/README.md
+++ b/README.md
@@ -111,7 +111,7 @@ for i in (pbar := tqdm(range(1000))):
         pbar.set_postfix({"loss": loss.item()})
 ```
 
-and in this example, we instead train using Sub Trajectory Balance. You can see we simply assemble our GFlowNet from slightly different building blocks
+and in this example, we instead train using Sub Trajectory Balance. You can see we simply assemble our GFlowNet from slightly different building blocks:
 
 ```python
 import torch

From 8984c94f42153fbb96fdcf7629df7167e9d5256f Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Fri, 20 Sep 2024 18:26:34 -0400
Subject: [PATCH 4/7] added helper methods and type checking for logF

---
 src/gfn/gflownet/detailed_balance.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/gfn/gflownet/detailed_balance.py b/src/gfn/gflownet/detailed_balance.py
index 3d97b1ad..c08966f2 100644
--- a/src/gfn/gflownet/detailed_balance.py
+++ b/src/gfn/gflownet/detailed_balance.py
@@ -37,10 +37,23 @@ def __init__(
         log_reward_clip_min: float = -float("inf"),
     ):
         super().__init__(pf, pb)
+        assert isinstance(logF, ScalarEstimator), "logF must be a ScalarEstimator"
         self.logF = logF
         self.forward_looking = forward_looking
         self.log_reward_clip_min = log_reward_clip_min
 
+    def logF_named_parameters(self):
+        try:
+            return {k: v for k, v in self.named_parameters() if "logF" in k}
+        except KeyError as e:
+            print("logF not found in self.named_parameters. Are the weights tied with PF? {}".format(e))
+
+    def logF_parameters(self):
+        try:
+            return [v for k, v in self.named_parameters() if "logF" in k]
+        except KeyError as e:
+            print("logF not found in self.named_parameters. Are the weights tied with PF? {}".format(e))
+
     def get_scores(
         self, env: Env, transitions: Transitions, recalculate_all_logprobs: bool = False
     ) -> Tuple[

From 0b62a2a573784b726e7c4bbce6de4f1719856117 Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Fri, 20 Sep 2024 18:26:52 -0400
Subject: [PATCH 5/7] added helper methods and type checking for logF

---
 src/gfn/gflownet/flow_matching.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gfn/gflownet/flow_matching.py b/src/gfn/gflownet/flow_matching.py
index 5764cb8e..193f7c07 100644
--- a/src/gfn/gflownet/flow_matching.py
+++ b/src/gfn/gflownet/flow_matching.py
@@ -23,13 +23,14 @@ class FMGFlowNet(GFlowNet[Tuple[DiscreteStates, DiscreteStates]]):
     3.2 of [GFlowNet Foundations](https://arxiv.org/abs/2111.09266).
 
     Attributes:
-        logF: LogEdgeFlowEstimator
+        logF: an estimator of log edge flows.
         alpha: weight for the reward matching loss.
     """
 
     def __init__(self, logF: DiscretePolicyEstimator, alpha: float = 1.0):
         super().__init__()
 
+        assert isinstance(logF, DiscretePolicyEstimator), "logF must be a Discrete Policy Estimator"
         self.logF = logF
         self.alpha = alpha
 

From 83f276a1d869979457c7d45d3ca9706d816c7940 Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Fri, 20 Sep 2024 18:27:08 -0400
Subject: [PATCH 6/7] added helper methods and type checking for logF

---
 src/gfn/gflownet/sub_trajectory_balance.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/gfn/gflownet/sub_trajectory_balance.py b/src/gfn/gflownet/sub_trajectory_balance.py
index 6e8b1324..0c14c497 100644
--- a/src/gfn/gflownet/sub_trajectory_balance.py
+++ b/src/gfn/gflownet/sub_trajectory_balance.py
@@ -70,12 +70,25 @@ def __init__(
         forward_looking: bool = False,
     ):
         super().__init__(pf, pb)
+        assert isinstance(logF, ScalarEstimator), "logF must be a ScalarEstimator"
         self.logF = logF
         self.weighting = weighting
         self.lamda = lamda
         self.log_reward_clip_min = log_reward_clip_min
         self.forward_looking = forward_looking
 
+    def logF_named_parameters(self):
+        try:
+            return {k: v for k, v in self.named_parameters() if "logF" in k}
+        except KeyError as e:
+            print("logF not found in self.named_parameters. Are the weights tied with PF? {}".format(e))
+
+    def logF_parameters(self):
+        try:
+            return [v for k, v in self.named_parameters() if "logF" in k]
+        except KeyError as e:
+            print("logF not found in self.named_parameters. Are the weights tied with PF? {}".format(e))
+
     def cumulative_logprobs(
         self,
         trajectories: Trajectories,

From 54af465314e92dcd230b022cc3fe79654ec57e75 Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Fri, 20 Sep 2024 18:27:32 -0400
Subject: [PATCH 7/7] added helper methods and type checking for logZ,
 including allowing the user to have a conditional logZ

---
 src/gfn/gflownet/trajectory_balance.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gfn/gflownet/trajectory_balance.py b/src/gfn/gflownet/trajectory_balance.py
index 1f8799d9..3e9e88c7 100644
--- a/src/gfn/gflownet/trajectory_balance.py
+++ b/src/gfn/gflownet/trajectory_balance.py
@@ -10,7 +10,7 @@
 from gfn.containers import Trajectories
 from gfn.env import Env
 from gfn.gflownet.base import TrajectoryBasedGFlowNet
-from gfn.modules import GFNModule
+from gfn.modules import GFNModule, ScalarEstimator
 
 
 class TBGFlowNet(TrajectoryBasedGFlowNet):
@@ -23,7 +23,7 @@ class TBGFlowNet(TrajectoryBasedGFlowNet):
     the DAG, or a singleton thereof, if self.logit_PB is a fixed DiscretePBEstimator.
 
     Attributes:
-        logZ: a LogZEstimator instance.
+        logZ: a ScalarEstimator (for conditional GFNs) instance, or float.
         log_reward_clip_min: If finite, clips log rewards to this value.
     """
 
@@ -31,14 +31,17 @@ def __init__(
         self,
         pf: GFNModule,
         pb: GFNModule,
-        init_logZ: float = 0.0,
+        logZ: float | ScalarEstimator = 0.0,
         log_reward_clip_min: float = -float("inf"),
     ):
         super().__init__(pf, pb)
 
-        self.logZ = nn.Parameter(
-            torch.tensor(init_logZ)
-        )  # TODO: Optionally, this should be a nn.Module to support conditional GFNs.
+        if isinstance(logZ, float):
+            self.logZ = nn.Parameter(torch.tensor(logZ))
+        else:
+            assert isinstance(logZ, ScalarEstimator), "logZ must be either float or a ScalarEstimator"
+            self.logZ = logZ
+
         self.log_reward_clip_min = log_reward_clip_min
 
     def loss(