adds comments to code

d872c448 · Maria Guaranda-Cabezas · 4b64fcec · d872c448 · d872c448 · d872c448
Commit d872c448 authored 1 year ago by Maria Guaranda-Cabezas
--- a/src/data/lagrangian_datatools.py
+++ b/src/data/lagrangian_datatools.py
@@ -75,6 +75,7 @@ class CollatorForDiffwave:
        pass
    def collate(self, minibatch):
+        # shape is (batch_size, trajectory_length)
        trajectories = np.stack([record ['audio']for record in minibatch])
        return {
            'audio': torch.from_numpy(trajectories),

--- a/src/diffwave/learner.py
+++ b/src/diffwave/learner.py
@@ -28,6 +28,17 @@ from params import AttrDict
 def _nested_map(struct, map_fn):
+  '''
+  This function will dive into an structure until it finds a tensor, and then
+  send it to a device.
+  Example:
+  if struct is a dict like:
+  x = {"audio": Tensor(64,22000),
+   "spectrogram": Tensor(64,1024,128)}
+  and map_fn is a function that sends a tensor to a device, then the result is
+  x = {"audio": Tensor(64,22000).to(device),
+    "spectrogram": Tensor(64,1024,128).to(device)}
+  '''
  if isinstance(struct, tuple):
    return tuple(_nested_map(x, map_fn) for x in struct)
  if isinstance(struct, list):
@@ -101,6 +112,8 @@ class DiffWaveLearner:
  def train(self, max_steps=None):
    device = next(self.model.parameters()).device
    while True:
+      # number of epochs = max_steps / num_batches
+      # e.g. for max_steps = 100000 and num_batches = 1000, we have 100 epochs
      for features in tqdm(self.dataset, desc=f'Epoch {self.step // len(self.dataset)}') if self.is_master else self.dataset:
        if max_steps is not None and self.step >= max_steps:
          # Save final checkpoint.

--- a/src/diffwave/model.py
+++ b/src/diffwave/model.py
@@ -37,6 +37,9 @@ def silu(x):
 class DiffusionEmbedding(nn.Module):
+  '''
+  Sinusoidal embedding for diffusion step.
+  '''
  def __init__(self, max_steps):
    super().__init__()
    self.register_buffer('embedding', self._build_embedding(max_steps), persistent=False)
@@ -147,7 +150,8 @@ class DiffWave(nn.Module):
  def forward(self, audio, diffusion_step, spectrogram=None):
    assert (spectrogram is None and self.spectrogram_upsampler is None) or \
           (spectrogram is not None and self.spectrogram_upsampler is not None)
-    x = audio.unsqueeze(1) # watch out for this, we can leave this to the dataloader actually
+    # watch out for this, we can leave this to the dataloader actually
+    x = audio.unsqueeze(1) # shape is (batch_size, 1, trajectory_length)
    x = self.input_projection(x)
    x = F.relu(x)