adapts code to new experiment with new noise schedule

899dab07 · Maria Guaranda-Cabezas · 228bd01c · 899dab07 · 899dab07 · 899dab07
Commit 899dab07 authored 1 year ago by Maria Guaranda-Cabezas
--- a/src/diffwave/__main__.py
+++ b/src/diffwave/__main__.py
@@ -51,7 +51,8 @@ if __name__ == '__main__':
      help='directory in which to store model checkpoints and training logs')
  parser.add_argument('data_dirs', nargs='+',
      help='space separated list of directories from which to read .wav files for training')
-  parser.add_argument('--max_steps', default=None, type=int,
+  # 1406250 is the number of steps in 900 epochs with a batch size of 64 for 100K samples.
+  parser.add_argument('--max_steps', default=1406250, type=int,
      help='maximum number of training steps')
  parser.add_argument('--fp16', action='store_true', default=False,
      help='use 16-bit floating point operations for training')

--- a/src/diffwave/dataset.py
+++ b/src/diffwave/dataset.py
@@ -149,11 +149,7 @@ def from_path(args, params, is_distributed=False):
    if args.data_type == 'trajectories':
      dataset = ParticleDataset(path = data_dirs[0], for_diffwave=True)
    elif args.data_type == 'trajectories_x':
-      # the mean and standard deviation were previously computed
-      # in an exploration notebook
-      dataset = ParticleDatasetVx(path = data_dirs[0], 
-                                  transform=transforms.Compose([StandardScaler(mean=-0.0003, std=1.7358), 
-                                                                ToDiffwaveTensor()]))
+      dataset = ParticleDatasetVx(path = data_dirs[0], transform=transforms.Compose([ ToDiffwaveTensor()]))
    else: #with condition
      dataset = ConditionalDataset(data_dirs)
  return torch.utils.data.DataLoader(
@@ -161,7 +157,7 @@ def from_path(args, params, is_distributed=False):
      batch_size=params.batch_size,
      collate_fn= LagrangianCollator().collate if 'trajectories' in args.data_type else Collator(params).collate,
      shuffle=not is_distributed,
-      num_workers= os.cpu_count(), #2 for cpu
+      num_workers= 2, #os.cpu_count()
      sampler=DistributedSampler(dataset) if is_distributed else None,
      pin_memory=True,
      drop_last=True)

--- a/src/diffwave/learner.py
+++ b/src/diffwave/learner.py
@@ -124,7 +124,7 @@ class DiffWaveLearner:
        if torch.isnan(loss).any():
          raise RuntimeError(f'Detected NaN loss at step {self.step}.')
        if self.is_master:
-          if self.step % 50 == 0:
+          if self.step % 500 == 0:
            self._write_summary(self.step, features, loss)
          if self.step % self.params.checkpoints_hop == 0:
            self.save_to_checkpoint()

--- a/src/diffwave/params.py
+++ b/src/diffwave/params.py
@@ -14,7 +14,7 @@
 # ==============================================================================

 import numpy as np
-
+from utils import tanh61_beta_schedule

 class AttrDict(dict):
  def __init__(self, *args, **kwargs):
@@ -47,14 +47,15 @@ params = AttrDict(
    crop_mel_frames=62,  # Probably an error in paper.

    # Model params
-    residual_layers=8,
+    residual_layers=7,
    residual_channels=32,
-    dilation_cycle_length=8, # with this config and residual layers = 8, we get r=511*T
+    dilation_cycle_length=21, # with cycle=1 and residual layers = 8, we get r=511*T
    unconditional = True,
-    noise_schedule=np.linspace(1e-4, 0.01, 200).tolist(), # last param is num_timesteps
-    inference_noise_schedule=[1e-4, 0.001, 0.01, 0.05, 0.2, 0.5], # for fast sampling
+    #noise_schedule=np.linspace(1e-4, 0.01, 200).tolist(), # last param is num_timesteps
+    noise_schedule = tanh61_beta_schedule(200).tolist(),
+    inference_noise_schedule=[1e-4, 0.001, 0.01, 0.05, 0.2, 0.5,  0.6, 0.7, 0.8, 0.9], # for fast sampling
    audio_len = 2000, # length of generated samples
-    checkpoints_hop = 50000 # how often to save checkpoints
+    checkpoints_hop = 100000 # how often to save checkpoints
 )



--- a/src/diffwave/utils.py
+++ b/src/diffwave/utils.py
+import math
+
+def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function,
+    which defines the cumulative product of (1-beta) over time from t = [0,1].
+
+    :param num_diffusion_timesteps: the number of betas to produce.
+    :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
+                      produces the cumulative product of (1-beta) up to that
+                      part of the diffusion process.
+    :param max_beta: the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+    :return: a list of betas.
+    Taken from https://github.com/SmartTURB/diffusion-lagr/blob/master/guided_diffusion/gaussian_diffusion.py
+    """
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+    return betas
+
+
+def tanh61_beta_schedule(timesteps, t0=6, t1=1):
+    """
+    tanh6-1 schedule
+    """
+    return betas_for_alpha_bar(
+                timesteps,
+                lambda t: -math.tanh((t0 + t1) * t - t0) + math.tanh(t1),
+            )
\ No newline at end of file