Class: Toy::Trainer

Inherits:

Object

Object
Toy::Trainer

show all

Defined in:: lib/toy/train/toy_trainer.rb

Instance Attribute Summary collapse

#beta1 ⇒ Object

Returns the value of attribute beta1.
#beta2 ⇒ Object

Returns the value of attribute beta2.
#eps ⇒ Object

Returns the value of attribute eps.
#grads ⇒ Object

Returns the value of attribute grads.
#lr_max ⇒ Object

Returns the value of attribute lr_max.
#lr_min ⇒ Object

Returns the value of attribute lr_min.
#model ⇒ Object

Returns the value of attribute model.
#optimizer ⇒ Object

Returns the value of attribute optimizer.
#schedule ⇒ Object

Returns the value of attribute schedule.
#step_idx ⇒ Object

Returns the value of attribute step_idx.
#total_steps ⇒ Object

Returns the value of attribute total_steps.
#warmup ⇒ Object

Returns the value of attribute warmup.

Instance Method Summary collapse

#initialize(model) ⇒ Trainer constructor

Defaults match the train_tinystories.rb constants — sensible starting points for a small transformer LM.
#lr ⇒ Object

Convenience: current learning rate.
#reset_optimizer! ⇒ Object

Reset optimizer state (e.g. after a warm-up step that you don’t want to count).
#step!(seq) ⇒ Object

One optimizer step on a single sequence.

Constructor Details

#initialize(model) ⇒ `Trainer`

Defaults match the train_tinystories.rb constants — sensible starting points for a small transformer LM.

# File 'lib/toy/train/toy_trainer.rb', line 45

def initialize(model)
  @model       = model
  @beta1       = 0.9
  @beta2       = 0.999
  @eps         = 0.00000001
  @lr_max      = 0.001
  @lr_min      = 0.00001
  @warmup      = 200
  @total_steps = 1000
  @step_idx    = 0

  @grads     = Gradients.new(model.vocab_size, model.d_model, model.d_ff,
                              model.n_heads, model.d_head, model.n_layers,
                              model.context_length)
  @optimizer = Adam.new(model, @beta1, @beta2, @eps)
  @schedule  = LRSchedule.new(@warmup, @total_steps, @lr_max, @lr_min)
end

Instance Attribute Details

#beta1 ⇒ `Object`

Returns the value of attribute beta1.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def beta1
  @beta1
end

#beta2 ⇒ `Object`

Returns the value of attribute beta2.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def beta2
  @beta2
end

#eps ⇒ `Object`

Returns the value of attribute eps.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def eps
  @eps
end

#grads ⇒ `Object`

Returns the value of attribute grads.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def grads
  @grads
end

#lr_max ⇒ `Object`

Returns the value of attribute lr_max.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def lr_max
  @lr_max
end

#lr_min ⇒ `Object`

Returns the value of attribute lr_min.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def lr_min
  @lr_min
end

#model ⇒ `Object`

Returns the value of attribute model.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def model
  @model
end

#optimizer ⇒ `Object`

Returns the value of attribute optimizer.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def optimizer
  @optimizer
end

#schedule ⇒ `Object`

Returns the value of attribute schedule.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def schedule
  @schedule
end

#step_idx ⇒ `Object`

Returns the value of attribute step_idx.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def step_idx
  @step_idx
end

#total_steps ⇒ `Object`

Returns the value of attribute total_steps.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def total_steps
  @total_steps
end

#warmup ⇒ `Object`

Returns the value of attribute warmup.



39
40
41

# File 'lib/toy/train/toy_trainer.rb', line 39

def warmup
  @warmup
end

Instance Method Details

#lr ⇒ `Object`

Convenience: current learning rate.



82
83
84

# File 'lib/toy/train/toy_trainer.rb', line 82

def lr
  @schedule.at(@step_idx)
end

#reset_optimizer! ⇒ `Object`

Reset optimizer state (e.g. after a warm-up step that you don’t want to count). Step counter stays where it is — change @step_idx by hand if you want that too.



77
78
79

# File 'lib/toy/train/toy_trainer.rb', line 77

def reset_optimizer!
  @optimizer.reset
end

#step!(seq) ⇒ `Object`

One optimizer step on a single sequence. Returns the loss. The four-line body is the whole point: this is what training is.

# File 'lib/toy/train/toy_trainer.rb', line 65

def step!(seq)
  @grads.fill_zero
  @model.forward(seq)
  @model.backward(seq, @grads)
  @optimizer.step(@grads, @schedule.at(@step_idx))
  @step_idx += 1
  @grads.loss
end

Class: Toy::Trainer

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model) ⇒ Trainer

Instance Attribute Details

#beta1 ⇒ Object

#beta2 ⇒ Object

#eps ⇒ Object

#grads ⇒ Object

#lr_max ⇒ Object

#lr_min ⇒ Object

#model ⇒ Object

#optimizer ⇒ Object

#schedule ⇒ Object

#step_idx ⇒ Object

#total_steps ⇒ Object

#warmup ⇒ Object