Class: Trainers::Trainer

Inherits:

Object

Object
Trainers::Trainer

show all

Defined in:: lib/trainers/trainer.rb

Instance Attribute Summary collapse

#args ⇒ Object readonly

Returns the value of attribute args.
#control ⇒ Object readonly

Returns the value of attribute control.
#data_collator ⇒ Object readonly

Returns the value of attribute data_collator.
#eval_dataset ⇒ Object readonly

Returns the value of attribute eval_dataset.
#lr_scheduler ⇒ Object readonly

Returns the value of attribute lr_scheduler.
#model ⇒ Object readonly

Returns the value of attribute model.
#optimizer ⇒ Object readonly

Returns the value of attribute optimizer.
#state ⇒ Object readonly

Returns the value of attribute state.
#tokenizer ⇒ Object readonly

Returns the value of attribute tokenizer.
#train_dataset ⇒ Object readonly

Returns the value of attribute train_dataset.

Instance Method Summary collapse

#evaluate(eval_dataset: nil) ⇒ Object
#initialize(model:, args: nil, train_dataset: nil, eval_dataset: nil, tokenizer: nil, data_collator: nil, compute_metrics: nil, callbacks: []) ⇒ Trainer constructor

A new instance of Trainer.
#predict(test_dataset) ⇒ Object
#save_model(output_dir = nil) ⇒ Object
#train ⇒ Object

Constructor Details

#initialize(model:, args: nil, train_dataset: nil, eval_dataset: nil, tokenizer: nil, data_collator: nil, compute_metrics: nil, callbacks: []) ⇒ `Trainer`

Returns a new instance of Trainer.

# File 'lib/trainers/trainer.rb', line 8

def initialize(
  model:,
  args: nil,
  train_dataset: nil,
  eval_dataset: nil,
  tokenizer: nil,
  data_collator: nil,
  compute_metrics: nil,
  callbacks: []
)
  @model           = model
  @args            = args || TrainingArguments.new
  @train_dataset   = train_dataset
  @eval_dataset    = eval_dataset
  @tokenizer       = tokenizer
  @data_collator   = data_collator || DefaultDataCollator.new
  @compute_metrics = compute_metrics
  @state           = TrainerState.new
  @control         = TrainerControl.new

  all_callbacks = [PrinterCallback.new] + callbacks
  @callback_handler = CallbackHandler.new(all_callbacks)
end

Instance Attribute Details

#args ⇒ `Object` (readonly)

Returns the value of attribute args.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def args
  @args
end

#control ⇒ `Object` (readonly)

Returns the value of attribute control.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def control
  @control
end

#data_collator ⇒ `Object` (readonly)

Returns the value of attribute data_collator.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def data_collator
  @data_collator
end

#eval_dataset ⇒ `Object` (readonly)

Returns the value of attribute eval_dataset.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def eval_dataset
  @eval_dataset
end

#lr_scheduler ⇒ `Object` (readonly)

Returns the value of attribute lr_scheduler.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def lr_scheduler
  @lr_scheduler
end

#model ⇒ `Object` (readonly)

Returns the value of attribute model.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def model
  @model
end

#optimizer ⇒ `Object` (readonly)

Returns the value of attribute optimizer.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def optimizer
  @optimizer
end

#state ⇒ `Object` (readonly)

Returns the value of attribute state.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def state
  @state
end

#tokenizer ⇒ `Object` (readonly)

Returns the value of attribute tokenizer.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def tokenizer
  @tokenizer
end

#train_dataset ⇒ `Object` (readonly)

Returns the value of attribute train_dataset.



5
6
7

# File 'lib/trainers/trainer.rb', line 5

def train_dataset
  @train_dataset
end

Instance Method Details

#evaluate(eval_dataset: nil) ⇒ `Object`

Raises:

(ArgumentError)

# File 'lib/trainers/trainer.rb', line 139

def evaluate(eval_dataset: nil)
  dataset = eval_dataset || @eval_dataset
  raise ArgumentError, "No eval_dataset provided" unless dataset

  device = @args.resolved_device
  @model.eval

  all_preds  = []
  all_labels = []
  total_loss = 0.0
  total_steps = 0

  Torch.no_grad do
    each_batch(dataset, @args.per_device_eval_batch_size) do |batch|
      batch  = move_to_device(batch, device)
      labels = batch.delete(:labels) || batch.delete("labels")

      output = forward(batch)

      if labels
        logits = output.respond_to?(:logits) ? output.logits : output
        loss = Torch::NN::F.cross_entropy(logits, labels)
        total_loss += loss.item
        all_labels << labels.detach.cpu
      end
      total_steps += 1

      logits = output.respond_to?(:logits) ? output.logits : output
      all_preds << logits.detach.cpu
    end
  end

  @model.train

  metrics = {}
  metrics[:eval_loss] = total_loss / total_steps if total_steps > 0

  if @compute_metrics && all_preds.any? && all_labels.any?
    preds  = Torch.cat(all_preds)
    labels = Torch.cat(all_labels)
    eval_pred = EvalPrediction.new(predictions: preds, label_ids: labels)
    custom_metrics = @compute_metrics.call(eval_pred)
    metrics.merge!(custom_metrics)
  end

  metrics
end

#predict(test_dataset) ⇒ `Object`

# File 'lib/trainers/trainer.rb', line 187

def predict(test_dataset)
  device = @args.resolved_device
  @model.eval

  all_preds = []
  Torch.no_grad do
    each_batch(test_dataset, @args.per_device_eval_batch_size) do |batch|
      batch  = move_to_device(batch, device)
      output = forward(batch)
      logits = output.respond_to?(:logits) ? output.logits : output
      all_preds << logits.detach.cpu
    end
  end

  Torch.cat(all_preds)
end

#save_model(output_dir = nil) ⇒ `Object`

# File 'lib/trainers/trainer.rb', line 204

def save_model(output_dir = nil)
  output_dir ||= @args.output_dir
  SaveUtils.save_pretrained(@model, @tokenizer, output_dir, training_args: @args)
end

#train ⇒ `Object`

# File 'lib/trainers/trainer.rb', line 32

def train
  device = @args.resolved_device
  @model.to(device)
  @model.train

  num_examples   = @train_dataset.size
  batch_size     = @args.per_device_train_batch_size
  steps_per_epoch = (num_examples.to_f / batch_size).ceil
  total_steps    = steps_per_epoch * @args.num_train_epochs

  @state.max_steps       = total_steps
  @state.num_train_epochs = @args.num_train_epochs

  @optimizer    = create_optimizer
  @lr_scheduler = create_scheduler(total_steps)

  @callback_handler.fire(:on_train_begin, @args, @state, @control)

  @args.num_train_epochs.times do |epoch|
    @state.epoch = epoch + 1
    @callback_handler.fire(:on_epoch_begin, @args, @state, @control)
    @model.train

    epoch_loss   = 0.0
    epoch_steps  = 0

    each_batch(@train_dataset, batch_size, shuffle: true) do |batch|
      @callback_handler.fire(:on_step_begin, @args, @state, @control)

      batch = move_to_device(batch, device)
      loss  = compute_loss(batch)

      scaled_loss = if @args.gradient_accumulation_steps > 1
                      loss / @args.gradient_accumulation_steps
                    else
                      loss
                    end

      scaled_loss.backward

      epoch_loss  += loss.item
      epoch_steps += 1
      @state.global_step += 1

      if @state.global_step % @args.gradient_accumulation_steps == 0
        clip_grad_norm!(@model.parameters, @args.max_grad_norm)
        @optimizer.step
        @lr_scheduler.step
        @optimizer.zero_grad
      end

      # Logging
      if should_log?
        logs = {
          loss:          epoch_loss / epoch_steps,
          learning_rate: current_lr,
          epoch:         @state.epoch
        }
        @state.log_history << logs.merge(step: @state.global_step)
        @callback_handler.fire(:on_log, @args, @state, @control, logs: logs)
      end

      # Step-based evaluation
      if @args.eval_strategy == :steps && @args.eval_steps &&
         @state.global_step % @args.eval_steps == 0
        metrics = evaluate
        @callback_handler.fire(:on_evaluate, @args, @state, @control, metrics: metrics)
      end

      # Step-based saving
      if @args.save_strategy == :steps && @args.save_steps &&
         @state.global_step % @args.save_steps == 0
        save_checkpoint
        @callback_handler.fire(:on_save, @args, @state, @control)
      end

      @callback_handler.fire(:on_step_end, @args, @state, @control)
      break if @control.should_training_stop || @control.should_epoch_stop
    end

    # Epoch-level logging
    epoch_avg_loss = epoch_steps > 0 ? epoch_loss / epoch_steps : 0.0
    logs = { loss: epoch_avg_loss, learning_rate: current_lr, epoch: @state.epoch }
    @state.log_history << logs.merge(step: @state.global_step)
    @callback_handler.fire(:on_log, @args, @state, @control, logs: logs)

    # Epoch-based evaluation
    if @args.eval_strategy == :epoch && @eval_dataset
      metrics = evaluate
      @callback_handler.fire(:on_evaluate, @args, @state, @control, metrics: metrics)
    end

    # Epoch-based saving
    if @args.save_strategy == :epoch
      save_checkpoint
      @callback_handler.fire(:on_save, @args, @state, @control)
    end

    @callback_handler.fire(:on_epoch_end, @args, @state, @control)
    @control.should_epoch_stop = false
    break if @control.should_training_stop
  end

  @callback_handler.fire(:on_train_end, @args, @state, @control)
  @state
end

Class: Trainers::Trainer

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model:, args: nil, train_dataset: nil, eval_dataset: nil, tokenizer: nil, data_collator: nil, compute_metrics: nil, callbacks: []) ⇒ Trainer

Instance Attribute Details

#args ⇒ Object (readonly)

#control ⇒ Object (readonly)

#data_collator ⇒ Object (readonly)

#eval_dataset ⇒ Object (readonly)

#lr_scheduler ⇒ Object (readonly)

#model ⇒ Object (readonly)

#optimizer ⇒ Object (readonly)

#state ⇒ Object (readonly)

#tokenizer ⇒ Object (readonly)

#train_dataset ⇒ Object (readonly)