Class: RemoteStep

Inherits:
Step
  • Object
show all
Defined in:
lib/rbbt/workflow/remote_workflow/remote_step.rb,
lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb,
lib/rbbt/workflow/remote_workflow/remote_step/rest.rb

Defined Under Namespace

Modules: REST, SSH

Constant Summary

Constants inherited from Step

Step::INFO_SERIALIZER, Step::MAIN_RSYNC_ARGS, Step::STREAM_CACHE, Step::STREAM_CACHE_MUTEX

Instance Attribute Summary collapse

Attributes inherited from Step

#bindings, #dependencies, #mutex, #original_task_name, #original_workflow, #overriden, #pid, #relocated, #result, #seen, #workflow

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Step

#aborted?, #access, #archive, archive, #archive_deps, #archived_info, #archived_inputs, #canfail_paths, clean, #clean, #config, #dirty?, #dirty_files, dup_stream, #error?, #exception, #execute_and_dup, #execute_dependency, files_dir, #files_dir, #get_exception, info_file, #info_file, #info_lock, #input_dependencies, job_files_for_archive, job_name_for_info_file, #knowledge_base, link_job, #load_dependencies_from_info, #load_file, #load_inputs_from_info, load_serialized_info, log, #log, log_block, #log_dependency_exec, log_progress, #log_progress, log_string, md5_file, #md5_file, #merge_info, #message, #messages, migrate, migrate_source_paths, #missing?, #monitor_stream, #noinfo?, #old_exec, #overriden?, #overriden_deps, pid_file, #pid_file, prepare_for_execution, produce_dependencies, #progress_bar, prov_report, prov_report_msg, prov_status_msg, #provenance, #provenance_paths, purge, purge_stream_cache, #python, #python_block, #python_file, raise_dependency_error, #rec_access, #rec_dependencies, #recoverable_error?, #relocated?, #reset_info, #resumable?, #run_compute_dependencies, #run_dependencies, #save_file, save_inputs, serialize_info, #set_info, #short_path_real, #soft_grace, #stalled?, #status=, status_color, #status_lock, step_info, #stop_dependencies, #streaming?, #task_signature, tmp_path, #tmp_path, wait_for_jobs, #waiting?

Constructor Details

#initialize(base_url, task = nil, base_name = nil, inputs = nil, input_types = nil, result_type = nil, result_description = nil, is_exec = false, is_stream = false, stream_input = nil, refresh_time = nil) ⇒ RemoteStep

Returns a new instance of RemoteStep.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 7

def initialize(base_url, task = nil, base_name = nil, inputs = nil, input_types = nil, result_type = nil, result_description = nil, is_exec = false, is_stream = false, stream_input = nil, refresh_time = nil)
  @base_url, @task, @base_name, @inputs, @input_types, @result_type, @result_description, @is_exec, @is_stream, @stream_input, @refresh_time = base_url, task, base_name, inputs, input_types, result_type, result_description, is_exec, is_stream, stream_input, refresh_time
  @base_url = "http://" << @base_url unless @base_url =~ /^[a-z]+:\/\//
  @mutex = Mutex.new
  rest = base_url.include?('ssh:') ? false : true

  if rest
    @adaptor = RemoteWorkflow::REST
    self.extend RemoteStep::REST
    @refresh_time ||= Rbbt::Config.get(:remote_refresh_time, :refresh_time, :ssh_refresh_time, :ssh, :SSH, :default => RemoteStep::REST::DEFAULT_REFRESH_TIME)
  else
    @adaptor = RemoteWorkflow::SSH
    self.extend RemoteStep::SSH
    @refresh_time ||= Rbbt::Config.get(:remote_refresh_time, :refresh_time, :rest_refresh_time, :rest, :REST, :default => RemoteStep::SSH::DEFAULT_REFRESH_TIME)
  end

end

Instance Attribute Details

#base_nameObject

Returns the value of attribute base_name.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def base_name
  @base_name
end

#base_urlObject

Returns the value of attribute base_url.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def base_url
  @base_url
end

#input_typesObject

Returns the value of attribute input_types.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def input_types
  @input_types
end

#inputsObject

Returns the value of attribute inputs.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def inputs
  @inputs
end

#is_execObject

Returns the value of attribute is_exec.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def is_exec
  @is_exec
end

#is_streamObject

Returns the value of attribute is_stream.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def is_stream
  @is_stream
end

#refresh_timeObject

Returns the value of attribute refresh_time.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def refresh_time
  @refresh_time
end

#result_descriptionObject

Returns the value of attribute result_description.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def result_description
  @result_description
end

#result_typeObject

Returns the value of attribute result_type.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def result_type
  @result_type
end

#startedObject

Returns the value of attribute started.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def started
  @started
end

#stream_inputObject

Returns the value of attribute stream_input.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def stream_input
  @stream_input
end

#taskObject

Returns the value of attribute task.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def task
  @task
end

#urlObject

Returns the value of attribute url.



5
6
7
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 5

def url
  @url
end

Class Method Details

.get_streams(inputs, stream_input = nil) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 72

def self.get_streams(inputs, stream_input = nil)
  new_inputs = {}
  inputs.each do |k,v|
    stream = stream_input.to_s == k.to_s
    if Step === v 
      unless (v.done? or v.streaming?) # or RestClient::Step === v)
        v.run(true) and v.grace 
      end

      begin
        if stream
          new_inputs[k] = TSV.get_stream(v)
        else
          new_inputs[k] = v.load
        end
      rescue Exception
        raise $!
      end
    else
      new_inputs[k] = v
    end
  end
  new_inputs
end

Instance Method Details

#_restartObject



348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 348

def _restart
  @done = nil
  @name = nil
  @started = nil
  @aborted = nil
  new_inputs = {}
  inputs.each do |k,i| 
    if File === i 
      new_inputs[k] = File.open(i.path)
    else
      new_inputs[k] = i
    end
  end
  @inputs = new_inputs
  @info = nil
end

#cache_fileObject



29
30
31
32
33
34
35
36
37
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 29

def cache_file
  begin
    digest = Misc.digest([base_url, task.to_s, base_name, inputs])
    Rbbt.var.cache.REST[task.to_s][[clean_name, digest].compact * "."].find
  rescue
    Log.exception $!
    raise $!
  end
end

#cache_filesObject



39
40
41
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 39

def cache_files
  Dir.glob(cache_file + '.*')
end

#clean_nameObject



25
26
27
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 25

def clean_name
  @base_name
end

#done?Boolean

Returns:

  • (Boolean)


182
183
184
185
186
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 182

def done?
  return true if cache_files.any?
  self.init_job unless @url
  @done || status.to_s == 'done' || status.to_s == 'noinfo'
end

#dup_inputsObject



104
105
106
107
108
109
110
111
112
113
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 104

def dup_inputs
  return if @dupped or ENV["RBBT_NO_STREAM"] == 'true'
  Log.low "Dupping inputs for #{path}"
  dupped_inputs = {}
  @inputs.collect do |k,input|
    dupped_inputs[k] = Step.dup_stream input
  end
  @inputs = dupped_inputs
  @dupped = true
end

#exec(noload = false) ⇒ Object



239
240
241
242
243
244
245
246
247
248
249
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 239

def exec(noload = false)
  @result ||= begin
                if noload == :stream
                  _run_job(:exec)
                else
                  exec_job 
                end
              ensure
                @started = true
              end
end

#file(file) ⇒ Object



192
193
194
195
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 192

def file(file)
  init_job
  @adaptor.get_raw(File.join(url, 'file', file.to_s))
end

#filesObject



188
189
190
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 188

def files
  @adaptor.get_json(File.join(url, 'files'))
end

#fork(noload = false, semaphore = nil) ⇒ Object



231
232
233
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 231

def fork(noload=false, semaphore=nil)
  init_job(:asynchronous)
end

#get_streamObject



197
198
199
200
201
202
203
204
205
206
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 197

def get_stream
  case @result
  when IO 
    @result
  when String
    StringIO.new @result
  else
    nil
  end
end

#get_streamsObject



97
98
99
100
101
102
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 97

def get_streams
  return if @inputs_done
  @inputs = RemoteStep.get_streams @inputs, @stream_input
  @inputs_done = true
  @inputs
end

#graceObject



208
209
210
211
212
213
214
215
216
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 208

def grace
  produce unless @started
  sleep 0.1 unless started?
  sleep 0.5 unless started?
  sleep 1 unless started?
  while not (done? or started?)
    sleep 1 
  end
end

#info(check_lock = false) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 135

def info(check_lock=false)
  return {:status => :waiting } unless started?
  @done = @info && @info[:status] && (@info[:status].to_sym == :done || @info[:status].to_sym == :error)

  if !@done && (@last_info_time.nil? || (Time.now - @last_info_time) > @refresh_time)
    update = true 
  else
    update = false
  end

  @info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
    @last_info_time = Time.now
    init_job unless @url
    info = begin
             @adaptor.get_json(File.join(@url, 'info'))
           rescue
             {:status => :noinfo}
           end
    info = RemoteWorkflow.fix_hash(info)
    info[:status] = info[:status].to_sym if String === info[:status]
    info
  end

  @info
end

#init_info(*args) ⇒ Object



365
366
367
368
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 365

def init_info(*args)
  i = {:status => :waiting, :pid => Process.pid, :path => path}
  i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
end

#input_checksObject



344
345
346
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 344

def input_checks
  []
end

#inspectObject



370
371
372
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 370

def inspect
  "RemoteWorkflow #{self.path}"
end

#joinObject



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 251

def join
  return true if cache_files.any?
  init_job unless @url
  produce unless @started
  Log.debug{ "Joining RemoteStep: #{path}" }

  if IO === @result
    res = @result
    @result = nil
    Misc.consume_stream(res, true) 
  end

  case @adaptor
  when RemoteWorkflow::REST
    max_tries = Scout::Config.get :max_tries, :remote_step, :rest, :default => nil
  when RemoteWorkflow::SSH
    max_tries = Scout::Config.get :max_tries, :remote_step, :ssh, :default => 10
  end

  times = 0
  if not (self.done? || self.aborted? || self.error?)
    self.info 
    return self if self.done? || self.aborted? || self.error?
    sleep 0.2 unless self.done? || self.aborted? || self.error?
    sleep 1 unless self.done? || self.aborted? || self.error?
    while not (self.done? || self.aborted? || self.error?)
      sleep 3
      raise "Max tries reached while waiting for remote job: #{Log.fingerprint self}" if times >  max_tries
      times += 1
    end
  end

  raise "Error in remote step" if self.error?

  self
end

#load_res(res, result_type = nil) ⇒ Object



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 288

def load_res(res, result_type = nil)

  stream = true if res.respond_to? :read
  join unless stream
  result_type ||= self.type

  case result_type.to_sym
  when :string
    stream ? res.read : res
  when :boolean
    (stream ? res.read : res) == 'true'
  when :tsv
    if stream
      TSV.open(res, :monitor => true)
    else
      TSV.open(StringIO.new(res))
    end
  when :annotations
    if stream
      Annotated.load_tsv(TSV.open(res))
    else
      Annotated.load_tsv(TSV.open(StringIO.new(res)))
    end
  when :array
    (stream ? res.read : res).split("\n")
  else
    json_text = if IO === res
                  res.read
                else
                  res
                end
    begin
      JSON.parse json_text
    rescue
      case
      when json_text =~ /^\d+$/
        json_text.to_i
      when json_text =~ /^\d+\.\d/
        json_text.to_f
      else
        raise $!
      end
    end
  end
end

#nameObject



115
116
117
118
119
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 115

def name
  return nil if @is_exec
  return @path if @url.nil?
  (Array === @url ? @url.first : @url).split("/").last
end

#name=(name) ⇒ Object



121
122
123
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 121

def name=(name)
  @url = [base_url,task, name] * "/"
end

#nopid?Boolean

Returns:

  • (Boolean)


131
132
133
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 131

def nopid?
  false
end

#pathObject



221
222
223
224
225
226
227
228
229
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 221

def path
  if @url
    Misc.add_GET_param(@url, "_format", "raw")
  elsif @base_name
    [base_url, task, @base_name + '-' +  Misc.fingerprint(inputs)] * "/"
  else
    nil
  end
end

#run(no_load = false) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 43

def run(no_load = false)
  no_load = @is_stream ? :stream : true if no_load

  @result ||= @mutex.synchronize do
    begin
      if @is_exec
        exec(no_load)
      elsif no_load == :stream
        _run_job(:stream)
      elsif no_load
        init_job 
        nil
      else
        if ! done?
          init_job 
          produce
          join
        end
        self.load
      end
    ensure
      @started = true
    end
  end

  return @result if no_load == :stream
  no_load ? Misc.add_GET_param(path, "_format", "raw") : @result
end

#running?Boolean

Returns:

  • (Boolean)


235
236
237
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 235

def running?
  ! %w(done error aborted noinfo).include? status.to_s
end

#short_pathObject



339
340
341
342
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 339

def short_path
  init_job unless @url
  [@task, @name] * "/"
end

#started?Boolean

Returns:

  • (Boolean)


178
179
180
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 178

def started?
  @result != nil || @started || @streaming
end

#statusObject



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 161

def status
  return :done if @done
  return nil unless url or started?
  #return :streaming if @streaming 
  begin
    status = info[:status]
    @done = true if status and status.to_sym == :done
    Log.low "RemoteStep status '#{status}' #{self.url}" if @status != status
    @status = status
  rescue
    Log.exception $!
    nil
  ensure
    @info = nil
  end
end

#task_nameObject



125
126
127
128
129
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 125

def task_name
  return task if task
  init_job
  (Array === @url ? @url.first : @url).split("/")[-2]
end

#workflow_short_pathObject



334
335
336
337
# File 'lib/rbbt/workflow/remote_workflow/remote_step.rb', line 334

def workflow_short_path
  init_job unless @url
  [@base_url, @task, @name] * "/"
end