Class: GoogleDrive

Inherits:
Object
  • Object
show all
Defined in:
lib/googledrive-easy.rb

Instance Method Summary collapse

Constructor Details

#initialize(raise_error: false, loglevel: Logger::ERROR) ⇒ GoogleDrive

TODO: Test the upload, download, list methods. Write tests for them. TODO: Test using the gem in another program.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/googledrive-easy.rb', line 26

def initialize(raise_error: false, loglevel: Logger::ERROR )
  # Config
  @raise_error = raise_error
  @file_fields = "id,kind,mime_type,name,md5Checksum,size,parents"
  @md5_command = 'md5 -q' # Needs to output only the sum, no filename or header info.

  # Init and configure logger.
  # TODO: Make this smarter/respect global loggers.
  @logger = Logger.new(STDOUT)
  @logger.level = loglevel
  @logger.formatter = proc do |severity, datetime, progname, msg|
    date_format = datetime.strftime("%Y-%m-%d %H:%M:%S")
    "[%s] %-5s (%s): %s\n" % [date_format, severity, self.class.name, msg ]
  end

  # Critical File Paths
  @api_key_file = DRIVE_ENV_FILE
  @loaded_api_key_file = @api_key_file
  @drive_secret_path = "#{Dir.home}/.googledrive-secret.json" # Should contain Oauth2 data exactly as provided by GCloud Console
  @drive_token_path = "#{Dir.home}/.googledrive-token.yaml"    # Created by script to hold the token store

  # Oauth Config
  @drive_scope = 'https://www.googleapis.com/auth/drive'
  @auth_url_path = '/'
  @oauth_address = 'localhost' # Should be `localhost`, `127.0.0.1`, or `[::1]`
  @oauth_port = 8181 # Pick anything outside the privileged range [1-1023] to avoid running SSH as root.
  @oauth_loopback_base_url = "http://#{@oauth_address}:#{@oauth_port}"

  # Client vars
  @token_file_user = 'cliuser'  # The user to store credentials as in the token file
  @client_id     = nil
  @client_secret = nil
  @refresh_token = nil
  @access_token = nil
  @expiration = 1665718429000

  # Core runtime vars
  @drive_service = nil
  @authorizer = nil
  @credentials = nil
end

Instance Method Details

#create_service(mode: 'environment') ⇒ Object



289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/googledrive-easy.rb', line 289

def create_service(mode:'environment')
  @logger.debug("Passed authorization mode: #{mode}")

  unless %w(environment loopback input manual).include?(mode)
    return log_error_and_raise("Unknown authorization mode")
  end

  # TODO: Figure out balance between always requiring env config and just reading from the token store if it already exists

  interactive_mode = %w(loopback input).include?(mode)
  # Attempt to load keys from environment, unless manual. Doing ahead for the benefit of both cases.

  if mode == "manual"
    key_load_result = false
  else
    key_load_result = (
      load_api_keys_from_env(require_refresh_token:!interactive_mode,  raise_error: false) ||
        load_api_keys_from_file(require_refresh_token:!interactive_mode, raise_error: false)
    )
  end

  if interactive_mode && !key_load_result
    print_config_hints
  end

  # If environment, we need key load to have succeeded completely
  # If manual, it should pull from memory.
  if %w(environment manual).include?(mode)
    tmp_token_path = generate_temp_token_file(raise_error: false)
    unless tmp_token_path
      return log_error_and_raise("Failed to generate temporary token file")
    end
    @logger.debug(File.read(tmp_token_path))
    unless key_load_result || mode=='manual'
      return log_error_and_raise("Unable to load api keys from environment")
    end
    token_store = Google::Auth::Stores::FileTokenStore.new(file: tmp_token_path)
  else
    # Otherwise, just the ID and secret are enough
    unless @client_secret && @client_id
      return log_error_and_raise("Client Secret or ID missing.")
    end
    token_store = Google::Auth::Stores::FileTokenStore.new(file: @drive_token_path)
  end

  @authorizer = Google::Auth::UserAuthorizer.new(
    Google::Auth::ClientId::from_hash(generate_api_secret_hash),
    @drive_scope,
    token_store,
    @auth_url_path
  )
  @logger.debug("google_authorizer: " + @authorizer.inspect)

  # Attempt to retrieve credentials
  @credentials = @authorizer.get_credentials(@token_file_user)
  if @credentials.nil?
    case mode
    when 'input'
      @credentials = get_oauth_credentials_via_input(@authorizer)
    when 'loopback'
      @credentials = get_oauth_credentials_via_loopback(@authorizer)
    end
  end

  # Final cred check
  if @credentials.nil?
    return log_error_and_raise(@logger.error('Unable to retrieve credentials') )
  end

  # Update internal credentials based on credentials loaded
  @client_id = @credentials.client_id
  @client_secret = @credentials.client_secret
  @refresh_token = @credentials.refresh_token

  @drive_service = Google::Apis::DriveV3::DriveService.new
  @drive_service.authorization = @credentials
  return @drive_service
end

#download_file(file_name_or_id, parentfolderid: nil, file_path: nil) ⇒ Object

returns full path of downloaded file



519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
# File 'lib/googledrive-easy.rb', line 519

def download_file(file_name_or_id, parentfolderid: nil, file_path: nil)
  return log_error_and_raise("Drive service not initialized.") unless @drive_service

  # if file path passed, check it is valid.
  if file_path && !Dir.exist?(file_path)
    return log_error_and_raise("File path '#{file_path}' does not exist.")
  elsif !file_path # no path passed, use current directory
    file_path = Dir.getwd
  end

  # path passed and valid. Append forward slash if not already there.
  file_path = file_path.gsub(/\/$/, '') + "/"

  # 1) assume file_name_or_id is a filename
  files = find_files(file_name_or_id, parentfolderid: parentfolderid)
  if files && (files.count == 1)
    file_info = files.first
  elsif files && (files.count > 1)
    return log_error_and_raise("Multiple files with name '#{file_name_or_id}' exist. download_file() can only handle a single filename.")
  else # either files is false or count is 0. assume file_name_or_id is an id.
    file_info = get_file_info(file_name_or_id)
    if !file_info
      return log_error_and_raise("No file with ID  '#{file_name_or_id}' exist.")
    end
  end

  output_file = "#{file_path + file_info[:name]}"
  # Delete local file if it exists
  `rm #{output_file} > /dev/null 2>&1`

  @drive_service.get_file(file_info[:id], acknowledge_abuse:true, download_dest: output_file ) do |resfile, err|
    if err
      log_error_and_raise("Error: #{err}.")
      return false
    end
  end
  return output_file
end

#dump_keysObject



181
182
183
184
185
186
187
188
# File 'lib/googledrive-easy.rb', line 181

def dump_keys
  {
    "client_id"               => @client_id,
    "client_secret"           => @client_secret,
    "access_token"            => @access_token,
    "refresh_token"           => @refresh_token,
  }
end

#find_directory_id(directory_name, parentfolderid: nil, raise_error: @raise_error) ⇒ Object



470
471
472
473
474
475
476
477
478
# File 'lib/googledrive-easy.rb', line 470

def find_directory_id(directory_name, parentfolderid: nil, raise_error: @raise_error)
  file_list = get_all_files(justfolders: true, name: directory_name, parentfolderid: parentfolderid, raise_error: raise_error)

  if !file_list || (file_list.count == 0)
    return log_error_and_raise("Directory not found.", raise_error)
  end

  return file_list.first[:id]
end

#find_files(name = "*", parentfolderid: nil, raise_error: @raise_error) ⇒ Object

returns all files by default in all folders



455
456
457
# File 'lib/googledrive-easy.rb', line 455

def find_files(name = "*", parentfolderid: nil, raise_error: @raise_error)
  return get_all_files(justfiles: true, parentfolderid: parentfolderid, name: name, raise_error: raise_error)
end

#generate_api_secret_hashObject



167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/googledrive-easy.rb', line 167

def generate_api_secret_hash
  {
    "installed" => {
      "client_id"     => @client_id,
      "client_secret" => @client_secret,
      #    "project_id"=>"super-secret-project",
      #    "auth_uri"=>"https://accounts.google.com/o/oauth2/auth",
      #    "token_uri"=>"https://oauth2.googleapis.com/token",
      #    "auth_provider_x509_cert_url"=>"https://www.googleapis.com/oauth2/v1/certs",
      #    "redirect_uris"=>["http://localhost"]
    }
  }
end

#generate_env_file(path = @api_key_file) ⇒ Object



218
219
220
221
222
223
224
225
226
# File 'lib/googledrive-easy.rb', line 218

def generate_env_file(path=@api_key_file)
  File.open(path,"w") do |f|
    return f.write({
              "CLIENT_ID" => @client_id,
              "CLIENT_SECRET" => @client_secret,
              "REFRESH_TOKEN" => @refresh_token,
            }.to_json)
  end
end

#generate_temp_token_file(raise_error: @raise_error) ⇒ Object

Generates temp token file and return path to it



209
210
211
212
213
214
215
216
# File 'lib/googledrive-easy.rb', line 209

def generate_temp_token_file(raise_error: @raise_error)
  tmpfile = Tempfile.new('drive_token')
  token_yaml = generate_token_yaml(raise_error: raise_error)
  return false unless token_yaml
  tmpfile.write(token_yaml)
  tmpfile.flush # flush file contents before continuing.
  return @drive_token_path = File.expand_path(tmpfile.path)
end

#generate_token_yaml(raise_error: @raise_error) ⇒ Object

Generates token yaml



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/googledrive-easy.rb', line 191

def generate_token_yaml(raise_error: @raise_error)
  unless @client_id && @refresh_token
    api_config = {"client_id" => @client_id, "refresh_token" => @refresh_token }
    @logger.debug("API Config: #{api_config}")
    return log_error_and_raise("Some required API config for token hasn't been configured yet", raise_error)
  end

  drive_token_hash = {
    "client_id"               => @client_id,
    "access_token"            => @access_token,
    "refresh_token"           => @refresh_token,
    "scope"                   => [@drive_scope],
    "expiration_time_millis"  => @expiration
  }
  return {@token_file_user => drive_token_hash.to_json}.to_hash.to_yaml
end

#get_all_files(justfiles: false, justfolders: false, parentfolderid: nil, name: nil, raise_error: @raise_error) ⇒ Object

parentfolderid: “root” gets the root directory. Not all folders are under the root. Has to do with permissions and how Google Drive works. developers.google.com/drive/api/v3/reference/query-ref



382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
# File 'lib/googledrive-easy.rb', line 382

def get_all_files(justfiles: false, justfolders: false, parentfolderid: nil, name: nil, raise_error: @raise_error)
  return log_error_and_raise("Drive service not initialized.") unless @drive_service

  # Number of files/directories to be returned each call to /files.
  # multiple page sizes are handled with the pageToken return value.
  # 100 is default from google.
  page_size = 100

  # Fields param gives us extra juicy info like MD5 sums and file sizes
  fields = "kind,incomplete_search,next_page_token,files(#{@file_fields})"

  # Build query:
  query = "(trashed = false)"
  query +=" and (mimeType != 'application/vnd.google-apps.folder')" if justfiles && !justfolders
  query += " and (mimeType = 'application/vnd.google-apps.folder')" if justfolders && !justfiles
  # parent folder has to be surrounded by single quotes in query
  query += " and ('#{parentfolderid}' in parents)" if parentfolderid
  # filename has to be surrounded by single quotes in query

  if name
    name.split("*").each_with_index do |part, idx|
      if idx == 0 and name =~ /\*/ and part.size > 0
        # If the above are true, this is the first piece, and it was followed by a *, so we should pass it in the
        # format for prefix matching
        query += " and (name contains \"#{part}\")"
      elsif idx == 0 and !(name =~ /\*/ )
        # If the above is true, this is a literal equivalence search, no wildcards should be used.
        query += " and (name = \"#{part}\")"
      else
        # If we're here, either we had a wildcard prefix, or we're on a later part of the search term. Do a contains.
        query += " and (fullText contains \"#{part}\")" if part.size > 0
      end
    end
  end
  @logger.debug("Searching with query: #{query}")

  files = [ ]
  next_page_token = nil
  begin
    loop do
      # TODO: Should this be converted to block form and then use that to gracefully handle failure and errors?
      files_page = @drive_service.list_files(page_size: page_size, q: query, page_token: next_page_token, fields: fields)
      files_page.files.each {|f| files << f}
      next_page_token = files_page.next_page_token
      break unless next_page_token
    end
  rescue => e
    return log_error_and_raise("Error retrieving files: #{e}", raise_error)
  end

  # Process the returned files
  # Todo: Do we really need to convert these now that the API returns real objects?
  processed_files = []
  files.each do |file|
    processed_files << process_file(file)
  end

  # we have additional processing to do it a wildcard character was passed. Because Google Drive "contains" returns all portions of it.
  # so we need to filter here
  if name =~ /\*/ # if name contains wildcard
    ret_files = [ ]
    processed_files.each do |file|
      if GoogleDriveWildcard.new(name) =~ file[:name]
        ret_files << file
      end
    end
    return ret_files
  else
    return processed_files
  end
end

#get_file_info(file_id, raise_error: @raise_error) ⇒ Object



459
460
461
462
463
464
465
466
467
468
# File 'lib/googledrive-easy.rb', line 459

def get_file_info(file_id, raise_error: @raise_error)
  return log_error_and_raise("Drive service not initialized.") unless @drive_service
  begin
    # TODO: Maybe convert this to block format and handle errors like in other places
    file = @drive_service.get_file(file_id, fields: "#{@file_fields}")
    return process_file(file)
  rescue => e
    return log_error_and_raise("Error getting file info: #{e}.", raise_error)
  end
end

#get_oauth_credentials_via_input(authorizer) ⇒ Object



274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/googledrive-easy.rb', line 274

def get_oauth_credentials_via_input(authorizer)
  puts 'Follow this url and complete the sign-in process. The login will result in an error, do not close it.'
  puts 'Instead, copy and paste the value of the `code` parameter (begins with 4/)'
  puts authorizer.get_authorization_url(base_url: "http://localhost:1")
  puts ''
  code = HighLine::ask "Please enter code:"
  puts "Got code: #{code}"

  return authorizer.get_and_store_credentials_from_code(
    user_id: @token_file_user,
    code: code,
    base_url: "http://localhost:1"
  )
end

#get_oauth_credentials_via_loopback(authorizer) ⇒ Object

Sets up a webserver to receive a 'localhost' query for 3-legged OAUTH. Requires that @oauth_port is forwarded from the machine running the authenticating browser to the machine running this script, likely via `ssh user@host -L @oauth_port:@oauth_address:@oauth_port`



231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/googledrive-easy.rb', line 231

def get_oauth_credentials_via_loopback(authorizer)
  # Print instructions and URL for user to click
  # TODO: Should this be puts or @logger? it's a user interaction...
  puts "Listening on #{@oauth_address}:#{@oauth_port}."
  puts "If this a remote system, you need to forward this port via SSH Tunneling, if you haven't already."
  puts("eg: ssh user@host -L#{@oauth_port}:#{@oauth_address}:#{@oauth_port}")
  puts "After you have done so, follow this link:", authorizer.get_authorization_url(base_url: @oauth_loopback_base_url)

  # Start webserver to listen for response:
  socket = TCPServer.new(@oauth_address, @oauth_port)
  loop do
    client = socket.accept
    first_line = client.gets
    verb, path, _ = first_line.split

    if verb == 'GET'
      if result = path.match(/^\/\?code=(.*)&scope=(.*)/)
        code = result[1]
        scope = result[2]
        response = "HTTP/1.1 200\r\n\r\nAuthorized for scope `#{scope}`! Code is #{code}"
        client.puts(response)
        client.close
        socket.close

        puts "Authorized for scope `#{scope}`! Code is #{code}"
        # Extract response
        puts 'Oauth flow complete. You can close the local port forward if desired.'
        return authorizer.get_and_store_credentials_from_code(
          user_id: @token_file_user,
          code: code,
          base_url: @oauth_loopback_base_url
        )
      else
        # Default response for testing/sanity checks
        response = "HTTP/1.1 200\r\n\r\nI respond to auth requests."
        client.puts(response)
      end
    end
    client.close
  end
  socket.close
end

#load_api_keys_from_env(require_refresh_token: true, raise_error: @raise_error) ⇒ Object



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/googledrive-easy.rb', line 143

def load_api_keys_from_env(require_refresh_token:true, raise_error: @raise_error)
  # Google Drive Credentials from ENV if not derived from JSON file in home.
  vars = [ENV_KEYS[:id], ENV_KEYS[:secret]]
  vars << ENV_KEYS[:refresh] if require_refresh_token
  vars.each do |v|
    if ENV[v].nil?
      return log_error_and_raise("#{v} export variable not set.", raise_error)
    end
  end

  # Set
  if set_api_keys(
    client_id:ENV[ENV_KEYS[:id]],
    client_secret:ENV[ENV_KEYS[:secret]],
    refresh_token:ENV[ENV_KEYS[:refresh]],
    require_refresh_token:require_refresh_token,
    raise_error: raise_error
  )
    @logger.info("Using Google Drive API information from environment.")
  else
    return log_error_and_raise("Not all API keys were in environment.", raise_error)
  end
end

#load_api_keys_from_file(path: nil, require_refresh_token: true, raise_error: @raise_error) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/googledrive-easy.rb', line 110

def load_api_keys_from_file(path:nil, require_refresh_token:true, raise_error: @raise_error)
  @loaded_api_key_file = path ? path : @api_key_file
  if(File.exist?(@loaded_api_key_file))
    @logger.info("API key file #{@loaded_api_key_file} exists")
    begin
      api_hash = JSON.parse(File.read(@loaded_api_key_file))
    rescue => error
      return log_error_and_raise("Error opening api key file: " + error.inspect, raise_error)
    end

    @logger.debug("api_hash: " + api_hash.inspect)

    @client_id     = api_hash["CLIENT_ID"]
    @client_secret = api_hash["CLIENT_SECRET"]
    @refresh_token = api_hash["REFRESH_TOKEN"]

    if set_api_keys(
      client_id:api_hash["CLIENT_ID"],
      client_secret:api_hash["CLIENT_SECRET"],
      refresh_token:api_hash["REFRESH_TOKEN"],
      require_refresh_token:require_refresh_token,
      raise_error: false
    )
      @logger.info("Using Google Drive API information from #{@loaded_api_key_file}")
    else
      return log_error_and_raise("Not all API keys were in file #{@loaded_api_key_file}.", raise_error)
    end
  else
    return log_error_and_raise("Cannot find #{@loaded_api_key_file}", raise_error)
  end
  true
end

#log_error_and_raise(msg, raise_error = @raise_error) ⇒ Object



92
93
94
95
96
# File 'lib/googledrive-easy.rb', line 92

def log_error_and_raise(msg, raise_error=@raise_error)
  @logger.error(msg)
  raise "#{msg}" if raise_error
  false
end


68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/googledrive-easy.rb', line 68

def print_config_hints
  puts <<~TEXT.chomp
    To run in headless modes (environment or file configuration), you must provide either:
    * A json file at #{@api_key_file} containing the following structure:
    {
      "CLIENT_ID": "your Google Cloud Oauth client ID",
      "CLIENT_SECRET": "your Google Cloud Oauth client secret",
      "REFRESH_TOKEN": "A valid refresh token from a prior Oauth completion with the ID and SECRET",
    }

    * The following environment variables:
      #{ENV_KEYS[:id]}='your Google Cloud Oauth client ID'
      #{ENV_KEYS[:secret]}='your Google Cloud Oauth client secret'
      #{ENV_KEYS[:refresh]}='A valid refresh token from a prior Oauth completion with the ID and SECRET'
  
    For the interactive auth modes (loopback or input) you can omit the refresh token from either of the above.
    If you later switch to a headless mode, you will need to add the refresh token.
  TEXT
end

#process_file(file) ⇒ Object



368
369
370
371
372
373
374
375
376
377
# File 'lib/googledrive-easy.rb', line 368

def process_file(file)
  file_hash = { }
  file_hash[:name] = file.name
  file_hash[:id] = file.id
  file_hash[:isfolder] = file.mime_type == 'application/vnd.google-apps.folder'
  file_hash[:size] = file.size if file.size
  file_hash[:md5] = file.md5_checksum if file.md5_checksum
  file_hash[:parents] = file.parents if file.parents
  return file_hash
end

#set_api_keys(client_id: nil, client_secret: nil, refresh_token: nil, access_token: nil, require_refresh_token: true, raise_error: @raise_error) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
# File 'lib/googledrive-easy.rb', line 98

def set_api_keys(client_id:nil, client_secret:nil, refresh_token:nil, access_token:nil, require_refresh_token:true, raise_error: @raise_error )
  # Recommended service mode if manually called: manual
  unless client_id && client_secret && (refresh_token || !require_refresh_token)
    return log_error_and_raise("Not all tokens provided.", raise_error)
  end
  @client_id     = client_id if client_id
  @client_secret = client_secret if client_secret
  @refresh_token = refresh_token if refresh_token
  @access_token = access_token if access_token
  return true
end

#set_raise_error(raise_error = true) ⇒ Object



88
89
90
# File 'lib/googledrive-easy.rb', line 88

def set_raise_error(raise_error=true)
  @raise_error = raise_error
end

#upload_file(file, directory_id: nil) ⇒ Object



480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
# File 'lib/googledrive-easy.rb', line 480

def upload_file(file, directory_id: nil)
  return log_error_and_raise("Drive service not initialized.") unless @drive_service

  file_basename = File.basename(file)
  # TODO: If no parent directory is passed, it will deny upload if a file by that name exists in any visible folder on any visible drive. How to fix?
  # see if file exists on Drive
  file_list = self.get_all_files(justfiles: true, parentfolderid: directory_id, name: file_basename)
  if file_list.count > 0
    return log_error_and_raise("ERROR: File '#{file_basename}' already exists.")
  end

  file_obj =  Google::Apis::DriveV3::File.new(name: file_basename)
  file_obj.parents = [directory_id] if directory_id
  @drive_service.create_file(
    file_obj,
    upload_source: file,
    fields: @file_fields
  ) do |resfile, err|
    if err
      return log_error_and_raise("Error uploading file: #{err}.")
    end

    # check that name = filename
    # check that kind = drive#file
    if !resfile.name # name key does not exist
      return log_error_and_raise("no name key specified in response.")
    elsif !resfile.kind # kind key does not exist
      return log_error_and_raise("no kind key specified in response.")
    elsif resfile.kind != "drive#file" # Not of file type
      return log_error_and_raise("kind is of non-file type.")
    elsif resfile.name != file_basename # file name mismatch
      return log_error_and_raise("file name mismatch.")
    end
    # TODO: Add MD5 check, since we're now capable.
  end
  return true
end