Class: Bioroebe::Ncbi

Inherits:

Base

Object
Base
Bioroebe::Ncbi

show all

Defined in:: lib/bioroebe/ncbi/ncbi.rb,
lib/bioroebe/ncbi/efetch.rb

Overview

Bioroebe::Ncbi

Constant Summary collapse

NCBI_URL = # NCBI_URL #

'https://www.ncbi.nlm.nih.gov/gene/?term='

NAMESPACE = # NAMESPACE #

inspect

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Class Method Summary collapse

.cd(i) ⇒ Object

# === Ncbi.cd ========================================================================= #.
.e(i = '') ⇒ Object

# === Bioroebe::Ncbi.e ========================================================================= #.
.efetch(accession_number = 'JN556047', email_to_use = 'me@foobar.com') ⇒ Object

# === Bioroebe::Ncbi.efetch.
.efetch_by_url(target_id = :default) ⇒ Object

# === Bioroebe::Ncbi.efetch_by_url.
.opnn ⇒ Object

# === Bioroebe::Ncbi.opnn ========================================================================= #.

Instance Method Summary collapse

#initialize(i = nil, run_already = true) ⇒ Ncbi constructor

# === initialize ========================================================================= #.
#input? ⇒ Boolean

# === input? ========================================================================= #.
#main_url? ⇒ Boolean

# === main_url? ========================================================================= #.
#reset ⇒ Object

# === reset (reset tag) ========================================================================= #.
#run ⇒ Object

# === run (run tag) ========================================================================= #.
#set_input(i = 'STAT3') ⇒ Object

# === set_input ========================================================================= #.

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = nil, run_already = true) ⇒ `Ncbi`

#

initialize

#

# File 'lib/bioroebe/ncbi/ncbi.rb', line 34

def initialize(
    i           = nil,
    run_already = true
  )
  reset
  set_input(i)
  run if run_already
end

Class Method Details

.cd(i) ⇒ `Object`

#

Ncbi.cd

#



243
244
245

# File 'lib/bioroebe/ncbi/efetch.rb', line 243

def self.cd(i)
  ::Bioroebe.cd(i)
end

.e(i = '') ⇒ `Object`

#

Bioroebe::Ncbi.e

#



91
92
93

# File 'lib/bioroebe/ncbi/efetch.rb', line 91

def self.e(i = '')
  puts i
end

.efetch(accession_number = 'JN556047', email_to_use = 'me@foobar.com') ⇒ `Object`

#

Bioroebe::Ncbi.efetch

Allows you to obtain a query. Currently this makes use of the “bio” gem.

Usage examples:

result = Bioroebe::Ncbi.efetch 'NM_007315'

#

# File 'lib/bioroebe/ncbi/efetch.rb', line 66

def self.efetch(
    accession_number = 'JN556047',
    email_to_use     = 'me@foobar.com'
  )
  begin
    require 'bio' # For now, until we get rid of the Bio component.
  rescue LoadError; end
  # ======================================================================= #
  # Set any email next.
  # ======================================================================= #
  Bio::NCBI.default_email = email_to_use
  gb = Bio::NCBI::REST::EFetch.nucleotide(accession_number)
  return gb
end

.efetch_by_url(target_id = :default) ⇒ `Object`

#

Bioroebe::Ncbi.efetch_by_url

This method will be much simpler than the method Bioroebe::Ncbi.efetch; it will just use an URl such as:

http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=189458859&rettype=fasta&retmode=text

https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=NP_000092.2&rettype=fasta&retmode=text

That way we don’t even have to parse anything! Just provide it the target ID.

If you want to obtain the CDS, or a subset, you can do this:

https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=189458859&rettype=fasta&retmode=text&from=389&to=2641
/nuccore/NM_007315.3?&;feature=CDS
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NM_007315.3&rettype=fasta&retmode=text&feature=CDS

#

# File 'lib/bioroebe/ncbi/efetch.rb', line 117

def self.efetch_by_url(
    target_id = :default
  )
  if target_id.is_a? Array
    target_id = target_id.first
  end
  case target_id
  # ======================================================================= #
  # === :default
  # ======================================================================= #
  when :default
    target_id = '189458859'
  end
  if target_id.frozen?
    target_id = target_id.dup
  end
  # ======================================================================= #
  # === '?' is an unwanted character
  # ======================================================================= #
  #if target_id.include? '?'
  #  target_id.delete!('?')
  # end
  # ======================================================================= #
  # Next we will build up our desired URL. Since the user can also provide
  # the full remote URL, we will only add what is necessary here.
  # ======================================================================= #
  if target_id.start_with? 'http'
    use_this_url = target_id.dup
  elsif target_id.to_s.empty?
    opne 'No target id was passed. Please provide an ID.'
    exit
  else
    target_id = target_id.to_s
    use_this_database_type = 'nuccore' # This is for a nucleotide sequence.
    if target_id.start_with?('NP_') or
       target_id.start_with?('XP_') or # Assume a protein sequence in this case.
       target_id.start_with?('NP_') or
       target_id.start_with?('YP_') or
       target_id.start_with?('NM_')
      use_this_database_type = 'protein'
      # =================================================================== #
      # Example for the above:
      #
      #   https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=NP_000092.2&rettype=fasta&retmode=text
      #
      # =================================================================== #
    end
    target_id = target_id.dup
    if target_id.start_with? 'efetch.fcgi'
      target_id.prepend('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/')
    end
    if target_id.include?('entrez/eutils/efetch.fcgi?db=') or
       target_id.include?('&rettype=fasta&retmode=text')
      use_this_url = target_id.dup
    else
      use_this_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db="\
                     "#{use_this_database_type}&id="\
                     "#{target_id}"\
                     "&rettype=fasta&retmode=text"
    end
  end
  opnn; ::Bioroebe.erev 'We will use the following url: '
  opne "  #{::Colours.simp(use_this_url)}"
  # ======================================================================= #
  # Go to our log-directory.
  # ======================================================================= #
  cd ::Bioroebe.log_dir?
  # ======================================================================= #
  # Next, save this download into a local file.
  # ======================================================================= #
  _ = target_id.dup # Work on a copy of target_id
  if _.end_with? 'report=fasta'
    _.sub!(/report=fasta$/,'')
  end
  save_into_this_file = File.basename(_)+'.fasta'
  if save_into_this_file.include?('&id=')
    save_into_this_file = save_into_this_file.scan(/&id=(.+)?&/).flatten.first
    if save_into_this_file.include? '&'
      save_into_this_file = save_into_this_file.split('&').first.to_s+'.fasta'
    end
  end
  save_into_this_file = ::Bioroebe.rds(::Bioroebe.log_dir?+save_into_this_file).dup
  save_into_this_file.tr('?','_').tr('&','_').tr('=','_')
  opnn; ::Bioroebe.erev 'We will save into the file `'+
        ::Colours.sfile(save_into_this_file)+'`.'
  begin # We have to rescue OpenURI::HTTPError errors.
    File.write(save_into_this_file, URI.open(use_this_url).read)
    # ===================================================================== #
    # Next, we will rename this file to a "better" name.
    #
    # A Fasta identifier has several entries separated by '|'.
    #
    # First, we grab the identifier:
    # ===================================================================== #
    new_filename = ::Bioroebe.return_new_filename_based_on_fasta_identifier(
      save_into_this_file
    )
    opnn; ::Bioroebe.erev 'Next renaming `'+::Colours.sfile(save_into_this_file)+
                          ::Bioroebe.rev+'` to'
    opnn; ::Bioroebe.erev "  `#{::Colours.sfile(new_filename)}`."
    ::Bioroebe.mv(save_into_this_file, new_filename)
    _ = ::Bioroebe.fasta_directory?
    unless File.directory? _
      ::Bioroebe.mkdir_p(_)
    end
    # ===================================================================== #
    # Move it into the fasta/ subdirectory, to keep things cleaner.
    # ===================================================================== #
    if File.exist? _
      even_newer_filename = _+File.basename(new_filename) 
      ::Bioroebe.mv(new_filename, even_newer_filename)
      opnn; ::Bioroebe.erev "Moving into `#{::Colours.sfile(even_newer_filename)}`."
      new_filename = even_newer_filename
    end
    return new_filename # Return the new location here.
  rescue OpenURI::HTTPError => error
    ::Bioroebe.erev 'We could not download from this url: '+
                    ::Colours.simp(use_this_url)
    ::Bioroebe.erev 'The error was:'
    pp error
  end
end

.opnn ⇒ `Object`

#

Bioroebe::Ncbi.opnn

#



84
85
86

# File 'lib/bioroebe/ncbi/efetch.rb', line 84

def self.opnn
  ::Opn.opn(namespace: NAMESPACE)
end

Instance Method Details

#input? ⇒ `Boolean`

#

input?

#

Returns:

(Boolean)



78
79
80

# File 'lib/bioroebe/ncbi/ncbi.rb', line 78

def input?
  @input
end

#main_url? ⇒ `Boolean`

#

main_url?

#

Returns:

(Boolean)



53
54
55

# File 'lib/bioroebe/ncbi/ncbi.rb', line 53

def main_url?
  NCBI_URL
end

#reset ⇒ `Object`

#

reset (reset tag)

#



46
47
48

# File 'lib/bioroebe/ncbi/ncbi.rb', line 46

def reset
  super()
end

#run ⇒ `Object`

#

run (run tag)

#



85
86
87

# File 'lib/bioroebe/ncbi/ncbi.rb', line 85

def run
  open_in_browser(input?)
end

#set_input(i = 'STAT3') ⇒ `Object`

#

set_input

#

# File 'lib/bioroebe/ncbi/ncbi.rb', line 60

def set_input(
    i = 'STAT3'
  )
  i = i.join(' ').strip if i.is_a? Array
  i = i.to_s.dup
  i.prepend main_url?
  # ======================================================================= #
  # Next, add quotes if we have a ' ' token.
  # ======================================================================= #
  if i.include? ' '
    i = '"'+i.strip+'"'
  end
  @input = i
end

Class: Bioroebe::Ncbi

Overview

Bioroebe::Ncbi

Constant Summary collapse

#

NCBI_URL

#

#

NAMESPACE

#

Constants included from ColoursForBase

Class Method Summary collapse

# === Ncbi.cd ========================================================================= #.

# === Bioroebe::Ncbi.e ========================================================================= #.

# === Bioroebe::Ncbi.efetch.

# === Bioroebe::Ncbi.efetch_by_url.

# === Bioroebe::Ncbi.opnn ========================================================================= #.

Instance Method Summary collapse

# === initialize ========================================================================= #.

# === input? ========================================================================= #.

# === main_url? ========================================================================= #.

# === reset (reset tag) ========================================================================= #.

# === run (run tag) ========================================================================= #.

# === set_input ========================================================================= #.