Class: HTS::Bam::Record

Inherits:
Object
  • Object
show all
Defined in:
lib/hts/bam/record.rb

Overview

A class for working with alignment records.

Constant Summary collapse

SEQ_NT16_STR =
"=ACMGRSVTWYHKDBN"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(header, bam1 = nil, qname: nil, flag: nil, tid: nil, pos: nil, mapq: nil, cigar: nil, mtid: nil, mpos: nil, isize: nil, seq: nil, qual: nil, l_aux: nil) ⇒ Record

Initialization API is experimental.



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/hts/bam/record.rb', line 17

def initialize(
  header,
  bam1 = nil,
  qname: nil,
  flag: nil,
  tid: nil,
  pos: nil,
  mapq: nil,
  cigar: nil,
  mtid: nil,
  mpos: nil,
  isize: nil,
  seq: nil,
  qual: nil,
  l_aux: nil
)
  @bam1 = bam1 || LibHTS.bam_init1
  @header = header

  params = [qname, flag, tid, pos, mapq, cigar, mtid, mpos, isize, seq, qual, l_aux]
  return if params.all? { |x| x.nil? }

  if params.all?
    cigar_array = Cigar.parse(cigar).array
    cigar_pointer = if cigar_array.empty?
                      FFI::Pointer::NULL
                    else
                      FFI::MemoryPointer.new(:uint32, cigar_array.length).tap do |pointer|
                        pointer.write_array_of_uint32(cigar_array)
                      end
                    end
    if qual.is_a?(Array)
      qual = qual.pack("C*")
    elsif qual.is_a?(String)
      raise "Which implementation is better? +33 or not? Please tell me."
    end
    r = LibHTS.bam_set1(
      @bam1,
      qname.length,
      qname,
      flag,
      tid,
      pos,
      mapq,
      cigar_array.length,
      cigar_pointer,
      mtid,
      mpos,
      isize,
      seq.length,
      seq,
      qual,
      l_aux
    )
    raise "bam_set1 failed: #{r}" if r < 0
  else
    warn "Ignore bam_set1 because some arguments are missing."
  end
end

Instance Attribute Details

#headerObject (readonly)

Returns the value of attribute header.



13
14
15
# File 'lib/hts/bam/record.rb', line 13

def header
  @header
end

Instance Method Details

#aux(key = nil) ⇒ String

Get the auxiliary data.

Parameters:

  • key (String) (defaults to: nil)

    tag name

Returns:

  • (String)

    value



324
325
326
327
328
329
330
331
# File 'lib/hts/bam/record.rb', line 324

def aux(key = nil)
  aux = Aux.new(self)
  if key
    aux.get(key)
  else
    aux
  end
end

#base(n) ⇒ String

Get the base of the requested index “i” of the query sequence.

Parameters:

  • i (Integer)

    index

Returns:

  • (String)

    base



268
269
270
271
272
273
274
# File 'lib/hts/bam/record.rb', line 268

def base(n)
  n += len if n < 0
  return "." if (n >= len) || (n < 0) # eg. base(-1000)

  r = LibHTS.bam_get_seq(@bam1)
  SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
end

#base_mod(auto_parse: true) ⇒ BaseMod

Get base modification information from MM/ML tags

Parameters:

  • auto_parse (Boolean) (defaults to: true)

    If true (default), parse lazily on first access

Returns:

  • (BaseMod)

    Base modification object



336
337
338
# File 'lib/hts/bam/record.rb', line 336

def base_mod(auto_parse: true)
  BaseMod.new(self, auto_parse: auto_parse)
end

#base_qual(n) ⇒ Integer

Get the base quality of the requested index “i” of the query sequence.

Parameters:

  • i (Integer)

    index

Returns:

  • (Integer)

    base quality



296
297
298
299
300
301
302
# File 'lib/hts/bam/record.rb', line 296

def base_qual(n)
  n += len if n < 0
  return 0 if (n >= len) || (n < 0) # eg. base_qual(-1000)

  q_ptr = LibHTS.bam_get_qual(@bam1)
  q_ptr.get_uint8(n)
end

#binInteger

Get the bin calculated by bam_reg2bin().

Returns:

  • (Integer)

    bin



141
142
143
# File 'lib/hts/bam/record.rb', line 141

def bin
  @bam1[:core][:bin]
end

#bin=(bin) ⇒ Object



145
146
147
# File 'lib/hts/bam/record.rb', line 145

def bin=(bin)
  @bam1[:core][:bin] = bin
end

#chromString Also known as: contig

Get the reference sequence name of the alignment. (a.k.a RNAME) ” if not mapped.

Returns:

  • (String)

    reference sequence name



158
159
160
161
162
# File 'lib/hts/bam/record.rb', line 158

def chrom
  return "" if tid == -1

  LibHTS.sam_hdr_tid2name(@header, tid)
end

#cigarBam::Cigar

Get the Bam::Cigar object.

Returns:



214
215
216
# File 'lib/hts/bam/record.rb', line 214

def cigar
  Cigar.new(self)
end

#cigar=(str) ⇒ Object



218
219
220
221
222
223
224
225
226
# File 'lib/hts/bam/record.rb', line 218

def cigar=(str)
  case str
  when Cigar, String
    r = LibHTS.bam_parse_cigar(str.to_s, FFI::Pointer::NULL, @bam1)
    raise "bam_parse_cigar failed: #{r}" if r.negative?
  else
    raise ArgumentError, "cigar must be a String or Bam::Cigar"
  end
end

#endposInteger

Get the rightmost base position of the alignment on the reference genome.

Returns:

  • (Integer)

    0-based rightmost coordinate



151
152
153
# File 'lib/hts/bam/record.rb', line 151

def endpos
  LibHTS.bam_endpos @bam1
end

#flagBam::Flag

Get Bam::Flag object of the alignment.

Returns:



306
307
308
# File 'lib/hts/bam/record.rb', line 306

def flag
  Flag.new(@bam1[:core][:flag])
end

#flag=(flag) ⇒ Object



310
311
312
313
314
315
316
317
318
319
# File 'lib/hts/bam/record.rb', line 310

def flag=(flag)
  case flag
  when Integer
    @bam1[:core][:flag] = flag
  when Flag
    @bam1[:core][:flag] = flag.value
  else
    raise "Invalid flag type: #{flag.class}"
  end
end

#insert_sizeInteger Also known as: isize

Get the observed template length. (a.k.a TLEN)

Returns:

  • (Integer)

    isize



191
192
193
# File 'lib/hts/bam/record.rb', line 191

def insert_size
  @bam1[:core][:isize]
end

#insert_size=(isize) ⇒ Object Also known as: isize=



195
196
197
# File 'lib/hts/bam/record.rb', line 195

def insert_size=(isize)
  @bam1[:core][:isize] = isize
end

#lenInteger

Get the length of the query sequence.

Returns:

  • (Integer)

    query length



261
262
263
# File 'lib/hts/bam/record.rb', line 261

def len
  @bam1[:core][:l_qseq]
end

#mapqInteger

Get the mapping quality of the alignment. (a.k.a MAPQ)

Returns:

  • (Integer)

    mapping quality



204
205
206
# File 'lib/hts/bam/record.rb', line 204

def mapq
  @bam1[:core][:qual]
end

#mapq=(mapq) ⇒ Object



208
209
210
# File 'lib/hts/bam/record.rb', line 208

def mapq=(mapq)
  @bam1[:core][:qual] = mapq
end

#mate_chromString Also known as: mate_contig

Get the reference sequence name of the mate. ” if not mapped.

Returns:

  • (String)

    reference sequence name



169
170
171
172
173
# File 'lib/hts/bam/record.rb', line 169

def mate_chrom
  return "" if mtid == -1

  LibHTS.sam_hdr_tid2name(@header, mtid)
end

#mate_posInteger Also known as: mpos

Get the 0-based leftmost coordinate of the mate.

Returns:

  • (Integer)

    0-based leftmost coordinate



128
129
130
# File 'lib/hts/bam/record.rb', line 128

def mate_pos
  @bam1[:core][:mpos]
end

#mate_pos=(mpos) ⇒ Object Also known as: mpos=



132
133
134
# File 'lib/hts/bam/record.rb', line 132

def mate_pos=(mpos)
  @bam1[:core][:mpos] = mpos
end

#mate_strandString

Get whether the query’s mate is on the reverse strand.

Returns:

  • (String)

    strand “+” or “-”



185
186
187
# File 'lib/hts/bam/record.rb', line 185

def mate_strand
  LibHTS.bam_is_mrev(@bam1) ? "-" : "+"
end

#mtidInteger

Get the chromosome ID of the mate. -1 if not mapped.

Returns:

  • (Integer)

    chromosome ID



108
109
110
# File 'lib/hts/bam/record.rb', line 108

def mtid
  @bam1[:core][:mtid]
end

#mtid=(mtid) ⇒ Object



112
113
114
# File 'lib/hts/bam/record.rb', line 112

def mtid=(mtid)
  @bam1[:core][:mtid] = mtid
end

#posInteger

Get the 0-based leftmost coordinate of the alignment.

Returns:

  • (Integer)

    0-based leftmost coordinate



118
119
120
# File 'lib/hts/bam/record.rb', line 118

def pos
  @bam1[:core][:pos]
end

#pos=(pos) ⇒ Object



122
123
124
# File 'lib/hts/bam/record.rb', line 122

def pos=(pos)
  @bam1[:core][:pos] = pos
end

#qlenInteger

Calculate query length from CIGAR.

Returns:

  • (Integer)

    query length



230
231
232
233
234
235
236
# File 'lib/hts/bam/record.rb', line 230

def qlen
  # cigar.qlen will be slower because it converts to a Ruby array.
  LibHTS.bam_cigar2qlen(
    @bam1[:core][:n_cigar],
    LibHTS.bam_get_cigar(@bam1)
  )
end

#qnameString

Get the read name. (a.k.a QNAME)

Returns:

  • (String)

    query template name



88
89
90
# File 'lib/hts/bam/record.rb', line 88

def qname
  LibHTS.bam_get_qname(@bam1).read_string
end

#qname=(name) ⇒ Object



92
93
94
# File 'lib/hts/bam/record.rb', line 92

def qname=(name)
  LibHTS.bam_set_qname(@bam1, name)
end

#qualArray<Integer>

Get the base qualities as raw PHRED bytes. Ruby has no UInt8 type, so this returns an Array<Integer> with values in 0..255, corresponding to Crystal’s Array(UInt8). Use qual_string for the SAM-style ASCII representation.

Returns:

  • (Array<Integer>)

    base qualities as unsigned bytes



281
282
283
284
# File 'lib/hts/bam/record.rb', line 281

def qual
  q_ptr = LibHTS.bam_get_qual(@bam1)
  q_ptr.read_array_of_uint8(len)
end

#qual_stringString

Get the base qualities as a string. (a.k.a QUAL) ASCII of base quality + 33.

Returns:

  • (String)

    base qualities



289
290
291
# File 'lib/hts/bam/record.rb', line 289

def qual_string
  qual.map { |q| (q + 33).chr }.join
end

#rlenInteger

Calculate reference length from CIGAR.

Returns:

  • (Integer)

    reference length



240
241
242
243
244
245
# File 'lib/hts/bam/record.rb', line 240

def rlen
  LibHTS.bam_cigar2rlen(
    @bam1[:core][:n_cigar],
    LibHTS.bam_get_cigar(@bam1)
  )
end

#seqString Also known as: sequence

Get the sequence. (a.k.a SEQ)

Returns:

  • (String)

    sequence



249
250
251
252
253
254
255
256
# File 'lib/hts/bam/record.rb', line 249

def seq
  r = LibHTS.bam_get_seq(@bam1)
  seq = String.new
  len.times do |i|
    seq << SEQ_NT16_STR[LibHTS.bam_seqi(r, i)]
  end
  seq
end

#strandString

Get whether the query is on the reverse strand.

Returns:

  • (String)

    strand “+” or “-”



179
180
181
# File 'lib/hts/bam/record.rb', line 179

def strand
  LibHTS.bam_is_rev(@bam1) ? "-" : "+"
end

#structObject

Return the FFI::Struct object.



78
79
80
# File 'lib/hts/bam/record.rb', line 78

def struct
  @bam1
end

#tidInteger

Get the chromosome ID of the alignment. -1 if not mapped.

Returns:

  • (Integer)

    chromosome ID



98
99
100
# File 'lib/hts/bam/record.rb', line 98

def tid
  @bam1[:core][:tid]
end

#tid=(tid) ⇒ Object



102
103
104
# File 'lib/hts/bam/record.rb', line 102

def tid=(tid)
  @bam1[:core][:tid] = tid
end

#to_ptrObject



82
83
84
# File 'lib/hts/bam/record.rb', line 82

def to_ptr
  @bam1.to_ptr
end

#to_sString

Returns a string representation of the alignment.

Returns:

  • (String)

    a string representation of the alignment.



356
357
358
359
360
361
362
363
364
365
# File 'lib/hts/bam/record.rb', line 356

def to_s
  kstr = LibHTS::KString.new
  begin
    raise "Failed to format bam record" if LibHTS.sam_format1(@header.struct, @bam1, kstr) == -1

    kstr.read_string_copy
  ensure
    kstr.free_buffer
  end
end