Class: XGBoost::DMatrix
- Inherits:
-
Object
- Object
- XGBoost::DMatrix
- Includes:
- Utils
- Defined in:
- lib/xgboost/dmatrix.rb
Instance Attribute Summary collapse
-
#handle ⇒ Object
readonly
Returns the value of attribute handle.
Instance Method Summary collapse
- #data_split_mode ⇒ Object
- #feature_names ⇒ Object
- #feature_names=(feature_names) ⇒ Object
- #feature_types ⇒ Object
- #feature_types=(feature_types) ⇒ Object
- #group ⇒ Object
- #group=(group) ⇒ Object
-
#initialize(data, label: nil, weight: nil, missing: Float::NAN) ⇒ DMatrix
constructor
A new instance of DMatrix.
- #label ⇒ Object
- #label=(label) ⇒ Object
- #num_col ⇒ Object
- #num_nonmissing ⇒ Object
- #num_row ⇒ Object
- #save_binary(fname, silent: true) ⇒ Object
- #slice(rindex) ⇒ Object
- #weight ⇒ Object
- #weight=(weight) ⇒ Object
Constructor Details
#initialize(data, label: nil, weight: nil, missing: Float::NAN) ⇒ DMatrix
Returns a new instance of DMatrix.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/xgboost/dmatrix.rb', line 7 def initialize(data, label: nil, weight: nil, missing: Float::NAN) if data.is_a?(::FFI::AutoPointer) @handle = data return end if matrix?(data) nrow = data.row_count ncol = data.column_count flat_data = data.to_a.flatten elsif numo?(data) nrow, ncol = data.shape elsif rover?(data) nrow, ncol = data.shape feature_names = data.keys feature_types = data.types.map do |_, v| v = v.to_s if v.start_with?("int") || v.start_with?("uint") "int" elsif v.start_with?("float") "float" else raise Error, "Unknown feature type: #{v}" end end data = data.to_numo else nrow = data.count ncol = data.first.count if !data.all? { |r| r.size == ncol } raise ArgumentError, "Rows have different sizes" end flat_data = data.flatten end c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol) if numo?(data) c_data.write_bytes(data.cast_to(Numo::SFloat).to_string) else handle_missing(flat_data, missing) c_data.write_array_of_float(flat_data) end out = ::FFI::MemoryPointer.new(:pointer) check_call FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, out) @handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree)) self.feature_names = feature_names if feature_names self.feature_types = feature_types if feature_types self.label = label if label self.weight = weight if weight end |
Instance Attribute Details
#handle ⇒ Object (readonly)
Returns the value of attribute handle.
5 6 7 |
# File 'lib/xgboost/dmatrix.rb', line 5 def handle @handle end |
Instance Method Details
#data_split_mode ⇒ Object
116 117 118 119 120 |
# File 'lib/xgboost/dmatrix.rb', line 116 def data_split_mode out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixDataSplitMode(handle, out) out.read_uint64 == 0 ? :row : :col end |
#feature_names ⇒ Object
132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/xgboost/dmatrix.rb', line 132 def feature_names length = ::FFI::MemoryPointer.new(:uint64) sarr = ::FFI::MemoryPointer.new(:pointer) check_call( FFI.XGDMatrixGetStrFeatureInfo( handle, "feature_name", length, sarr ) ) feature_names = from_cstr_to_rbstr(sarr, length) feature_names.empty? ? nil : feature_names end |
#feature_names=(feature_names) ⇒ Object
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/xgboost/dmatrix.rb', line 147 def feature_names=(feature_names) if feature_names.nil? check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_name", nil, 0 ) ) return end # validate feature name feature_names = validate_feature_info( feature_names, num_col, data_split_mode == :col, "feature names" ) if feature_names.length != feature_names.uniq.length raise ArgumentError, "feature_names must be unique" end # prohibit the use symbols that may affect parsing. e.g. []< if !feature_names.all? { |f| f.is_a?(String) && !["[", "]", "<"].any? { |x| f.include?(x) } } raise ArgumentError, "feature_names must be string, and may not contain [, ] or <" end c_feature_names = array_of_pointers(feature_names.map { |f| string_pointer(f) }) check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_name", c_feature_names, feature_names.length ) ) end |
#feature_types ⇒ Object
185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
# File 'lib/xgboost/dmatrix.rb', line 185 def feature_types length = ::FFI::MemoryPointer.new(:uint64) sarr = ::FFI::MemoryPointer.new(:pointer) check_call( FFI.XGDMatrixGetStrFeatureInfo( handle, "feature_type", length, sarr ) ) res = from_cstr_to_rbstr(sarr, length) res.empty? ? nil : res end |
#feature_types=(feature_types) ⇒ Object
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/xgboost/dmatrix.rb', line 200 def feature_types=(feature_types) if feature_types.nil? check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_type", nil, 0 ) ) return end feature_types = validate_feature_info( feature_types, num_col, data_split_mode == :col, "feature types" ) c_feature_types = array_of_pointers(feature_types.map { |f| string_pointer(f) }) check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_type", c_feature_types, feature_types.length ) ) end |
#group ⇒ Object
94 95 96 |
# File 'lib/xgboost/dmatrix.rb', line 94 def group uint_info("group_ptr") end |
#group=(group) ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/xgboost/dmatrix.rb', line 74 def group=(group) c_data = ::FFI::MemoryPointer.new(:uint32, group.size) c_data.write_array_of_uint32(group) interface = { shape: [group.length], typestr: "|u4", data: [c_data.address, false], version: 3 } check_call FFI.XGDMatrixSetInfoFromInterface(handle, "group", JSON.generate(interface)) end |
#label ⇒ Object
86 87 88 |
# File 'lib/xgboost/dmatrix.rb', line 86 def label float_info("label") end |
#label=(label) ⇒ Object
66 67 68 |
# File 'lib/xgboost/dmatrix.rb', line 66 def label=(label) set_float_info("label", label) end |
#num_col ⇒ Object
104 105 106 107 108 |
# File 'lib/xgboost/dmatrix.rb', line 104 def num_col out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixNumCol(handle, out) out.read_uint64 end |
#num_nonmissing ⇒ Object
110 111 112 113 114 |
# File 'lib/xgboost/dmatrix.rb', line 110 def num_nonmissing out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixNumNonMissing(handle, out) out.read_uint64 end |
#num_row ⇒ Object
98 99 100 101 102 |
# File 'lib/xgboost/dmatrix.rb', line 98 def num_row out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixNumRow(handle, out) out.read_uint64 end |
#save_binary(fname, silent: true) ⇒ Object
62 63 64 |
# File 'lib/xgboost/dmatrix.rb', line 62 def save_binary(fname, silent: true) check_call FFI.XGDMatrixSaveBinary(handle, fname, silent ? 1 : 0) end |
#slice(rindex) ⇒ Object
122 123 124 125 126 127 128 129 130 |
# File 'lib/xgboost/dmatrix.rb', line 122 def slice(rindex) idxset = ::FFI::MemoryPointer.new(:int, rindex.count) idxset.write_array_of_int(rindex) out = ::FFI::MemoryPointer.new(:pointer) check_call FFI.XGDMatrixSliceDMatrix(handle, idxset, rindex.size, out) handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree)) DMatrix.new(handle) end |
#weight ⇒ Object
90 91 92 |
# File 'lib/xgboost/dmatrix.rb', line 90 def weight float_info("weight") end |
#weight=(weight) ⇒ Object
70 71 72 |
# File 'lib/xgboost/dmatrix.rb', line 70 def weight=(weight) set_float_info("weight", weight) end |