Class: Scrapetor::Schema
- Inherits:
-
Object
- Object
- Scrapetor::Schema
- Defined in:
- lib/scrapetor/schema.rb
Defined Under Namespace
Instance Attribute Summary collapse
-
#fields ⇒ Object
readonly
Returns the value of attribute fields.
-
#groups ⇒ Object
readonly
Returns the value of attribute groups.
Class Method Summary collapse
- .build(&block) ⇒ Object
- .dump_to_file(schema, path) ⇒ Object
-
.dumpable(schema) ⇒ Object
Convert a schema to a portable Hash (no procs).
- .field_from_h(h) ⇒ Object
- .field_to_h(f) ⇒ Object
- .group_from_h(h) ⇒ Object
- .group_to_h(g) ⇒ Object
- .load(blob) ⇒ Object
- .load_file(path) ⇒ Object
- .new_from_h(h) ⇒ Object
Instance Method Summary collapse
-
#dump ⇒ Object
—– Cross-process plan cache —–.
-
#field(name, from:, attr: nil, type: :text, clean: false, multi: false, normalize_url: false, default: nil, required: false, transform: nil, delimiter: /\s*,\s*/) ⇒ Object
field :name, from: SELECTOR, attr: SYM, type: SYM, clean: BOOL, multi: BOOL, normalize_url: BOOL, default: VALUE, required: BOOL, transform: PROC, delimiter: STRING_OR_REGEX.
-
#initialize ⇒ Schema
constructor
A new instance of Schema.
- #repeated(selector, as:, &block) ⇒ Object
- #to_h ⇒ Object
Constructor Details
#initialize ⇒ Schema
Returns a new instance of Schema.
13 14 15 16 |
# File 'lib/scrapetor/schema.rb', line 13 def initialize @fields = [] @groups = [] end |
Instance Attribute Details
#fields ⇒ Object (readonly)
Returns the value of attribute fields.
11 12 13 |
# File 'lib/scrapetor/schema.rb', line 11 def fields @fields end |
#groups ⇒ Object (readonly)
Returns the value of attribute groups.
11 12 13 |
# File 'lib/scrapetor/schema.rb', line 11 def groups @groups end |
Class Method Details
.build(&block) ⇒ Object
18 19 20 21 22 |
# File 'lib/scrapetor/schema.rb', line 18 def self.build(&block) s = new s.instance_eval(&block) if block s end |
.dump_to_file(schema, path) ⇒ Object
73 74 75 76 |
# File 'lib/scrapetor/schema.rb', line 73 def self.dump_to_file(schema, path) File.binwrite(path, schema.dump) path end |
.dumpable(schema) ⇒ Object
Convert a schema to a portable Hash (no procs).
83 84 85 86 87 88 |
# File 'lib/scrapetor/schema.rb', line 83 def self.dumpable(schema) { fields: schema.fields.map { |f| field_to_h(f) }, groups: schema.groups.map { |g| group_to_h(g) } } end |
.field_from_h(h) ⇒ Object
123 124 125 126 127 128 129 |
# File 'lib/scrapetor/schema.rb', line 123 def self.field_from_h(h) Field.new( h[:name], h[:selector], h[:attr], h[:attr_str], h[:type], h[:clean], h[:multi], h[:normalize_url], h[:default], h[:required], nil, h[:delimiter] ) end |
.field_to_h(f) ⇒ Object
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/scrapetor/schema.rb', line 90 def self.field_to_h(f) raise SchemaError, "transform: blocks can't be serialized" if f.transform { name: f.name, selector: f.selector, attr: f.attr, attr_str: f.attr_str, type: f.type, clean: f.clean, multi: f.multi, normalize_url: f.normalize_url, default: f.default, required: f.required, delimiter: f.delimiter } end |
.group_from_h(h) ⇒ Object
131 132 133 134 135 136 137 138 |
# File 'lib/scrapetor/schema.rb', line 131 def self.group_from_h(h) Group.new( h[:name], h[:selector], h[:fields].map { |fh| field_from_h(fh) }, h[:groups].map { |gh| group_from_h(gh) } ) end |
.group_to_h(g) ⇒ Object
107 108 109 110 111 112 113 114 |
# File 'lib/scrapetor/schema.rb', line 107 def self.group_to_h(g) { name: g.name, selector: g.selector, fields: g.fields.map { |f| field_to_h(f) }, groups: g.groups.map { |sub| group_to_h(sub) } } end |
.load(blob) ⇒ Object
69 70 71 |
# File 'lib/scrapetor/schema.rb', line 69 def self.load(blob) new_from_h(Marshal.load(blob)) # rubocop:disable Security/MarshalLoad end |
.load_file(path) ⇒ Object
78 79 80 |
# File 'lib/scrapetor/schema.rb', line 78 def self.load_file(path) load(File.binread(path)) end |
.new_from_h(h) ⇒ Object
116 117 118 119 120 121 |
# File 'lib/scrapetor/schema.rb', line 116 def self.new_from_h(h) schema = new h[:fields].each { |fh| schema.fields << field_from_h(fh) } h[:groups].each { |gh| schema.groups << group_from_h(gh) } schema end |
Instance Method Details
#dump ⇒ Object
—– Cross-process plan cache —–
Serialize a schema to a binary blob (Marshal) so a worker can restore the compiled descriptor without re-parsing the Ruby DSL. Schemas using ‘transform:` (procs) can’t be dumped — those plans must be rebuilt from source.
65 66 67 |
# File 'lib/scrapetor/schema.rb', line 65 def dump Marshal.dump(self.class.dumpable(self)) end |
#field(name, from:, attr: nil, type: :text, clean: false, multi: false, normalize_url: false, default: nil, required: false, transform: nil, delimiter: /\s*,\s*/) ⇒ Object
field :name, from: SELECTOR, attr: SYM, type: SYM,
clean: BOOL, multi: BOOL, normalize_url: BOOL,
default: VALUE, required: BOOL,
transform: PROC, delimiter: STRING_OR_REGEX
from: may be a String selector or an Array of selectors (tried in order until one matches).
Types: :text :integer :float :money :url :date :json :html :list
:boolean :array (alias for multi:true)
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/scrapetor/schema.rb', line 34 def field(name, from:, attr: nil, type: :text, clean: false, multi: false, normalize_url: false, default: nil, required: false, transform: nil, delimiter: /\s*,\s*/) multi = true if type == :array type = :text if type == :array @fields << Field.new( name, from, attr, attr && attr.to_s, type, clean, multi, normalize_url, default, required, transform, delimiter ) end |
#repeated(selector, as:, &block) ⇒ Object
53 54 55 56 |
# File 'lib/scrapetor/schema.rb', line 53 def repeated(selector, as:, &block) sub = self.class.build(&block) @groups << Group.new(as, selector, sub.fields, sub.groups) end |
#to_h ⇒ Object
140 141 142 |
# File 'lib/scrapetor/schema.rb', line 140 def to_h self.class.dumpable(self) end |