Class: Coradoc::Input::Html::HtmlConverter
- Inherits:
-
Object
- Object
- Coradoc::Input::Html::HtmlConverter
- Defined in:
- lib/coradoc/html/input/html_converter.rb
Overview
HTML to CoreModel converter
This class handles the conversion of HTML documents to CoreModel. It does NOT handle serialization to any specific output format. For serialization, use Coradoc.serialize(coremodel, to: :format)
Class Method Summary collapse
-
.cleanup_result(result, options = {}) ⇒ String
Clean up the serialized result.
-
.convert(input, options = {}) ⇒ String
deprecated
Deprecated.
Use #to_core_model + Coradoc.serialize instead
- .prepare_plugin_instances(options) ⇒ Object
-
.serialize_core_model(coremodel, format, options = {}) ⇒ String
Serialize CoreModel to target format using the appropriate gem.
-
.to_coradoc(input, options = {}) ⇒ Object
deprecated
Deprecated.
Use #to_core_model instead
-
.to_core_model(input, options = {}) ⇒ Coradoc::CoreModel::Base
Convert HTML to CoreModel.
- .track_time(task) ⇒ Object
Class Method Details
.cleanup_result(result, options = {}) ⇒ String
Clean up the serialized result
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/coradoc/html/input/html_converter.rb', line 125 def self.cleanup_result(result, = {}) Input::Html.config.with() do plugin_instances = prepare_plugin_instances() result = track_time 'Cleaning up the result' do Input::Html.cleaner.tidy(result) end plugin_instances.each do |plugin| next unless plugin.public_methods.include?(:postprocess_output_string) plugin.output_string = result track_time "Postprocessing output string with #{plugin.name} plugin" do plugin.postprocess_output_string end result = plugin.output_string end result end end |
.convert(input, options = {}) ⇒ String
Deprecated.
Use #to_core_model + Coradoc.serialize instead
Legacy method for backward compatibility Converts HTML to CoreModel, then serializes to target format
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/coradoc/html/input/html_converter.rb', line 93 def self.convert(input, = {}) output_format = .delete(:output_format) || :asciidoc coremodel = to_core_model(input, ) if coremodel.is_a?(Hash) coremodel.to_h do |file, tree| track_time "Serializing file #{file || 'main'}" do [file, serialize_core_model(tree, output_format, )] end end else serialize_core_model(coremodel, output_format, ) end end |
.prepare_plugin_instances(options) ⇒ Object
147 148 149 |
# File 'lib/coradoc/html/input/html_converter.rb', line 147 def self.prepare_plugin_instances() [:plugin_instances] || Html.config.plugins.map(&:new) end |
.serialize_core_model(coremodel, format, options = {}) ⇒ String
Serialize CoreModel to target format using the appropriate gem
115 116 117 118 |
# File 'lib/coradoc/html/input/html_converter.rb', line 115 def self.serialize_core_model(coremodel, format, = {}) result = Coradoc.serialize(coremodel, to: format) cleanup_result(result, ) end |
.to_coradoc(input, options = {}) ⇒ Object
Deprecated.
Use #to_core_model instead
Legacy method - returns CoreModel
81 82 83 |
# File 'lib/coradoc/html/input/html_converter.rb', line 81 def self.to_coradoc(input, = {}) to_core_model(input, ) end |
.to_core_model(input, options = {}) ⇒ Coradoc::CoreModel::Base
Convert HTML to CoreModel
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/coradoc/html/input/html_converter.rb', line 25 def self.to_core_model(input, = {}) Input::Html.config.with() do plugin_instances = prepare_plugin_instances() root = track_time 'Loading input HTML document' do case input when String Nokogiri::HTML(input).root when Nokogiri::XML::Document input.root when Nokogiri::XML::Node input end end return nil unless root plugin_instances.each do |plugin| plugin.html_tree = root if plugin.public_methods.include?(:preprocess_html_tree) track_time "Preprocessing document with #{plugin.name} plugin" do plugin.preprocess_html_tree end end root = plugin.html_tree end coremodel = track_time 'Converting input document tree to CoreModel' do Converters.process_coradoc( root, plugin_instances: plugin_instances ) end coremodel = track_time 'Post-process CoreModel tree' do Postprocessor.process(coremodel) end plugin_instances.each do |plugin| next unless plugin.public_methods.include?(:postprocess_coremodel_tree) plugin.coremodel_tree = coremodel track_time "Postprocessing CoreModel tree with #{plugin.name} plugin" do plugin.postprocess_coremodel_tree end coremodel = plugin.coremodel_tree end [:plugin_instances] = plugin_instances unless .frozen? coremodel end end |
.track_time(task) ⇒ Object
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/coradoc/html/input/html_converter.rb', line 152 def self.track_time(task) if Input::Html.config.track_time warn (' ' * @track_time_indentation) + "* #{task} is starting..." @track_time_indentation += 1 t0 = Time.now ret = yield time_elapsed = Time.now - t0 @track_time_indentation -= 1 warn (' ' * @track_time_indentation) + "* #{task} took #{time_elapsed.round(3)} seconds" ret else yield end end |