Class: Coradoc::Input::Html::Converters::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/coradoc/html/input/converters/base.rb

Direct Known Subclasses

A, Aside, Audio, Blockquote, Br, Bypass, Div, Dl, Drop, Figure, H, Head, Hr, Ignore, Img, Li, Markup, Math, Ol, P, PassThrough, Pre, Q, Sub, Sup, Table, Td, Text, Tr, Video

Instance Method Summary collapse

Instance Method Details

#convert(node, state = {}) ⇒ Object

Default implementation to convert a given Nokogiri node to a CoreModel type. Can be overriden by subclasses.



11
12
13
# File 'lib/coradoc/html/input/converters/base.rb', line 11

def convert(node, state = {})
  to_coradoc(node, state)
end

#escape_text(text) ⇒ Object

Helper to escape text content



109
110
111
# File 'lib/coradoc/html/input/converters/base.rb', line 109

def escape_text(text)
  text.to_s.gsub(/[<>&]/, '<' => '&lt;', '>' => '&gt;', '&' => '&amp;')
end

#extract_leading_trailing_whitespace(node) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/coradoc/html/input/converters/base.rb', line 75

def extract_leading_trailing_whitespace(node)
  node.text =~ /^(\s+)/
  leading_whitespace = ::Regexp.last_match(1)
  unless leading_whitespace.nil?
    first_text = node.at_xpath('./text()[1]')
    first_text&.replace(first_text.text.lstrip)
    leading_whitespace = ' '
  end
  node.text =~ /(\s+)$/
  trailing_whitespace = ::Regexp.last_match(1)
  unless trailing_whitespace.nil?
    last_text = node.at_xpath('./text()[last()]')
    last_text&.replace(last_text.text.rstrip)
    trailing_whitespace = ' '
  end
  [leading_whitespace, trailing_whitespace]
end

#extract_title(node) ⇒ Object



40
41
42
# File 'lib/coradoc/html/input/converters/base.rb', line 40

def extract_title(node)
  node['title'].to_s
end

#node_has_ancestor?(node, name) ⇒ Boolean

Returns:

  • (Boolean)


44
45
46
47
48
49
50
51
# File 'lib/coradoc/html/input/converters/base.rb', line 44

def node_has_ancestor?(node, name)
  case name
  when String
    node.ancestors.map(&:name).include?(name)
  when Array
    (node.ancestors.map(&:name) & name).any?
  end
end

#textnode_after_start_with?(node, str) ⇒ Boolean

Returns:

  • (Boolean)


64
65
66
67
68
69
70
71
72
73
# File 'lib/coradoc/html/input/converters/base.rb', line 64

def textnode_after_start_with?(node, str)
  return nil unless [String, Regexp].include?(str.class)
  return nil if str.is_a?(String) && str.empty?

  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
  str = /\A(?:#{str})/

  node2 = node.at_xpath('following-sibling::node()[1]')
  node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
end

#textnode_before_end_with?(node, str) ⇒ Boolean

Returns:

  • (Boolean)


53
54
55
56
57
58
59
60
61
62
# File 'lib/coradoc/html/input/converters/base.rb', line 53

def textnode_before_end_with?(node, str)
  return nil unless [String, Regexp].include?(str.class)
  return nil if str.is_a?(String) && str.empty?

  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
  str = /(?:#{str})\z/

  node2 = node.at_xpath('preceding-sibling::node()[1]')
  node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
end

#treat(node, state) ⇒ Object



22
23
24
# File 'lib/coradoc/html/input/converters/base.rb', line 22

def treat(node, state)
  Converters.process(node, state)
end

#treat_children(node, state) ⇒ Object

NOTE: treat_children won’t run plugin hooks



16
17
18
19
20
# File 'lib/coradoc/html/input/converters/base.rb', line 16

def treat_children(node, state)
  node.children.map do |child|
    treat(child, state)
  end
end

#treat_children_coradoc(node, state) ⇒ Object



26
27
28
29
30
31
32
33
34
# File 'lib/coradoc/html/input/converters/base.rb', line 26

def treat_children_coradoc(node, state)
  results = node.children.map do |child|
    treat_coradoc(child, state)
  end.flatten

  results.reject do |x|
    x.nil? || (x.is_a?(String) && x.strip.empty?)
  end
end

#treat_coradoc(node, state) ⇒ Object



36
37
38
# File 'lib/coradoc/html/input/converters/base.rb', line 36

def treat_coradoc(node, state)
  Converters.process_coradoc(node, state)
end

#unconstrained_after?(node) ⇒ Boolean

Returns:

  • (Boolean)


101
102
103
104
105
106
# File 'lib/coradoc/html/input/converters/base.rb', line 101

def unconstrained_after?(node)
  after = node.at_xpath('following::node()[1]')

  after && !after.text.strip.empty? &&
    after.text[0]&.match?(/\w|,|;|"|\.\?!/)
end

#unconstrained_before?(node) ⇒ Boolean

Returns:

  • (Boolean)


93
94
95
96
97
98
99
# File 'lib/coradoc/html/input/converters/base.rb', line 93

def unconstrained_before?(node)
  before = node.at_xpath('preceding::node()[1]')

  before &&
    !before.text.strip.empty? &&
    before.text[-1]&.match?(/\w/)
end