Class: Coradoc::Input::Html::Converters::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/coradoc/html/input/converters/base.rb

Direct Known Subclasses

A, Aside, Blockquote, Br, Bypass, Div, Dl, Figure, H, Head, Hr, Img, Li, Markup, Math, MediaBase, Ol, P, PassThrough, Pre, Q, Skip, Sub, Sup, Table, Td, Text, Tr

Instance Method Summary collapse

Instance Method Details

#extract_leading_trailing_whitespace(node) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/coradoc/html/input/converters/base.rb', line 57

def extract_leading_trailing_whitespace(node)
  node.text =~ /^(\s+)/
  leading_whitespace = ::Regexp.last_match(1)
  unless leading_whitespace.nil?
    first_text = node.at_xpath('./text()[1]')
    first_text&.replace(first_text.text.lstrip)
    leading_whitespace = ' '
  end
  node.text =~ /(\s+)$/
  trailing_whitespace = ::Regexp.last_match(1)
  unless trailing_whitespace.nil?
    last_text = node.at_xpath('./text()[last()]')
    last_text&.replace(last_text.text.rstrip)
    trailing_whitespace = ' '
  end
  [leading_whitespace, trailing_whitespace]
end

#extract_text_from_content(content) ⇒ Object

Extract plain text from a mixed content array. Handles String, InlineElement (via .content), and other CoreModel::Base (via .content or .title).



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/coradoc/html/input/converters/base.rb', line 93

def extract_text_from_content(content)
  return content if content.is_a?(String)
  return '' if content.nil?

  content.map do |item|
    case item
    when String
      item
    when Coradoc::CoreModel::InlineElement
      item.content.to_s
    when Coradoc::CoreModel::Base
      if item.content
        item.content.to_s
      else
        ''
      end
    else
      item.to_s
    end
  end.join
end

#extract_title(node) ⇒ Object



22
23
24
# File 'lib/coradoc/html/input/converters/base.rb', line 22

def extract_title(node)
  node['title'].to_s
end

#node_has_ancestor?(node, name) ⇒ Boolean

Returns:

  • (Boolean)


26
27
28
29
30
31
32
33
# File 'lib/coradoc/html/input/converters/base.rb', line 26

def node_has_ancestor?(node, name)
  case name
  when String
    node.ancestors(name).any?
  when Array
    name.any? { |n| node.ancestors(n).any? }
  end
end

#textnode_after_start_with?(node, str) ⇒ Boolean

Returns:

  • (Boolean)


46
47
48
49
50
51
52
53
54
55
# File 'lib/coradoc/html/input/converters/base.rb', line 46

def textnode_after_start_with?(node, str)
  return false unless [String, Regexp].include?(str.class)
  return false if str.is_a?(String) && str.empty?

  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
  str = /\A(?:#{str})/

  node2 = node.at_xpath('following-sibling::node()[1]')
  node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
end

#textnode_before_end_with?(node, str) ⇒ Boolean

Returns:

  • (Boolean)


35
36
37
38
39
40
41
42
43
44
# File 'lib/coradoc/html/input/converters/base.rb', line 35

def textnode_before_end_with?(node, str)
  return false unless [String, Regexp].include?(str.class)
  return false if str.is_a?(String) && str.empty?

  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
  str = /(?:#{str})\z/

  node2 = node.at_xpath('preceding-sibling::node()[1]')
  node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
end

#treat_children_coradoc(node, state) ⇒ Object



8
9
10
11
12
13
14
15
16
# File 'lib/coradoc/html/input/converters/base.rb', line 8

def treat_children_coradoc(node, state)
  results = node.children.map do |child|
    treat_coradoc(child, state)
  end.flatten

  results.reject do |x|
    x.nil? || (x.is_a?(String) && x.strip.empty?)
  end
end

#treat_coradoc(node, state) ⇒ Object



18
19
20
# File 'lib/coradoc/html/input/converters/base.rb', line 18

def treat_coradoc(node, state)
  Converters.process_coradoc(node, state)
end

#unconstrained_after?(node) ⇒ Boolean

Returns:

  • (Boolean)


83
84
85
86
87
88
# File 'lib/coradoc/html/input/converters/base.rb', line 83

def unconstrained_after?(node)
  after = node.at_xpath('following::node()[1]')

  after && !after.text.strip.empty? &&
    after.text[0]&.match?(/\w|,|;|"|\.\?!/)
end

#unconstrained_before?(node) ⇒ Boolean

Returns:

  • (Boolean)


75
76
77
78
79
80
81
# File 'lib/coradoc/html/input/converters/base.rb', line 75

def unconstrained_before?(node)
  before = node.at_xpath('preceding::node()[1]')

  before &&
    !before.text.strip.empty? &&
    before.text[-1]&.match?(/\w/)
end