Class: Coradoc::Html::Converters::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/coradoc/html/converters/base.rb

Direct Known Subclasses

A, Aside, Blockquote, Br, Bypass, Div, Dl, Figure, H, Head, Hr, Img, Li, Markup, Math, MediaBase, Ol, P, PassThrough, Pre, Q, Skip, Sub, Sup, Table, Td, Text, Tr

Instance Method Summary collapse

Instance Method Details

#extract_leading_trailing_whitespace(node) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/coradoc/html/converters/base.rb', line 56

def extract_leading_trailing_whitespace(node)
  node.text =~ /^(\s+)/
  leading_whitespace = ::Regexp.last_match(1)
  unless leading_whitespace.nil?
    first_text = node.at_xpath('./text()[1]')
    first_text&.replace(first_text.text.lstrip)
    leading_whitespace = ' '
  end
  node.text =~ /(\s+)$/
  trailing_whitespace = ::Regexp.last_match(1)
  unless trailing_whitespace.nil?
    last_text = node.at_xpath('./text()[last()]')
    last_text&.replace(last_text.text.rstrip)
    trailing_whitespace = ' '
  end
  [leading_whitespace, trailing_whitespace]
end

#extract_text_from_content(content) ⇒ Object

Extract plain text from a mixed content array. Delegates to CoreModel::InlineContent.text_of — single source of truth for nil/Array/InlineElement/StructuralElement handling.



92
93
94
# File 'lib/coradoc/html/converters/base.rb', line 92

def extract_text_from_content(content)
  Coradoc::CoreModel::InlineContent.text_of(content)
end

#extract_title(node) ⇒ Object



21
22
23
# File 'lib/coradoc/html/converters/base.rb', line 21

def extract_title(node)
  node['title'].to_s
end

#node_has_ancestor?(node, name) ⇒ Boolean

Returns:

  • (Boolean)


25
26
27
28
29
30
31
32
# File 'lib/coradoc/html/converters/base.rb', line 25

def node_has_ancestor?(node, name)
  case name
  when String
    node.ancestors(name).any?
  when Array
    name.any? { |n| node.ancestors(n).any? }
  end
end

#textnode_after_start_with?(node, str) ⇒ Boolean

Returns:

  • (Boolean)


45
46
47
48
49
50
51
52
53
54
# File 'lib/coradoc/html/converters/base.rb', line 45

def textnode_after_start_with?(node, str)
  return false unless [String, Regexp].include?(str.class)
  return false if str.is_a?(String) && str.empty?

  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
  str = /\A(?:#{str})/

  node2 = node.at_xpath('following-sibling::node()[1]')
  node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
end

#textnode_before_end_with?(node, str) ⇒ Boolean

Returns:

  • (Boolean)


34
35
36
37
38
39
40
41
42
43
# File 'lib/coradoc/html/converters/base.rb', line 34

def textnode_before_end_with?(node, str)
  return false unless [String, Regexp].include?(str.class)
  return false if str.is_a?(String) && str.empty?

  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
  str = /(?:#{str})\z/

  node2 = node.at_xpath('preceding-sibling::node()[1]')
  node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
end

#treat_children_coradoc(node, state) ⇒ Object



7
8
9
10
11
12
13
14
15
# File 'lib/coradoc/html/converters/base.rb', line 7

def treat_children_coradoc(node, state)
  results = node.children.map do |child|
    treat_coradoc(child, state)
  end.flatten

  results.reject do |x|
    x.nil? || (x.is_a?(String) && x.strip.empty?)
  end
end

#treat_coradoc(node, state) ⇒ Object



17
18
19
# File 'lib/coradoc/html/converters/base.rb', line 17

def treat_coradoc(node, state)
  Converters.process_coradoc(node, state)
end

#unconstrained_after?(node) ⇒ Boolean

Returns:

  • (Boolean)


82
83
84
85
86
87
# File 'lib/coradoc/html/converters/base.rb', line 82

def unconstrained_after?(node)
  after = node.at_xpath('following::node()[1]')

  after && !after.text.strip.empty? &&
    after.text[0]&.match?(/\w|,|;|"|\.\?!/)
end

#unconstrained_before?(node) ⇒ Boolean

Returns:

  • (Boolean)


74
75
76
77
78
79
80
# File 'lib/coradoc/html/converters/base.rb', line 74

def unconstrained_before?(node)
  before = node.at_xpath('preceding::node()[1]')

  before &&
    !before.text.strip.empty? &&
    before.text[-1]&.match?(/\w/)
end