Class: Body

Inherits:
Object
  • Object
show all
Extended by:
Logging
Defined in:
lib/body.rb

Overview

an object of this class represents the body of a news-article.

Constant Summary collapse

@@config =

a class-level configuration instance.

Configuration.instance
@@log =

initialize the class-level logger as configured

init_logger(@@config.log_target, @@config.log_level)

Instance Method Summary collapse

Methods included from Logging

init_logger, log_level=, log_target=

Constructor Details

#initialize(article_text) ⇒ Body

reads the body text of the article



36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/body.rb', line 36

def initialize(article_text)
  # for simplicity.
  @log = @@log 
  line = nil
  #  transform the article into an array.
  line_array = article_text.split($LN)
  # keep only from the first after an empty line  ''
  start_index = line_array.index('')

  # ... to the end of the current array (all that follows '').
  @lines = line_array.slice(start_index + 1, line_array.size)
  @log.debug('initialize(): body lines are ' << @lines.inspect)
end

Instance Method Details

#handle_referencesObject

extract URL or other stuff, if configured for footnotes,



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/body.rb', line 176

def handle_references()
  # a symbol or string to mark the beginning an ending of a future footnote.
  ref_delim = @@config.REFERENCES_DELIMITER
  @log.debug('references delimiter is ' << ref_delim)
  references = Array.new
  body = @lines.join($LN)
  if ref_delim && !ref_delim.strip.empty? 
    unless ref_delim == ref_delim.reverse
      ref_delim.strip!
      ref_rx = Regexp.new(ref_delim.dup << ".*?" << ref_delim.reverse, Regexp::MULTILINE) 
      @log.debug('ref_rx is ' << ref_rx.to_s)
      index = 0
      # I cannot work with an array, here, and apply the pattern
      # to the whole body, over multiple lines, if need be.
      begin
        ref = body.match(ref_rx )
        @log.debug("found reference " << ref.to_s << " (length: " << (ref ? ref.to_s.size.to_s : '0') << ")")
        if ref 
          @log.debug('ref is ' << ref.to_s)
          #  ... This is some presentation thing and I think
          #  it works, too.
          r = ref[0].gsub(/[ \t]+/, ' ').strip
          r.gsub!("\n", "\n   ") 
          references << r  
          index += 1
          body.gsub!(ref[0], format(@@config.REFERENCE_FORMAT, index.to_s ))
        end
      end until ref == nil
      @log.debug("all references found:\n" << references.join('\n'))
    else
      msg = 'The References Delimiter is the same in its reversed form.'
      msg << "#{$LN}Cannot handle references or footnotes!"
      @log.error(msg)
    end

    if(references && !references.empty?) 
      # a line, separating the footnotes from the body of the article
      body << $LN << @@config.REFERENCES_SEPARATOR << $LN
      references.each_with_index do |r, i| 
        r = r.delete(ref_delim).delete(ref_delim.reverse)
        body << (i + 1 ).to_s << ") " << r.strip <<  $LN
      end
    end
  end
  @lines = body.split($LN)
end

#handle_urlsObject

verify and possibly correct links in the post.



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/body.rb', line 135

def handle_urls() 
  @lines.each_with_index do | l, i | 
    # leave cited lines as they are.
    if !l.start_with?( '>') 
      # news 
      # IT IS HENCEFORTH PROHIBITED TO WRITE AN EMAIL-ADDRESS
      # IN THE BODY OF A NEWS-POST AND TO NOT PREPEND IT WITH
      # Mail:
      #  ... Because I do not know what to do in these cases.
      if l.include?('@')
        # And I forgot how this works. It does.
        url_strs = l.split.collect do |ele| 
          url = ele.match(/\<(.*)\>/)
          if url && url.size > 1
            url[1]
          end
        end.compact
        if(url_strs && ! url_strs.empty? )
          url_strs.each do |u|
            if ! u.start_with?('news:') && !u.start_with?('mailto:')
              l.sub!(u, 'news:' << u)
            end
          end 
        end
        # http(s)
      elsif l.include?('http')
        l_array = l.split
        url_strs = l_array.collect{|ele| ele.strip.include?('http') ? ele.strip : nil}.compact
        # @log.debug('url_strs: ' << url_strs.to_s)
        url_strs.each do |str|
          url = str.match(/\<?(https?:\/\/.*[^\>])\>?/)[1]
          # @log.debug('url: ' << url.to_s)
          l.sub!(str, '<' << url << '>')
        end
      end
      @lines[i] = l
    end # if >
  end # @lines.each_with_index
end

#joinObject



130
131
132
# File 'lib/body.rb', line 130

def join 
  return @lines.join($LN) 
end

#set_intro(intro) ⇒ Object

If so configured, replace an eventual followup-intro by the one configured for a group. This may depend on other conditions and must be triggered explicitly.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/body.rb', line 53

def set_intro(intro)
  return if !intro || intro.empty? || @@config.no_intro

  # name of the previous poster
  fup_name = nil
  # the current newsgroup 
  fup_group = nil

  @log.debug('FUP_NAME is ' << @@config.FUP_NAME)
  @log.debug('FUP_GROUP is ' << @@config.FUP_GROUP)
  # The expressions which allow the identification of both
  # in the current article.
  fn = @@config.FUP_NAME
  fg = @@config.FUP_GROUP

  # Okay, this is called parsing, when it is well done.
  # I just try and am happy when it works.
  @lines.each_with_index do |line, i|
    # find the name in the intro-line
    if !fn.strip.empty? && !line.strip.empty? && !fup_name
      # match a name
      fup_name = line.match(Regexp.new(fn) ) do |md|
        # @log.debug("\tmatch: " << md.to_s)
        md.length == 2 ? md[1] : md[0]
      end
      @log.debug("\tfup_name: " << fup_name.to_s) 

      if !fg.strip.empty? && !fup_group
        # match a group
        fup_group = line.match(Regexp.new(fg) ) do |md| 
          @log.debug("\tmatch: " << md.to_s)
          md.length == 2 ? md[1] : nil 
        end 
      end
      @log.debug "group is " << fup_group.to_s

      # All that follows depends on the presence of a name
      # in the intro-string.
      if fup_name && !fup_name.strip.empty?
        # keep the current intro for later
        ointro = line
        line = ''
        while line.strip.empty?
          i = i.next
          line = @lines[i]
        end
        # check if there is a citation, at all
        if(line.start_with?('>'))
          @log.debug("\tfound intro " << ointro)
          # variables are part of the $intro.
          # Do substitutions.
          intro.sub!('%fup_name%', fup_name) if fup_name 
          intro.sub!('%fup_group%', fup_group)  if fup_group 
          @log.debug("\tsetting intro " << intro.to_s)
          
          # exchange original intro-line against the new one
          @lines[@lines.index(ointro)] = intro.strip 
          # looked complicated because it is.
        end
      end
    end # fn.strip.empty?
  end # lines.each_with_index
end

#set_signature(signature) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/body.rb', line 117

def set_signature(signature)
  # unless no changes requested.
  if signature && !signature.empty? 
    # remove any signature(s) from 
    # the current article
    sigpos = @lines.index('-- ')
    @log.debug('found signature at position ' << sigpos) if sigpos
    @lines = @lines.slice(0, sigpos ) if sigpos
    @log.debug('setting signature ' << signature) if signature
    @lines << "-- " << signature if signature
  end
end