Module: Sisimai::RFC5322

Defined in:
lib/sisimai/rfc5322.rb

Overview

Sisimai::RFC5322 provide methods for checking email address.

Constant Summary collapse

HeaderTable =
{
  :messageid => %w[message-id],
  :subject   => %w[subject],
  :listid    => %w[list-id],
  :date      => %w[date posted-date posted resent-date],
  :addresser => %w[from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from],
  :recipient => %w[to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to],
}.freeze

Class Method Summary collapse

Class Method Details

.FIELDINDEXObject



17
18
19
20
21
22
23
24
25
# File 'lib/sisimai/rfc5322.rb', line 17

def FIELDINDEX
  return %w[
    Resent-Date From Sender Reply-To To Message-ID Subject Return-Path Received Date X-Mailer
    Content-Type Content-Transfer-Encoding Content-Description Content-Disposition
  ]
  # The following fields are not referred in Sisimai
  #   Resent-From Resent-Sender Resent-Cc Cc Bcc Resent-Bcc In-Reply-To References
  #   Comments Keywords
end

.HEADERFIELDS(group = '') ⇒ Array

Grouped RFC822 headers

Parameters:

  • group (Symbol) (defaults to: '')

    RFC822 Header group name

Returns:

  • (Array)

    RFC822 Header list



30
31
32
33
# File 'lib/sisimai/rfc5322.rb', line 30

def HEADERFIELDS(group = '')
  return HeaderTable[group] if HeaderTable[group]
  return []
end

.HEADERTABLEObject



15
# File 'lib/sisimai/rfc5322.rb', line 15

def HEADERTABLE; return HeaderTable; end

.part(email = '', cutby = [], keeps = false) ⇒ Array

Split given entire message body into error message lines and the original message part only include email headers

Parameters:

  • email (String) (defaults to: '')

    Entire message body

  • cutby (Array) (defaults to: [])

    List of strings which is a boundary of the original message part

  • keeps (Bool) (defaults to: false)

    Flag for keeping strings after “nn”

Returns:

  • (Array)
    Error message lines, The original message

Since:

  • v5.0.0



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'lib/sisimai/rfc5322.rb', line 165

def part(email = '', cutby = [], keeps = false)
  return nil if email.empty?
  return nil if cutby.empty?

  boundaryor = ''   # A boundary string divides the error message part and the original message part
  positionor = nil  # A Position of the boundary string
  formerpart = ''   # The error message part
  latterpart = ''   # The original message part

  cutby.each do |e|
    # Find a boundary string(2nd argument) from the 1st argument
    positionor = email.index(e); next if positionor.nil?
    boundaryor = e
    break
  end

  if positionor
    # There is the boundary string in the message body
    formerpart = email[0, positionor]
    latterpart = email[positionor + boundaryor.size + 1, email.size - positionor]
  else
    # Substitute the entire message to the former part when the boundary string is not included
    # the "email"
    formerpart = email
    latterpart = ''
  end

  if latterpart.size > 0
    # Remove blank lines, the message body of the original message, and append "\n" at the end
    # of the original message headers
    # 1. Remove leading blank lines
    # 2. Remove text after the first blank line: \n\n
    # 3. Append "\n" at the end of test block when the last character is not "\n"
    latterpart.sub!(/\A\s+/, '')
    if keeps == false
      #  Remove text after the first blank line: \n\n when "keeps" is false
      latterpart = latterpart[0, latterpart.index("\n\n")] if latterpart.include?("\n\n")
    end
    latterpart << "\n" if latterpart.end_with?("\n") == false
  end

  return [formerpart, latterpart]
end

.received(argv1) ⇒ Array

Convert Received headers to a structured data

Parameters:

  • argv1 (String)

    Received header

Returns:

  • (Array)

    Received header as a structured data



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/sisimai/rfc5322.rb', line 38

def received(argv1)
  return [] if argv1.is_a?(::String) == false
  return [] if argv1.include?(' invoked by uid') || argv1.include?(' invoked from network')

  # - https://datatracker.ietf.org/doc/html/rfc5322
  #   received        =   "Received:" *received-token ";" date-time CRLF
  #   received-token  =   word / angle-addr / addr-spec / domain
  #
  # - Appendix A.4. Message with Trace Fields
  #   Received:
  #       from x.y.test
  #       by example.net
  #       via TCP
  #       with ESMTP
  #       id ABC12345
  #       for <mary@example.net>;  21 Nov 1997 10:05:43 -0600
  recvd = argv1.split(' ')
  label = %w[from by via with id for]
  token = {}
  other = []
  alter = []
  right = false
  range = recvd.size
  index = -1

  recvd.each do |e|
    # Look up each label defined in "label" from Received header
    index += 1
    break if index >= range; f = e.downcase
    next  if label.none? { |a| f == a }
    token[f] = recvd[index + 1] || next
    token[f] = token[f].downcase.delete('();')

    next  if f != 'from'
    break if index + 2 >= range
    next  if recvd[index + 2].start_with?('(') == false

    # Get and keep a hostname in the comment as follows:
    # from mx1.example.com (c213502.kyoto.example.ne.jp [192.0.2.135]) by mx.example.jp (V8/cf)
    # [
    #   "from",                         # index + 0
    #   "mx1.example.com",              # index + 1
    #   "(c213502.kyoto.example.ne.jp", # index + 2
    #   "[192.0.2.135])",               # index + 3
    #   "by",
    #   "mx.example.jp",
    #   "(V8/cf)",
    #   ...
    # ]
    # The 2nd element after the current element is NOT a continuation of the current element
    # such as "(c213502.kyoto.example.ne.jp)"
    other << recvd[index + 2].delete('();')

    # The 2nd element after the current element is a continuation of the current element.
    # such as "(c213502.kyoto.example.ne.jp", "[192.0.2.135])"
    break if index + 3 >= range
    other << recvd[index + 3].delete('();')
  end

  other.each do |e|
    # Check alternatives in "other", and then delete uninformative values.
    next if e.nil?
    next if e.size < 4
    next if e == 'unknown' || e == 'localhost' || e == '[127.0.0.1]' || e == '[IPv6:::1]'
    next if e.include?('.') == false || e.include?('=') == true
    alter << e
  end

  %w[from by].each do |e|
    # Remove square brackets from the IP address such as "[192.0.2.25]"
    next if token[e].to_s.empty? || token[e].start_with?('[') == false
    token[e] = Sisimai::RFC791.find(token[e]).shift || ''
  end
  token['from'] ||= ''

  while true do
    # Prefer hostnames over IP addresses, except for localhost.localdomain and similar.
    break if token['from'] == 'localhost' || token['from'] == 'localhost.localdomain'
    break if token['from'].include?('.') == false || Sisimai::RFC791.find(token['from']).empty? == false

    right = true # No need to rewrite token['from']
    break
  end

  while true do
    # Try to rewrite uninformative hostnames and IP addresses in token['from']
    break if right || alter.empty? || alter[0].include?(token['from'])

    if token['from'].start_with?('localhost')
      # localhost or localhost.localdomain
      token['from'] = alter[0]
    elsif token['from'].index('.')
      # A hostname without a domain name such as "mail", "mx", or "mbox"
      token['from'] = alter[0] if alter[0].include?('.')
    else
      # An IPv4 address
      token['from'] = alter[0]
    end
    break
  end
  token.delete('from') if token['from'].nil?
  token.delete('by')   if token['by'].nil?
  token['for'] = Sisimai::Address.s3s4(token['for']) if token.has_key?('for')

  token.keys.each do |e|
    # Delete an invalid value
    token[e] = '' if token[e].include?(' ')
    token[e].delete!('[]')  # Remove "[]" from the IP address
  end

  return [
    token['from'] || '',
    token['by']   || '',
    token['via']  || '',
    token['with'] || '',
    token['id']   || '',
    token['for']  || '',
  ]
end