Class: TextMetrics::Processors::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/text_metrics/processors/base.rb

Direct Known Subclasses

AmericanEnglish, French

Constant Summary collapse

GEM_PATH =
File.dirname(__FILE__, 2).freeze
METRICS =

The public metric surface. #to_h and the individual readers are both derived from this list, so they can never drift apart.

%i[
  words_count
  characters_count
  sentences_count
  syllables_count
  punctuation_count
  syllables_per_word_average
  letters_per_word_average
  words_per_sentence_average
  characters_per_sentence_average
  words_per_punctuation_average
  punctuation_per_sentence_average
  flesch_reading_ease
  flesch_kincaid_grade
  lix
  smog_index
  gunning_fog_index
  coleman_liau_index
].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, language: nil) ⇒ Base

Returns a new instance of Base.



34
35
36
37
# File 'lib/text_metrics/processors/base.rb', line 34

def initialize(text, language: nil)
  @text = (text || "").squeeze(" ")
  @language = language
end

Instance Attribute Details

#languageObject (readonly)

Returns the value of attribute language.



32
33
34
# File 'lib/text_metrics/processors/base.rb', line 32

def language
  @language
end

#textObject (readonly)

Returns the value of attribute text.



32
33
34
# File 'lib/text_metrics/processors/base.rb', line 32

def text
  @text
end

Instance Method Details

#characters_count(ignore_spaces: true) ⇒ Object



50
51
52
# File 'lib/text_metrics/processors/base.rb', line 50

def characters_count(ignore_spaces: true)
  ignore_spaces ? text.delete(" ").length : text.length
end

#characters_per_sentence_averageObject



82
83
84
85
86
# File 'lib/text_metrics/processors/base.rb', line 82

def characters_per_sentence_average
  return 0.0 if sentences_count.zero?

  (characters_count.to_f / sentences_count).round(2)
end

#coleman_liau_indexObject



130
131
132
133
134
135
136
137
# File 'lib/text_metrics/processors/base.rb', line 130

def coleman_liau_index
  return 0.0 if words_count.zero?

  letters_per_100_words = average_letters_per_word * 100
  sentences_per_100_words = sentences_count.to_f / words_count * 100

  (0.0588 * letters_per_100_words - 0.296 * sentences_per_100_words - 15.8).round(2)
end

#flesch_kincaid_gradeObject

Flesch-Kincaid Grade Level (US school grade). The same formula is used for every language — there is no validated non-English adaptation.



110
111
112
113
114
# File 'lib/text_metrics/processors/base.rb', line 110

def flesch_kincaid_grade
  return 0.0 if words_count.zero?

  (0.39 * average_words_per_sentence + 11.8 * average_syllables_per_word - 15.59).round(1)
end

#flesch_reading_easeObject

Language-specific; subclasses supply the constants.

Raises:

  • (NotImplementedError)


104
105
106
# File 'lib/text_metrics/processors/base.rb', line 104

def flesch_reading_ease
  raise NotImplementedError
end

#gunning_fog_indexObject



124
125
126
127
128
# File 'lib/text_metrics/processors/base.rb', line 124

def gunning_fog_index
  return 0.0 if words_count.zero?

  (0.4 * (average_words_per_sentence + 100.0 * count_polysyllabic_words / words_count)).round(1)
end

#letters_per_word_averageObject



74
75
76
# File 'lib/text_metrics/processors/base.rb', line 74

def letters_per_word_average
  average_letters_per_word.round(2)
end

#lixObject



139
140
141
142
143
144
145
# File 'lib/text_metrics/processors/base.rb', line 139

def lix
  return 0.0 if words_count.zero?

  long_words = words.count { |word| word.length > 6 }

  (average_words_per_sentence + 100.0 * long_words / words_count).round(2)
end

#punctuation_countObject



64
65
66
# File 'lib/text_metrics/processors/base.rb', line 64

def punctuation_count
  punctuation_marks.size
end

#punctuation_per_sentence_averageObject



94
95
96
97
98
# File 'lib/text_metrics/processors/base.rb', line 94

def punctuation_per_sentence_average
  return 0.0 if punctuation_count.zero? || sentences_count.zero?

  (punctuation_count.to_f / sentences_count).round(2)
end

#sentences_countObject



54
55
56
57
58
# File 'lib/text_metrics/processors/base.rb', line 54

def sentences_count
  return 0 if words_count.zero?

  [1, sentences.size].max
end

#smog_indexObject



116
117
118
119
120
121
122
# File 'lib/text_metrics/processors/base.rb', line 116

def smog_index
  return 0.0 if sentences_count < 3

  (1.043 * Math.sqrt(30.0 * count_polysyllabic_words / sentences_count) + 3.1291).round(1)
rescue ZeroDivisionError
  0.0
end

#syllables_countObject



60
61
62
# File 'lib/text_metrics/processors/base.rb', line 60

def syllables_count
  words.sum { |word| count_syllables_in_word(word) }
end

#syllables_per_word_averageObject

averages — rounded for display only. The readability scores below are computed from the full-precision ratios (#average_*), not from these rounded values.



70
71
72
# File 'lib/text_metrics/processors/base.rb', line 70

def syllables_per_word_average
  average_syllables_per_word.round(1)
end

#to_hObject

Every metric in one hash. Single source of truth for the public surface. Memoized — the analyzer is immutable once built.



41
42
43
# File 'lib/text_metrics/processors/base.rb', line 41

def to_h
  @to_h ||= METRICS.to_h { |metric| [metric, public_send(metric)] }
end

#words_countObject

counts



46
47
48
# File 'lib/text_metrics/processors/base.rb', line 46

def words_count
  words.size
end

#words_per_punctuation_averageObject



88
89
90
91
92
# File 'lib/text_metrics/processors/base.rb', line 88

def words_per_punctuation_average
  return 0.0 if words_count.zero? || punctuation_count.zero?

  (words_count.to_f / punctuation_count).round(2)
end

#words_per_sentence_averageObject



78
79
80
# File 'lib/text_metrics/processors/base.rb', line 78

def words_per_sentence_average
  average_words_per_sentence.round(2)
end