Class: Vivlio::Starter::CLI::Metrics::Analyzer

Inherits:
Object
  • Object
show all
Defined in:
lib/vivlio/starter/cli/metrics/analyzer.rb

Overview

Markdown コンテンツを解析してメトリクスを算出する

Constant Summary collapse

SENTENCE_DELIMITER =
/[。!?!?]+/
CLAUSE_DELIMITER =
''
KANJI_PATTERN =
/\p{Han}/

Instance Method Summary collapse

Constructor Details

#initialize(content, config = {}) ⇒ Analyzer

Returns a new instance of Analyzer.



74
75
76
77
78
# File 'lib/vivlio/starter/cli/metrics/analyzer.rb', line 74

def initialize(content, config = {})
  @content = content
  @config = config
  @mecab_available = check_mecab_available
end

Instance Method Details

#basic_statsObject

基本統計を算出する



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/vivlio/starter/cli/metrics/analyzer.rb', line 81

def basic_stats
  sentences = sentence_segments
  clauses = clause_segments

  BasicStats.new(
    chars: content.length,
    chars_no_newline: content.delete("\r\n").length,
    lines: content.empty? ? 0 : content.each_line.count,
    sentences: sentences.size,
    avg_sentence_len: safe_average(sentences.sum(&:length), sentences.size),
    clauses: clauses.size,
    avg_clause_len: safe_average(clauses.sum(&:length), clauses.size),
    commas: content.count(CLAUSE_DELIMITER)
  )
end

#readabilityObject

読解難度スコアを算出する



122
123
124
125
126
127
128
129
130
# File 'lib/vivlio/starter/cli/metrics/analyzer.rb', line 122

def readability
  vocab = vocabulary_stats
  basic = basic_stats

  score = (basic.avg_sentence_len * 0.5) + (vocab.kanji_ratio * 0.5)
  label = readability_label(score)

  ReadabilityScore.new(score:, label:)
end

#vocabulary_statsObject

語彙分析を実行する



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/vivlio/starter/cli/metrics/analyzer.rb', line 98

def vocabulary_stats
  text = strip_markdown(content)
  tokens = tokenize(text)
  unique = tokens.uniq
  tokens_map = build_token_frequencies(tokens)
  stripped = text.gsub(/\s/, '')
  total_chars = stripped.length
  kanji_count = stripped.scan(KANJI_PATTERN).size
  total_word_length = tokens.sum(&:length)

  VocabularyStats.new(
    kanji_ratio: calculate_kanji_ratio(kanji_count, total_chars),
    avg_word_length: safe_average(total_word_length, tokens.size),
    ttr: safe_average(unique.size.to_f, tokens.size),
    total_tokens: tokens.size,
    unique_tokens: unique.size,
    kanji_char_count: kanji_count,
    total_char_count: total_chars,
    total_word_length:,
    tokens_map: tokens_map
  )
end