Class: Vivlio::Starter::CLI::UnifiedIndexManager

Inherits:
Object
  • Object
show all
Defined in:
lib/vivlio/starter/cli/index/unified_index_manager.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeUnifiedIndexManager

Returns a new instance of UnifiedIndexManager.



38
39
40
41
42
43
44
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 38

def initialize
  @terms_manager = UnifiedTermsManager.new
  @queue_manager = ReviewQueueManager.new
  @markdown_generator = ReviewMarkdownGenerator.new
  @config = load_index_config
  @glossary_config = load_glossary_config
end

Instance Attribute Details

#markdown_generatorObject (readonly)

Returns the value of attribute markdown_generator.



36
37
38
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 36

def markdown_generator
  @markdown_generator
end

#queue_managerObject (readonly)

Returns the value of attribute queue_manager.



36
37
38
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 36

def queue_manager
  @queue_manager
end

#terms_managerObject (readonly)

Returns the value of attribute terms_manager.



36
37
38
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 36

def terms_manager
  @terms_manager
end

Instance Method Details

#apply_markdown_review!Object

Markdownから承認・リジェクトを適用仕様: vs index:apply は内部で vs index:build を実行しない



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 126

def apply_markdown_review!
  unless @markdown_generator.exists?
    Common.log_warn('_index_glossary_review.md が見つかりません')
    Common.log_info('先に vs index:auto を実行してください')
    return
  end

  # --- Phase: 索引処理 ---
  index_approved = @markdown_generator.parse_index_approved
  index_rejected = @markdown_generator.parse_index_rejected

  # --- Phase: 用語集処理 ---
  glossary_approved = @markdown_generator.parse_glossary_approved
  glossary_rejected = @markdown_generator.parse_glossary_rejected

  # --- Phase: 共通処理 ---
  both_rejected = @markdown_generator.parse_rejected
  unreject = @markdown_generator.parse_unreject
  yomi_changes = @markdown_generator.parse_yomi_changes

  changes_made = false
  index_count = 0
  glossary_count = 0

  # --- Phase: 索引承認 ---
  if index_approved.any?
    @terms_manager.merge_terms!(index_approved, flags: 'i', source: 'auto_extracted')
    index_count = index_approved.size
    changes_made = true
  end

  # --- Phase: 用語集承認 ---
  if glossary_approved.any?
    validate_glossary_definitions!(glossary_approved)
    @terms_manager.merge_terms!(glossary_approved, flags: 'g', source: 'review')
    glossary_count = glossary_approved.size
    changes_made = true
  end

  # [ig] → [i] に変更された場合: g フラグを除去
  glossary_approved_names = glossary_approved.map { it['term'] }
  index_only = index_approved.reject { glossary_approved_names.include?(it['term']) }
  index_only.each do |term|
    next unless @terms_manager.glossary_term_names.include?(term['term'])

    @terms_manager.remove_flag!(term['term'], 'g')
    Common.log_info("用語集フラグを除去しました(索引のみ): #{term['term']}")
    changes_made = true
  end

  # --- Phase: 索引のみリジェクト([-i]) ---
  if index_rejected.any?
    index_rejected.each { @terms_manager.remove_flag!(it['term'], 'i') }
    @queue_manager.save_rejected_terms(index_rejected)
    changes_made = true
  end

  # --- Phase: 用語集のみリジェクト([-g]) ---
  if glossary_rejected.any?
    glossary_rejected.each { @terms_manager.remove_flag!(it['term'], 'g') }
    @queue_manager.save_rejected_terms(glossary_rejected)
    changes_made = true
  end

  # --- Phase: 両方リジェクト([r]) ---
  if both_rejected.any?
    both_rejected.each { @terms_manager.remove_term!(it['term']) }
    @queue_manager.save_rejected_terms(both_rejected)
    changes_made = true
  end

  # --- Phase: リジェクト解除 + 直接登録 ---
  if unreject.any?
    unreject.each do |entry|
      @queue_manager.unreject_term_by_name!(entry['term'])
      flag = entry['flag'] || 'i'
      term_data = { 'term' => entry['term'], 'yomi' => entry['yomi'] }
      flags = case flag
              when 'i', 'x' then 'i'
              when 'g' then 'g'
              when 'ig', 'gi' then 'ig'
              else 'i'
              end
      @terms_manager.merge_terms!([term_data], flags:, source: 'unreject')
      index_count += 1 if flags.include?('i')
      glossary_count += 1 if flags.include?('g')
      Common.log_info("リジェクト解除 → [#{flag}] 登録: #{entry['term']}")
    end
    changes_made = true
  end

  # --- Phase: 読み変更 ---
  if yomi_changes.any?
    @terms_manager.update_yomi!(yomi_changes)
    changes_made = true
  end

  # --- Phase: 孤立データ除去 ---
  index_approved_names = index_approved.map { it['term'] }
  glossary_approved_names_all = glossary_approved.map { it['term'] }
  unreject_index_names = unreject.select { %w[i x ig gi].include?(it['flag']) }.map { it['term'] }
  unreject_glossary_names = unreject.select { %w[g ig gi].include?(it['flag']) }.map { it['term'] }

  # 明示的にリジェクトされた用語は孤立除去の対象外
  # ([-i] で i を除去した後に残る g を誤って除去しないため)
  explicitly_rejected = (index_rejected + glossary_rejected + both_rejected).map { it['term'] }.uniq

  # 索引フラグの孤立除去
  stale_index = @terms_manager.index_term_names - index_approved_names - unreject_index_names - explicitly_rejected
  stale_index.each do |term_name|
    @terms_manager.remove_flag!(term_name, 'i')
    Common.log_info("索引フラグを除去: #{term_name}")
    changes_made = true
  end

  # 用語集フラグの孤立除去
  stale_glossary = @terms_manager.glossary_term_names - glossary_approved_names_all - unreject_glossary_names - explicitly_rejected
  stale_glossary.each do |term_name|
    @terms_manager.remove_flag!(term_name, 'g')
    Common.log_info("用語集フラグを除去: #{term_name}")
    changes_made = true
  end

  # --- Phase: Section 4 同期処理 ---
  rejected_section_all = @markdown_generator.parse_rejected_section_all
  unreject_names = unreject.map { it['term'] }

  confirmed_rejected = rejected_section_all.select { ['', ' '].include?(it['flag']) }
                                           .reject { unreject_names.include?(it['term']) }

  if confirmed_rejected.any?
    rejected_count = 0
    confirmed_rejected.each do |entry|
      term_name = entry['term']
      next unless @terms_manager.term_names.include?(term_name)

      @terms_manager.remove_term!(term_name)
      Common.log_info("除外済みリストに基づき登録を解除: #{term_name}")
      rejected_count += 1
    end

    @queue_manager.save_rejected_terms(confirmed_rejected)
    changes_made = true if rejected_count.positive? || confirmed_rejected.any?
  end

  if changes_made
    rejected_total = both_rejected.size + (confirmed_rejected&.size || 0)
    Common.log_success("索引: #{index_count}件、用語集: #{glossary_count}件、リジェクト: #{rejected_total}")
    Common.log_info("読み変更: #{yomi_changes.size}") if yomi_changes.any?
    Common.log_success('index_glossary_terms.yml を更新しました')
    Common.log_info('ページ生成は vs build 実行時に行われます')
  else
    Common.log_warn('変更がありませんでした')
    Common.log_info('_index_glossary_review.md でフラグを編集してください')
  end

  # _index_glossary_review.md は残す(再編集の可能性があるため)
  # vs build の clean 処理で削除される
  changes_made
end

#auto_process!(chapters) ⇒ Object

全自動索引候補抽出 → _index_review.md 生成

Parameters:

  • chapters (Array<String>)

    対象章のリスト



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 48

def auto_process!(chapters)
  auto_threshold = @config[:auto_approve_threshold] || 300
  review_threshold = @config[:review_threshold] || 150
  high_ratio = @config[:high_candidates_ratio] || 0.25
  auto_discovery = @config.fetch(:auto_discovery, true)

  Common.log_action('索引の自動処理を開始します...')

  # 1. 手動マークアップを検出して統合辞書に登録
  manual_terms = extract_manual_markup_terms(chapters)
  if manual_terms.any?
    @terms_manager.merge_terms!(manual_terms, flags: 'i', source: 'manual_markup')
    Common.log_info("手動マークアップから #{manual_terms.size} 件の用語を登録しました")
  end

  # auto_discovery が無効の場合、自動候補抽出をスキップ
  unless auto_discovery
    Common.log_info('auto_discovery: false のため、自動候補抽出をスキップします')
    Common.log_info('手動マークアップ [用語|読み] のみが索引に反映されます')
    return
  end

  # 2. 候補抽出
  candidates = extract_candidates(chapters)
  Common.log_info("候補抽出: #{candidates.size}")

  # 3. 既存の承認済み用語(索引+用語集)とリジェクト済み用語を除外
  existing_terms = @terms_manager.term_names
  rejected_terms = @queue_manager.load_rejected_terms
  rejected_count_in_candidates = 0

  filtered_candidates = candidates.reject do |c|
    term = c['term']
    if existing_terms.include?(term)
      true
    elsif rejected_terms.include?(term)
      rejected_count_in_candidates += 1
      true
    else
      false
    end
  end

  # 4. 高スコア候補を自動承認
  auto_approved = filtered_candidates
                  .select { |c| c['score'] >= auto_threshold }
                  .map { |candidate| normalize_candidate(candidate) }
  @terms_manager.merge_terms!(auto_approved, flags: 'i', source: 'auto_extracted') if auto_approved.any?

  # 5. 中スコア候補をHigh/Lowに分割
  review_candidates = filtered_candidates
                      .select { |c| c['score'] >= review_threshold && c['score'] < auto_threshold }
                      .map { |candidate| normalize_candidate(candidate).merge('is_new' => true) }

  high_candidates, low_candidates = split_candidates_by_ratio(review_candidates, high_ratio)

  # 6. 登録済み用語(索引+用語集すべて)に文脈を付与
  terms_with_context = enrich_terms_with_context(@terms_manager.load_terms, chapters)

  # 7. リジェクト済み用語に文脈とスコアを付与
  # candidatesからスコアを復元できるように渡す
  rejected_with_context = enrich_rejected_with_context(candidates)

  # 8. _index_review.md を生成
  @markdown_generator.generate!(
    terms: terms_with_context,
    high_candidates: high_candidates,
    low_candidates: low_candidates,
    rejected: rejected_with_context
  )

  # 9. 結果レポート
  report_auto_results(auto_approved, high_candidates, low_candidates, auto_threshold, review_threshold,
                      rejected_count_in_candidates)
end

#build_glossary!Object

用語集ページを生成(後方互換 - 単独呼び出し用)



361
362
363
364
365
366
367
368
369
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 361

def build_glossary!
  return unless glossary_enabled?

  @terms_manager.clear_cache!
  glossary = @terms_manager.glossary_terms
  builder = UnifiedPageBuilder.new(glossary_config: @glossary_config)
  result = builder.build_glossary!(glossary)
  Common.log_success('用語集ページを生成しました') if result
end

#build_index!(chapters) ⇒ Object

索引・用語集ページを生成(内部用 - vs build から呼ばれる)

Parameters:

  • chapters (Array<String>)

    対象章のリスト



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 333

def build_index!(chapters)
  Common.log_action('索引・用語集ページを生成しています...')

  # 本文スキャン(索引タグ付け+用語集リンク生成)
  scanner = IndexMatchScanner.new(defer_warnings: true)
  scanner.scan_all_chapters!(chapters, read_only: false)

  # UnifiedPageBuilder で索引+用語集を生成
  builder = UnifiedPageBuilder.new(glossary_config: @glossary_config)

  # 索引ページ生成
  builder.build_index!

  # 用語集ページ生成(glossary_enabled かつ g フラグの用語がある場合)
  if glossary_enabled?
    @terms_manager.clear_cache! # scan 後に backlink_sources が更新されるためリロード
    glossary = @terms_manager.glossary_terms
    builder.build_glossary!(glossary)
  end

  Common.log_success('索引・用語集ページの生成が完了しました')

  return unless scanner.config_missing || scanner.no_matches

  IndexCommands.add_post_build_message(IndexCommands::INDEX_TERMS_MISSING_MESSAGE)
end

#glossary_enabled?Boolean

用語集機能が有効か

Returns:

  • (Boolean)


372
373
374
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 372

def glossary_enabled?
  @glossary_config[:enabled] == true
end

#list_rejected_termsObject

リジェクト済み候補の一覧表示



377
378
379
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 377

def list_rejected_terms
  @queue_manager.list_rejected_terms
end

#reset_rejected!Object

リジェクト履歴をクリア



388
389
390
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 388

def reset_rejected!
  @queue_manager.reset_rejected!
end

#strip_markdown(text) ⇒ Object

Markdown装飾を除去してプレーンテキストを取得



319
320
321
322
323
324
325
326
327
328
329
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 319

def strip_markdown(text)
  text
    .gsub(/\*\*(.+?)\*\*/, '\1')  # **bold**
    .gsub(/\*(.+?)\*/, '\1')      # *italic*
    .gsub(/`(.+?)`/, '\1')        # `code`
    .gsub(/\[(.+?)\]\(.+?\)/, '\1') # [link](url)
    .gsub(/^#+\s*/, '')           # # heading
    .gsub(/^\s*[-*]\s+/, '')      # - list item
    .gsub(/\n+/, ' ')             # newlines to space
    .strip
end

#unreject_term!(term_or_number) ⇒ Object

リジェクト解除

Parameters:

  • term_or_number (String)

    用語名または番号



383
384
385
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 383

def unreject_term!(term_or_number)
  @queue_manager.unreject_term!(term_or_number)
end

#validate_glossary_definitions!(terms) ⇒ Object

用語集の説明文バリデーションrequire_definition: true の場合、説明文が空ならエラーmax_definition_length を超過している場合は警告



290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# File 'lib/vivlio/starter/cli/index/unified_index_manager.rb', line 290

def validate_glossary_definitions!(terms)
  max_length = @glossary_config[:max_definition_length] || 200

  # 説明文の長さチェック(Markdown装飾を除去した文字数)
  terms.each do |term|
    definition = term['definition'].to_s
    next if definition.strip.empty?

    plain_text = strip_markdown(definition)
    next unless plain_text.length > max_length

    Common.log_warn(
      "用語「#{term['term']}」の説明文が #{max_length} 文字を超過しています " \
      "(#{plain_text.length} 文字)"
    )
  end

  return unless @glossary_config[:require_definition]

  missing = terms.select { it['definition'].to_s.strip.empty? }
  return if missing.empty?

  missing.each do |term|
    Common.log_error("用語「#{term['term']}」に説明文がありません")
  end
  raise "用語集の説明文が必須ですが、#{missing.size}件の用語に説明文がありません"
end