Class: Kotoshu::Languages::Japanese::GrammarRules::ScriptMixingRule

Inherits:
Rule
  • Object
show all
Defined in:
lib/kotoshu/languages/ja/language.rb

Overview

Rule: Script mixing

Instance Attribute Summary

Attributes inherited from Rule

#description, #id, #name

Instance Method Summary collapse

Constructor Details

#initializeScriptMixingRule

Returns a new instance of ScriptMixingRule.



360
361
362
# File 'lib/kotoshu/languages/ja/language.rb', line 360

def initialize
  super('JA_SCRIPT_MIXING', 'Script Mixing', 'Japanese text uses multiple scripts (Hiragana, Katakana, Kanji).')
end

Instance Method Details

#check(tokens) ⇒ Object



364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
# File 'lib/kotoshu/languages/ja/language.rb', line 364

def check(tokens)
  errors = []
  tokens.each do |token|
    word = token[:token]
    next if word.nil? || word.empty?

    # Check for script mixing inconsistencies
    has_hiragana = word.match?(/[\u3040-\u309F]/)
    has_katakana = word.match?(/[\u30A0-\u30FF]/)
    has_kanji = word.match?(/[\u4E00-\u9FFF]/)

    # Words typically shouldn't mix all three scripts
    if has_hiragana && has_katakana && has_kanji
      errors << {
        rule_id: @id,
        position: token[:position],
        message: "Unusual script mixing in word '#{word}'",
        suggestion: 'Review script usage',
        context: word,
        suggestions: ['Use consistent script']
      }
    end
  end
  errors
end