Class: RosettAi::Adopter::RuleAdopter

Inherits:
Object
  • Object
show all
Defined in:
lib/rosett_ai/adopter/rule_adopter.rb

Overview

Analyzes compiled markdown rule files for inconsistencies, conflicts, harmful content, duplicates, and other issues.

Supports four layers of data privacy protection:

  1. Opt-in per file — sensitive: true in YAML excludes from API analysis
  2. Redaction — regex patterns replace matches before sending to API
  3. Configurable endpoint — ANTHROPIC_API_BASE_URL for proxy/Bedrock/Vertex
  4. Local-only mode — structural checks without API calls

Constant Summary collapse

GENERATED_MARKER =
'<!-- rosett-ai-'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(rules_dir:, cache_path:, redactions_path:, engine: 'claude') ⇒ RuleAdopter

Returns a new instance of RuleAdopter.



26
27
28
29
30
31
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 26

def initialize(rules_dir:, cache_path:, redactions_path:, engine: 'claude')
  @rules_dir = Pathname.new(rules_dir)
  @cache_path = Pathname.new(cache_path)
  @redactions_path = Pathname.new(redactions_path)
  @engine = engine.to_s
end

Instance Attribute Details

#cache_pathObject (readonly)

Returns the value of attribute cache_path.



24
25
26
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 24

def cache_path
  @cache_path
end

#engineObject (readonly)

Returns the value of attribute engine.



24
25
26
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 24

def engine
  @engine
end

#redactions_pathObject (readonly)

Returns the value of attribute redactions_path.



24
25
26
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 24

def redactions_path
  @redactions_path
end

#rules_dirObject (readonly)

Returns the value of attribute rules_dir.



24
25
26
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 24

def rules_dir
  @rules_dir
end

Instance Method Details

#analyze(files) ⇒ Object



104
105
106
107
108
109
110
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 104

def analyze(files)
  return empty_result if files.empty?

  prompt = build_prompt(files)
  executor = resolve_executor
  executor.analyze(prompt)
end

#analyze_local(files) ⇒ Object



112
113
114
115
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 112

def analyze_local(files)
  collector = LocalAnalysisCollector.new
  collector.analyze(files)
end

#build_prompt(files) ⇒ Object



86
87
88
89
90
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 86

def build_prompt(files)
  parts = [prompt_header]
  append_file_contents(parts, files)
  parts.join("\n")
end

#cached_result(checksum) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 63

def cached_result(checksum)
  return nil unless cache_path.exist?

  data = RosettAi::YamlLoader.load_file(cache_path.to_s, permitted_classes: [Time, Date])
  return nil unless data.is_a?(Hash) && data['checksum'] == checksum
  return nil if cache_expired?(data)

  data['result']
rescue Psych::SyntaxError, Psych::DisallowedClass => e
  RosettAi.logger.warn("Corrupt adopt cache (#{e.message}), will re-analyze")
  nil
end

#content_checksum(files) ⇒ Object



58
59
60
61
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 58

def content_checksum(files)
  content = files.sort.map { |f| File.read(f) }.join
  Digest::SHA256.hexdigest(content)
end

#discover_managed_filesObject



47
48
49
50
51
52
53
54
55
56
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 47

def discover_managed_files
  return [] unless rules_dir.exist?

  Dir.glob(rules_dir.join('*.md')).select do |file|
    first_line = File.open(file, &:readline)
    first_line.start_with?(GENERATED_MARKER)
  rescue EOFError
    false
  end.sort
end

#evaluate(local_only: false) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 33

def evaluate(local_only: false)
  files = discover_managed_files
  raise RosettAi::AdoptError, 'No managed rule files found in rules directory' if files.empty?

  sensitive = sensitive_files
  api_files = filter_sensitive(files, sensitive)

  if local_only
    evaluate_local(files, sensitive)
  else
    evaluate_remote(files, api_files, sensitive)
  end
end

#filter_sensitive(files, sensitive_sources) ⇒ Object



129
130
131
132
133
134
135
136
137
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 129

def filter_sensitive(files, sensitive_sources)
  return files if sensitive_sources.empty?

  sensitive_names = sensitive_sources.map { |f| RosettAi::TextSanitizer.normalize_nfc(File.basename(f, '.yml')) }
  files.reject do |file|
    name = File.basename(file, '.md').sub(/\A[^-]+-/, '')
    sensitive_names.include?(name)
  end
end

#redact(content) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 92

def redact(content)
  patterns = load_redaction_patterns
  result = content.dup
  patterns.each do |entry|
    regex = Regexp.new(entry['pattern'])
    result.gsub!(regex, entry['replacement'])
  rescue RegexpError => e
    RosettAi.logger.warn("Invalid redaction pattern '#{entry['pattern']}': #{e.message}")
  end
  result
end

#sensitive_filesObject



117
118
119
120
121
122
123
124
125
126
127
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 117

def sensitive_files
  behaviour_dir = RosettAi.root.join('conf', 'behaviour')
  return [] unless behaviour_dir.exist?

  Dir.glob(behaviour_dir.join('*.yml')).select do |file|
    data = RosettAi::YamlLoader.load_file(file)
    data.is_a?(Hash) && data['sensitive'] == true
  rescue StandardError
    false
  end
end

#write_cache(checksum, result) ⇒ Object



76
77
78
79
80
81
82
83
84
# File 'lib/rosett_ai/adopter/rule_adopter.rb', line 76

def write_cache(checksum, result)
  FileUtils.mkdir_p(cache_path.dirname)
  data = {
    'checksum' => checksum,
    'analyzed_at' => Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ'),
    'result' => result
  }
  File.open(cache_path, 'w', 0o644) { |f| f.write(data.to_yaml) }
end