Class: SharedTools::Tools::Doc::DocxReaderTool

Inherits:
RubyLLM::Tool
  • Object
show all
Defined in:
lib/shared_tools/tools/doc/docx_reader_tool.rb

Overview

Read text content from Microsoft Word (.docx) documents.

Examples:

tool = SharedTools::Tools::Doc::DocxReaderTool.new
tool.execute(doc_path: "./report.docx")
tool.execute(doc_path: "./report.docx", paragraph_range: "1-10")

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(logger: nil) ⇒ DocxReaderTool

Returns a new instance of DocxReaderTool.

Parameters:

  • logger (Logger) (defaults to: nil)

    optional logger



38
39
40
# File 'lib/shared_tools/tools/doc/docx_reader_tool.rb', line 38

def initialize(logger: nil)
  @logger = logger || RubyLLM.logger
end

Class Method Details

.nameObject



19
# File 'lib/shared_tools/tools/doc/docx_reader_tool.rb', line 19

def self.name = 'doc_docx_read'

Instance Method Details

#execute(doc_path:, paragraph_range: nil) ⇒ Hash

Returns extraction result.

Parameters:

  • doc_path (String)

    path to .docx file

  • paragraph_range (String, nil) (defaults to: nil)

    optional paragraph range

Returns:

  • (Hash)

    extraction result



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/shared_tools/tools/doc/docx_reader_tool.rb', line 45

def execute(doc_path:, paragraph_range: nil)
  raise LoadError, "DocxReaderTool requires the 'docx' gem. Install it with: gem install docx" unless defined?(Docx)

  @logger.info("DocxReaderTool#execute doc_path=#{doc_path} paragraph_range=#{paragraph_range}")

  unless File.exist?(doc_path)
    return { error: "File not found: #{doc_path}" }
  end

  unless File.extname(doc_path).downcase == '.docx'
    return { error: "Expected a .docx file, got: #{File.extname(doc_path)}" }
  end

  doc        = Docx::Document.open(doc_path)
  paragraphs = doc.paragraphs.map(&:to_s).reject { |p| p.strip.empty? }
  total      = paragraphs.length

  @logger.debug("Loaded #{total} non-empty paragraphs from #{doc_path}")

  selected_indices = if paragraph_range
    parse_range(paragraph_range, total)
  else
    (1..total).to_a
  end

  invalid = selected_indices.select { |n| n < 1 || n > total }
  valid   = selected_indices.select { |n| n >= 1 && n <= total }

  extracted = valid.map { |n| { paragraph: n, text: paragraphs[n - 1] } }

  @logger.info("Extracted #{extracted.size} paragraphs from #{doc_path}")

  {
    doc_path:           doc_path,
    total_paragraphs:   total,
    requested_range:    paragraph_range || "all",
    invalid_paragraphs: invalid,
    paragraphs:         extracted,
    full_text:          extracted.map { |p| p[:text] }.join("\n\n")
  }
rescue => e
  @logger.error("Failed to read DOCX '#{doc_path}': #{e.message}")
  { error: e.message }
end