Class: Kreuzberg::Result
- Inherits:
-
Object
- Object
- Kreuzberg::Result
- Defined in:
- lib/kreuzberg/result.rb,
lib/kreuzberg/djot_content.rb,
lib/kreuzberg/document_structure.rb
Overview
rubocop:disable Metrics/ClassLength
Defined Under Namespace
Classes: Chunk, DjotContent, DocumentAnnotation, DocumentBoundingBox, DocumentNode, DocumentStructure, ElementBoundingBox, ElementMetadataStruct, ElementStruct, HierarchicalBlock, Image, LayoutRegion, OcrBoundingGeometry, OcrConfidence, OcrElement, OcrRotation, PageContent, PageHierarchy, Table
Instance Attribute Summary collapse
-
#annotations ⇒ Object
readonly
Returns the value of attribute annotations.
-
#byte_end ⇒ Integer
readonly
Ending byte offset (UTF-8).
-
#byte_start ⇒ Integer
readonly
Starting byte offset (UTF-8).
-
#children ⇒ Object
readonly
Returns the value of attribute children.
-
#chunks ⇒ Object
readonly
Returns the value of attribute chunks.
-
#content ⇒ String
readonly
Text content for this page.
-
#detected_languages ⇒ Object
readonly
Returns the value of attribute detected_languages.
-
#djot_content ⇒ Object
readonly
Returns the value of attribute djot_content.
-
#document ⇒ Object
readonly
Returns the value of attribute document.
-
#elements ⇒ Object
readonly
Returns the value of attribute elements.
-
#extracted_keywords ⇒ Object
readonly
Returns the value of attribute extracted_keywords.
-
#first_page ⇒ Integer?
readonly
First page number (1-indexed).
-
#hierarchy ⇒ PageHierarchy?
readonly
Hierarchy information for the page.
-
#images ⇒ Array<Image>
readonly
Images on this page.
-
#last_page ⇒ Integer?
readonly
Last page number (1-indexed).
-
#metadata ⇒ Object
readonly
Returns the value of attribute metadata.
-
#metadata_json ⇒ Object
readonly
Returns the value of attribute metadata_json.
-
#mime_type ⇒ Object
readonly
Returns the value of attribute mime_type.
-
#ocr_elements ⇒ Object
readonly
Returns the value of attribute ocr_elements.
-
#page_number ⇒ Integer
readonly
Page number (1-indexed).
-
#pages ⇒ Object
readonly
Returns the value of attribute pages.
-
#processing_warnings ⇒ Object
readonly
Returns the value of attribute processing_warnings.
-
#quality_score ⇒ Object
readonly
Returns the value of attribute quality_score.
-
#structured_output ⇒ Object
readonly
Returns the value of attribute structured_output.
-
#tables ⇒ Array<Table>
readonly
Tables on this page.
-
#token_count ⇒ Integer?
readonly
Approximate token count (may be nil).
-
#uris ⇒ Object
readonly
Returns the value of attribute uris.
Instance Method Summary collapse
-
#chunk_count ⇒ Integer
Get the total number of text chunks.
-
#detected_language ⇒ String?
Get the primary detected language.
-
#initialize(hash) ⇒ Result
constructor
Initialize from native hash result.
-
#metadata_field(name) ⇒ Object?
Get a metadata field by name.
-
#page_count ⇒ Integer
Get the total number of pages in the document.
-
#to_h ⇒ Hash
Convert to hash.
-
#to_json ⇒ String
Convert to JSON.
Constructor Details
#initialize(hash) ⇒ Result
Initialize from native hash result
rubocop:disable Metrics/AbcSize, Metrics/MethodLength
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 |
# File 'lib/kreuzberg/result.rb', line 345 def initialize(hash) @content = get_value(hash, 'content', '') @mime_type = get_value(hash, 'mime_type', '') @metadata_json = get_value(hash, 'metadata_json', '{}') @metadata = (@metadata_json) @tables = parse_tables(get_value(hash, 'tables')) @detected_languages = parse_detected_languages(get_value(hash, 'detected_languages')) @chunks = parse_chunks(get_value(hash, 'chunks')) @images = parse_images(get_value(hash, 'images')) @pages = parse_pages(get_value(hash, 'pages')) @elements = parse_elements(get_value(hash, 'elements')) @ocr_elements = parse_ocr_elements(get_value(hash, 'ocr_elements')) @djot_content = parse_djot_content(get_value(hash, 'djot_content')) @document = parse_document_structure(get_value(hash, 'document')) @extracted_keywords = parse_extracted_keywords(get_value(hash, 'extracted_keywords')) @quality_score = get_value(hash, 'quality_score') @processing_warnings = parse_processing_warnings(get_value(hash, 'processing_warnings')) @annotations = parse_annotations(get_value(hash, 'annotations')) @uris = parse_uris(get_value(hash, 'uris')) @children = parse_children(get_value(hash, 'children')) @structured_output = get_value(hash, 'structured_output') end |
Instance Attribute Details
#annotations ⇒ Object (readonly)
Returns the value of attribute annotations.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def annotations @annotations end |
#byte_end ⇒ Integer (readonly)
Returns Ending byte offset (UTF-8).
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/kreuzberg/result.rb', line 46 Chunk = Struct.new( :content, :byte_start, :byte_end, :token_count, :chunk_index, :total_chunks, :first_page, :last_page, :chunk_type, :embedding ) do def to_h { content: content, byte_start: byte_start, byte_end: byte_end, token_count: token_count, chunk_index: chunk_index, total_chunks: total_chunks, first_page: first_page, last_page: last_page, chunk_type: chunk_type, embedding: } end end |
#byte_start ⇒ Integer (readonly)
Returns Starting byte offset (UTF-8).
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/kreuzberg/result.rb', line 46 Chunk = Struct.new( :content, :byte_start, :byte_end, :token_count, :chunk_index, :total_chunks, :first_page, :last_page, :chunk_type, :embedding ) do def to_h { content: content, byte_start: byte_start, byte_end: byte_end, token_count: token_count, chunk_index: chunk_index, total_chunks: total_chunks, first_page: first_page, last_page: last_page, chunk_type: chunk_type, embedding: } end end |
#children ⇒ Object (readonly)
Returns the value of attribute children.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def children @children end |
#chunks ⇒ Object (readonly)
Returns the value of attribute chunks.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def chunks @chunks end |
#content ⇒ String (readonly)
Returns Text content for this page.
46 47 48 |
# File 'lib/kreuzberg/result.rb', line 46 def content @content end |
#detected_languages ⇒ Object (readonly)
Returns the value of attribute detected_languages.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def detected_languages @detected_languages end |
#djot_content ⇒ Object (readonly)
Returns the value of attribute djot_content.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def djot_content @djot_content end |
#document ⇒ Object (readonly)
Returns the value of attribute document.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def document @document end |
#elements ⇒ Object (readonly)
Returns the value of attribute elements.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def elements @elements end |
#extracted_keywords ⇒ Object (readonly)
Returns the value of attribute extracted_keywords.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def extracted_keywords @extracted_keywords end |
#first_page ⇒ Integer? (readonly)
Returns First page number (1-indexed).
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/kreuzberg/result.rb', line 46 Chunk = Struct.new( :content, :byte_start, :byte_end, :token_count, :chunk_index, :total_chunks, :first_page, :last_page, :chunk_type, :embedding ) do def to_h { content: content, byte_start: byte_start, byte_end: byte_end, token_count: token_count, chunk_index: chunk_index, total_chunks: total_chunks, first_page: first_page, last_page: last_page, chunk_type: chunk_type, embedding: } end end |
#hierarchy ⇒ PageHierarchy? (readonly)
Returns Hierarchy information for the page.
148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/kreuzberg/result.rb', line 148 PageContent = Struct.new(:page_number, :content, :tables, :images, :hierarchy, :is_blank, :layout_regions) do def to_h { page_number: page_number, content: content, tables: tables.map(&:to_h), images: images.map(&:to_h), hierarchy: hierarchy&.to_h, is_blank: is_blank, layout_regions: layout_regions&.map(&:to_h) } end end |
#images ⇒ Array<Image> (readonly)
Returns Images on this page.
148 149 150 |
# File 'lib/kreuzberg/result.rb', line 148 def images @images end |
#last_page ⇒ Integer? (readonly)
Returns Last page number (1-indexed).
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/kreuzberg/result.rb', line 46 Chunk = Struct.new( :content, :byte_start, :byte_end, :token_count, :chunk_index, :total_chunks, :first_page, :last_page, :chunk_type, :embedding ) do def to_h { content: content, byte_start: byte_start, byte_end: byte_end, token_count: token_count, chunk_index: chunk_index, total_chunks: total_chunks, first_page: first_page, last_page: last_page, chunk_type: chunk_type, embedding: } end end |
#metadata ⇒ Object (readonly)
Returns the value of attribute metadata.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def @metadata end |
#metadata_json ⇒ Object (readonly)
Returns the value of attribute metadata_json.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def @metadata_json end |
#mime_type ⇒ Object (readonly)
Returns the value of attribute mime_type.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def mime_type @mime_type end |
#ocr_elements ⇒ Object (readonly)
Returns the value of attribute ocr_elements.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def ocr_elements @ocr_elements end |
#page_number ⇒ Integer (readonly)
Returns Page number (1-indexed).
148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/kreuzberg/result.rb', line 148 PageContent = Struct.new(:page_number, :content, :tables, :images, :hierarchy, :is_blank, :layout_regions) do def to_h { page_number: page_number, content: content, tables: tables.map(&:to_h), images: images.map(&:to_h), hierarchy: hierarchy&.to_h, is_blank: is_blank, layout_regions: layout_regions&.map(&:to_h) } end end |
#pages ⇒ Object (readonly)
Returns the value of attribute pages.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def pages @pages end |
#processing_warnings ⇒ Object (readonly)
Returns the value of attribute processing_warnings.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def processing_warnings @processing_warnings end |
#quality_score ⇒ Object (readonly)
Returns the value of attribute quality_score.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def quality_score @quality_score end |
#structured_output ⇒ Object (readonly)
Returns the value of attribute structured_output.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def structured_output @structured_output end |
#tables ⇒ Array<Table> (readonly)
Returns Tables on this page.
148 149 150 |
# File 'lib/kreuzberg/result.rb', line 148 def tables @tables end |
#token_count ⇒ Integer? (readonly)
Returns Approximate token count (may be nil).
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/kreuzberg/result.rb', line 46 Chunk = Struct.new( :content, :byte_start, :byte_end, :token_count, :chunk_index, :total_chunks, :first_page, :last_page, :chunk_type, :embedding ) do def to_h { content: content, byte_start: byte_start, byte_end: byte_end, token_count: token_count, chunk_index: chunk_index, total_chunks: total_chunks, first_page: first_page, last_page: last_page, chunk_type: chunk_type, embedding: } end end |
#uris ⇒ Object (readonly)
Returns the value of attribute uris.
15 16 17 |
# File 'lib/kreuzberg/result.rb', line 15 def uris @uris end |
Instance Method Details
#chunk_count ⇒ Integer
Get the total number of text chunks
Returns 0 if chunking was not performed.
433 434 435 |
# File 'lib/kreuzberg/result.rb', line 433 def chunk_count @chunks&.length || 0 end |
#detected_language ⇒ String?
Get the primary detected language
446 447 448 449 450 451 |
# File 'lib/kreuzberg/result.rb', line 446 def detected_language return @metadata['language'] if @metadata.is_a?(Hash) && @metadata['language'] return @detected_languages&.first if @detected_languages&.any? nil end |
#metadata_field(name) ⇒ Object?
Get a metadata field by name
Supports dot notation for nested fields (e.g., “format.pages”).
468 469 470 471 472 473 474 475 476 477 478 479 480 481 |
# File 'lib/kreuzberg/result.rb', line 468 def (name) return nil unless @metadata.is_a?(Hash) parts = name.to_s.split('.') value = @metadata parts.each do |part| return nil unless value.is_a?(Hash) value = value[part] end value end |
#page_count ⇒ Integer
Get the total number of pages in the document
415 416 417 418 419 420 421 |
# File 'lib/kreuzberg/result.rb', line 415 def page_count if @metadata.is_a?(Hash) && @metadata['pages'].is_a?(Hash) @metadata['pages']['total_count'] || 0 else 0 end end |
#to_h ⇒ Hash
Convert to hash
rubocop:disable Metrics/CyclomaticComplexity, Metrics/MethodLength
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
# File 'lib/kreuzberg/result.rb', line 374 def to_h { content: @content, mime_type: @mime_type, metadata: @metadata, tables: serialize_tables, detected_languages: @detected_languages, chunks: serialize_chunks, images: serialize_images, pages: serialize_pages, elements: serialize_elements, ocr_elements: serialize_ocr_elements, djot_content: @djot_content&.to_h, document: @document&.to_h, extracted_keywords: @extracted_keywords&.map(&:to_h), quality_score: @quality_score, processing_warnings: @processing_warnings.map(&:to_h), annotations: @annotations&.map(&:to_h), uris: @uris&.map(&:to_h), children: @children&.map(&:to_h), structured_output: @structured_output } end |
#to_json ⇒ String
Convert to JSON
403 404 405 |
# File 'lib/kreuzberg/result.rb', line 403 def to_json(*) to_h.to_json(*) end |