Module: Coradoc
- Extended by:
- Configurable
- Defined in:
- lib/coradoc/coradoc.rb,
lib/coradoc.rb,
lib/coradoc/cli.rb,
lib/coradoc/hooks.rb,
lib/coradoc/input.rb,
lib/coradoc/query.rb,
lib/coradoc/errors.rb,
lib/coradoc/logger.rb,
lib/coradoc/output.rb,
lib/coradoc/version.rb,
lib/coradoc/visitor.rb,
lib/coradoc/registry.rb,
lib/coradoc/transform.rb,
lib/coradoc/core_model.rb,
lib/coradoc/validation.rb,
lib/coradoc/configurable.rb,
lib/coradoc/format_module.rb,
lib/coradoc/core_model/toc.rb,
lib/coradoc/transform/base.rb,
lib/coradoc/core_model/base.rb,
lib/coradoc/core_model/term.rb,
lib/coradoc/core_model/block.rb,
lib/coradoc/core_model/image.rb,
lib/coradoc/core_model/table.rb,
lib/coradoc/document_builder.rb,
lib/coradoc/include_resolver.rb,
lib/coradoc/resolve_includes.rb,
lib/coradoc/include_selectors.rb,
lib/coradoc/core_model/callout.rb,
lib/coradoc/core_model/include.rb,
lib/coradoc/processor_registry.rb,
lib/coradoc/core_model/footnote.rb,
lib/coradoc/core_model/metadata.rb,
lib/coradoc/serializer/registry.rb,
lib/coradoc/core_model/list_item.rb,
lib/coradoc/document_manipulator.rb,
lib/coradoc/core_model/list_block.rb,
lib/coradoc/core_model/open_block.rb,
lib/coradoc/core_model/pass_block.rb,
lib/coradoc/core_model/frontmatter.rb,
lib/coradoc/core_model/quote_block.rb,
lib/coradoc/core_model/verse_block.rb,
lib/coradoc/include_selectors/tags.rb,
lib/coradoc/performance_regression.rb,
lib/coradoc/core_model/bibliography.rb,
lib/coradoc/core_model/callout_text.rb,
lib/coradoc/core_model/comment_line.rb,
lib/coradoc/core_model/has_children.rb,
lib/coradoc/core_model/id_generator.rb,
lib/coradoc/core_model/source_block.rb,
lib/coradoc/core_model/text_content.rb,
lib/coradoc/include_selectors/lines.rb,
lib/coradoc/core_model/comment_block.rb,
lib/coradoc/core_model/example_block.rb,
lib/coradoc/core_model/listing_block.rb,
lib/coradoc/core_model/literal_block.rb,
lib/coradoc/core_model/sidebar_block.rb,
lib/coradoc/core_model/toc_generator.rb,
lib/coradoc/include_selectors/indent.rb,
lib/coradoc/core_model/inline_element.rb,
lib/coradoc/core_model/reviewer_block.rb,
lib/coradoc/core_model/definition_item.rb,
lib/coradoc/core_model/definition_list.rb,
lib/coradoc/core_model/include_options.rb,
lib/coradoc/core_model/paragraph_block.rb,
lib/coradoc/core_model/annotation_block.rb,
lib/coradoc/core_model/children_content.rb,
lib/coradoc/include_resolver/filesystem.rb,
lib/coradoc/core_model/element_attribute.rb,
lib/coradoc/core_model/frontmatter/codec.rb,
lib/coradoc/core_model/bibliography_entry.rb,
lib/coradoc/core_model/raw_inline_element.rb,
lib/coradoc/core_model/structural_element.rb,
lib/coradoc/include_selectors/level_offset.rb,
lib/coradoc/core_model/include_level_offset.rb,
lib/coradoc/core_model/horizontal_rule_block.rb,
lib/coradoc/core_model/frontmatter/text_splitter.rb,
lib/coradoc/core_model/frontmatter/field_transform.rb,
lib/coradoc/core_model/frontmatter/schema_resolver.rb,
lib/coradoc/core_model/frontmatter/frontmatter_value.rb
Overview
Coradoc - A hub-and-spoke document transformation library
Coradoc provides a unified document model (CoreModel) and transformation infrastructure for converting between document formats such as AsciiDoc, HTML, and Markdown.
## Architecture
Coradoc uses a hub-and-spoke architecture where CoreModel acts as the canonical document representation. Each format (AsciiDoc, HTML, Markdown) has its own model and transformers to/from CoreModel.
“‘ Source Format → Source Model → CoreModel → Target Model → Target Format “`
## Quick Start
Defined Under Namespace
Modules: Configurable, CoreModel, FormatModule, Hooks, IncludeSelectors, Input, Output, PerformanceRegression, ProcessorRegistry, Query, Serializer, Transform, Validation, Visitor Classes: CLI, CircularIncludeError, DocumentBuilder, DocumentManipulator, Error, FileNotFoundError, IncludeDepthExceededError, IncludeNotFoundError, IncludeResolver, IncludeTooLargeError, Logger, ParseError, Registry, ResolveIncludes, TransformationError, UnsafeIncludeError, UnsupportedFormatError, ValidationError
Constant Summary collapse
- ERROR_SUGGESTIONS =
Suggestion patterns for common parsing errors
These patterns are matched against error messages and source content to provide helpful suggestions for fixing common issues.
[ { pattern: /unterminated.*string|unexpected.*end.*of.*input|expected.*["']/i, suggestion: 'Check for unclosed quotes or strings', examples: ["'text'", '"text"'] }, { pattern: /unexpected.*indentation|indentation.*error|inconsistent.*indent/i, suggestion: 'Check indentation - use consistent spaces or tabs', examples: [' indented line', ' nested item'] }, { pattern: /missing.*separator|expected.*delimiter|missing.*comma/i, suggestion: 'Add missing separator between elements', examples: ['item1, item2', 'key: value'] }, { pattern: /invalid.*attribute|unknown.*attribute|attribute.*not.*allowed/i, suggestion: 'Check attribute spelling and allowed values', examples: ['[role=example]', '[source,ruby]'] }, { pattern: /invalid.*heading|heading.*level|expected.*heading/i, suggestion: 'Use valid heading syntax with = or # markers', examples: ['= Level 1', '== Level 2', '### Level 3'] }, { pattern: /invalid.*list|list.*marker|expected.*list.*item/i, suggestion: 'Use correct list markers (*, -, ., or numbered)', examples: ['* bullet', '. ordered', 'term:: definition'] }, { pattern: /invalid.*link|malformed.*url|link.*syntax/i, suggestion: 'Use correct link syntax: text[url] or link:url[]', examples: ['Google[https://google.com]', 'link:file.adoc[]'] }, { pattern: /invalid.*table|table.*delimiter|expected.*separator/i, suggestion: 'Check table syntax with | delimiters', examples: ["|===\n| Cell 1 | Cell 2\n|==="] }, { pattern: /invalid.*block|block.*delimiter|unterminated.*block/i, suggestion: 'Ensure block delimiters match (----, ****, ====, etc.)', examples: ["----\ncode\n----", "====\nexample\n===="] }, { pattern: /invalid.*macro|unknown.*macro|macro.*syntax/i, suggestion: 'Check macro syntax: name:target[attributes]', examples: ['include::file.adoc[]', 'image::image.png[]'] } ].freeze
- VERSION =
'2.0.22'
Class Method Summary collapse
-
.binary_format?(format) ⇒ Boolean
Check if a format requires binary (file path) input.
- .build(&block) ⇒ Object
-
.config ⇒ Configuration
Shortcut to configuration.
-
.configure {|Configuration| ... } ⇒ void
Shortcut to configure.
-
.convert(text, from:, to:) ⇒ String
Convert document text from one format to another.
-
.convert_file(path, to:, from: nil) ⇒ String
Convert a file from one format to another.
-
.describe_element(elem) ⇒ String
Describe an element for display.
-
.detect_format(filename) ⇒ Symbol?
Detect format from a file extension.
-
.document_stats(doc) ⇒ Hash
Gather statistics about a parsed document.
-
.file_info(path) ⇒ Hash
Get file metadata for display.
-
.format_capabilities ⇒ Hash<Symbol, Hash<Symbol, Boolean>>
Get capability summary for all registered formats.
-
.get_format(format_name) ⇒ Module?
Get a registered format.
-
.manipulate(document) ⇒ DocumentManipulator
Create a DocumentManipulator for chainable operations.
-
.normalize_format(name) ⇒ Symbol?
Normalize a format name string to a symbol.
-
.parse(text, format:) ⇒ Coradoc::CoreModel::Base, Object
Parse text to a document model.
-
.parse_file(path, format: nil) ⇒ Coradoc::CoreModel::Base
Parse a document from a file path.
-
.parse_format?(format) ⇒ Boolean
Check if a format supports parsing (reading input).
-
.register_format(format_name, format_module, **options) ⇒ void
Register a format gem.
-
.registered_formats ⇒ Array<Symbol>
List all registered formats.
-
.registry ⇒ Registry
Get the format registry.
-
.resolve_includes(document, base_dir:, missing_include: :error, max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH, allow_unsafe: false, resolver: nil) ⇒ Coradoc::CoreModel::Base
Resolve
include::directives in a parsed document. -
.resolve_output_format(output_file, default: :html) ⇒ Symbol
Resolve the output format from a filename, with a default.
-
.serialize(model, to:) ⇒ String
Serialize a CoreModel to a specific format.
-
.serialize_format?(format) ⇒ Boolean
Check if a format supports serialization (writing output).
-
.strip_unicode(string, only: nil) ⇒ String
Strip unicode whitespace from a string.
-
.to_core(model) ⇒ Coradoc::CoreModel::Base
Transform a model to CoreModel.
-
.validate_file(path, format: nil) ⇒ Coradoc::Validation::Result
Validate a document file.
Methods included from Configurable
load_configuration, reset_configuration!
Class Method Details
.binary_format?(format) ⇒ Boolean
Check if a format requires binary (file path) input
315 316 317 318 |
# File 'lib/coradoc/coradoc.rb', line 315 def binary_format?(format) opts = registry.(format) opts&.fetch(:binary, false) == true end |
.build(&block) ⇒ Object
181 182 183 |
# File 'lib/coradoc/document_builder.rb', line 181 def self.build(&block) DocumentBuilder.build(&block) end |
.config ⇒ Configuration
Shortcut to configuration
516 517 518 |
# File 'lib/coradoc/configurable.rb', line 516 def self.config Configurable.configuration end |
.configure {|Configuration| ... } ⇒ void
This method returns an undefined value.
Shortcut to configure
524 525 526 |
# File 'lib/coradoc/configurable.rb', line 524 def self.configure(&block) Configurable.configure(&block) if block_given? end |
.convert(text, from:, to:) ⇒ String
Convert document text from one format to another
This is the main entry point for format conversion. It handles the complete pipeline: parse -> transform to CoreModel -> transform to target -> serialize
190 191 192 193 194 195 196 |
# File 'lib/coradoc/coradoc.rb', line 190 def convert(text, from:, to:, **) # Parse to CoreModel core = parse(text, format: from) # Convert to target format serialize(core, to: to, **) end |
.convert_file(path, to:, from: nil) ⇒ String
Convert a file from one format to another
303 304 305 306 307 308 309 |
# File 'lib/coradoc/coradoc.rb', line 303 def convert_file(path, to:, from: nil, **) source_format = from || detect_format(path) raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format core = parse_file(path, format: source_format) serialize(core, to: to, **) end |
.describe_element(elem) ⇒ String
Describe an element for display
435 436 437 438 439 440 441 442 443 444 445 446 447 448 |
# File 'lib/coradoc/coradoc.rb', line 435 def describe_element(elem) return elem.to_s unless elem.is_a?(CoreModel::Base) type = elem.class.name.split('::').last if elem.title "#{type}: #{elem.title}" elsif elem.is_a?(CoreModel::Block) && elem.content preview = elem.content.to_s[0..50] preview += '...' if elem.content.to_s.length > 50 "#{type}: #{preview}" else type end end |
.detect_format(filename) ⇒ Symbol?
Detect format from a file extension
251 252 253 254 255 256 257 258 |
# File 'lib/coradoc/coradoc.rb', line 251 def detect_format(filename) ext = File.extname(filename).downcase registry.each_key do |name| opts = registry.(name) return name if opts[:extensions]&.include?(ext) end nil end |
.document_stats(doc) ⇒ Hash
Gather statistics about a parsed document
418 419 420 421 422 423 424 425 426 427 428 429 |
# File 'lib/coradoc/coradoc.rb', line 418 def document_stats(doc) stats = {} stats[:title] = doc.title if doc.title if doc.is_a?(CoreModel::StructuralElement) stats[:child_count] = count_elements(doc) stats[:element_counts] = count_element_types(doc) end stats end |
.file_info(path) ⇒ Hash
Get file metadata for display
389 390 391 392 393 394 |
# File 'lib/coradoc/coradoc.rb', line 389 def file_info(path) fmt = detect_format(path) info = { size: File.size(path), format: fmt } info[:lines] = File.foreach(path).count unless binary_format?(fmt) info end |
.format_capabilities ⇒ Hash<Symbol, Hash<Symbol, Boolean>>
Get capability summary for all registered formats
Returns a hash mapping each format name to its capabilities (parse: bool, serialize: bool). Useful for CLI display and introspection.
365 366 367 368 369 370 371 372 |
# File 'lib/coradoc/coradoc.rb', line 365 def format_capabilities registered_formats.each_with_object({}) do |name, caps| caps[name] = { parse: parse_format?(name), serialize: serialize_format?(name) } end end |
.get_format(format_name) ⇒ Module?
Get a registered format
78 79 80 |
# File 'lib/coradoc/coradoc.rb', line 78 def get_format(format_name) registry.get(format_name) end |
.manipulate(document) ⇒ DocumentManipulator
Create a DocumentManipulator for chainable operations
239 240 241 |
# File 'lib/coradoc/coradoc.rb', line 239 def manipulate(document) DocumentManipulator.new(document) end |
.normalize_format(name) ⇒ Symbol?
Normalize a format name string to a symbol
Handles common aliases like “adoc” → :asciidoc, “md” → :markdown.
326 327 328 329 330 331 332 333 334 335 |
# File 'lib/coradoc/coradoc.rb', line 326 def normalize_format(name) return nil unless name key = name.to_s.downcase registry.each_key do |fmt_name| opts = registry.(fmt_name) return fmt_name if opts[:aliases]&.include?(key) end key.to_sym end |
.parse(text, format:) ⇒ Coradoc::CoreModel::Base, Object
Parse text to a document model.
Graph mode is the only mode: include:: directives survive as CoreModel::Include link nodes pointing at other files. NO file I/O happens during parse. The result is a single document that references other documents via Include edges — a text graph.
To splice included content inline, call Coradoc.resolve_includes on the parsed document. This is an explicit, separate step so the caller controls when (and whether) file I/O happens.
110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/coradoc/coradoc.rb', line 110 def parse(text, format:) format_module = get_format(format) unless format_module raise UnsupportedFormatError, "Format '#{format}' is not registered. " \ "Available formats: #{registered_formats.join(', ')}" end text = Hooks.invoke(:before_parse, text, format: format) result = format_module.parse_to_core(text) Hooks.invoke(:after_parse, result, format: format) end |
.parse_file(path, format: nil) ⇒ Coradoc::CoreModel::Base
Parse a document from a file path
Handles both text formats (reads file content) and binary formats (passes file path directly to the format module).
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 |
# File 'lib/coradoc/coradoc.rb', line 273 def parse_file(path, format: nil) raise FileNotFoundError, path unless File.exist?(path) source_format = format || detect_format(path) raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format format_module = get_format(source_format) raise UnsupportedFormatError, "Format '#{source_format}' is not registered" unless format_module if binary_format?(source_format) format_module.parse_to_core(path) else content = File.read(path) content = Hooks.invoke(:before_parse, content, format: source_format) result = format_module.parse_file_to_core(path, content) Hooks.invoke(:after_parse, result, format: source_format) end end |
.parse_format?(format) ⇒ Boolean
Check if a format supports parsing (reading input)
352 353 354 355 356 357 |
# File 'lib/coradoc/coradoc.rb', line 352 def parse_format?(format) mod = get_format(format) return false unless mod mod.public_methods.include?(:parse_to_core) || mod.public_methods.include?(:parse) end |
.register_format(format_name, format_module, **options) ⇒ void
This method returns an undefined value.
Register a format gem
68 69 70 71 72 |
# File 'lib/coradoc/coradoc.rb', line 68 def register_format(format_name, format_module, **) format_module.extend(FormatModule::Interface) unless format_module.is_a?(FormatModule::Interface) registry.register(format_name, format_module, ) FormatModule.validate!(format_module, format_name) end |
.registered_formats ⇒ Array<Symbol>
List all registered formats
85 86 87 |
# File 'lib/coradoc/coradoc.rb', line 85 def registered_formats registry.list end |
.registry ⇒ Registry
Get the format registry
58 59 60 |
# File 'lib/coradoc/coradoc.rb', line 58 def registry @registry ||= Registry.new end |
.resolve_includes(document, base_dir:, missing_include: :error, max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH, allow_unsafe: false, resolver: nil) ⇒ Coradoc::CoreModel::Base
Resolve include:: directives in a parsed document.
Walks the document tree and replaces every CoreModel::Include link node with the parsed content of its target file, recursing into the result. The original document is left unchanged; a new subtree is constructed.
This is the explicit “flatten” step that turns a text graph into a single spliced document. Callers control:
- +base_dir+ — where to root relative include paths
- +missing_include+ — what to do when a target is missing
- +max_depth+ — recursion cap
- +allow_unsafe+ — opt out of path-traversal protection
- +resolver+ — custom resolution strategy (e.g. HTTP, in-memory)
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/coradoc/coradoc.rb', line 154 def resolve_includes(document, base_dir:, missing_include: :error, max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH, allow_unsafe: false, resolver: nil) resolver = Coradoc::IncludeResolver.coerce( resolver, base_dir: base_dir, allow_unsafe: allow_unsafe ) Coradoc::ResolveIncludes.call( document, resolver: resolver, base_dir: base_dir, missing_include: missing_include, max_depth: max_depth ) end |
.resolve_output_format(output_file, default: :html) ⇒ Symbol
Resolve the output format from a filename, with a default
379 380 381 382 383 |
# File 'lib/coradoc/coradoc.rb', line 379 def resolve_output_format(output_file, default: :html) return default unless output_file detect_format(output_file) || default end |
.serialize(model, to:) ⇒ String
Serialize a CoreModel to a specific format
220 221 222 223 224 225 226 227 |
# File 'lib/coradoc/coradoc.rb', line 220 def serialize(model, to:, **) format_module = get_format(to) raise UnsupportedFormatError, "Format '#{to}' is not registered" unless format_module model = Hooks.invoke(:before_serialize, model, format: to) result = format_module.serialize(model, **) Hooks.invoke(:after_serialize, result, format: to) end |
.serialize_format?(format) ⇒ Boolean
Check if a format supports serialization (writing output)
341 342 343 344 345 346 |
# File 'lib/coradoc/coradoc.rb', line 341 def serialize_format?(format) mod = get_format(format) return false unless mod mod.serialize? end |
.strip_unicode(string, only: nil) ⇒ String
Strip unicode whitespace from a string
455 456 457 458 459 460 461 462 463 464 465 466 |
# File 'lib/coradoc/coradoc.rb', line 455 def strip_unicode(string, only: nil) return string if string.nil? case only when :begin string.sub(/^\p{Zs}+/, '') when :end string.sub(/\p{Zs}+$/, '') else string.sub(/^\p{Zs}+/, '').sub(/\p{Zs}+$/, '') end end |
.to_core(model) ⇒ Coradoc::CoreModel::Base
Transform a model to CoreModel
202 203 204 205 206 207 208 209 210 211 212 |
# File 'lib/coradoc/coradoc.rb', line 202 def to_core(model) return model if model.is_a?(CoreModel::Base) registry.each_value do |format_module| next unless format_module.handles_model?(model) return format_module.to_core(model) end raise TransformationError, "No transformer found for #{model.class}" end |
.validate_file(path, format: nil) ⇒ Coradoc::Validation::Result
Validate a document file
Parses the file and validates against auto-generated schema. Returns a Coradoc::Validation::Result.
405 406 407 408 409 410 411 412 |
# File 'lib/coradoc/coradoc.rb', line 405 def validate_file(path, format: nil) doc = parse_file(path, format: format) schema = Validation::SchemaGenerator.generate(doc.class) return schema.validate(doc) if schema Validation::Result.new end |