Module: Coradoc
- Extended by:
- Configurable
- Defined in:
- lib/coradoc/coradoc.rb,
lib/coradoc.rb,
lib/coradoc/cli.rb,
lib/coradoc/hooks.rb,
lib/coradoc/input.rb,
lib/coradoc/query.rb,
lib/coradoc/errors.rb,
lib/coradoc/logger.rb,
lib/coradoc/output.rb,
lib/coradoc/version.rb,
lib/coradoc/visitor.rb,
lib/coradoc/registry.rb,
lib/coradoc/transform.rb,
lib/coradoc/core_model.rb,
lib/coradoc/validation.rb,
lib/coradoc/configurable.rb,
lib/coradoc/format_module.rb,
lib/coradoc/link_rewriter.rb,
lib/coradoc/relative_path.rb,
lib/coradoc/core_model/toc.rb,
lib/coradoc/transform/base.rb,
lib/coradoc/core_model/base.rb,
lib/coradoc/core_model/term.rb,
lib/coradoc/core_model/block.rb,
lib/coradoc/core_model/image.rb,
lib/coradoc/core_model/table.rb,
lib/coradoc/document_builder.rb,
lib/coradoc/include_resolver.rb,
lib/coradoc/resolve_includes.rb,
lib/coradoc/include_selectors.rb,
lib/coradoc/core_model/callout.rb,
lib/coradoc/core_model/include.rb,
lib/coradoc/processor_registry.rb,
lib/coradoc/core_model/footnote.rb,
lib/coradoc/core_model/metadata.rb,
lib/coradoc/serializer/registry.rb,
lib/coradoc/core_model/list_item.rb,
lib/coradoc/document_manipulator.rb,
lib/coradoc/core_model/list_block.rb,
lib/coradoc/core_model/open_block.rb,
lib/coradoc/core_model/pass_block.rb,
lib/coradoc/core_model/stem_block.rb,
lib/coradoc/link_rewriter/visitor.rb,
lib/coradoc/core_model/frontmatter.rb,
lib/coradoc/core_model/quote_block.rb,
lib/coradoc/core_model/verse_block.rb,
lib/coradoc/include_selectors/tags.rb,
lib/coradoc/link_rewriter/identity.rb,
lib/coradoc/performance_regression.rb,
lib/coradoc/core_model/bibliography.rb,
lib/coradoc/core_model/callout_text.rb,
lib/coradoc/core_model/comment_line.rb,
lib/coradoc/core_model/has_children.rb,
lib/coradoc/core_model/id_generator.rb,
lib/coradoc/core_model/source_block.rb,
lib/coradoc/core_model/text_content.rb,
lib/coradoc/include_selectors/lines.rb,
lib/coradoc/core_model/comment_block.rb,
lib/coradoc/core_model/example_block.rb,
lib/coradoc/core_model/listing_block.rb,
lib/coradoc/core_model/literal_block.rb,
lib/coradoc/core_model/sidebar_block.rb,
lib/coradoc/core_model/toc_generator.rb,
lib/coradoc/include_selectors/indent.rb,
lib/coradoc/core_model/inline_element.rb,
lib/coradoc/core_model/reviewer_block.rb,
lib/coradoc/core_model/definition_item.rb,
lib/coradoc/core_model/definition_list.rb,
lib/coradoc/core_model/include_options.rb,
lib/coradoc/core_model/output_artifact.rb,
lib/coradoc/core_model/paragraph_block.rb,
lib/coradoc/core_model/annotation_block.rb,
lib/coradoc/core_model/children_content.rb,
lib/coradoc/include_resolver/filesystem.rb,
lib/coradoc/core_model/element_attribute.rb,
lib/coradoc/core_model/frontmatter/codec.rb,
lib/coradoc/core_model/bibliography_entry.rb,
lib/coradoc/core_model/raw_inline_element.rb,
lib/coradoc/core_model/structural_element.rb,
lib/coradoc/include_selectors/level_offset.rb,
lib/coradoc/core_model/include_level_offset.rb,
lib/coradoc/core_model/horizontal_rule_block.rb,
lib/coradoc/core_model/frontmatter/text_splitter.rb,
lib/coradoc/core_model/frontmatter/field_transform.rb,
lib/coradoc/core_model/frontmatter/schema_resolver.rb,
lib/coradoc/core_model/frontmatter/frontmatter_value.rb
Overview
Coradoc - A hub-and-spoke document transformation library
Coradoc provides a unified document model (CoreModel) and transformation infrastructure for converting between document formats such as AsciiDoc, HTML, and Markdown.
## Architecture
Coradoc uses a hub-and-spoke architecture where CoreModel acts as the canonical document representation. Each format (AsciiDoc, HTML, Markdown) has its own model and transformers to/from CoreModel.
“‘ Source Format → Source Model → CoreModel → Target Model → Target Format “`
## Quick Start
Defined Under Namespace
Modules: Configurable, CoreModel, FormatModule, Hooks, IncludeSelectors, Input, LinkRewriter, Output, PerformanceRegression, ProcessorRegistry, Query, RelativePath, Serializer, Transform, Validation, Visitor Classes: CLI, CircularIncludeError, DocumentBuilder, DocumentManipulator, Error, FileNotFoundError, IncludeDepthExceededError, IncludeNotFoundError, IncludeResolver, IncludeTooLargeError, Logger, ParseError, Registry, ResolveIncludes, TransformationError, UnsafeIncludeError, UnsupportedFormatError, ValidationError
Constant Summary collapse
- ERROR_SUGGESTIONS =
Suggestion patterns for common parsing errors
These patterns are matched against error messages and source content to provide helpful suggestions for fixing common issues.
[ { pattern: /unterminated.*string|unexpected.*end.*of.*input|expected.*["']/i, suggestion: 'Check for unclosed quotes or strings', examples: ["'text'", '"text"'] }, { pattern: /unexpected.*indentation|indentation.*error|inconsistent.*indent/i, suggestion: 'Check indentation - use consistent spaces or tabs', examples: [' indented line', ' nested item'] }, { pattern: /missing.*separator|expected.*delimiter|missing.*comma/i, suggestion: 'Add missing separator between elements', examples: ['item1, item2', 'key: value'] }, { pattern: /invalid.*attribute|unknown.*attribute|attribute.*not.*allowed/i, suggestion: 'Check attribute spelling and allowed values', examples: ['[role=example]', '[source,ruby]'] }, { pattern: /invalid.*heading|heading.*level|expected.*heading/i, suggestion: 'Use valid heading syntax with = or # markers', examples: ['= Level 1', '== Level 2', '### Level 3'] }, { pattern: /invalid.*list|list.*marker|expected.*list.*item/i, suggestion: 'Use correct list markers (*, -, ., or numbered)', examples: ['* bullet', '. ordered', 'term:: definition'] }, { pattern: /invalid.*link|malformed.*url|link.*syntax/i, suggestion: 'Use correct link syntax: text[url] or link:url[]', examples: ['Google[https://google.com]', 'link:file.adoc[]'] }, { pattern: /invalid.*table|table.*delimiter|expected.*separator/i, suggestion: 'Check table syntax with | delimiters', examples: ["|===\n| Cell 1 | Cell 2\n|==="] }, { pattern: /invalid.*block|block.*delimiter|unterminated.*block/i, suggestion: 'Ensure block delimiters match (----, ****, ====, etc.)', examples: ["----\ncode\n----", "====\nexample\n===="] }, { pattern: /invalid.*macro|unknown.*macro|macro.*syntax/i, suggestion: 'Check macro syntax: name:target[attributes]', examples: ['include::file.adoc[]', 'image::image.png[]'] } ].freeze
- VERSION =
'2.0.23'
Class Method Summary collapse
-
.binary_format?(format) ⇒ Boolean
Check if a format requires binary (file path) input.
- .build(&block) ⇒ Object
-
.config ⇒ Configuration
Shortcut to configuration.
-
.configure {|Configuration| ... } ⇒ void
Shortcut to configure.
-
.convert(text, from:, to:) ⇒ String
Convert document text from one format to another.
-
.convert_file(path, to:, from: nil) ⇒ String
Convert a file from one format to another.
-
.describe_element(elem) ⇒ String
Describe an element for display.
-
.detect_format(filename) ⇒ Symbol?
Detect format from a file extension.
-
.document_stats(doc) ⇒ Hash
Gather statistics about a parsed document.
-
.file_info(path) ⇒ Hash
Get file metadata for display.
-
.format_capabilities ⇒ Hash<Symbol, Hash<Symbol, Boolean>>
Get capability summary for all registered formats.
-
.get_format(format_name) ⇒ Module?
Get a registered format.
-
.manipulate(document) ⇒ DocumentManipulator
Create a DocumentManipulator for chainable operations.
-
.normalize_format(name) ⇒ Symbol?
Normalize a format name string to a symbol.
-
.parse(text, format:) ⇒ Coradoc::CoreModel::Base, Object
Parse text to a document model.
-
.parse_file(path, format: nil) ⇒ Coradoc::CoreModel::Base
Parse a document from a file path.
-
.parse_format?(format) ⇒ Boolean
Check if a format supports parsing (reading input).
-
.register_format(format_name, format_module, **options) ⇒ void
Register a format gem.
-
.registered_formats ⇒ Array<Symbol>
List all registered formats.
-
.registry ⇒ Registry
Get the format registry.
-
.resolve_includes(document, base_dir:, missing_include: :error, max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH, allow_unsafe: false, resolver: nil) ⇒ Coradoc::CoreModel::Base
Resolve
include::directives in a parsed document. -
.resolve_output_format(output_file, default: :html) ⇒ Symbol
Resolve the output format from a filename, with a default.
-
.rewrite_links(document, rewriter: nil, &block) ⇒ Coradoc::CoreModel::Base
Rewrite every link/xref target in a parsed document.
-
.serialize(model, to:) ⇒ String
Serialize a CoreModel to a specific format.
-
.serialize_format?(format) ⇒ Boolean
Check if a format supports serialization (writing output).
-
.strip_unicode(string, only: nil) ⇒ String
Strip unicode whitespace from a string.
-
.to_core(model) ⇒ Coradoc::CoreModel::Base
Transform a model to CoreModel.
-
.validate_file(path, format: nil) ⇒ Coradoc::Validation::Result
Validate a document file.
Methods included from Configurable
load_configuration, reset_configuration!
Class Method Details
.binary_format?(format) ⇒ Boolean
Check if a format requires binary (file path) input
342 343 344 345 |
# File 'lib/coradoc/coradoc.rb', line 342 def binary_format?(format) opts = registry.(format) opts&.fetch(:binary, false) == true end |
.build(&block) ⇒ Object
181 182 183 |
# File 'lib/coradoc/document_builder.rb', line 181 def self.build(&block) DocumentBuilder.build(&block) end |
.config ⇒ Configuration
Shortcut to configuration
516 517 518 |
# File 'lib/coradoc/configurable.rb', line 516 def self.config Configurable.configuration end |
.configure {|Configuration| ... } ⇒ void
This method returns an undefined value.
Shortcut to configure
524 525 526 |
# File 'lib/coradoc/configurable.rb', line 524 def self.configure(&block) Configurable.configure(&block) if block_given? end |
.convert(text, from:, to:) ⇒ String
Convert document text from one format to another
This is the main entry point for format conversion. It handles the complete pipeline: parse -> transform to CoreModel -> transform to target -> serialize
217 218 219 220 221 222 223 |
# File 'lib/coradoc/coradoc.rb', line 217 def convert(text, from:, to:, **) # Parse to CoreModel core = parse(text, format: from) # Convert to target format serialize(core, to: to, **) end |
.convert_file(path, to:, from: nil) ⇒ String
Convert a file from one format to another
330 331 332 333 334 335 336 |
# File 'lib/coradoc/coradoc.rb', line 330 def convert_file(path, to:, from: nil, **) source_format = from || detect_format(path) raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format core = parse_file(path, format: source_format) serialize(core, to: to, **) end |
.describe_element(elem) ⇒ String
Describe an element for display
462 463 464 465 466 467 468 469 470 471 472 473 474 475 |
# File 'lib/coradoc/coradoc.rb', line 462 def describe_element(elem) return elem.to_s unless elem.is_a?(CoreModel::Base) type = elem.class.name.split('::').last if elem.title "#{type}: #{elem.title}" elsif elem.is_a?(CoreModel::Block) && elem.content preview = elem.content.to_s[0..50] preview += '...' if elem.content.to_s.length > 50 "#{type}: #{preview}" else type end end |
.detect_format(filename) ⇒ Symbol?
Detect format from a file extension
278 279 280 281 282 283 284 285 |
# File 'lib/coradoc/coradoc.rb', line 278 def detect_format(filename) ext = File.extname(filename).downcase registry.each_key do |name| opts = registry.(name) return name if opts[:extensions]&.include?(ext) end nil end |
.document_stats(doc) ⇒ Hash
Gather statistics about a parsed document
445 446 447 448 449 450 451 452 453 454 455 456 |
# File 'lib/coradoc/coradoc.rb', line 445 def document_stats(doc) stats = {} stats[:title] = doc.title if doc.title if doc.is_a?(CoreModel::StructuralElement) stats[:child_count] = count_elements(doc) stats[:element_counts] = count_element_types(doc) end stats end |
.file_info(path) ⇒ Hash
Get file metadata for display
416 417 418 419 420 421 |
# File 'lib/coradoc/coradoc.rb', line 416 def file_info(path) fmt = detect_format(path) info = { size: File.size(path), format: fmt } info[:lines] = File.foreach(path).count unless binary_format?(fmt) info end |
.format_capabilities ⇒ Hash<Symbol, Hash<Symbol, Boolean>>
Get capability summary for all registered formats
Returns a hash mapping each format name to its capabilities (parse: bool, serialize: bool). Useful for CLI display and introspection.
392 393 394 395 396 397 398 399 |
# File 'lib/coradoc/coradoc.rb', line 392 def format_capabilities registered_formats.each_with_object({}) do |name, caps| caps[name] = { parse: parse_format?(name), serialize: serialize_format?(name) } end end |
.get_format(format_name) ⇒ Module?
Get a registered format
78 79 80 |
# File 'lib/coradoc/coradoc.rb', line 78 def get_format(format_name) registry.get(format_name) end |
.manipulate(document) ⇒ DocumentManipulator
Create a DocumentManipulator for chainable operations
266 267 268 |
# File 'lib/coradoc/coradoc.rb', line 266 def manipulate(document) DocumentManipulator.new(document) end |
.normalize_format(name) ⇒ Symbol?
Normalize a format name string to a symbol
Handles common aliases like “adoc” → :asciidoc, “md” → :markdown.
353 354 355 356 357 358 359 360 361 362 |
# File 'lib/coradoc/coradoc.rb', line 353 def normalize_format(name) return nil unless name key = name.to_s.downcase registry.each_key do |fmt_name| opts = registry.(fmt_name) return fmt_name if opts[:aliases]&.include?(key) end key.to_sym end |
.parse(text, format:) ⇒ Coradoc::CoreModel::Base, Object
Parse text to a document model.
Graph mode is the only mode: include:: directives survive as CoreModel::Include link nodes pointing at other files. NO file I/O happens during parse. The result is a single document that references other documents via Include edges — a text graph.
To splice included content inline, call Coradoc.resolve_includes on the parsed document. This is an explicit, separate step so the caller controls when (and whether) file I/O happens.
110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/coradoc/coradoc.rb', line 110 def parse(text, format:) format_module = get_format(format) unless format_module raise UnsupportedFormatError, "Format '#{format}' is not registered. " \ "Available formats: #{registered_formats.join(', ')}" end text = Hooks.invoke(:before_parse, text, format: format) result = format_module.parse_to_core(text) Hooks.invoke(:after_parse, result, format: format) end |
.parse_file(path, format: nil) ⇒ Coradoc::CoreModel::Base
Parse a document from a file path
Handles both text formats (reads file content) and binary formats (passes file path directly to the format module).
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 |
# File 'lib/coradoc/coradoc.rb', line 300 def parse_file(path, format: nil) raise FileNotFoundError, path unless File.exist?(path) source_format = format || detect_format(path) raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format format_module = get_format(source_format) raise UnsupportedFormatError, "Format '#{source_format}' is not registered" unless format_module if binary_format?(source_format) format_module.parse_to_core(path) else content = File.read(path) content = Hooks.invoke(:before_parse, content, format: source_format) result = format_module.parse_file_to_core(path, content) Hooks.invoke(:after_parse, result, format: source_format) end end |
.parse_format?(format) ⇒ Boolean
Check if a format supports parsing (reading input)
379 380 381 382 383 384 |
# File 'lib/coradoc/coradoc.rb', line 379 def parse_format?(format) mod = get_format(format) return false unless mod mod.public_methods.include?(:parse_to_core) || mod.public_methods.include?(:parse) end |
.register_format(format_name, format_module, **options) ⇒ void
This method returns an undefined value.
Register a format gem
68 69 70 71 72 |
# File 'lib/coradoc/coradoc.rb', line 68 def register_format(format_name, format_module, **) format_module.extend(FormatModule::Interface) unless format_module.is_a?(FormatModule::Interface) registry.register(format_name, format_module, ) FormatModule.validate!(format_module, format_name) end |
.registered_formats ⇒ Array<Symbol>
List all registered formats
85 86 87 |
# File 'lib/coradoc/coradoc.rb', line 85 def registered_formats registry.list end |
.registry ⇒ Registry
Get the format registry
58 59 60 |
# File 'lib/coradoc/coradoc.rb', line 58 def registry @registry ||= Registry.new end |
.resolve_includes(document, base_dir:, missing_include: :error, max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH, allow_unsafe: false, resolver: nil) ⇒ Coradoc::CoreModel::Base
Resolve include:: directives in a parsed document.
Walks the document tree and replaces every CoreModel::Include link node with the parsed content of its target file, recursing into the result. The original document is left unchanged; a new subtree is constructed.
This is the explicit “flatten” step that turns a text graph into a single spliced document. Callers control:
- +base_dir+ — where to root relative include paths
- +missing_include+ — what to do when a target is missing
- +max_depth+ — recursion cap
- +allow_unsafe+ — opt out of path-traversal protection
- +resolver+ — custom resolution strategy (e.g. HTTP, in-memory)
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/coradoc/coradoc.rb', line 154 def resolve_includes(document, base_dir:, missing_include: :error, max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH, allow_unsafe: false, resolver: nil) resolver = Coradoc::IncludeResolver.coerce( resolver, base_dir: base_dir, allow_unsafe: allow_unsafe ) Coradoc::ResolveIncludes.call( document, resolver: resolver, base_dir: base_dir, missing_include: missing_include, max_depth: max_depth ) end |
.resolve_output_format(output_file, default: :html) ⇒ Symbol
Resolve the output format from a filename, with a default
406 407 408 409 410 |
# File 'lib/coradoc/coradoc.rb', line 406 def resolve_output_format(output_file, default: :html) return default unless output_file detect_format(output_file) || default end |
.rewrite_links(document, rewriter: nil, &block) ⇒ Coradoc::CoreModel::Base
Rewrite every link/xref target in a parsed document.
Walks the document tree and invokes the supplied rewriter for each link and cross-reference target. The original document is never mutated — a NEW document is returned.
Verbatim blocks (SourceBlock, ListingBlock, LiteralBlock, PassBlock, StemBlock) are skipped entirely so link-shaped text inside code/math bodies is never rewritten.
The rewriter responds to #call(target:, kind:, context:) and returns the new target String. kind is :link or :xref; the block form is supported for one-liners.
196 197 198 |
# File 'lib/coradoc/coradoc.rb', line 196 def rewrite_links(document, rewriter: nil, &block) Coradoc::LinkRewriter.rewrite(document, rewriter: rewriter, &block) end |
.serialize(model, to:) ⇒ String
Serialize a CoreModel to a specific format
247 248 249 250 251 252 253 254 |
# File 'lib/coradoc/coradoc.rb', line 247 def serialize(model, to:, **) format_module = get_format(to) raise UnsupportedFormatError, "Format '#{to}' is not registered" unless format_module model = Hooks.invoke(:before_serialize, model, format: to) result = format_module.serialize(model, **) Hooks.invoke(:after_serialize, result, format: to) end |
.serialize_format?(format) ⇒ Boolean
Check if a format supports serialization (writing output)
368 369 370 371 372 373 |
# File 'lib/coradoc/coradoc.rb', line 368 def serialize_format?(format) mod = get_format(format) return false unless mod mod.serialize? end |
.strip_unicode(string, only: nil) ⇒ String
Strip unicode whitespace from a string
482 483 484 485 486 487 488 489 490 491 492 493 |
# File 'lib/coradoc/coradoc.rb', line 482 def strip_unicode(string, only: nil) return string if string.nil? case only when :begin string.sub(/^\p{Zs}+/, '') when :end string.sub(/\p{Zs}+$/, '') else string.sub(/^\p{Zs}+/, '').sub(/\p{Zs}+$/, '') end end |
.to_core(model) ⇒ Coradoc::CoreModel::Base
Transform a model to CoreModel
229 230 231 232 233 234 235 236 237 238 239 |
# File 'lib/coradoc/coradoc.rb', line 229 def to_core(model) return model if model.is_a?(CoreModel::Base) registry.each_value do |format_module| next unless format_module.handles_model?(model) return format_module.to_core(model) end raise TransformationError, "No transformer found for #{model.class}" end |
.validate_file(path, format: nil) ⇒ Coradoc::Validation::Result
Validate a document file
Parses the file and validates against auto-generated schema. Returns a Coradoc::Validation::Result.
432 433 434 435 436 437 438 439 |
# File 'lib/coradoc/coradoc.rb', line 432 def validate_file(path, format: nil) doc = parse_file(path, format: format) schema = Validation::SchemaGenerator.generate(doc.class) return schema.validate(doc) if schema Validation::Result.new end |