Module: RubyRich::Markdown::TerminalConverter::LatexConverter

Defined in:: lib/ruby_rich/markdown.rb

Overview

—- LaTeX to Unicode converter —- Translates common LaTeX math commands to Unicode characters for terminal display. Handles Greek letters, big operators, frac, sqrt, super/subscript, cases, and ~150 common symbols.

Constant Summary collapse

SYMBOLS = — big lookup table ———————————— Format: “\command” => “unicode_char”

{
  # Greek lowercase
  'alpha' => 'α', 'beta' => 'β', 'gamma' => 'γ',
  'delta' => 'δ', 'epsilon' => 'ε', 'varepsilon' => 'ɛ',
  'zeta' => 'ζ', 'eta' => 'η', 'theta' => 'θ',
  'vartheta' => 'ϑ', 'iota' => 'ι', 'kappa' => 'κ',
  'lambda' => 'λ', 'mu' => 'μ', 'nu' => 'ν',
  'xi' => 'ξ', 'pi' => 'π', 'varpi' => 'ϖ',
  'rho' => 'ρ', 'varrho' => 'ϱ', 'sigma' => 'σ',
  'varsigma' => 'ς', 'tau' => 'τ', 'upsilon' => 'υ',
  'phi' => 'φ', 'varphi' => 'ϕ', 'chi' => 'χ',
  'psi' => 'ψ', 'omega' => 'ω',
  # Greek uppercase
  'Gamma' => 'Γ', 'Delta' => 'Δ', 'Theta' => 'Θ',
  'Lambda' => 'Λ', 'Xi' => 'Ξ', 'Pi' => 'Π',
  'Sigma' => 'Σ', 'Upsilon' => 'Υ', 'Phi' => 'Φ',
  'Psi' => 'Ψ', 'Omega' => 'Ω',
  # Relations
  'leq' => '≤', 'geq' => '≥', 'neq' => '≠',
  'equiv' => '≡', 'approx' => '≈', 'sim' => '∼',
  'simeq' => '≃', 'propto' => '∝', 'll' => '≪',
  'gg' => '≫', 'doteq' => '≐', 'prec' => '≺',
  'succ' => '≻', 'preceq' => '≼', 'succeq' => '≽',
  'subset' => '⊂', 'supset' => '⊃', 'subseteq' => '⊆',
  'supseteq' => '⊇', 'in' => '∈', 'ni' => '∋',
  'notin' => '∉', 'perp' => '⊥', 'parallel' => '∥',
  # Binary operators
  'times' => '×', 'div' => '÷', 'cdot' => '·',
  'pm' => '±', 'mp' => '∓', 'oplus' => '⊕',
  'ominus' => '⊖', 'otimes' => '⊗', 'oslash' => '⊘',
  'odot' => '⊙', 'circ' => '∘', 'bullet' => '∙',
  'cap' => '∩', 'cup' => '∪', 'setminus' => '∖',
  'land' => '∧', 'lor' => '∨', 'wedge' => '∧',
  'vee' => '∨', 'star' => '⋆',
  # Arrows
  'to' => '→', 'rightarrow' => '→', 'Rightarrow' => '⇒',
  'leftarrow' => '←', 'Leftarrow' => '⇐',
  'leftrightarrow' => '↔', 'Leftrightarrow' => '⇔',
  'mapsto' => '↦', 'longmapsto' => '⟼',
  'uparrow' => '↑', 'downarrow' => '↓',
  'longrightarrow' => '⟶', 'Longrightarrow' => '⟹',
  # Big operators
  'sum' => '∑', 'prod' => '∏', 'coprod' => '∐',
  'int' => '∫', 'iint' => '∬', 'iiint' => '∭',
  'oint' => '∮', 'bigcup' => '⋃', 'bigcap' => '⋂',
  'bigvee' => '⋁', 'bigwedge' => '⋀',
  # Misc symbols
  'infty' => '∞', 'partial' => '∂', 'nabla' => '∇',
  'forall' => '∀', 'exists' => '∃', 'nexists' => '∄',
  'emptyset' => '∅', 'varnothing' => '∅',
  'Re' => 'ℜ', 'Im' => 'ℑ', 'aleph' => 'ℵ',
  'ell' => 'ℓ', 'hbar' => 'ℏ', 'wp' => '℘',
  'angle' => '∠', 'triangle' => '△', 'triangledown' => '▽',
  'square' => '□', 'Box' => '□', 'diamond' => '◇',
  'clubsuit' => '♣', 'diamondsuit' => '♢',
  'heartsuit' => '♡', 'spadesuit' => '♠',
  'ldots' => '…', 'cdots' => '⋯', 'vdots' => '⋮',
  'ddots' => '⋱', 'dots' => '…',
  'cong' => '≅', 'models' => '⊨', 'mid' => '∣',
  'nmid' => '∤', 'therefore' => '∴', 'because' => '∵',
  'neg' => '¬', 'lnot' => '¬', 'top' => '⊤', 'bot' => '⊥',
  'degree' => '°', 'prime' => '′', 'dag' => '†',
  'ddag' => '‡', 'S' => '§', 'P' => '¶',
  'pound' => '£', 'euro' => '€', 'yen' => '¥',
  'copyright' => '©', 'circledR' => '®',
  # Delimiters – strip LaTeX wrapper
  'left' => '', 'right' => '', 'bigl' => '', 'bigr' => '',
  'Bigl' => '', 'Bigr' => '', 'biggl' => '', 'biggr' => '',
  # Arrows special
  'gets' => '←',
  # Text sub/sup scripts
  'text' => '',
}.freeze

TEXT_LIKE = Commands whose argument should be preserved verbatim (e.g. textabc)

%w[text textrm textsf texttt textbf textit].freeze

SUPERSCRIPTS =

{
  '0' => '⁰', '1' => '¹', '2' => '²', '3' => '³', '4' => '⁴',
  '5' => '⁵', '6' => '⁶', '7' => '⁷', '8' => '⁸', '9' => '⁹',
  '+' => '⁺', '-' => '⁻', '=' => '⁼', '(' => '⁽', ')' => '⁾',
  'i' => 'ⁱ', 'n' => 'ⁿ',
}.freeze

SUBSCRIPTS =

{
  '0' => '₀', '1' => '₁', '2' => '₂', '3' => '₃', '4' => '₄',
  '5' => '₅', '6' => '₆', '7' => '₇', '8' => '₈', '9' => '₉',
  '+' => '₊', '-' => '₋', '=' => '₌', '(' => '₍', ')' => '₎',
  'a' => 'ₐ', 'e' => 'ₑ', 'i' => 'ᵢ', 'j' => 'ⱼ',
  'n' => 'ₙ', 'x' => 'ₓ',
}.freeze

Class Method Summary collapse

.convert(formula) ⇒ Object
.find_matching_brace(text, open_pos) ⇒ Object

Find the index of the } that matches the { at ‘open_pos`.
.process_cases(text) ⇒ Object

begincases …
.process_frac(text) ⇒ Object

fracnumden / dfracnumden / tfracnumden → (num)/(den) when num/den include operators, otherwise num/den.
.process_scripts(text) ⇒ Object

^x / _x → Unicode super/subscript.
.process_sqrt(text) ⇒ Object

sqrtx → √(x) sqrtx → ⁿ√(x).
.replace_symbols(text) ⇒ Object

Replace command tokens with Unicode equivalents.
.script_chars(str, map) ⇒ Object
.strip_delim_spacing(text) ⇒ Object

Remove stray spaces inserted by left / right.

Class Method Details

.convert(formula) ⇒ `Object`

# File 'lib/ruby_rich/markdown.rb', line 744

def self.convert(formula)
  return formula if formula.nil? || formula.strip.empty?

  result = formula.dup
  result = process_cases(result)
  result = replace_symbols(result)
  result = process_scripts(result)
  result = process_frac(result)
  result = process_sqrt(result)
  result = strip_delim_spacing(result)
  result
end

.find_matching_brace(text, open_pos) ⇒ `Object`

Find the index of the } that matches the { at ‘open_pos`. Returns nil when braces are unbalanced.

# File 'lib/ruby_rich/markdown.rb', line 759

def self.find_matching_brace(text, open_pos)
  return nil unless text[open_pos] == '{'
  depth = 1
  i = open_pos + 1
  while i < text.length && depth > 0
    case text[i]
    when '{' then depth += 1
    when '}' then depth -= 1
    when '\\' then i += 1
    end
    i += 1
  end
  depth == 0 ? i - 1 : nil
end

.process_cases(text) ⇒ `Object`

begincases … endcases → ⎧ … ⎨ … ⎩ …

# File 'lib/ruby_rich/markdown.rb', line 864

def self.process_cases(text)
  text.gsub(/\\begin\{cases\}(.*?)\\end\{cases\}/m) do
    body = Regexp.last_match(1).strip
    lines = body.split('\\\\').map(&:strip).reject(&:empty?)
    return '{}' if lines.empty?
    out = +""
    lines.each_with_index do |line, i|
      leader = case i
               when 0 then '⎧'
               when lines.length - 1 then '⎩'
               else '⎨'
               end
      out << "#{leader} #{line.gsub('&', '')}\n"
    end
    out.strip
  end
end

.process_frac(text) ⇒ `Object`

fracnumden / dfracnumden / tfracnumden → (num)/(den) when num/den include operators, otherwise num/den

# File 'lib/ruby_rich/markdown.rb', line 776

def self.process_frac(text)
  result = +""
  i = 0
  while i < text.length
    cmd_len = nil
    if text[i..].start_with?('\\dfrac') || text[i..].start_with?('\\tfrac')
      cmd_len = 6
    elsif text[i..].start_with?('\\frac')
      cmd_len = 5
    end
    if cmd_len
      j = i + cmd_len
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '{'
        num_start = j
        num_end = find_matching_brace(text, num_start)
        if num_end
          k = num_end + 1
          while k < text.length && text[k] =~ /\s/
            k += 1
          end
          if k < text.length && text[k] == '{'
            den_start = k
            den_end = find_matching_brace(text, den_start)
            if den_end
              num = text[num_start + 1...num_end]
              den = text[den_start + 1...den_end]
              # Only wrap in parens when the expression includes
              # operators that would change precedence without them.
              op_rx = /[+\-±∓×÷=<>]/
              num_wrap = num =~ op_rx ? "(#{num})" : num
              den_wrap = den =~ op_rx ? "(#{den})" : den
              result << "#{num_wrap}/#{den_wrap}"
              i = den_end + 1
              next
            end
          end
        end
      end
    end
    result << text[i]
    i += 1
  end
  result
end

.process_scripts(text) ⇒ `Object`

^x / _x → Unicode super/subscript

# File 'lib/ruby_rich/markdown.rb', line 883

def self.process_scripts(text)
  # ^{...}
  text = text.gsub(/\^\{([^}]+)\}/) {
    inner = Regexp.last_match(1)
    inner.include?('\\') ? "^\{#{inner}\}" : script_chars(inner, SUPERSCRIPTS)
  }
  # _{...}
  text = text.gsub(/_\{([^}]+)\}/) {
    inner = Regexp.last_match(1)
    inner.include?('\\') ? "_\{#{inner}\}" : script_chars(inner, SUBSCRIPTS)
  }
  # ^x  (single non-whitespace char, not \ or {)
  text = text.gsub(/\^([^\s\\{])/) { SUPERSCRIPTS[Regexp.last_match(1)] || "^#{Regexp.last_match(1)}" }
  # _x  (single non-whitespace char, not \ or {)
  text = text.gsub(/_([^\s\\{])/) { SUBSCRIPTS[Regexp.last_match(1)] || "_#{Regexp.last_match(1)}" }
  text
end

.process_sqrt(text) ⇒ `Object`

sqrtx → √(x) sqrtx → ⁿ√(x)

# File 'lib/ruby_rich/markdown.rb', line 825

def self.process_sqrt(text)
  result = +""
  i = 0
  while i < text.length
    if text[i..].start_with?('\\sqrt')
      j = i + 5
      deg_text = nil
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '['
        close_br = text.index(']', j)
        if close_br
          deg_text = text[j + 1...close_br]
          j = close_br + 1
        end
      end
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '{'
        rad_start = j
        rad_end = find_matching_brace(text, rad_start)
        if rad_end
          rad = text[rad_start + 1...rad_end]
          prefix = deg_text ? script_chars(deg_text, SUPERSCRIPTS) : ''
          result << "#{prefix}√(#{rad})"
          i = rad_end + 1
          next
        end
      end
    end
    result << text[i]
    i += 1
  end
  result
end

.replace_symbols(text) ⇒ `Object`

Replace command tokens with Unicode equivalents.

# File 'lib/ruby_rich/markdown.rb', line 906

def self.replace_symbols(text)
  # Handle brace-wrapped font/formatting commands: \text{ab}, \mathbf{ab}, \mathbb{R}, etc.
  # Strip the wrapper, keep the content.
  text = text.gsub(/\\(?:text\w*|math[bif]|mathbf|mathrm|mathit|mathsf|mathtt|mathcal|mathfrak|mathbb|mathscr|boldsymbol|bm|emph)\s*\{(.*?)\}/) {
    Regexp.last_match(1)
  }
  # Handle font commands with single-char arg (space-separated): \mathbf E
  text = text.gsub(/\\(?:mathbf|mathrm|mathit|mathsf|mathtt|mathcal|mathfrak|mathbb|mathscr|boldsymbol|bm)\s+([a-zA-Z0-9])/) {
    Regexp.last_match(1)
  }
  # Replace all other \commands
  text.gsub(/\\([a-zA-Z]+)/) { |m|
    SYMBOLS[Regexp.last_match(1)] || m
  }
end

.script_chars(str, map) ⇒ `Object`



901
902
903

# File 'lib/ruby_rich/markdown.rb', line 901

def self.script_chars(str, map)
  str.each_char.map { |c| map[c] || c }.join
end

.strip_delim_spacing(text) ⇒ `Object`

Remove stray spaces inserted by left / right.

# File 'lib/ruby_rich/markdown.rb', line 923

def self.strip_delim_spacing(text)
  text.gsub(/\(\s+/, '(').gsub(/\s+\)/, ')')
      .gsub(/\[\s+/, '[').gsub(/\s+\]/, ']')
      .gsub(/\{\s+/, '{').gsub(/\s+\}/, '}')
      .gsub(/\\s+/, ' ')
      .gsub(/([·×÷]) +/, '\1')
      .gsub(/ +([·×÷])/, '\1')
end

Module: RubyRich::Markdown::TerminalConverter::LatexConverter

Overview

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.convert(formula) ⇒ Object

.find_matching_brace(text, open_pos) ⇒ Object

.process_cases(text) ⇒ Object

.process_frac(text) ⇒ Object

.process_scripts(text) ⇒ Object

.process_sqrt(text) ⇒ Object

.replace_symbols(text) ⇒ Object

.script_chars(str, map) ⇒ Object

.strip_delim_spacing(text) ⇒ Object