Module: RubyRich::Markdown::TerminalConverter::LatexConverter

Defined in:
lib/ruby_rich/markdown.rb

Overview

—- LaTeX to Unicode converter —- Translates common LaTeX math commands to Unicode characters for terminal display. Handles Greek letters, big operators, frac, sqrt, super/subscript, cases, and ~150 common symbols.

Constant Summary collapse

SYMBOLS =

— big lookup table ———————————— Format: “\command” => “unicode_char”

{
  # Greek lowercase
  'alpha' => 'α', 'beta' => 'β', 'gamma' => 'γ',
  'delta' => 'δ', 'epsilon' => 'ε', 'varepsilon' => 'ɛ',
  'zeta' => 'ζ', 'eta' => 'η', 'theta' => 'θ',
  'vartheta' => 'ϑ', 'iota' => 'ι', 'kappa' => 'κ',
  'lambda' => 'λ', 'mu' => 'μ', 'nu' => 'ν',
  'xi' => 'ξ', 'pi' => 'π', 'varpi' => 'ϖ',
  'rho' => 'ρ', 'varrho' => 'ϱ', 'sigma' => 'σ',
  'varsigma' => 'ς', 'tau' => 'τ', 'upsilon' => 'υ',
  'phi' => 'φ', 'varphi' => 'ϕ', 'chi' => 'χ',
  'psi' => 'ψ', 'omega' => 'ω',
  # Greek uppercase
  'Gamma' => 'Γ', 'Delta' => 'Δ', 'Theta' => 'Θ',
  'Lambda' => 'Λ', 'Xi' => 'Ξ', 'Pi' => 'Π',
  'Sigma' => 'Σ', 'Upsilon' => 'Υ', 'Phi' => 'Φ',
  'Psi' => 'Ψ', 'Omega' => 'Ω',
  # Relations
  'leq' => '', 'geq' => '', 'neq' => '',
  'equiv' => '', 'approx' => '', 'sim' => '',
  'simeq' => '', 'propto' => '', 'll' => '',
  'gg' => '', 'doteq' => '', 'prec' => '',
  'succ' => '', 'preceq' => '', 'succeq' => '',
  'subset' => '', 'supset' => '', 'subseteq' => '',
  'supseteq' => '', 'in' => '', 'ni' => '',
  'notin' => '', 'perp' => '', 'parallel' => '',
  # Binary operators
  'times' => '×', 'div' => '÷', 'cdot' => '·',
  'pm' => '±', 'mp' => '', 'oplus' => '',
  'ominus' => '', 'otimes' => '', 'oslash' => '',
  'odot' => '', 'circ' => '', 'bullet' => '',
  'cap' => '', 'cup' => '', 'setminus' => '',
  'land' => '', 'lor' => '', 'wedge' => '',
  'vee' => '', 'star' => '',
  # Arrows
  'to' => '', 'rightarrow' => '', 'Rightarrow' => '',
  'leftarrow' => '', 'Leftarrow' => '',
  'leftrightarrow' => '', 'Leftrightarrow' => '',
  'mapsto' => '', 'longmapsto' => '',
  'uparrow' => '', 'downarrow' => '',
  'longrightarrow' => '', 'Longrightarrow' => '',
  # Big operators
  'sum' => '', 'prod' => '', 'coprod' => '',
  'int' => '', 'iint' => '', 'iiint' => '',
  'oint' => '', 'bigcup' => '', 'bigcap' => '',
  'bigvee' => '', 'bigwedge' => '',
  # Misc symbols
  'infty' => '', 'partial' => '', 'nabla' => '',
  'forall' => '', 'exists' => '', 'nexists' => '',
  'emptyset' => '', 'varnothing' => '',
  'Re' => '', 'Im' => '', 'aleph' => '',
  'ell' => '', 'hbar' => '', 'wp' => '',
  'angle' => '', 'triangle' => '', 'triangledown' => '',
  'square' => '', 'Box' => '', 'diamond' => '',
  'clubsuit' => '', 'diamondsuit' => '',
  'heartsuit' => '', 'spadesuit' => '',
  'ldots' => '', 'cdots' => '', 'vdots' => '',
  'ddots' => '', 'dots' => '',
  'cong' => '', 'models' => '', 'mid' => '',
  'nmid' => '', 'therefore' => '', 'because' => '',
  'neg' => '¬', 'lnot' => '¬', 'top' => '', 'bot' => '',
  'degree' => '°', 'prime' => '', 'dag' => '',
  'ddag' => '', 'S' => '§', 'P' => '',
  'pound' => '£', 'euro' => '', 'yen' => '¥',
  'copyright' => '©', 'circledR' => '®',
  # Delimiters – strip LaTeX wrapper
  'left' => '', 'right' => '', 'bigl' => '', 'bigr' => '',
  'Bigl' => '', 'Bigr' => '', 'biggl' => '', 'biggr' => '',
  # Arrows special
  'gets' => '',
  # Text sub/sup scripts
  'text' => '',
}.freeze
TEXT_LIKE =

Commands whose argument should be preserved verbatim (e.g. textabc)

%w[text textrm textsf texttt textbf textit].freeze
SUPERSCRIPTS =
{
  '0' => '', '1' => '¹', '2' => '²', '3' => '³', '4' => '',
  '5' => '', '6' => '', '7' => '', '8' => '', '9' => '',
  '+' => '', '-' => '', '=' => '', '(' => '', ')' => '',
  'i' => '', 'n' => '',
}.freeze
SUBSCRIPTS =
{
  '0' => '', '1' => '', '2' => '', '3' => '', '4' => '',
  '5' => '', '6' => '', '7' => '', '8' => '', '9' => '',
  '+' => '', '-' => '', '=' => '', '(' => '', ')' => '',
  'a' => '', 'e' => '', 'i' => '', 'j' => '',
  'n' => '', 'x' => '',
}.freeze

Class Method Summary collapse

Class Method Details

.convert(formula) ⇒ Object



737
738
739
740
741
742
743
744
745
746
747
748
# File 'lib/ruby_rich/markdown.rb', line 737

def self.convert(formula)
  return formula if formula.nil? || formula.strip.empty?

  result = formula.dup
  result = process_cases(result)
  result = replace_symbols(result)
  result = process_scripts(result)
  result = process_frac(result)
  result = process_sqrt(result)
  result = strip_delim_spacing(result)
  result
end

.find_matching_brace(text, open_pos) ⇒ Object

Find the index of the } that matches the { at ‘open_pos`. Returns nil when braces are unbalanced.



752
753
754
755
756
757
758
759
760
761
762
763
764
765
# File 'lib/ruby_rich/markdown.rb', line 752

def self.find_matching_brace(text, open_pos)
  return nil unless text[open_pos] == '{'
  depth = 1
  i = open_pos + 1
  while i < text.length && depth > 0
    case text[i]
    when '{' then depth += 1
    when '}' then depth -= 1
    when '\\' then i += 1
    end
    i += 1
  end
  depth == 0 ? i - 1 : nil
end

.process_cases(text) ⇒ Object

begincases … endcases → ⎧ … ⎨ … ⎩ …



857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
# File 'lib/ruby_rich/markdown.rb', line 857

def self.process_cases(text)
  text.gsub(/\\begin\{cases\}(.*?)\\end\{cases\}/m) do
    body = Regexp.last_match(1).strip
    lines = body.split('\\\\').map(&:strip).reject(&:empty?)
    return '{}' if lines.empty?
    out = +""
    lines.each_with_index do |line, i|
      leader = case i
               when 0 then ''
               when lines.length - 1 then ''
               else ''
               end
      out << "#{leader} #{line.gsub('&', '')}\n"
    end
    out.strip
  end
end

.process_frac(text) ⇒ Object

fracnumden / dfracnumden / tfracnumden → (num)/(den) when num/den include operators, otherwise num/den



769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
# File 'lib/ruby_rich/markdown.rb', line 769

def self.process_frac(text)
  result = +""
  i = 0
  while i < text.length
    cmd_len = nil
    if text[i..].start_with?('\\dfrac') || text[i..].start_with?('\\tfrac')
      cmd_len = 6
    elsif text[i..].start_with?('\\frac')
      cmd_len = 5
    end
    if cmd_len
      j = i + cmd_len
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '{'
        num_start = j
        num_end = find_matching_brace(text, num_start)
        if num_end
          k = num_end + 1
          while k < text.length && text[k] =~ /\s/
            k += 1
          end
          if k < text.length && text[k] == '{'
            den_start = k
            den_end = find_matching_brace(text, den_start)
            if den_end
              num = text[num_start + 1...num_end]
              den = text[den_start + 1...den_end]
              # Only wrap in parens when the expression includes
              # operators that would change precedence without them.
              op_rx = /[+\-±∓×÷=<>]/
              num_wrap = num =~ op_rx ? "(#{num})" : num
              den_wrap = den =~ op_rx ? "(#{den})" : den
              result << "#{num_wrap}/#{den_wrap}"
              i = den_end + 1
              next
            end
          end
        end
      end
    end
    result << text[i]
    i += 1
  end
  result
end

.process_scripts(text) ⇒ Object

^x / _x → Unicode super/subscript



876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
# File 'lib/ruby_rich/markdown.rb', line 876

def self.process_scripts(text)
  # ^{...}
  text = text.gsub(/\^\{([^}]+)\}/) {
    inner = Regexp.last_match(1)
    inner.include?('\\') ? "^\{#{inner}\}" : script_chars(inner, SUPERSCRIPTS)
  }
  # _{...}
  text = text.gsub(/_\{([^}]+)\}/) {
    inner = Regexp.last_match(1)
    inner.include?('\\') ? "_\{#{inner}\}" : script_chars(inner, SUBSCRIPTS)
  }
  # ^x  (single non-whitespace char, not \ or {)
  text = text.gsub(/\^([^\s\\{])/) { SUPERSCRIPTS[Regexp.last_match(1)] || "^#{Regexp.last_match(1)}" }
  # _x  (single non-whitespace char, not \ or {)
  text = text.gsub(/_([^\s\\{])/) { SUBSCRIPTS[Regexp.last_match(1)] || "_#{Regexp.last_match(1)}" }
  text
end

.process_sqrt(text) ⇒ Object

sqrtx → √(x) sqrtx → ⁿ√(x)



818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
# File 'lib/ruby_rich/markdown.rb', line 818

def self.process_sqrt(text)
  result = +""
  i = 0
  while i < text.length
    if text[i..].start_with?('\\sqrt')
      j = i + 5
      deg_text = nil
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '['
        close_br = text.index(']', j)
        if close_br
          deg_text = text[j + 1...close_br]
          j = close_br + 1
        end
      end
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '{'
        rad_start = j
        rad_end = find_matching_brace(text, rad_start)
        if rad_end
          rad = text[rad_start + 1...rad_end]
          prefix = deg_text ? script_chars(deg_text, SUPERSCRIPTS) : ''
          result << "#{prefix}√(#{rad})"
          i = rad_end + 1
          next
        end
      end
    end
    result << text[i]
    i += 1
  end
  result
end

.replace_symbols(text) ⇒ Object

Replace command tokens with Unicode equivalents.



899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
# File 'lib/ruby_rich/markdown.rb', line 899

def self.replace_symbols(text)
  # Handle brace-wrapped font/formatting commands: \text{ab}, \mathbf{ab}, \mathbb{R}, etc.
  # Strip the wrapper, keep the content.
  text = text.gsub(/\\(?:text\w*|math[bif]|mathbf|mathrm|mathit|mathsf|mathtt|mathcal|mathfrak|mathbb|mathscr|boldsymbol|bm|emph)\s*\{(.*?)\}/) {
    Regexp.last_match(1)
  }
  # Handle font commands with single-char arg (space-separated): \mathbf E
  text = text.gsub(/\\(?:mathbf|mathrm|mathit|mathsf|mathtt|mathcal|mathfrak|mathbb|mathscr|boldsymbol|bm)\s+([a-zA-Z0-9])/) {
    Regexp.last_match(1)
  }
  # Replace all other \commands
  text.gsub(/\\([a-zA-Z]+)/) { |m|
    SYMBOLS[Regexp.last_match(1)] || m
  }
end

.script_chars(str, map) ⇒ Object



894
895
896
# File 'lib/ruby_rich/markdown.rb', line 894

def self.script_chars(str, map)
  str.each_char.map { |c| map[c] || c }.join
end

.strip_delim_spacing(text) ⇒ Object

Remove stray spaces inserted by left / right.



916
917
918
919
920
921
922
923
# File 'lib/ruby_rich/markdown.rb', line 916

def self.strip_delim_spacing(text)
  text.gsub(/\(\s+/, '(').gsub(/\s+\)/, ')')
      .gsub(/\[\s+/, '[').gsub(/\s+\]/, ']')
      .gsub(/\{\s+/, '{').gsub(/\s+\}/, '}')
      .gsub(/\\s+/, ' ')
      .gsub(/([·×÷]) +/, '\1')
      .gsub(/ +([·×÷])/, '\1')
end