Module: RubyRich::Markdown::TerminalConverter::LatexConverter

Defined in:
lib/ruby_rich/markdown.rb

Overview

—- LaTeX to Unicode converter —- Translates common LaTeX math commands to Unicode characters for terminal display. Handles Greek letters, big operators, frac, sqrt, super/subscript, cases, and ~150 common symbols.

Constant Summary collapse

SYMBOLS =

— big lookup table ———————————— Format: “\command” => “unicode_char”

{
  # Greek lowercase
  'alpha' => 'α', 'beta' => 'β', 'gamma' => 'γ',
  'delta' => 'δ', 'epsilon' => 'ε', 'varepsilon' => 'ɛ',
  'zeta' => 'ζ', 'eta' => 'η', 'theta' => 'θ',
  'vartheta' => 'ϑ', 'iota' => 'ι', 'kappa' => 'κ',
  'lambda' => 'λ', 'mu' => 'μ', 'nu' => 'ν',
  'xi' => 'ξ', 'pi' => 'π', 'varpi' => 'ϖ',
  'rho' => 'ρ', 'varrho' => 'ϱ', 'sigma' => 'σ',
  'varsigma' => 'ς', 'tau' => 'τ', 'upsilon' => 'υ',
  'phi' => 'φ', 'varphi' => 'ϕ', 'chi' => 'χ',
  'psi' => 'ψ', 'omega' => 'ω',
  # Greek uppercase
  'Gamma' => 'Γ', 'Delta' => 'Δ', 'Theta' => 'Θ',
  'Lambda' => 'Λ', 'Xi' => 'Ξ', 'Pi' => 'Π',
  'Sigma' => 'Σ', 'Upsilon' => 'Υ', 'Phi' => 'Φ',
  'Psi' => 'Ψ', 'Omega' => 'Ω',
  # Relations
  'leq' => '', 'geq' => '', 'neq' => '',
  'equiv' => '', 'approx' => '', 'sim' => '',
  'simeq' => '', 'propto' => '', 'll' => '',
  'gg' => '', 'doteq' => '', 'prec' => '',
  'succ' => '', 'preceq' => '', 'succeq' => '',
  'subset' => '', 'supset' => '', 'subseteq' => '',
  'supseteq' => '', 'in' => '', 'ni' => '',
  'notin' => '', 'perp' => '', 'parallel' => '',
  # Binary operators
  'times' => '×', 'div' => '÷', 'cdot' => '·',
  'pm' => '±', 'mp' => '', 'oplus' => '',
  'ominus' => '', 'otimes' => '', 'oslash' => '',
  'odot' => '', 'circ' => '', 'bullet' => '',
  'cap' => '', 'cup' => '', 'setminus' => '',
  'land' => '', 'lor' => '', 'wedge' => '',
  'vee' => '', 'star' => '',
  # Arrows
  'to' => '', 'rightarrow' => '', 'Rightarrow' => '',
  'leftarrow' => '', 'Leftarrow' => '',
  'leftrightarrow' => '', 'Leftrightarrow' => '',
  'mapsto' => '', 'longmapsto' => '',
  'uparrow' => '', 'downarrow' => '',
  'longrightarrow' => '', 'Longrightarrow' => '',
  # Big operators
  'sum' => '', 'prod' => '', 'coprod' => '',
  'int' => '', 'iint' => '', 'iiint' => '',
  'oint' => '', 'bigcup' => '', 'bigcap' => '',
  'bigvee' => '', 'bigwedge' => '',
  # Misc symbols
  'infty' => '', 'partial' => '', 'nabla' => '',
  'forall' => '', 'exists' => '', 'nexists' => '',
  'emptyset' => '', 'varnothing' => '',
  'Re' => '', 'Im' => '', 'aleph' => '',
  'ell' => '', 'hbar' => '', 'wp' => '',
  'angle' => '', 'triangle' => '', 'triangledown' => '',
  'square' => '', 'Box' => '', 'diamond' => '',
  'clubsuit' => '', 'diamondsuit' => '',
  'heartsuit' => '', 'spadesuit' => '',
  'ldots' => '', 'cdots' => '', 'vdots' => '',
  'ddots' => '', 'dots' => '',
  'cong' => '', 'models' => '', 'mid' => '',
  'nmid' => '', 'therefore' => '', 'because' => '',
  'neg' => '¬', 'lnot' => '¬', 'top' => '', 'bot' => '',
  'degree' => '°', 'prime' => '', 'dag' => '',
  'ddag' => '', 'S' => '§', 'P' => '',
  'pound' => '£', 'euro' => '', 'yen' => '¥',
  'copyright' => '©', 'circledR' => '®',
  # Delimiters – strip LaTeX wrapper
  'left' => '', 'right' => '', 'bigl' => '', 'bigr' => '',
  'Bigl' => '', 'Bigr' => '', 'biggl' => '', 'biggr' => '',
  # Arrows special
  'gets' => '',
  # Text sub/sup scripts
  'text' => '',
}.freeze
TEXT_LIKE =

Commands whose argument should be preserved verbatim (e.g. textabc)

%w[text textrm textsf texttt textbf textit].freeze
SUPERSCRIPTS =
{
  '0' => '', '1' => '¹', '2' => '²', '3' => '³', '4' => '',
  '5' => '', '6' => '', '7' => '', '8' => '', '9' => '',
  '+' => '', '-' => '', '=' => '', '(' => '', ')' => '',
  'i' => '', 'n' => '',
}.freeze
SUBSCRIPTS =
{
  '0' => '', '1' => '', '2' => '', '3' => '', '4' => '',
  '5' => '', '6' => '', '7' => '', '8' => '', '9' => '',
  '+' => '', '-' => '', '=' => '', '(' => '', ')' => '',
  'a' => '', 'e' => '', 'i' => '', 'j' => '',
  'n' => '', 'x' => '',
}.freeze

Class Method Summary collapse

Class Method Details

.convert(formula) ⇒ Object



744
745
746
747
748
749
750
751
752
753
754
755
# File 'lib/ruby_rich/markdown.rb', line 744

def self.convert(formula)
  return formula if formula.nil? || formula.strip.empty?

  result = formula.dup
  result = process_cases(result)
  result = replace_symbols(result)
  result = process_scripts(result)
  result = process_frac(result)
  result = process_sqrt(result)
  result = strip_delim_spacing(result)
  result
end

.find_matching_brace(text, open_pos) ⇒ Object

Find the index of the } that matches the { at ‘open_pos`. Returns nil when braces are unbalanced.



759
760
761
762
763
764
765
766
767
768
769
770
771
772
# File 'lib/ruby_rich/markdown.rb', line 759

def self.find_matching_brace(text, open_pos)
  return nil unless text[open_pos] == '{'
  depth = 1
  i = open_pos + 1
  while i < text.length && depth > 0
    case text[i]
    when '{' then depth += 1
    when '}' then depth -= 1
    when '\\' then i += 1
    end
    i += 1
  end
  depth == 0 ? i - 1 : nil
end

.process_cases(text) ⇒ Object

begincases … endcases → ⎧ … ⎨ … ⎩ …



864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
# File 'lib/ruby_rich/markdown.rb', line 864

def self.process_cases(text)
  text.gsub(/\\begin\{cases\}(.*?)\\end\{cases\}/m) do
    body = Regexp.last_match(1).strip
    lines = body.split('\\\\').map(&:strip).reject(&:empty?)
    return '{}' if lines.empty?
    out = +""
    lines.each_with_index do |line, i|
      leader = case i
               when 0 then ''
               when lines.length - 1 then ''
               else ''
               end
      out << "#{leader} #{line.gsub('&', '')}\n"
    end
    out.strip
  end
end

.process_frac(text) ⇒ Object

fracnumden / dfracnumden / tfracnumden → (num)/(den) when num/den include operators, otherwise num/den



776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
# File 'lib/ruby_rich/markdown.rb', line 776

def self.process_frac(text)
  result = +""
  i = 0
  while i < text.length
    cmd_len = nil
    if text[i..].start_with?('\\dfrac') || text[i..].start_with?('\\tfrac')
      cmd_len = 6
    elsif text[i..].start_with?('\\frac')
      cmd_len = 5
    end
    if cmd_len
      j = i + cmd_len
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '{'
        num_start = j
        num_end = find_matching_brace(text, num_start)
        if num_end
          k = num_end + 1
          while k < text.length && text[k] =~ /\s/
            k += 1
          end
          if k < text.length && text[k] == '{'
            den_start = k
            den_end = find_matching_brace(text, den_start)
            if den_end
              num = text[num_start + 1...num_end]
              den = text[den_start + 1...den_end]
              # Only wrap in parens when the expression includes
              # operators that would change precedence without them.
              op_rx = /[+\-±∓×÷=<>]/
              num_wrap = num =~ op_rx ? "(#{num})" : num
              den_wrap = den =~ op_rx ? "(#{den})" : den
              result << "#{num_wrap}/#{den_wrap}"
              i = den_end + 1
              next
            end
          end
        end
      end
    end
    result << text[i]
    i += 1
  end
  result
end

.process_scripts(text) ⇒ Object

^x / _x → Unicode super/subscript



883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
# File 'lib/ruby_rich/markdown.rb', line 883

def self.process_scripts(text)
  # ^{...}
  text = text.gsub(/\^\{([^}]+)\}/) {
    inner = Regexp.last_match(1)
    inner.include?('\\') ? "^\{#{inner}\}" : script_chars(inner, SUPERSCRIPTS)
  }
  # _{...}
  text = text.gsub(/_\{([^}]+)\}/) {
    inner = Regexp.last_match(1)
    inner.include?('\\') ? "_\{#{inner}\}" : script_chars(inner, SUBSCRIPTS)
  }
  # ^x  (single non-whitespace char, not \ or {)
  text = text.gsub(/\^([^\s\\{])/) { SUPERSCRIPTS[Regexp.last_match(1)] || "^#{Regexp.last_match(1)}" }
  # _x  (single non-whitespace char, not \ or {)
  text = text.gsub(/_([^\s\\{])/) { SUBSCRIPTS[Regexp.last_match(1)] || "_#{Regexp.last_match(1)}" }
  text
end

.process_sqrt(text) ⇒ Object

sqrtx → √(x) sqrtx → ⁿ√(x)



825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
# File 'lib/ruby_rich/markdown.rb', line 825

def self.process_sqrt(text)
  result = +""
  i = 0
  while i < text.length
    if text[i..].start_with?('\\sqrt')
      j = i + 5
      deg_text = nil
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '['
        close_br = text.index(']', j)
        if close_br
          deg_text = text[j + 1...close_br]
          j = close_br + 1
        end
      end
      while j < text.length && text[j] =~ /\s/
        j += 1
      end
      if j < text.length && text[j] == '{'
        rad_start = j
        rad_end = find_matching_brace(text, rad_start)
        if rad_end
          rad = text[rad_start + 1...rad_end]
          prefix = deg_text ? script_chars(deg_text, SUPERSCRIPTS) : ''
          result << "#{prefix}√(#{rad})"
          i = rad_end + 1
          next
        end
      end
    end
    result << text[i]
    i += 1
  end
  result
end

.replace_symbols(text) ⇒ Object

Replace command tokens with Unicode equivalents.



906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
# File 'lib/ruby_rich/markdown.rb', line 906

def self.replace_symbols(text)
  # Handle brace-wrapped font/formatting commands: \text{ab}, \mathbf{ab}, \mathbb{R}, etc.
  # Strip the wrapper, keep the content.
  text = text.gsub(/\\(?:text\w*|math[bif]|mathbf|mathrm|mathit|mathsf|mathtt|mathcal|mathfrak|mathbb|mathscr|boldsymbol|bm|emph)\s*\{(.*?)\}/) {
    Regexp.last_match(1)
  }
  # Handle font commands with single-char arg (space-separated): \mathbf E
  text = text.gsub(/\\(?:mathbf|mathrm|mathit|mathsf|mathtt|mathcal|mathfrak|mathbb|mathscr|boldsymbol|bm)\s+([a-zA-Z0-9])/) {
    Regexp.last_match(1)
  }
  # Replace all other \commands
  text.gsub(/\\([a-zA-Z]+)/) { |m|
    SYMBOLS[Regexp.last_match(1)] || m
  }
end

.script_chars(str, map) ⇒ Object



901
902
903
# File 'lib/ruby_rich/markdown.rb', line 901

def self.script_chars(str, map)
  str.each_char.map { |c| map[c] || c }.join
end

.strip_delim_spacing(text) ⇒ Object

Remove stray spaces inserted by left / right.



923
924
925
926
927
928
929
930
# File 'lib/ruby_rich/markdown.rb', line 923

def self.strip_delim_spacing(text)
  text.gsub(/\(\s+/, '(').gsub(/\s+\)/, ')')
      .gsub(/\[\s+/, '[').gsub(/\s+\]/, ']')
      .gsub(/\{\s+/, '{').gsub(/\s+\}/, '}')
      .gsub(/\\s+/, ' ')
      .gsub(/([·×÷]) +/, '\1')
      .gsub(/ +([·×÷])/, '\1')
end