Class: Doc2Text::Docx::Document

Inherits:
XmlBasedDocument::DocumentFile show all
Defined in:
lib/doc2text/docx/docx.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from XmlBasedDocument::DocumentFile

#clean, #extract_path, #initialize, #open, #unpack

Constructor Details

This class inherits a constructor from Doc2Text::XmlBasedDocument::DocumentFile

Class Method Details

.parse_and_save(input, output_filename) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/doc2text/docx/docx.rb', line 5

def self.parse_and_save(input, output_filename)
  docx = new input
  begin
    docx.unpack
    styles_xml_root = docx.parse_styles
    output = File.open output_filename, 'w'
    markdown = Markdown::DocxParser.new output, styles_xml_root
    begin
      docx.parse markdown
    ensure
      markdown.close
    end
  ensure
    docx.clean
  end
end

Instance Method Details

#contains_extracted_files?Boolean

Returns:

  • (Boolean)


34
35
36
# File 'lib/doc2text/docx/docx.rb', line 34

def contains_extracted_files?
  File.exist? File.join(extract_path, '[Content_Types].xml')
end

#extract_extensionObject



38
39
40
# File 'lib/doc2text/docx/docx.rb', line 38

def extract_extension
  'unpacked_docx'
end

#parse(markdown) ⇒ Object



29
30
31
32
# File 'lib/doc2text/docx/docx.rb', line 29

def parse(markdown)
  parser = Nokogiri::XML::SAX::Parser.new(markdown)
  parser.parse open File.join('word', 'document.xml')
end

#parse_stylesObject



22
23
24
25
26
27
# File 'lib/doc2text/docx/docx.rb', line 22

def parse_styles
  styles_parser = Doc2Text::Odt::StylesParser.new
  xml = Nokogiri::XML::SAX::Parser.new(styles_parser)
  xml.parse open File.join('word', 'styles.xml')
  styles_parser.xml_root
end