Class: Pdfcrowd::PdfToTextClient

Inherits:
Object
  • Object
show all
Defined in:
lib/pdfcrowd.rb

Overview

Conversion from PDF to text.

Instance Method Summary collapse

Constructor Details

#initialize(user_name, api_key) ⇒ PdfToTextClient

Returns a new instance of PdfToTextClient.



4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
# File 'lib/pdfcrowd.rb', line 4710

def initialize(user_name, api_key)
    @helper = ConnectionHelper.new(user_name, api_key)
    @fields = {
        'input_format'=>'pdf',
        'output_format'=>'txt'
    }
    @file_id = 1
    @files = {}
    @raw_data = {}
end

Instance Method Details

#convertFile(file) ⇒ Object



4759
4760
4761
4762
4763
4764
4765
4766
# File 'lib/pdfcrowd.rb', line 4759

def convertFile(file)
    if (!(File.file?(file) && !File.zero?(file)))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-text", "The file must exist and not be empty.", "convert_file"), 470);
    end
    
    @files['file'] = file
    @helper.post(@fields, @files, @raw_data)
end

#convertFileToFile(file, file_path) ⇒ Object



4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
# File 'lib/pdfcrowd.rb', line 4779

def convertFileToFile(file, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_file_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertFileToStream(file, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertFileToStream(file, out_stream) ⇒ Object



4769
4770
4771
4772
4773
4774
4775
4776
# File 'lib/pdfcrowd.rb', line 4769

def convertFileToStream(file, out_stream)
    if (!(File.file?(file) && !File.zero?(file)))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-text", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
    end
    
    @files['file'] = file
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#convertRawData(data) ⇒ Object



4796
4797
4798
4799
# File 'lib/pdfcrowd.rb', line 4796

def convertRawData(data)
    @raw_data['file'] = data
    @helper.post(@fields, @files, @raw_data)
end

#convertRawDataToFile(data, file_path) ⇒ Object



4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
# File 'lib/pdfcrowd.rb', line 4808

def convertRawDataToFile(data, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_raw_data_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertRawDataToStream(data, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertRawDataToStream(data, out_stream) ⇒ Object



4802
4803
4804
4805
# File 'lib/pdfcrowd.rb', line 4802

def convertRawDataToStream(data, out_stream)
    @raw_data['file'] = data
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#convertStream(in_stream) ⇒ Object



4825
4826
4827
4828
# File 'lib/pdfcrowd.rb', line 4825

def convertStream(in_stream)
    @raw_data['stream'] = in_stream.read
    @helper.post(@fields, @files, @raw_data)
end

#convertStreamToFile(in_stream, file_path) ⇒ Object



4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
# File 'lib/pdfcrowd.rb', line 4837

def convertStreamToFile(in_stream, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_stream_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertStreamToStream(in_stream, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertStreamToStream(in_stream, out_stream) ⇒ Object



4831
4832
4833
4834
# File 'lib/pdfcrowd.rb', line 4831

def convertStreamToStream(in_stream, out_stream)
    @raw_data['stream'] = in_stream.read
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#convertUrl(url) ⇒ Object



4722
4723
4724
4725
4726
4727
4728
4729
# File 'lib/pdfcrowd.rb', line 4722

def convertUrl(url)
    unless /(?i)^https?:\/\/.*$/.match(url)
        raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url"), 470);
    end
    
    @fields['url'] = url
    @helper.post(@fields, @files, @raw_data)
end

#convertUrlToFile(url, file_path) ⇒ Object



4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
# File 'lib/pdfcrowd.rb', line 4742

def convertUrlToFile(url, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_url_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertUrlToStream(url, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertUrlToStream(url, out_stream) ⇒ Object



4732
4733
4734
4735
4736
4737
4738
4739
# File 'lib/pdfcrowd.rb', line 4732

def convertUrlToStream(url, out_stream)
    unless /(?i)^https?:\/\/.*$/.match(url)
        raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
    end
    
    @fields['url'] = url
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#getConsumedCreditCountObject



4999
5000
5001
# File 'lib/pdfcrowd.rb', line 4999

def getConsumedCreditCount()
    return @helper.getConsumedCreditCount()
end

#getDebugLogUrlObject



4989
4990
4991
# File 'lib/pdfcrowd.rb', line 4989

def getDebugLogUrl()
    return @helper.getDebugLogUrl()
end

#getJobIdObject



5004
5005
5006
# File 'lib/pdfcrowd.rb', line 5004

def getJobId()
    return @helper.getJobId()
end

#getOutputSizeObject



5014
5015
5016
# File 'lib/pdfcrowd.rb', line 5014

def getOutputSize()
    return @helper.getOutputSize()
end

#getPageCountObject



5009
5010
5011
# File 'lib/pdfcrowd.rb', line 5009

def getPageCount()
    return @helper.getPageCount()
end

#getRemainingCreditCountObject



4994
4995
4996
# File 'lib/pdfcrowd.rb', line 4994

def getRemainingCreditCount()
    return @helper.getRemainingCreditCount()
end

#getVersionObject



5019
5020
5021
# File 'lib/pdfcrowd.rb', line 5019

def getVersion()
    return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
end

#setClientUserAgent(agent) ⇒ Object



5056
5057
5058
5059
# File 'lib/pdfcrowd.rb', line 5056

def setClientUserAgent(agent)
    @helper.setUserAgent(agent)
    self
end

#setCropArea(x, y, width, height) ⇒ Object



4974
4975
4976
4977
4978
4979
4980
# File 'lib/pdfcrowd.rb', line 4974

def setCropArea(x, y, width, height)
    setCropAreaX(x)
    setCropAreaY(y)
    setCropAreaWidth(width)
    setCropAreaHeight(height)
    self
end

#setCropAreaHeight(height) ⇒ Object



4964
4965
4966
4967
4968
4969
4970
4971
# File 'lib/pdfcrowd.rb', line 4964

def setCropAreaHeight(height)
    if (!(Integer(height) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(height, "setCropAreaHeight", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_height"), 470);
    end
    
    @fields['crop_area_height'] = height
    self
end

#setCropAreaWidth(width) ⇒ Object



4954
4955
4956
4957
4958
4959
4960
4961
# File 'lib/pdfcrowd.rb', line 4954

def setCropAreaWidth(width)
    if (!(Integer(width) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(width, "setCropAreaWidth", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_width"), 470);
    end
    
    @fields['crop_area_width'] = width
    self
end

#setCropAreaX(x) ⇒ Object



4934
4935
4936
4937
4938
4939
4940
4941
# File 'lib/pdfcrowd.rb', line 4934

def setCropAreaX(x)
    if (!(Integer(x) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(x, "setCropAreaX", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_x"), 470);
    end
    
    @fields['crop_area_x'] = x
    self
end

#setCropAreaY(y) ⇒ Object



4944
4945
4946
4947
4948
4949
4950
4951
# File 'lib/pdfcrowd.rb', line 4944

def setCropAreaY(y)
    if (!(Integer(y) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(y, "setCropAreaY", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_y"), 470);
    end
    
    @fields['crop_area_y'] = y
    self
end

#setCustomPageBreak(page_break) ⇒ Object



4896
4897
4898
4899
# File 'lib/pdfcrowd.rb', line 4896

def setCustomPageBreak(page_break)
    @fields['custom_page_break'] = page_break
    self
end

#setDebugLog(value) ⇒ Object



4983
4984
4985
4986
# File 'lib/pdfcrowd.rb', line 4983

def setDebugLog(value)
    @fields['debug_log'] = value
    self
end

#setEol(eol) ⇒ Object



4876
4877
4878
4879
4880
4881
4882
4883
# File 'lib/pdfcrowd.rb', line 4876

def setEol(eol)
    unless /(?i)^(unix|dos|mac)$/.match(eol)
        raise Error.new(Pdfcrowd.create_invalid_value_message(eol, "setEol", "pdf-to-text", "Allowed values are unix, dos, mac.", "set_eol"), 470);
    end
    
    @fields['eol'] = eol
    self
end

#setHttpProxy(proxy) ⇒ Object



5030
5031
5032
5033
5034
5035
5036
5037
# File 'lib/pdfcrowd.rb', line 5030

def setHttpProxy(proxy)
    unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
        raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
    end
    
    @fields['http_proxy'] = proxy
    self
end

#setHttpsProxy(proxy) ⇒ Object



5040
5041
5042
5043
5044
5045
5046
5047
# File 'lib/pdfcrowd.rb', line 5040

def setHttpsProxy(proxy)
    unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
        raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
    end
    
    @fields['https_proxy'] = proxy
    self
end

#setLineSpacingThreshold(threshold) ⇒ Object



4912
4913
4914
4915
4916
4917
4918
4919
# File 'lib/pdfcrowd.rb', line 4912

def setLineSpacingThreshold(threshold)
    unless /(?i)^0$|^[0-9]+%$/.match(threshold)
        raise Error.new(Pdfcrowd.create_invalid_value_message(threshold, "setLineSpacingThreshold", "pdf-to-text", "The value must be a positive integer percentage.", "set_line_spacing_threshold"), 470);
    end
    
    @fields['line_spacing_threshold'] = threshold
    self
end

#setNoLayout(value) ⇒ Object



4870
4871
4872
4873
# File 'lib/pdfcrowd.rb', line 4870

def setNoLayout(value)
    @fields['no_layout'] = value
    self
end

#setPageBreakMode(mode) ⇒ Object



4886
4887
4888
4889
4890
4891
4892
4893
# File 'lib/pdfcrowd.rb', line 4886

def setPageBreakMode(mode)
    unless /(?i)^(none|default|custom)$/.match(mode)
        raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageBreakMode", "pdf-to-text", "Allowed values are none, default, custom.", "set_page_break_mode"), 470);
    end
    
    @fields['page_break_mode'] = mode
    self
end

#setParagraphMode(mode) ⇒ Object



4902
4903
4904
4905
4906
4907
4908
4909
# File 'lib/pdfcrowd.rb', line 4902

def setParagraphMode(mode)
    unless /(?i)^(none|bounding-box|characters)$/.match(mode)
        raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setParagraphMode", "pdf-to-text", "Allowed values are none, bounding-box, characters.", "set_paragraph_mode"), 470);
    end
    
    @fields['paragraph_mode'] = mode
    self
end

#setPdfPassword(password) ⇒ Object



4854
4855
4856
4857
# File 'lib/pdfcrowd.rb', line 4854

def setPdfPassword(password)
    @fields['pdf_password'] = password
    self
end

#setPrintPageRange(pages) ⇒ Object



4860
4861
4862
4863
4864
4865
4866
4867
# File 'lib/pdfcrowd.rb', line 4860

def setPrintPageRange(pages)
    unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
        raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-text", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
    end
    
    @fields['print_page_range'] = pages
    self
end

#setProxy(host, port, user_name, password) ⇒ Object



5068
5069
5070
5071
# File 'lib/pdfcrowd.rb', line 5068

def setProxy(host, port, user_name, password)
    @helper.setProxy(host, port, user_name, password)
    self
end

#setRemoveEmptyLines(value) ⇒ Object



4928
4929
4930
4931
# File 'lib/pdfcrowd.rb', line 4928

def setRemoveEmptyLines(value)
    @fields['remove_empty_lines'] = value
    self
end

#setRemoveHyphenation(value) ⇒ Object



4922
4923
4924
4925
# File 'lib/pdfcrowd.rb', line 4922

def setRemoveHyphenation(value)
    @fields['remove_hyphenation'] = value
    self
end

#setRetryCount(count) ⇒ Object



5074
5075
5076
5077
# File 'lib/pdfcrowd.rb', line 5074

def setRetryCount(count)
    @helper.setRetryCount(count)
    self
end

#setTag(tag) ⇒ Object



5024
5025
5026
5027
# File 'lib/pdfcrowd.rb', line 5024

def setTag(tag)
    @fields['tag'] = tag
    self
end

#setUseHttp(value) ⇒ Object



5050
5051
5052
5053
# File 'lib/pdfcrowd.rb', line 5050

def setUseHttp(value)
    @helper.setUseHttp(value)
    self
end

#setUserAgent(agent) ⇒ Object



5062
5063
5064
5065
# File 'lib/pdfcrowd.rb', line 5062

def setUserAgent(agent)
    @helper.setUserAgent(agent)
    self
end