Class: Pdfcrowd::PdfToTextClient
- Inherits:
-
Object
- Object
- Pdfcrowd::PdfToTextClient
- Defined in:
- lib/pdfcrowd.rb
Overview
Conversion from PDF to text.
Instance Method Summary collapse
- #convertFile(file) ⇒ Object
- #convertFileToFile(file, file_path) ⇒ Object
- #convertFileToStream(file, out_stream) ⇒ Object
- #convertRawData(data) ⇒ Object
- #convertRawDataToFile(data, file_path) ⇒ Object
- #convertRawDataToStream(data, out_stream) ⇒ Object
- #convertStream(in_stream) ⇒ Object
- #convertStreamToFile(in_stream, file_path) ⇒ Object
- #convertStreamToStream(in_stream, out_stream) ⇒ Object
- #convertUrl(url) ⇒ Object
- #convertUrlToFile(url, file_path) ⇒ Object
- #convertUrlToStream(url, out_stream) ⇒ Object
- #getConsumedCreditCount ⇒ Object
- #getDebugLogUrl ⇒ Object
- #getJobId ⇒ Object
- #getOutputSize ⇒ Object
- #getPageCount ⇒ Object
- #getRemainingCreditCount ⇒ Object
- #getVersion ⇒ Object
-
#initialize(user_name, api_key) ⇒ PdfToTextClient
constructor
A new instance of PdfToTextClient.
- #setClientUserAgent(agent) ⇒ Object
- #setCropArea(x, y, width, height) ⇒ Object
- #setCropAreaHeight(height) ⇒ Object
- #setCropAreaWidth(width) ⇒ Object
- #setCropAreaX(x) ⇒ Object
- #setCropAreaY(y) ⇒ Object
- #setCustomPageBreak(page_break) ⇒ Object
- #setDebugLog(value) ⇒ Object
- #setEol(eol) ⇒ Object
- #setHttpProxy(proxy) ⇒ Object
- #setHttpsProxy(proxy) ⇒ Object
- #setLineSpacingThreshold(threshold) ⇒ Object
- #setNoLayout(value) ⇒ Object
- #setPageBreakMode(mode) ⇒ Object
- #setParagraphMode(mode) ⇒ Object
- #setPdfPassword(password) ⇒ Object
- #setPrintPageRange(pages) ⇒ Object
- #setProxy(host, port, user_name, password) ⇒ Object
- #setRemoveEmptyLines(value) ⇒ Object
- #setRemoveHyphenation(value) ⇒ Object
- #setRetryCount(count) ⇒ Object
- #setTag(tag) ⇒ Object
- #setUseHttp(value) ⇒ Object
- #setUserAgent(agent) ⇒ Object
Constructor Details
#initialize(user_name, api_key) ⇒ PdfToTextClient
Returns a new instance of PdfToTextClient.
4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 |
# File 'lib/pdfcrowd.rb', line 4710 def initialize(user_name, api_key) @helper = ConnectionHelper.new(user_name, api_key) @fields = { 'input_format'=>'pdf', 'output_format'=>'txt' } @file_id = 1 @files = {} @raw_data = {} end |
Instance Method Details
#convertFile(file) ⇒ Object
4759 4760 4761 4762 4763 4764 4765 4766 |
# File 'lib/pdfcrowd.rb', line 4759 def convertFile(file) if (!(File.file?(file) && !File.zero?(file))) raise Error.new(Pdfcrowd.(file, "convertFile", "pdf-to-text", "The file must exist and not be empty.", "convert_file"), 470); end @files['file'] = file @helper.post(@fields, @files, @raw_data) end |
#convertFileToFile(file, file_path) ⇒ Object
4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 |
# File 'lib/pdfcrowd.rb', line 4779 def convertFileToFile(file, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertFileToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_file_to_file"), 470); end output_file = open(file_path, "wb") begin convertFileToStream(file, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertFileToStream(file, out_stream) ⇒ Object
4769 4770 4771 4772 4773 4774 4775 4776 |
# File 'lib/pdfcrowd.rb', line 4769 def convertFileToStream(file, out_stream) if (!(File.file?(file) && !File.zero?(file))) raise Error.new(Pdfcrowd.(file, "convertFileToStream::file", "pdf-to-text", "The file must exist and not be empty.", "convert_file_to_stream"), 470); end @files['file'] = file @helper.post(@fields, @files, @raw_data, out_stream) end |
#convertRawData(data) ⇒ Object
4796 4797 4798 4799 |
# File 'lib/pdfcrowd.rb', line 4796 def convertRawData(data) @raw_data['file'] = data @helper.post(@fields, @files, @raw_data) end |
#convertRawDataToFile(data, file_path) ⇒ Object
4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 |
# File 'lib/pdfcrowd.rb', line 4808 def convertRawDataToFile(data, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertRawDataToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_raw_data_to_file"), 470); end output_file = open(file_path, "wb") begin convertRawDataToStream(data, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertRawDataToStream(data, out_stream) ⇒ Object
4802 4803 4804 4805 |
# File 'lib/pdfcrowd.rb', line 4802 def convertRawDataToStream(data, out_stream) @raw_data['file'] = data @helper.post(@fields, @files, @raw_data, out_stream) end |
#convertStream(in_stream) ⇒ Object
4825 4826 4827 4828 |
# File 'lib/pdfcrowd.rb', line 4825 def convertStream(in_stream) @raw_data['stream'] = in_stream.read @helper.post(@fields, @files, @raw_data) end |
#convertStreamToFile(in_stream, file_path) ⇒ Object
4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 |
# File 'lib/pdfcrowd.rb', line 4837 def convertStreamToFile(in_stream, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertStreamToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_stream_to_file"), 470); end output_file = open(file_path, "wb") begin convertStreamToStream(in_stream, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertStreamToStream(in_stream, out_stream) ⇒ Object
4831 4832 4833 4834 |
# File 'lib/pdfcrowd.rb', line 4831 def convertStreamToStream(in_stream, out_stream) @raw_data['stream'] = in_stream.read @helper.post(@fields, @files, @raw_data, out_stream) end |
#convertUrl(url) ⇒ Object
4722 4723 4724 4725 4726 4727 4728 4729 |
# File 'lib/pdfcrowd.rb', line 4722 def convertUrl(url) unless /(?i)^https?:\/\/.*$/.match(url) raise Error.new(Pdfcrowd.(url, "convertUrl", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url"), 470); end @fields['url'] = url @helper.post(@fields, @files, @raw_data) end |
#convertUrlToFile(url, file_path) ⇒ Object
4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 |
# File 'lib/pdfcrowd.rb', line 4742 def convertUrlToFile(url, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertUrlToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_url_to_file"), 470); end output_file = open(file_path, "wb") begin convertUrlToStream(url, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertUrlToStream(url, out_stream) ⇒ Object
4732 4733 4734 4735 4736 4737 4738 4739 |
# File 'lib/pdfcrowd.rb', line 4732 def convertUrlToStream(url, out_stream) unless /(?i)^https?:\/\/.*$/.match(url) raise Error.new(Pdfcrowd.(url, "convertUrlToStream::url", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url_to_stream"), 470); end @fields['url'] = url @helper.post(@fields, @files, @raw_data, out_stream) end |
#getConsumedCreditCount ⇒ Object
4999 5000 5001 |
# File 'lib/pdfcrowd.rb', line 4999 def getConsumedCreditCount() return @helper.getConsumedCreditCount() end |
#getDebugLogUrl ⇒ Object
4989 4990 4991 |
# File 'lib/pdfcrowd.rb', line 4989 def getDebugLogUrl() return @helper.getDebugLogUrl() end |
#getJobId ⇒ Object
5004 5005 5006 |
# File 'lib/pdfcrowd.rb', line 5004 def getJobId() return @helper.getJobId() end |
#getOutputSize ⇒ Object
5014 5015 5016 |
# File 'lib/pdfcrowd.rb', line 5014 def getOutputSize() return @helper.getOutputSize() end |
#getPageCount ⇒ Object
5009 5010 5011 |
# File 'lib/pdfcrowd.rb', line 5009 def getPageCount() return @helper.getPageCount() end |
#getRemainingCreditCount ⇒ Object
4994 4995 4996 |
# File 'lib/pdfcrowd.rb', line 4994 def getRemainingCreditCount() return @helper.getRemainingCreditCount() end |
#getVersion ⇒ Object
5019 5020 5021 |
# File 'lib/pdfcrowd.rb', line 5019 def getVersion() return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion() end |
#setClientUserAgent(agent) ⇒ Object
5056 5057 5058 5059 |
# File 'lib/pdfcrowd.rb', line 5056 def setClientUserAgent(agent) @helper.setUserAgent(agent) self end |
#setCropArea(x, y, width, height) ⇒ Object
4974 4975 4976 4977 4978 4979 4980 |
# File 'lib/pdfcrowd.rb', line 4974 def setCropArea(x, y, width, height) setCropAreaX(x) setCropAreaY(y) setCropAreaWidth(width) setCropAreaHeight(height) self end |
#setCropAreaHeight(height) ⇒ Object
4964 4965 4966 4967 4968 4969 4970 4971 |
# File 'lib/pdfcrowd.rb', line 4964 def setCropAreaHeight(height) if (!(Integer(height) >= 0)) raise Error.new(Pdfcrowd.(height, "setCropAreaHeight", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_height"), 470); end @fields['crop_area_height'] = height self end |
#setCropAreaWidth(width) ⇒ Object
4954 4955 4956 4957 4958 4959 4960 4961 |
# File 'lib/pdfcrowd.rb', line 4954 def setCropAreaWidth(width) if (!(Integer(width) >= 0)) raise Error.new(Pdfcrowd.(width, "setCropAreaWidth", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_width"), 470); end @fields['crop_area_width'] = width self end |
#setCropAreaX(x) ⇒ Object
4934 4935 4936 4937 4938 4939 4940 4941 |
# File 'lib/pdfcrowd.rb', line 4934 def setCropAreaX(x) if (!(Integer(x) >= 0)) raise Error.new(Pdfcrowd.(x, "setCropAreaX", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_x"), 470); end @fields['crop_area_x'] = x self end |
#setCropAreaY(y) ⇒ Object
4944 4945 4946 4947 4948 4949 4950 4951 |
# File 'lib/pdfcrowd.rb', line 4944 def setCropAreaY(y) if (!(Integer(y) >= 0)) raise Error.new(Pdfcrowd.(y, "setCropAreaY", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_y"), 470); end @fields['crop_area_y'] = y self end |
#setCustomPageBreak(page_break) ⇒ Object
4896 4897 4898 4899 |
# File 'lib/pdfcrowd.rb', line 4896 def setCustomPageBreak(page_break) @fields['custom_page_break'] = page_break self end |
#setDebugLog(value) ⇒ Object
4983 4984 4985 4986 |
# File 'lib/pdfcrowd.rb', line 4983 def setDebugLog(value) @fields['debug_log'] = value self end |
#setEol(eol) ⇒ Object
4876 4877 4878 4879 4880 4881 4882 4883 |
# File 'lib/pdfcrowd.rb', line 4876 def setEol(eol) unless /(?i)^(unix|dos|mac)$/.match(eol) raise Error.new(Pdfcrowd.(eol, "setEol", "pdf-to-text", "Allowed values are unix, dos, mac.", "set_eol"), 470); end @fields['eol'] = eol self end |
#setHttpProxy(proxy) ⇒ Object
5030 5031 5032 5033 5034 5035 5036 5037 |
# File 'lib/pdfcrowd.rb', line 5030 def setHttpProxy(proxy) unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy) raise Error.new(Pdfcrowd.(proxy, "setHttpProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470); end @fields['http_proxy'] = proxy self end |
#setHttpsProxy(proxy) ⇒ Object
5040 5041 5042 5043 5044 5045 5046 5047 |
# File 'lib/pdfcrowd.rb', line 5040 def setHttpsProxy(proxy) unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy) raise Error.new(Pdfcrowd.(proxy, "setHttpsProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470); end @fields['https_proxy'] = proxy self end |
#setLineSpacingThreshold(threshold) ⇒ Object
4912 4913 4914 4915 4916 4917 4918 4919 |
# File 'lib/pdfcrowd.rb', line 4912 def setLineSpacingThreshold(threshold) unless /(?i)^0$|^[0-9]+%$/.match(threshold) raise Error.new(Pdfcrowd.(threshold, "setLineSpacingThreshold", "pdf-to-text", "The value must be a positive integer percentage.", "set_line_spacing_threshold"), 470); end @fields['line_spacing_threshold'] = threshold self end |
#setNoLayout(value) ⇒ Object
4870 4871 4872 4873 |
# File 'lib/pdfcrowd.rb', line 4870 def setNoLayout(value) @fields['no_layout'] = value self end |
#setPageBreakMode(mode) ⇒ Object
4886 4887 4888 4889 4890 4891 4892 4893 |
# File 'lib/pdfcrowd.rb', line 4886 def setPageBreakMode(mode) unless /(?i)^(none|default|custom)$/.match(mode) raise Error.new(Pdfcrowd.(mode, "setPageBreakMode", "pdf-to-text", "Allowed values are none, default, custom.", "set_page_break_mode"), 470); end @fields['page_break_mode'] = mode self end |
#setParagraphMode(mode) ⇒ Object
4902 4903 4904 4905 4906 4907 4908 4909 |
# File 'lib/pdfcrowd.rb', line 4902 def setParagraphMode(mode) unless /(?i)^(none|bounding-box|characters)$/.match(mode) raise Error.new(Pdfcrowd.(mode, "setParagraphMode", "pdf-to-text", "Allowed values are none, bounding-box, characters.", "set_paragraph_mode"), 470); end @fields['paragraph_mode'] = mode self end |
#setPdfPassword(password) ⇒ Object
4854 4855 4856 4857 |
# File 'lib/pdfcrowd.rb', line 4854 def setPdfPassword(password) @fields['pdf_password'] = password self end |
#setPrintPageRange(pages) ⇒ Object
4860 4861 4862 4863 4864 4865 4866 4867 |
# File 'lib/pdfcrowd.rb', line 4860 def setPrintPageRange(pages) unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages) raise Error.new(Pdfcrowd.(pages, "setPrintPageRange", "pdf-to-text", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470); end @fields['print_page_range'] = pages self end |
#setProxy(host, port, user_name, password) ⇒ Object
5068 5069 5070 5071 |
# File 'lib/pdfcrowd.rb', line 5068 def setProxy(host, port, user_name, password) @helper.setProxy(host, port, user_name, password) self end |
#setRemoveEmptyLines(value) ⇒ Object
4928 4929 4930 4931 |
# File 'lib/pdfcrowd.rb', line 4928 def setRemoveEmptyLines(value) @fields['remove_empty_lines'] = value self end |
#setRemoveHyphenation(value) ⇒ Object
4922 4923 4924 4925 |
# File 'lib/pdfcrowd.rb', line 4922 def setRemoveHyphenation(value) @fields['remove_hyphenation'] = value self end |
#setRetryCount(count) ⇒ Object
5074 5075 5076 5077 |
# File 'lib/pdfcrowd.rb', line 5074 def setRetryCount(count) @helper.setRetryCount(count) self end |
#setTag(tag) ⇒ Object
5024 5025 5026 5027 |
# File 'lib/pdfcrowd.rb', line 5024 def setTag(tag) @fields['tag'] = tag self end |
#setUseHttp(value) ⇒ Object
5050 5051 5052 5053 |
# File 'lib/pdfcrowd.rb', line 5050 def setUseHttp(value) @helper.setUseHttp(value) self end |
#setUserAgent(agent) ⇒ Object
5062 5063 5064 5065 |
# File 'lib/pdfcrowd.rb', line 5062 def setUserAgent(agent) @helper.setUserAgent(agent) self end |