Class: EpubBook::Book
- Inherits:
-
Object
- Object
- EpubBook::Book
- Defined in:
- lib/epub_book/book.rb
Constant Summary collapse
- UserAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36"
- Referer =
"http://www.baidu.com/"
- Reg =
/<script.*?>.*?<\/script>/m
Instance Attribute Summary collapse
-
#body_css ⇒ Object
Returns the value of attribute body_css.
-
#cover ⇒ Object
Returns the value of attribute cover.
-
#cover_css ⇒ Object
Returns the value of attribute cover_css.
-
#creator ⇒ Object
Returns the value of attribute creator.
-
#des_url ⇒ Object
Returns the value of attribute des_url.
-
#description_css ⇒ Object
Returns the value of attribute description_css.
-
#ext_name ⇒ Object
Returns the value of attribute ext_name.
-
#folder_name ⇒ Object
Returns the value of attribute folder_name.
-
#ignore_txt ⇒ Object
Returns the value of attribute ignore_txt.
-
#index_item_css ⇒ Object
Returns the value of attribute index_item_css.
-
#item_attr ⇒ Object
Returns the value of attribute item_attr.
-
#limit ⇒ Object
Returns the value of attribute limit.
-
#mail_to ⇒ Object
Returns the value of attribute mail_to.
-
#page_attr ⇒ Object
Returns the value of attribute page_attr.
-
#page_css ⇒ Object
Returns the value of attribute page_css.
-
#path ⇒ Object
Returns the value of attribute path.
-
#referer ⇒ Object
Returns the value of attribute referer.
-
#title_css ⇒ Object
Returns the value of attribute title_css.
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
Instance Method Summary collapse
- #book ⇒ Object
- #book_path ⇒ Object
- #fetch_book ⇒ Object
-
#fetch_index(url = nil) ⇒ Object
得到书目索引.
-
#generate_book(book_name = nil) ⇒ Object
创建书本.
-
#initialize(index_url, des_url = nil) {|_self| ... } ⇒ Book
constructor
A new instance of Book.
- #link_host ⇒ Object
-
#save_book ⇒ Object
save catalog file.
Constructor Details
#initialize(index_url, des_url = nil) {|_self| ... } ⇒ Book
Returns a new instance of Book.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/epub_book/book.rb', line 30 def initialize(index_url,des_url=nil ) @index_url = index_url @des_url = des_url @user_agent = UserAgent @referer = Referer @folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3] @creator = 'javy_liu' @title_css = '.wrapper h1.title1' @index_item_css = 'ul.list3>li>a' @cover = 'cover.jpg' @body_css = '.articlebody' @item_attr = "href" @ext_name = 'epub' yield self if block_given? end |
Instance Attribute Details
#body_css ⇒ Object
Returns the value of attribute body_css.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def body_css @body_css end |
#cover ⇒ Object
Returns the value of attribute cover.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def cover @cover end |
#cover_css ⇒ Object
Returns the value of attribute cover_css.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def cover_css @cover_css end |
#creator ⇒ Object
Returns the value of attribute creator.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def creator @creator end |
#des_url ⇒ Object
Returns the value of attribute des_url.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def des_url @des_url end |
#description_css ⇒ Object
Returns the value of attribute description_css.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def description_css @description_css end |
#ext_name ⇒ Object
Returns the value of attribute ext_name.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def ext_name @ext_name end |
#folder_name ⇒ Object
Returns the value of attribute folder_name.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def folder_name @folder_name end |
#ignore_txt ⇒ Object
Returns the value of attribute ignore_txt.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def ignore_txt @ignore_txt end |
#index_item_css ⇒ Object
Returns the value of attribute index_item_css.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def index_item_css @index_item_css end |
#item_attr ⇒ Object
Returns the value of attribute item_attr.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def item_attr @item_attr end |
#limit ⇒ Object
Returns the value of attribute limit.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def limit @limit end |
#mail_to ⇒ Object
Returns the value of attribute mail_to.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def mail_to @mail_to end |
#page_attr ⇒ Object
Returns the value of attribute page_attr.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def page_attr @page_attr end |
#page_css ⇒ Object
Returns the value of attribute page_css.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def page_css @page_css end |
#path ⇒ Object
Returns the value of attribute path.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def path @path end |
#referer ⇒ Object
Returns the value of attribute referer.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def referer @referer end |
#title_css ⇒ Object
Returns the value of attribute title_css.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def title_css @title_css end |
#user_agent ⇒ Object
Returns the value of attribute user_agent.
25 26 27 |
# File 'lib/epub_book/book.rb', line 25 def user_agent @user_agent end |
Instance Method Details
#book ⇒ Object
54 55 56 57 58 |
# File 'lib/epub_book/book.rb', line 54 def book return @book if @book Dir.mkdir(book_path) unless test(?d,book_path) @book = test(?s,File.join(book_path,'index.yml')) ? YAML.load(File.open(File.join(book_path,'index.yml'))) : {files: []} end |
#book_path ⇒ Object
46 47 48 |
# File 'lib/epub_book/book.rb', line 46 def book_path @book_path ||= File.join((@path || `pwd`.strip), @folder_name) end |
#fetch_book ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
# File 'lib/epub_book/book.rb', line 174 def fetch_book #重新得到书目,如果不存在或重新索引的话 #fetch_index if !test(?s,File.join(book_path,'index.yml')) EpubBook.logger.info "------Fetch book----------" #open a txt file to write if ext_name == 'txt' txt_file = File.open(book[:file_abs_name], 'a') txt_file.write("简介\n\n") txt_file.write(' ') txt_file.write(book[:description] || " ") end book[:files].each_with_index do |item,index| break if limit && index >= limit content_path = File.join(book_path,item[:content]) #如果文件存在且长度不为0则获取下一个 #binding.pry next if test(?s,content_path) begin doc_file = Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent,'Referer'=> @referer).get(item[:url]).to_s) EpubBook.logger.info item[:label] #binding.pry if ext_name == 'pub' File.open(content_path,'w') do |f| f.write("<h3>#{item[:label]}</h3>") f.write(doc_file.css(@body_css).to_s.gsub(Reg,'')) end else txt_file.write("\n\n") txt_file.write(item[:label]) txt_file.write("\n\n") doc_file.css(@body_css).children.each do |it| if it.name == 'text' txt_file.write(it.text) end end #txt_file.write(doc_file.css(@body_css).text) end rescue Exception => e EpubBook.logger.info "Error:#{e.},#{item.inspect}" #EpubBook.logger.info e.backtrace next end end if ext_name == 'txt' txt_file.close EpubBook.logger.info "=============去除包含指定忽略字符的行=======" EpubBook.logger.info ignore_txt if ignore_txt system("sed -i -r '/#{ignore_txt}/d' #{book[:file_abs_name]}") end end end |
#fetch_index(url = nil) ⇒ Object
得到书目索引
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/epub_book/book.rb', line 126 def fetch_index(url=nil) book[:files] = [] url ||= @index_url #doc = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(url).to_s)) doc = Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(url).to_s) #generate index.yml EpubBook.logger.info "------Fetch index--#{url}---------------" if !book[:title] doc1 = if @des_url.nil? doc else #Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(generate_abs_url(doc.css(@des_url).attr("href").to_s)).to_s)) Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(generate_abs_url(doc.css(@des_url).attr("href").to_s)).to_s) end get_des(doc1) end #binding.pry #EpubBook.logger.info @index_item_css doc.css(@index_item_css).each do |item| _href = item.attr(@item_attr).to_s next if _href.start_with?('javascript') || _href.start_with?('#') _href = generate_abs_url(_href) #EpubBook.logger.info item.inspect #EpubBook.logger.info item.text book[:files] << {label: item.text, url: _href} end #如果有分页 if @page_css && @page_attr if next_page = doc.css(@page_css).attr(@page_attr).to_s fetch_index(generate_abs_url(next_page)) else return end end book[:files].each_with_index{|item,index| item[:content] = "#{index}.html"} #保存书目 save_book end |
#generate_book(book_name = nil) ⇒ Object
创建书本
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/epub_book/book.rb', line 69 def generate_book(book_name=nil) #获取epub源数据 fetch_index if !test(?s,File.join(book_path,'index.yml')) book[:file_abs_name] = File.join(book_path,"#{book[:title]}.#{ext_name}") fetch_book if ext_name == 'epub' if !@cover_css && @cover generate_cover = <<-eof convert #{File.("../../../#{@cover}",__FILE__)} -font tsxc.ttf -gravity center -fill red -pointsize 16 -draw "text 0,0 '#{book[:title]}'" #{File.join(book_path,@cover)} eof system(generate_cover) end epub = EeePub.make epub.title book[:title] epub.creator @creator epub.publisher @creator epub.date Time.now epub.identifier "http://javy_liu.com/book/#{@folder_name}", :scheme => 'URL' epub.uid "http://javy_liu.com/book/#{@folder_name}" epub.cover @cover epub.subject book[:title] epub.description book[:description] if book[:description] book[:files] = book[:files][0...limit] if limit _files = [] book[:files].collect! do |item| _file = File.join(book_path,item[:content]) if test(?f, _file) _files.push(_file) item end end book[:files].compact! epub.files _files.push(File.join(book_path,@cover)) epub.nav book[:files] yield self if block_given? epub.save(book[:file_abs_name]) end #send mail if mail_to mailer = Mailer.new mailer.to = mail_to mailer.add_file book[:file_abs_name] mailer.body = "您创建的电子书[#{book[:title]}]见附件\n" mailer.send_mail end end |
#link_host ⇒ Object
50 51 52 |
# File 'lib/epub_book/book.rb', line 50 def link_host @link_host ||= @index_url[/\A(https?:\/\/.*?)\/\w+/,1] end |
#save_book ⇒ Object
save catalog file
61 62 63 64 65 |
# File 'lib/epub_book/book.rb', line 61 def save_book File.open(File.join(book_path,'index.yml' ),'w') do |f| f.write(@book.to_yaml) end end |