Class: EpubBook::Book

Inherits:
Object
  • Object
show all
Defined in:
lib/epub_book/book.rb

Constant Summary collapse

UserAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36"
Referer =
"http://www.baidu.com/"
Reg =
/<script.*?>.*?<\/script>/m

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(index_url, des_url = nil) {|_self| ... } ⇒ Book

Returns a new instance of Book.

Yields:

  • (_self)

Yield Parameters:



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/epub_book/book.rb', line 30

def initialize(index_url,des_url=nil )
  @index_url = index_url
  @des_url = des_url
  @user_agent = UserAgent
  @referer = Referer
  @folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3]
  @creator = 'javy_liu'
  @title_css = '.wrapper h1.title1'
  @index_item_css = 'ul.list3>li>a'
  @cover = 'cover.jpg'
  @body_css = '.articlebody'
  @item_attr = "href"
  @ext_name = 'epub'
  yield self if block_given?
end

Instance Attribute Details

#body_cssObject

Returns the value of attribute body_css.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def body_css
  @body_css
end

#coverObject

Returns the value of attribute cover.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def cover
  @cover
end

#cover_cssObject

Returns the value of attribute cover_css.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def cover_css
  @cover_css
end

#creatorObject

Returns the value of attribute creator.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def creator
  @creator
end

#des_urlObject

Returns the value of attribute des_url.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def des_url
  @des_url
end

#description_cssObject

Returns the value of attribute description_css.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def description_css
  @description_css
end

#ext_nameObject

Returns the value of attribute ext_name.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def ext_name
  @ext_name
end

#folder_nameObject

Returns the value of attribute folder_name.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def folder_name
  @folder_name
end

#ignore_txtObject

Returns the value of attribute ignore_txt.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def ignore_txt
  @ignore_txt
end

#index_item_cssObject

Returns the value of attribute index_item_css.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def index_item_css
  @index_item_css
end

#item_attrObject

Returns the value of attribute item_attr.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def item_attr
  @item_attr
end

#limitObject

Returns the value of attribute limit.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def limit
  @limit
end

#mail_toObject

Returns the value of attribute mail_to.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def mail_to
  @mail_to
end

#page_attrObject

Returns the value of attribute page_attr.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def page_attr
  @page_attr
end

#page_cssObject

Returns the value of attribute page_css.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def page_css
  @page_css
end

#pathObject

Returns the value of attribute path.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def path
  @path
end

#refererObject

Returns the value of attribute referer.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def referer
  @referer
end

#title_cssObject

Returns the value of attribute title_css.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def title_css
  @title_css
end

#user_agentObject

Returns the value of attribute user_agent.



25
26
27
# File 'lib/epub_book/book.rb', line 25

def user_agent
  @user_agent
end

Instance Method Details

#bookObject



54
55
56
57
58
# File 'lib/epub_book/book.rb', line 54

def book
  return @book if @book
  Dir.mkdir(book_path) unless test(?d,book_path)
  @book = test(?s,File.join(book_path,'index.yml')) ? YAML.load(File.open(File.join(book_path,'index.yml'))) : {files: []}
end

#book_pathObject



46
47
48
# File 'lib/epub_book/book.rb', line 46

def book_path
  @book_path ||= File.join((@path || `pwd`.strip), @folder_name)
end

#fetch_bookObject



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/epub_book/book.rb', line 174

def fetch_book
  #重新得到书目,如果不存在或重新索引的话
  #fetch_index  if !test(?s,File.join(book_path,'index.yml'))
  EpubBook.logger.info "------Fetch book----------"
  #open a txt file to write
  if ext_name == 'txt'
    txt_file = File.open(book[:file_abs_name], 'a')
    txt_file.write("简介\n\n")
    txt_file.write('  ')
    txt_file.write(book[:description] || " ")
  end

  book[:files].each_with_index do |item,index|
    break if limit && index >= limit

    content_path = File.join(book_path,item[:content])

    #如果文件存在且长度不为0则获取下一个
    #binding.pry
    next if test(?s,content_path)

    begin
      doc_file = Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent,'Referer'=> @referer).get(item[:url]).to_s)

      EpubBook.logger.info item[:label]
      #binding.pry
      if ext_name == 'pub'
        File.open(content_path,'w') do |f|
          f.write("<h3>#{item[:label]}</h3>")
          f.write(doc_file.css(@body_css).to_s.gsub(Reg,''))
        end
      else
        txt_file.write("\n\n")
        txt_file.write(item[:label])
        txt_file.write("\n\n")
        doc_file.css(@body_css).children.each do |it|
          if it.name == 'text'
            txt_file.write(it.text)
          end
        end
        #txt_file.write(doc_file.css(@body_css).text)
      end
    rescue  Exception => e
      EpubBook.logger.info "Error:#{e.message},#{item.inspect}"
      #EpubBook.logger.info e.backtrace
      next
    end
  end
  if ext_name == 'txt'
    txt_file.close
    EpubBook.logger.info "=============去除包含指定忽略字符的行======="
    EpubBook.logger.info ignore_txt
    if ignore_txt
      system("sed -i -r '/#{ignore_txt}/d' #{book[:file_abs_name]}")
    end

  end

end

#fetch_index(url = nil) ⇒ Object

得到书目索引



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/epub_book/book.rb', line 126

def fetch_index(url=nil)
  book[:files] = []
  url ||= @index_url
  #doc = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(url).to_s))
  doc = Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(url).to_s)
  #generate index.yml
  EpubBook.logger.info "------Fetch index--#{url}---------------"

  if !book[:title]
    doc1 = if @des_url.nil?
             doc
           else
             #Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(generate_abs_url(doc.css(@des_url).attr("href").to_s)).to_s))
             Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(generate_abs_url(doc.css(@des_url).attr("href").to_s)).to_s)
           end
    get_des(doc1)
  end

  #binding.pry
  #EpubBook.logger.info @index_item_css

  doc.css(@index_item_css).each do |item|
    _href = item.attr(@item_attr).to_s
    next if _href.start_with?('javascript') || _href.start_with?('#')

    _href = generate_abs_url(_href)

    #EpubBook.logger.info item.inspect
    #EpubBook.logger.info item.text

    book[:files] << {label: item.text, url: _href}
  end

  #如果有分页
  if @page_css && @page_attr
    if next_page = doc.css(@page_css).attr(@page_attr).to_s
      fetch_index(generate_abs_url(next_page))
    else
      return
    end
  end

  book[:files].each_with_index{|item,index| item[:content] = "#{index}.html"}

  #保存书目
  save_book
end

#generate_book(book_name = nil) ⇒ Object

创建书本



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/epub_book/book.rb', line 69

def generate_book(book_name=nil)
  #获取epub源数据
  fetch_index  if !test(?s,File.join(book_path,'index.yml'))

  book[:file_abs_name] = File.join(book_path,"#{book[:title]}.#{ext_name}")

  fetch_book
  if ext_name == 'epub'
    if  !@cover_css && @cover
      generate_cover = <<-eof
        convert #{File.expand_path("../../../#{@cover}",__FILE__)} -font tsxc.ttf -gravity center -fill red -pointsize 16 -draw "text 0,0 '#{book[:title]}'"  #{File.join(book_path,@cover)}
      eof
      system(generate_cover)
    end

    epub = EeePub.make

    epub.title book[:title]
    epub.creator @creator
    epub.publisher @creator
    epub.date Time.now
    epub.identifier "http://javy_liu.com/book/#{@folder_name}", :scheme => 'URL'
    epub.uid "http://javy_liu.com/book/#{@folder_name}"
    epub.cover @cover
    epub.subject book[:title]
    epub.description book[:description] if book[:description]

    book[:files] = book[:files][0...limit] if limit
    _files = []
    book[:files].collect! do |item|
      _file = File.join(book_path,item[:content])
      if test(?f, _file)
        _files.push(_file)
        item
      end
    end
    book[:files].compact!

    epub.files _files.push(File.join(book_path,@cover))
    epub.nav book[:files]
    yield self if block_given?

    epub.save(book[:file_abs_name])
  end
  #send mail

  if mail_to
    mailer = Mailer.new
    mailer.to = mail_to
    mailer.add_file book[:file_abs_name]
    mailer.body = "您创建的电子书[#{book[:title]}]见附件\n"
    mailer.send_mail
  end

end


50
51
52
# File 'lib/epub_book/book.rb', line 50

def link_host
  @link_host ||= @index_url[/\A(https?:\/\/.*?)\/\w+/,1]
end

#save_bookObject

save catalog file



61
62
63
64
65
# File 'lib/epub_book/book.rb', line 61

def save_book
  File.open(File.join(book_path,'index.yml' ),'w') do |f|
    f.write(@book.to_yaml)
  end
end