class Wikipedia require 'net/http' require 'rubygems' require 'hpricot' WikipediaFolder='wikipedia/' def is_today(t) n=Time.now # logger.info "filet #{t} nowis #{n}" (n.year==t.year) and (n.yday==t.yday) end def page(search_word) wd=WikipediaFolder.chop Dir.mkdir(wd) unless File.directory?(wd) wfile=WikipediaFolder+ search_word.downcase+'.html' if !File.exist?(wfile) || !is_today(File.mtime(wfile)) search_word[0]=search_word[0,1].upcase #wikipedia likes the first char large # logger.info "downloading wikipage" begin pg=Net::HTTP.get('en.wikipedia.org','/wiki/'+ search_word) # logger.info "downloaded page for #{search_word}: #{pg}" p=Hpricot(pg) bc=p.search('#bodyContent') open(wfile,'w') {|f| f.puts bc } # logger.info "#{search_word} page downloaded" rescue Exception pg="unable to download: #{$!}... " if File.exist?(wfile) open(wfile,'r') {|f| pg+=f.read } # logger.info "no access, using stored #{search_word}" end end else # logger.info "wikicache for #{search_word} is current" open(wfile,'r') {|f| pg=f.read } end pg end end