require "rinruby" | faithful_data = {"eruption_duration"=>[], "waiting_time"=>[]} |
for row in File.readlines('faithful.dat')[1..-1]  splitrow = row.chomp.split  faithful_data["eruption_duration"] << splitrow[0].to_f  faithful_data["waiting_time"] << splitrow[1].to_i end |
R.ed = faithful_data["eruption_duration"] R.eval "edsummary <- summary(ed)" edsummary = R.pull("as.vector(edsummary)") keys = R.pull("names(edsummary)") puts "Summary of Old Faithful eruption duration data" keys.each_index do |i| Â puts "#{keys[i]}: #{sprintf('%.3f',edsummary[i])}" end puts puts "Stem-and-leaf plot of Old Faithful eruption duration data" R.eval "stem(ed)" | R.eval <<EOF Â png("faithful_histogram.png",width=10,height=7.5) Â hist(ed,seq(1.6, 5.2, 0.2), prob=1,col="lightgreen", Â Â Â Â Â main="Old Faithful eruptions",xlab="Eruption duration (seconds)") Â lines(density(ed,bw=0.1),col="orange") Â rug(ed) Â dev.off() EOF | cutoff = 3 R.long_ed = R.ed.delete_if{ |x| x <= cutoff } R.eval <<EOF Â png('faithful_ecdf.png',width=10,height=7.5) Â # library(stepfun)Â Â Â Â Â Â Â Â Â Â # package has been merged into 'stats' Â plot(ecdf(long_ed), do.points=0, verticals=1, Â Â Â Â Â main=paste('Empirical cumulative distribution function of Old', Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ' Faithful eruptions longer than #{cutoff} seconds')) Â x <- seq(3,5.4,0.01) Â lines(seq(3,5.4,0.01),pnorm(seq(3,5.4,0.01),mean=mean(long_ed), Â Â Â Â Â Â sd=sqrt(var(long_ed))), lty=3, lwd=2, col='red') Â dev.off() | Â png('faithful_qq.png',width=10,height=7.5) Â par(pty="s") Â qqnorm(long_ed,col="blue") Â qqline(long_ed,col="red") Â dev.off() EOF | # R.eval "library('ctest')"Â Â # package has been merged into 'stats' puts puts "Shapiro-Wilks normality test of Old Faithful eruptions" + Â Â Â Â " longer than #{cutoff} seconds" R.eval "sw <- shapiro.test(long_ed)" puts "W = #{sprintf("%.4f",R.pull("sw$statistic"))}" puts "p-value = #{sprintf("%.5f",R.pull("sw$p.value"))}" | puts puts "One-sample Kolmogorov-Smirnov test of Old Faithful eruptions" + Â Â Â Â " longer than #{cutoff} seconds" R.eval "ks <- ks.test(long_ed,'pnorm',mean=mean(long_ed),"+ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â "sd=sqrt(var(long_ed)))" puts "D = #{sprintf("%.4f",R.pull("ks$statistic"))}" puts "p-value = #{sprintf("%.4f",R.pull("ks$p.value"))}" puts "Alternative hypothesis: #{R.pull("ks$alternative")}" puts |
| Â from rpy import *
| faithful_data = {"eruption_duration":[], Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â "waiting_time":[]} | f = open('faithful.dat','r')
for row in f.readlines()[1:]: # skip the column header line    splitrow = row[:-1].split(" ")    faithful_data["eruption_duration"].append(float(splitrow[0]))    faithful_data["waiting_time"].append(int(splitrow[1]))
f.close() | ed = faithful_data["eruption_duration"] edsummary = r.summary(ed) print "Summary of Old Faithful eruption duration data" for k in edsummary.keys():    print k + ": %.3f" % edsummary[k] print   print "Stem-and-leaf plot of Old Faithful eruption duration data" print r.stem(ed) |
r.png('faithful_histogram.png',width=733,height=550) r.hist(ed,r.seq(1.6, 5.2, 0.2), prob=1,col="lightgreen", Â Â Â Â Â Â main="Old Faithful eruptions",xlab="Eruption duration (seconds)") r.lines(r.density(ed,bw=0.1),col="orange") r.rug(ed) r.dev_off() |
long_ed = filter(lambda x: x > 3, ed) r.png('faithful_ecdf.png',width=733,height=550) r.library('stepfun') r.plot(r.ecdf(long_ed), do_points=0, verticals=1, col="blue", Â Â Â Â Â Â main=paste("Empirical cumulative distribution function", Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â " of Old Faithful eruptions longer than 3 seconds") x = r.seq(3,5.4,0.01) r.lines(r.seq(3,5.4,0.01),r.pnorm(r.seq(3,5.4,0.01),mean=r.mean(long_ed), Â Â Â Â Â Â Â sd=r.sqrt(r.var(long_ed))), lty=3, lwd=2, col="red") r.dev_off() |
r.png('faithful_qq.png',width=733,height=550) r.par(pty="s") r.qqnorm(long_ed,col="blue") r.qqline(long_ed,col="red") r.dev_off() |
r.library('ctest') print print("Shapiro-Wilks normality test of Old Faithful eruptions" +\ Â Â Â Â Â " longer than 3 seconds") sw = r.shapiro_test(long_ed) print "W = %.4f" % sw['statistic']['W'] print "p-value = %.5f" % sw['p.value'] | print print("One-sample Kolmogorov-Smirnov test of Old Faithful eruptions" +\ Â Â Â Â Â " longer than 3 seconds" ks = r.ks_test(long_ed,"pnorm", mean=r.mean(long_ed), Â Â Â Â Â Â Â Â Â Â Â Â Â Â sd=r.sqrt(r.var(long_ed))) print "D = %.4f" % ks['statistic']['D'] print "p-value = %.4f" % ks['p.value'] print "Alternative hypothesis: %s" % ks['alternative'] print |
|