The command executed to get the information from the gedit mailing list was:
mlstats --db-user root --db-password root --db-name gedit_mlstats --db-admin-user root --db-admin-password root http://mail.gnome.org/archives/gedit-list/
R commands:
library(RMySQL) con <- dbConnect(dbDriver("MySQL"), user="root", password="root", dbname="gedit_mlstats") query <- " SELECT x.myyear,x.mymonth, if(g.key2 is NULL,0,g.messages) Value FROM fm3_wildcard_date.tblyearmonth x LEFT JOIN (SELECT date_format(m.first_date, '%Y%m') key2, COUNT(m.message_ID) messages FROM messages m GROUP BY date_format(m.first_date,'%Y%m')) g ON x.id=g.key2 WHERE x.id>= (SELECT date_format(min(s.first_date), '%Y%m') FROM messages s) AND x.id<= (SELECT date_format(max(s.first_date), '%Y%m') FROM messages s) ; " results <- dbGetQuery(con, query) evol_messages = ts(results$messages, start=c(2000,4), freq=12) plot(evol_messages, type="l", xlab="Date", ylab="Emails", main="Number of messages per month") query_avg <- " SELECT AVG(h.Value) FROM ( SELECT x.myyear, x.mymonth, if(g.key2 is NULL,0,g.messages) Value FROM fm3_wildcard_date.tblyearmonth x LEFT JOIN (SELECT date_format(m.first_date, '%Y%m') key2, COUNT(m.message_ID) messages FROM messages m GROUP BY date_format(m.first_date,'%Y%m')) g ON x.id=g.key2 WHERE x.id>= (SELECT date_format(min(s.first_date), '%Y%m') FROM messages s) AND x.id<= (SELECT date_format(max(s.first_date), '%Y%m') FROM messages s) ) h; " result_avg <- dbGetQuery(con,query_avg) qqline(result_avg, col="blue", lty=2) query_max_min <- " SELECT MAX(h.Value) max, MIN(h.Value) min FROM ( SELECT x.myyear, x.mymonth, if(g.key2 is NULL,0,g.messages) Value FROM fm3_wildcard_date.tblyearmonth x LEFT JOIN (SELECT date_format(m.first_date, '%Y%m') key2, COUNT(m.message_ID) messages FROM messages m GROUP BY date_format(m.first_date,'%Y%m')) g ON x.id=g.key2 WHERE x.id>= (SELECT date_format(min(s.first_date), '%Y%m') FROM messages s) AND x.id<= (SELECT date_format(max(s.first_date), '%Y%m') FROM messages s) ) h; " result_max_min <- dbGetQuery(con,query_max_min) qqline(result_max_min$max, col="red", lty=2) qqline(result_max_min$min, col="green", lty=2) legend("topleft", inset=.05, c("average","maximun","minimun"), fill=c("blue","red","green"))
R commands:
query <- " SELECT * FROM ( SELECT COUNT(message_id) num, domain_name FROM messages LEFT JOIN people ON author_email_address=email_address GROUP BY domain_name ORDER BY num DESC ) g WHERE g.num>=15; " results <- dbGetQuery(con,query) query <- " SELECT SUM(g.num) AS sum FROM ( SELECT COUNT(message_id) num, domain_name FROM messages LEFT JOIN people ON author_email_address=email_address GROUP BY domain_name ORDER BY num DESC ) g WHERE g.num<15 " total_other <- dbGetQuery(con,query) results <- rbind(results, data.frame(num=total_other$sum, domain_name="other")) pie(main="Messages by domain name", clockwise=1, results$num, labels=results$domain_name)
R commands:
query <- " SELECT * FROM ( SELECT COUNT(message_id) num, top_level_domain FROM messages LEFT JOIN people ON author_email_address=email_address GROUP BY top_level_domain ORDER BY num DESC ) g WHERE g.num>=20; " results <- dbGetQuery(con,query) query <- " SELECT SUM(g.num) AS sum FROM ( SELECT COUNT(message_id) num, top_level_domain FROM messages LEFT JOIN people ON author_email_address=email_address GROUP BY top_level_domain ORDER BY num DESC ) g WHERE g.num<20 " total_other <- dbGetQuery(con,query) results <- rbind(results, data.frame(num=total_other$sum, top_level_domain="other")) pie(main="Messages by top level domain", clockwise=1, results$num, labels=results$top_level_domain)