Mailing List Stats

The command executed to get the information from the gedit mailing list was:

mlstats --db-user root --db-password root --db-name gedit_mlstats --db-admin-user root --db-admin-password root http://mail.gnome.org/archives/gedit-list/

Evolution of messages per month

R commands:

library(RMySQL)

con <- dbConnect(dbDriver("MySQL"), user="root",
                 password="root", dbname="gedit_mlstats")

query <- "
SELECT x.myyear,x.mymonth, if(g.key2 is NULL,0,g.messages) Value
FROM fm3_wildcard_date.tblyearmonth x
LEFT JOIN (SELECT date_format(m.first_date, '%Y%m') key2,
COUNT(m.message_ID) messages
FROM messages m
GROUP BY date_format(m.first_date,'%Y%m')) g
ON x.id=g.key2
WHERE x.id>= (SELECT date_format(min(s.first_date), '%Y%m') FROM messages s)
AND x.id<= (SELECT date_format(max(s.first_date), '%Y%m') FROM messages s) ;
"
results <- dbGetQuery(con, query)
evol_messages = ts(results$messages, start=c(2000,4), freq=12)
plot(evol_messages, type="l", xlab="Date", ylab="Emails",
     main="Number of messages per month")

query_avg <- "
SELECT AVG(h.Value)
FROM (
SELECT x.myyear, x.mymonth, if(g.key2 is NULL,0,g.messages) Value
FROM fm3_wildcard_date.tblyearmonth x
LEFT JOIN (SELECT date_format(m.first_date, '%Y%m') key2,
COUNT(m.message_ID) messages
FROM messages m
GROUP BY date_format(m.first_date,'%Y%m')) g
ON x.id=g.key2
WHERE x.id>= (SELECT date_format(min(s.first_date), '%Y%m') FROM messages s)
AND x.id<= (SELECT date_format(max(s.first_date), '%Y%m') FROM messages s)
) h;
"
result_avg <- dbGetQuery(con,query_avg)
qqline(result_avg, col="blue", lty=2)

query_max_min <- "
SELECT MAX(h.Value) max, MIN(h.Value) min
FROM (
SELECT x.myyear, x.mymonth, if(g.key2 is NULL,0,g.messages) Value
FROM fm3_wildcard_date.tblyearmonth x
LEFT JOIN (SELECT date_format(m.first_date, '%Y%m') key2,
COUNT(m.message_ID) messages
FROM messages m
GROUP BY date_format(m.first_date,'%Y%m')) g
ON x.id=g.key2
WHERE x.id>= (SELECT date_format(min(s.first_date), '%Y%m') FROM messages s)
AND x.id<= (SELECT date_format(max(s.first_date), '%Y%m') FROM messages s)
) h;
"
result_max_min <- dbGetQuery(con,query_max_min)
qqline(result_max_min$max, col="red", lty=2)
qqline(result_max_min$min, col="green", lty=2)

legend("topleft", inset=.05, c("average","maximun","minimun"),
       fill=c("blue","red","green"))

See Figure 8, “Evolution of messages per month”.

Messages by domain name

R commands:

query <-
"
SELECT *
FROM (
	SELECT COUNT(message_id) num, domain_name
	FROM messages LEFT JOIN people ON author_email_address=email_address
	GROUP BY domain_name ORDER BY num DESC
) g WHERE g.num>=15;
"
results <- dbGetQuery(con,query)

query <-
"
SELECT SUM(g.num) AS sum
FROM (
	SELECT COUNT(message_id) num, domain_name
	FROM messages LEFT JOIN people ON author_email_address=email_address
	GROUP BY domain_name ORDER BY num DESC
) g WHERE g.num<15
"
total_other <- dbGetQuery(con,query)
results <- rbind(results, data.frame(num=total_other$sum, domain_name="other"))

pie(main="Messages by domain name", clockwise=1, results$num,
    labels=results$domain_name)

See Figure 9, “Messages by domain name”.

Messages by top level domain

R commands:

query <-
"
SELECT *
FROM (
	SELECT COUNT(message_id) num, top_level_domain
	FROM messages LEFT JOIN people ON author_email_address=email_address
	GROUP BY top_level_domain ORDER BY num DESC
) g WHERE g.num>=20;
"
results <- dbGetQuery(con,query)

query <-
"
SELECT SUM(g.num) AS sum
FROM (
	SELECT COUNT(message_id) num, top_level_domain
	FROM messages LEFT JOIN people ON author_email_address=email_address
	GROUP BY top_level_domain ORDER BY num DESC
) g WHERE g.num<20
"
total_other <- dbGetQuery(con,query)
results <- rbind(results, data.frame(num=total_other$sum,
                 top_level_domain="other"))

pie(main="Messages by top level domain", clockwise=1, results$num,
    labels=results$top_level_domain)

See Figure 10, “Messages by top level domain”.