In order to extract the data the command executed was:
cvsanaly2 -u root -p root -d gedit_cvsanaly --extensions=Metrics --metrics-all http://svn.gnome.org/svn/gedit/trunk/
R commands:
library(RMySQL)
con <- dbConnect(dbDriver("MySQL"), user="root",
password="root", dbname="gedit_cvsanaly")
query <- "SELECT date_format(s.date, '%m/%Y') date, count(s.id) commits
FROM scmlog s group by date_format(s.date,'%Y%m');"
results <- dbGetQuery(con,query)
evol_commits <- ts(results$commits, start=c(1998,4), freq=12)
plot(evol_commits, type="l", xlab="Date", ylab="Commits",
main="Number of commits per month")
query_avg <- "SELECT AVG(g.numcommits)
FROM
( SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth, count(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m') ) g;"
result_avg <- dbGetQuery(con,query_avg)
qqline(result_avg, col="blue", lty=2)
query_max_min <- "SELECT MAX(g.numcommits) as max, MIN(g.numcommits) as min
FROM
( SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth,
count(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m') ) g;"
result_max_min <- dbGetQuery(con,query_max_min)
qqline(result_max_min$max, col="red", lty=2)
qqline(result_max_min$min, col="green", lty=2)
legend("topright", inset=.05, c("average","maximun","minimun"),
fill=c("blue","red","green"))
R commands:
query <- "SELECT g.myyear, g.mymonth, g.numcommits,
(@sumacu:=@sumacu+g.numcommits) aggregated_numcommits
FROM
(SELECT @sumacu:=0) r, (SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth,
COUNT(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m')) g;"
results <- dbGetQuery(con,query)
evol_num_commits <- ts(results$aggregated_numcommits, start=c(1998,4), freq=12)
plot(evol_num_commits, type="h", xlab="Date", ylab="Commits",
main="Aggregated number of commits", col = "dark blue")
R commands:
query <- "SELECT p.name author, count(s.id) commits
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY committer_id ORDER BY commits;"
results <- dbGetQuery(con,query)
plot(results$commits, xlab="Author", ylab="Commits",
main="Number of commits by author")
R commands:
query <- "SELECT committer_id, count(*) AS num_commits FROM scmlog
GROUP BY committer_id ORDER BY num_commits desc;"
total_committers <- dbGetQuery(con, query)
library(ineq)
Gini(total_committers$num_commits)
Lc(total_committers$num_commits, plot=T)
R commands:
library(RMySQL)
con <- dbConnect(dbDriver("MySQL"), user="root",
password="root", dbname="gedit_cvsanaly")
query <- "
SELECT year, name, num
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 20;
"
results <- dbGetQuery(con,query)
results$year <- factor(results$year)
query_names <- "
SELECT DISTINCT(name)
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 20;
"
names <- dbGetQuery(con,query_names)
col <- 1
for (i in names$name) {
results$color[results$name==i] <- col
col <- col + 1
}
dotchart(results$num, groups=results$year, labels=results$name,
color=results$color, cex=.7, xlab="Number of commits",
main="Commits by author per year")