CVSAnalY

In order to extract the data the command executed was:

cvsanaly2 -u root -p root -d gedit_cvsanaly --extensions=Metrics --metrics-all http://svn.gnome.org/svn/gedit/trunk/

Evolution of commits per month

R commands:

library(RMySQL)

con <- dbConnect(dbDriver("MySQL"), user="root",
                 password="root", dbname="gedit_cvsanaly")

query <- "SELECT date_format(s.date, '%m/%Y') date, count(s.id) commits
          FROM scmlog s group by date_format(s.date,'%Y%m');"
results <- dbGetQuery(con,query)

evol_commits <- ts(results$commits, start=c(1998,4), freq=12)
plot(evol_commits, type="l", xlab="Date", ylab="Commits",
     main="Number of commits per month")

query_avg <- "SELECT AVG(g.numcommits)
	FROM
	( SELECT date_format(s.date, '%Y') myyear,
		date_format(s.date, '%m') mymonth, count(s.id) numcommits
		FROM scmlog s
		GROUP BY date_format(s.date,'%Y%m') ) g;"
result_avg <- dbGetQuery(con,query_avg)
qqline(result_avg, col="blue", lty=2)

query_max_min <- "SELECT MAX(g.numcommits) as max, MIN(g.numcommits) as min
	FROM
	( SELECT date_format(s.date, '%Y') myyear,
		date_format(s.date, '%m') mymonth,
		count(s.id) numcommits
		FROM scmlog s
		GROUP BY date_format(s.date,'%Y%m') ) g;"
result_max_min <- dbGetQuery(con,query_max_min)
qqline(result_max_min$max, col="red", lty=2)
qqline(result_max_min$min, col="green", lty=2)

legend("topright", inset=.05, c("average","maximun","minimun"),
       fill=c("blue","red","green"))

See Figure 3, “Evolution of commits per month”.

Aggregated number of commits up to time

R commands:

query <- "SELECT g.myyear, g.mymonth, g.numcommits,
		(@sumacu:=@sumacu+g.numcommits) aggregated_numcommits
	FROM
		(SELECT @sumacu:=0) r, (SELECT date_format(s.date, '%Y') myyear,
			date_format(s.date, '%m') mymonth,
			COUNT(s.id) numcommits
		FROM scmlog s
		GROUP BY date_format(s.date,'%Y%m')) g;"
results <- dbGetQuery(con,query)
evol_num_commits <- ts(results$aggregated_numcommits, start=c(1998,4), freq=12)
plot(evol_num_commits, type="h", xlab="Date", ylab="Commits",
     main="Aggregated number of commits", col = "dark blue")

See Figure 4, “Aggregated number of commits up to time”.

Number of commits per author

R commands:

query <- "SELECT p.name author, count(s.id) commits
	FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
	GROUP BY committer_id ORDER BY commits;"
results <- dbGetQuery(con,query)
plot(results$commits, xlab="Author", ylab="Commits",
     main="Number of commits by author")

See Figure 5, “Number of commits per author”.

Lorenz curve

R commands:

query <- "SELECT committer_id, count(*) AS num_commits FROM scmlog
          GROUP BY committer_id ORDER BY num_commits desc;"
total_committers <- dbGetQuery(con, query)
library(ineq)

Gini(total_committers$num_commits)

Lc(total_committers$num_commits, plot=T)

See Figure 6, “Lorenz curve”.

Number of commits by author per year

R commands:

library(RMySQL)

con <- dbConnect(dbDriver("MySQL"), user="root",
                 password="root", dbname="gedit_cvsanaly")

query <- "
SELECT year, name, num
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 20;
"

results <- dbGetQuery(con,query)

results$year <- factor(results$year)

query_names <- "
SELECT DISTINCT(name)
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 20;
"

names <- dbGetQuery(con,query_names)

col <- 1
for (i in names$name) {
	results$color[results$name==i] <- col
	col <- col + 1
}

dotchart(results$num, groups=results$year, labels=results$name,
         color=results$color, cex=.7, xlab="Number of commits",
         main="Commits by author per year")

See Figure 7, “Number of commits by author per year”.