# We are going to use the GA package # Make sure that the package is installed. # You install a package in R with the function install.packages(): # # install.packages("GA") library(GA) # # To install packages without root access: # # install.packages("GA", lib="/mylibs/Rpackages/") ## or some other path, e.g., C:\yourFolder # library(GA, lib.loc="/mylibs/Rpackages/") # # # EXAMPLE 1: One-dimensional function optimization # # # The asymmetric double claw is difficult to maximize because there are many local solutions. # Standard derivative-based optimizers would simply climb up the hill closest to the starting value. f <- function(x) { y <- (0.46 * (dnorm(x, -1, 2/3) + dnorm(x, 1, 2/3)) + (1/300) * (dnorm(x, -0.5, 0.01) + dnorm(x, -1, 0.01) + dnorm(x, -1.5, 0.01)) + (7/300) * (dnorm(x, 0.5, 0.07) + dnorm(x, 1, 0.07) + dnorm(x, 1.5, 0.07))) y ### return(y) } # Plot the double claw curve(f, from = -3, to = 3, n = 1000) # For the maximization of this function we may use f directly as the fitness function GA <- ga(type = "real-valued", fitness = f, lower = -3, upper = 3) # The object returned can be plotted plot(GA) summary(GA) # plot the solution curve(f, from = -3, to = 3, n = 1000) points(GA@solution, f(GA@solution), col="red") # The evolution of the population units and the corresponding functions values at each # generation can be obtained by defining a new monitor function and then passing this # function as an optional argument to ga myMonitor <- function(obj) { curve(f, obj@lower, obj@upper, n = 1000, main = paste("iteration =", obj@iter)) points(obj@population, obj@fitness, pch = 20, col = 2) rug(obj@population, col = 2) Sys.sleep(1) } GA <- ga(type = "real-valued", fitness = f, lower = -3, upper = 3, monitor = myMonitor) ## Inspect fitness across generations plot(GA) # # # EXAMPLE 2: Model fitting # # # We consider a data on the growth of trees # The age at which the tree was measured Age <- c(2.44, 12.44, 22.44, 32.44, 42.44, 52.44, 62.44, 72.44, 82.44, 92.44, 102.44, 112.44) # The bole volume of the tree Vol <- c(2.2, 20.0, 93.0, 262.0, 476.0, 705.0, 967.0, 1203.0, 1409.0, 1659.0, 1898.0, 2106.0) plot(Age, Vol) # An ecological model for the plant size (measured by volume) as a function of age is the Richards curve: # f(x) = a*(1-exp(-b*x))^c, where a, b, in c are the model parameters # Let's fit the Richards curve using genetic algorithms # We first define our model function (argument params represents a vector of the parameters a, b, and c) model <- function(params) { params[1] * (1 - exp(-params[2] * Age))^params[3] } # We define the fitness function as the sum of squares of the differences between estimated and observed data myFitness2 <- function(params) { -sum((Vol - model(params))^2) } # The fitness function needs to be maximized with respect to the model's parameters, given the observed data in x and y. # A blend crossover is used for improving the search over the parameter space: for two parents x1 and x2 (assume x1 < x2) # it randomly picks a solution in the range [x1 - k*(x2-x1), x2 + k*(x2-x1)], where k represents a constant between 0 and 1. # We restrict the search interval for a,b, and c to [1000.0, 5000.0], [0.0, 5.0], and [0.0, 5.0], respectively. GA2 <- ga(type = "real-valued", fitness = myFitness2, lower = c(1000, 0, 0), upper = c(5000, 5, 5), popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("a", "b", "c")) summary(GA2) # Let's plot our solution plot(Age, Vol) lines(Age, model(GA2@solution)) # we can use a monitor function to plot the current solution myMonitor2 <- function(obj) { i <- which.max(obj@fitness) plot(Age, Vol) lines(Age, model(obj@population[i,]), col="red") title(paste("iteration =", obj@iter), font.main = 1) Sys.sleep(1) } GA2 <- ga(type = "real-valued", fitness = myFitness2, lower = c(1000, 0, 0), upper = c(5000, 5, 5), popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("a", "b", "c"), monitor=myMonitor2) # # # EXAMPLE 3: The Knapsack problem # # # The Knapsack problem is defined as follows: given a set of items, each with a mass and a value, determine the subset # of items to be included in a collection so that the total weight is less than or equal to a given limit and the total value # is as large as possible. # a vector of the items' values values <- c(5, 8, 3, 4, 6, 5, 4, 3, 2) # a vector of the item's weights weights <- c(1, 3, 2, 4, 2, 1, 3, 4, 5) # the knapsack capacity Capacity <- 10 # A binary GA can be used to solve the knapsack problem. The solution to this problem is a binary string equal to the number # of items where the ith bit is 1 if the ith item is in the subset and 0 otherwise. The fitness function should penalize # unfeasible solutions. knapsack <- function(x) { f <- sum(x * values) w <- sum(x * weights) if (w > Capacity) f <- Capacity - w f } GA3 <- ga(type = "binary", fitness = knapsack, nBits = length(weights), maxiter = 1000, run = 200, popSize = 100) summary(GA3) GA3@solution # # Example 4: ESTABLISHING A TIMETABLE # # A small football club has a youth team and a senior team. The player # training program has seven components: stamina training, strength training, # technique, tactics, psychological preparation, teamwork, and regeneration. # Due to lack of funds, for each component, a single staff member is responsible # for both the youth and the senior team, with the exceptions of tactics and # stamina training, where two staff members are assigned, one to each team. # # The weekly training regime is summarized in the following table: # #+----------+---------------------+-----------------+-----------------+ #| Coach | Component | Senior team | Youth team | #+----------+---------------------+-----------------+-----------------+ #| Anze | Strength training | 1 time a week | 1 time a week | #| Bojan | Technique | 3 times a week | 3 times a week | #| Ciril | Regeneration | 2 times a week | 2 times a week | #| Dusan | Stamina training | doesn't conduct | 4 times a week | #| Erik | Stamina training | 4 times a week | doesn't conduct | #| Filip | Teamwork | 3 times a week | 3 times a week | #| Gasper | Psychological prep. | 1 time a week | 1 time a week | #| Hugo | Tactics | 1 time a week | doesn't conduct | #| Iztok | Tactics | doesn't conduct | 1 time a week | #+----------+---------------------+-----------------+-----------------+ # # Training is performed from Monday to Friday in four different time slots: # 8:00 - 10:00, 10:15 - 12:15, 14:00 - 16:00, and 16:15 - 18:15. # # Constraints: # # - each time slot can hold only one component for the youth team and one component # for the senior team (the youth and senior teams train separately, so a single # staff member can only train one of the two teams in a single time slot). # # - a team is not allowed to train the same component 2 or more times within one day. # # - the main purpose of the Tactics training component is to prepare the team for # the upcoming match. Matches are usually played during the weekend, so Tactics # training should be scheduled for Thursday in the 16:15 - 18:15 time slot. # # - after a match, the players need to rest. Therefore, there is no training in # the Monday 8:00 - 10:00 time slot. # # - the stamina training coach Dusan is not available on Monday mornings # (8:00 - 10:00 in 10:15 - 12:15 time slots) # # - there can be no Technique training on Wednesdays, because coach Bojan is # not available. # # # Produce a training schedule that takes into account these two and all of # the above restrictions! # # # VARIABLES # # senior - number of sessions per component for the senior team # youth - number of sessions per component for the youth team # staff - coaching staff -> the staff's actual occupacy is solved for, how much a certain coach can handle is (hard) coded in senior and youth variables! # slots - possible slots senior = c(1, 3, 2, 0, 4, 3, 1, 1, 0) youth = c(1, 3, 2, 4, 0, 3, 1, 0, 1) slots = 4*5 valueBin <- function(timetable) { # organize data into a multi-dimensional array # days, time slots, staff, teams t <- array(as.integer(timetable), c(5,4,9,2)) violations <- 0 # check all the conditions # check the number of sessions per component for (i in 1:9) { violations <- violations + abs(sum(t[,,i,1]) - senior[i]) violations <- violations + abs(sum(t[,,i,2]) - youth[i]) } # it is not allowed to train the same component 2 or more times within one day for (i in 1:9) { violations <- violations + sum(apply(t[,,i,1], 1, sum) > 1) violations <- violations + sum(apply(t[,,i,2], 1, sum) > 1) } # a single staff member can only train one of the two teams in a single time slot violations <- violations + sum(t[,,,1] == t[,,,2] & t[,,,1] != 0) # each time slot can hold only one component for the youth team and one component # for the senior team for (i in 1:5) for (j in 1:4) { violations <- violations + max(0, sum(t[i,j,,1]) - 1) violations <- violations + max(0, sum(t[i,j,,2]) - 1) } # Tactics training should be scheduled for Thursday in the 16:15 - 18:15 time slot violations <- violations + (t[4,3,8,1] != 1) violations <- violations + (t[4,3,9,2] != 1) # there is no training in the Monday 8:00 - 10:00 time slot violations <- violations + sum(t[1,1,,]) # the stamina training coach Dusan is not available on Monday mornings violations <- violations + sum(t[1,1:2,4,] == 1) # there can be no Technique training on Wednesdays violations <- violations + sum(t[3,,2,] == 1) -violations } myInitPopulation <- function(object) { p <- gabin_Population(object) for (i in 1:nrow(p)) { t <- array(p[i,], c(5,4,9,2)) # Tactics training on Thursdays in the 16:15 - 18:15 time slot t[4,3,8,1]=1 t[4,3,9,2]=1 # there is no training in the Monday 8:00 - 10:00 time slot t[1,1,,] = 0 # there is no Stamina training on Monday mornings t[1,1:2,4,] = 0 # there is no Technique training on Wednesdays t[3,,2,] = 0 p[i,] <- as.vector(t) } p } GA4 <- ga(type = "binary", fitness = valueBin, nBits = 4*5*9*2, popSize = 500, maxiter = 10, run = 200, population = myInitPopulation) timetable2 <- function(solution,coach,team){ t <- array(solution, c(5,4,9,2)) t[,,coach,team] } ## timetable of a coach 2 for team 1. t <- timetable2(GA4@solution[1,],2,1) t # # # EXAMPLE 5: Traveling salesman problem # # # Given a list of cities and the distances between each pair of cities, what is the shortest possible route that visits # each city exactly once and returns to the origin city? data("eurodist", package = "datasets") D <- as.matrix(eurodist) D # An individual round tour is represented as a permutation of a default numbering of the cities defining the current order # in which the cities are to be visited # Calculation of the tour length tourLength <- function(tour) { N <- length(tour) dist <- 0 for (i in 2:N) dist <- dist + D[tour[i-1],tour[i]] dist <- dist + D[tour[N],tour[1]] dist } # The fitness function to be maximized is defined as the reciprocal of the tour length. tspFitness <- function(tour) { 1/tourLength(tour) } GA5 <- ga(type = "permutation", fitness = tspFitness, lower = 1, upper = ncol(D), popSize = 50, maxiter = 5000, run = 500, pmutation = 0.2) summary(GA5) # Reconstruct the solution found tour <- GA5@solution[1, ] tour <- c(tour, tour[1]) tourLength(tour) colnames(D)[tour]