summaryrefslogtreecommitdiffstats
path: root/watchmaker/book
diff options
context:
space:
mode:
authorYohann Roussel <yroussel@google.com>2014-03-19 16:25:37 +0100
committerYohann Roussel <yroussel@google.com>2014-03-20 15:13:33 +0100
commit4eceb95409e844fdc33c9c706e1dc307bfd40303 (patch)
treeee9f4f3fc79f757c79081c336bce4f1782c6ccd8 /watchmaker/book
parent3d2402901b1a6462e2cf47a6fd09711f327961c3 (diff)
downloadtoolchain_jack-4eceb95409e844fdc33c9c706e1dc307bfd40303.zip
toolchain_jack-4eceb95409e844fdc33c9c706e1dc307bfd40303.tar.gz
toolchain_jack-4eceb95409e844fdc33c9c706e1dc307bfd40303.tar.bz2
Initial Jack import.
Change-Id: I953cf0a520195a7187d791b2885848ad0d5a9b43
Diffstat (limited to 'watchmaker/book')
-rw-r--r--watchmaker/book/book.iml12
-rw-r--r--watchmaker/book/src/resources/antenna.jpgbin0 -> 12181 bytes
-rw-r--r--watchmaker/book/src/resources/docbook.css35
-rw-r--r--watchmaker/book/src/resources/travelling_salesman_problem.pngbin0 -> 63665 bytes
-rw-r--r--watchmaker/book/src/xml/book.xml57
-rw-r--r--watchmaker/book/src/xml/classifiers.xml6
-rw-r--r--watchmaker/book/src/xml/distributed.xml13
-rw-r--r--watchmaker/book/src/xml/evolution.xml424
-rw-r--r--watchmaker/book/src/xml/furtherreading.xml6
-rw-r--r--watchmaker/book/src/xml/geneticprogramming.xml6
-rw-r--r--watchmaker/book/src/xml/gui.xml22
-rw-r--r--watchmaker/book/src/xml/interactive.xml6
-rw-r--r--watchmaker/book/src/xml/islands.xml175
-rw-r--r--watchmaker/book/src/xml/multiobjective.xml6
-rw-r--r--watchmaker/book/src/xml/performance.xml157
-rw-r--r--watchmaker/book/src/xml/preface.xml29
-rw-r--r--watchmaker/book/src/xml/salesman.xml17
-rw-r--r--watchmaker/book/src/xml/selection.xml160
-rw-r--r--watchmaker/book/src/xml/steadystate.xml6
-rw-r--r--watchmaker/book/src/xml/sudoku.xml6
-rw-r--r--watchmaker/book/src/xml/watchmaker.xml478
-rw-r--r--watchmaker/book/src/xml/website.xml44
22 files changed, 1665 insertions, 0 deletions
diff --git a/watchmaker/book/book.iml b/watchmaker/book/book.iml
new file mode 100644
index 0000000..d5c0743
--- /dev/null
+++ b/watchmaker/book/book.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+ <component name="NewModuleRootManager" inherit-compiler-output="true">
+ <exclude-output />
+ <content url="file://$MODULE_DIR$">
+ <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+ </content>
+ <orderEntry type="inheritedJdk" />
+ <orderEntry type="sourceFolder" forTests="false" />
+ </component>
+</module>
+
diff --git a/watchmaker/book/src/resources/antenna.jpg b/watchmaker/book/src/resources/antenna.jpg
new file mode 100644
index 0000000..d706426
--- /dev/null
+++ b/watchmaker/book/src/resources/antenna.jpg
Binary files differ
diff --git a/watchmaker/book/src/resources/docbook.css b/watchmaker/book/src/resources/docbook.css
new file mode 100644
index 0000000..018291b
--- /dev/null
+++ b/watchmaker/book/src/resources/docbook.css
@@ -0,0 +1,35 @@
+a {text-decoration: none;
+ color: #006699;}
+a:visited {color: #003366;}
+body {line-height: 1.8em;
+ font-size: 62.5%;
+ width: 960px;
+ margin-left: auto;
+ margin-right: auto;
+ font-family: Palatino Linotype, Palatino, serif;
+ background-color: #ffffff;
+ color: #444444;}
+dl {margin: 0;}
+h1, h2, h3 {font-weight: bold;
+ font-variant: small-caps;}
+h1 {font-size: 3em;
+ margin-bottom: .6em;}
+h2 {font-size: 2.4em;
+ margin-bottom: .75em;}
+h3 {font-size: 1.8em;
+ margin-bottom: 1em;}
+p, ul {font-size: 1.4em;
+ margin: 1.286em 0;}
+dl, table {font-size: 1.4em;}
+dl dl {font-size: 1em;}
+pre {font-family: Monaco, Courier New, monospace; font-size: 1.2em; margin-bottom: 1.5em;}
+strong {color: #000000;}
+
+h2.subtitle {font-variant: normal;}
+h3.author {font-variant: normal; margin-top: 2em; margin-bottom: 0;}
+.email {font-size: 1.2em;}
+.informalfigure {text-align: center;}
+.mediaobject {display: inline-block;}
+.caption p {margin: 0; text-align: right; font-size: small;}
+
+.navfooter table tr td {width: 33.33%;} \ No newline at end of file
diff --git a/watchmaker/book/src/resources/travelling_salesman_problem.png b/watchmaker/book/src/resources/travelling_salesman_problem.png
new file mode 100644
index 0000000..d6017c9
--- /dev/null
+++ b/watchmaker/book/src/resources/travelling_salesman_problem.png
Binary files differ
diff --git a/watchmaker/book/src/xml/book.xml b/watchmaker/book/src/xml/book.xml
new file mode 100644
index 0000000..24f0a09
--- /dev/null
+++ b/watchmaker/book/src/xml/book.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<book xmlns="http://docbook.org/ns/docbook"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <info>
+ <title>Evolutionary Computation in Java</title>
+ <author>
+ <personname>
+ <firstname>Daniel</firstname>
+ <surname>Dyer</surname>
+ <othername>W.</othername>
+ </personname>
+ <email>dan@uncommons.org</email>
+ <uri>http://www.dandyer.co.uk</uri>
+ </author>
+ <copyright>
+ <year>2008</year>
+ <year>2009</year>
+ <year>2010</year>
+ </copyright>
+ <keywordset>
+ <keyword>algorithms</keyword>
+ <keyword>evolution</keyword>
+ <keyword>evolutionary algorithms</keyword>
+ <keyword>evolutionary computation</keyword>
+ <keyword>genetic algorithms</keyword>
+ <keyword>Java</keyword>
+ <keyword>programming</keyword>
+ <keyword>software</keyword>
+ </keywordset>
+ </info>
+
+ <xi:include href="preface.xml" />
+
+ <!-- Chapters -->
+ <xi:include href="evolution.xml" />
+ <xi:include href="watchmaker.xml" />
+ <xi:include href="salesman.xml" />
+ <xi:include href="selection.xml" />
+ <xi:include href="sudoku.xml" />
+ <xi:include href="islands.xml" />
+ <xi:include href="interactive.xml" />
+ <xi:include href="steadystate.xml" />
+ <xi:include href="geneticprogramming.xml" />
+ <xi:include href="classifiers.xml" />
+ <xi:include href="multiobjective.xml" />
+
+ <!-- Appendices -->
+ <xi:include href="performance.xml" />
+ <xi:include href="gui.xml" />
+ <xi:include href="distributed.xml" />
+ <xi:include href="furtherreading.xml" />
+
+ <index />
+
+</book>
diff --git a/watchmaker/book/src/xml/classifiers.xml b/watchmaker/book/src/xml/classifiers.xml
new file mode 100644
index 0000000..79a433a
--- /dev/null
+++ b/watchmaker/book/src/xml/classifiers.xml
@@ -0,0 +1,6 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Learning Classifier Systems</title>
+ <para>TODO</para>
+</chapter>
diff --git a/watchmaker/book/src/xml/distributed.xml b/watchmaker/book/src/xml/distributed.xml
new file mode 100644
index 0000000..08c54c0
--- /dev/null
+++ b/watchmaker/book/src/xml/distributed.xml
@@ -0,0 +1,13 @@
+<appendix xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Distributed Evolutionary Algorithms</title>
+ <section>
+ <title>Running Watchmaker Programs on Hadoop with Apache Mahout</title>
+ <para>TODO</para>
+ </section>
+ <section>
+ <title>Clustering Watchmaker Programs with Terracotta</title>
+ <para>TODO</para>
+ </section>
+</appendix>
diff --git a/watchmaker/book/src/xml/evolution.xml b/watchmaker/book/src/xml/evolution.xml
new file mode 100644
index 0000000..0c134aa
--- /dev/null
+++ b/watchmaker/book/src/xml/evolution.xml
@@ -0,0 +1,424 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xlink="http://www.w3.org/1999/xlink"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>The Power of Evolution</title>
+ <para>
+ Software is normally developed in a very precise, deterministic way. The behaviour of a
+ computer is governed by strict logical rules. A computer invariably does exactly what
+ it is told to do.
+ </para>
+ <para>
+ When writing a program to solve a particular problem, software developers will identify
+ the necessary sub-tasks that the program must perform. Algorithms are chosen and
+ implemented for each task. The completed program becomes a detailed specification of
+ exactly how to get from A to B. Every aspect is carefully designed by its developers
+ who must understand how the various components interact to deliver the program's
+ functionality.
+ </para>
+ <para>
+ This prescriptive approach to solving problems with computers has served us well and is
+ responsible for most of the software applications that we use today. However, it is not
+ without limitations.
+ Solutions to problems are constrained by the intuition, knowledge and prejudices
+ of those who develop the software.
+ <emphasis>The programmers have to know exactly how to solve the problem.</emphasis>
+ </para>
+ <para>
+ Another characteristic of the prescriptive approach that is sometimes problematic is
+ that it is best suited to finding exact answers. Not all problems have exact solutions,
+ and some that do may be too computationally expensive to solve. Sometimes it is
+ more useful to be able to find an approximate answer quickly than to waste time searching
+ for a better solution.
+ </para>
+ <section>
+ <title>What are Evolutionary Algorithms?</title>
+ <indexterm><primary>evolutionary algorithm</primary></indexterm>
+ <indexterm><primary>Darwin, Charles</primary></indexterm>
+ <para>
+ Evolutionary algorithms (EAs) are inspired by the biological model of evolution and
+ natural selection first proposed by Charles Darwin in 1859.
+ In the natural world, evolution helps species adapt to their environments.
+ Environmental factors that influence the survival prospects of an organism
+ include climate, availability of food and the dangers of predators.
+ </para>
+ <indexterm><primary>natural selection</primary></indexterm>
+ <para>
+ Species change over the course of many generations.
+ Mutations occur randomly. Some mutations will be advantageous, but many will be
+ useless or detrimental. Progress comes from the feedback provided by non-random
+ natural selection.
+ For example, organisms that can survive for long periods without water will be
+ more likely to thrive in dry conditions than those that can't.
+ Likewise, animals that can run fast will be more successful at evading predators
+ than their slower rivals.
+ If a random genetic modification helps an organism to survive and to reproduce,
+ that modification will itself survive and spread throughout the population, via
+ the organism's offspring.
+ </para>
+ <para>
+ Evolutionary algorithms are based on a simplified model of this biological evolution.
+ To solve a particular problem we create an environment in which potential
+ solutions can evolve. The environment is shaped by the parameters of the problem
+ and encourages the evolution of good solutions.
+ </para>
+ <indexterm><primary>evolutionary computation</primary></indexterm>
+ <para>
+ The field of Evolutionary Computation encompasses several types of evolutionary
+ algorithm. These include <emphasis>Genetic Algorithms</emphasis> (GAs),
+ <emphasis>Evolution Strategies</emphasis>, <emphasis>Genetic Programming</emphasis>
+ (GP), <emphasis>Evolutionary Programming</emphasis> and <emphasis>Learning
+ Classifier Systems</emphasis>.
+ </para>
+ <indexterm><primary>genetic algorithms</primary></indexterm>
+ <para>
+ The most common type of evolutionary algorithm is the generational genetic
+ algorithm. We'll cover other EA variants in later chapters but, for now,
+ all of the evolutionary algorithms that we meet will be some kind of generational
+ GA.
+ </para>
+ <indexterm><primary>fitness function</primary></indexterm>
+ <indexterm><primary>population</primary></indexterm>
+ <para>
+ The basic outline of a generational GA is as follows (most other EA variants are
+ broadly similar).
+ A <emphasis>population</emphasis> of candidate solutions is iteratively evolved
+ over many <emphasis>generations</emphasis>. Mimicking the concept of
+ natural selection in biology, the survival of candidates (or their offspring)
+ from generation to generation in an EA is governed by a <emphasis>fitness
+ function</emphasis> that evaluates each candidate according to how close it is to
+ the desired outcome, and a <emphasis>selection strategy</emphasis> that favours
+ the better solutions.
+ Over time, the quality of the solutions in the population should improve.
+ If the program is successful, we can terminate the evolution once it has found
+ a solution that is good enough.
+ </para>
+ <section>
+ <title>An Example</title>
+ <para>
+ Now that we have introduced the basic concepts and terminology, I will attempt
+ to illustrate by way of an example. Suppose that we want to use evolution to generate
+ a particular character string, for example "HELLO WORLD". This is a contrived example
+ in as much as it assumes that we don't know how to create such a string and that
+ evolution is the best approach available to us. However, bear with me as this simple
+ example is useful for demonstrating exactly how the evolutionary approach works.
+ </para>
+ <para>
+ Each candidate solution in our population will be a string. We'll use a fixed-length
+ representation so that each string is 11 characters long. Each character in a string
+ will be one of the 27 valid characters (the upper case letters 'A' to 'Z' plus the space
+ character).
+ </para>
+ <para>
+ For the fitness function we'll use the simple approach of assigning a candidate
+ solution one point for each position in the string that has the correct character.
+ For the string "HELLO WORLD" this gives a maximum possible fitness score of 11 (the
+ length of the string).
+ </para>
+ <para>
+ The first task for the evolutionary algorithm is to randomly generate the initial
+ population. We can use any size population that we choose.
+ Typical EA population sizes can vary from tens to thousands of individuals.
+ For this example we will use a population size of 10.
+ After the initialisation of the population we might have the following candidates
+ (fitness scores in brackets):
+ <informalexample>
+ <programlisting>
+ 1. GERZUNFXCEN (1)
+ 2. HSFDAHDMUYZ (1)
+ 3. UQ IGARHGJN (0)
+ 4. ZASIB WSUVP (2)
+ 5. XIXROIUAZBH (1)
+ 6. VDLGCWMBFYA (1)
+ 7. SY YUHYRSEE (0)
+ 8. EUSVBIVFHFK (0)
+ 9. HHENRFZAMZH (1)
+ 10. UJBBDFZPLCN (0)
+ </programlisting>
+ </informalexample>
+ </para>
+ <para>
+ None of these candidate solutions is particularly good. The best (number 4) has just two
+ characters out of eleven that match the target string (the space character and the 'W').
+ </para>
+ <para>
+ The next step is to select candidates based on their fitness and use them to create
+ a new generation. One technique for favouring the selection of fitter candidates over
+ weaker candidates is to assign each candidate a selection probability proportionate to
+ its fitness.
+ </para>
+ <indexterm><primary>fitness-proportionate selection</primary></indexterm>
+ <indexterm><primary>selection</primary><secondary>fitness-proportionate</secondary></indexterm>
+ <para>
+ If we use <emphasis>fitness-proportionate selection</emphasis>, none of the candidates
+ with zero fitness will be selected and the candidate with a fitness of 2 is twice as likely
+ to be selected as any of the candidates with a fitness of 1. For the next step we need to
+ select 10 parents, so it is obvious that some of the fit candidates are going to be selected
+ multiple times.
+ </para>
+ <indexterm><primary>cross-over</primary></indexterm>
+ <para>
+ Now that we have some parents, we can breed the next generation. We do this via a process
+ called <emphasis>cross-over</emphasis>, which is analogous to sexual reproduction in biology.
+ For each pair of parents, a cross-over point is selected randomly. Assuming that the first
+ two randomly selected parents are numbers 2 and 4, if the cross-over occurs after the
+ first four characters, we will get the following offspring:
+ <informalexample>
+ <programlisting>
+ Parent 1: <emphasis role="bold">HSFDAHDMUYZ</emphasis>
+ Parent 2: ZASIB WSUVP
+
+ Offspring 1: <emphasis role="bold">HSFD</emphasis>B WSUVP
+ Offspring 2: ZASI<emphasis role="bold">AHDMUYZ</emphasis>
+ </programlisting>
+ </informalexample>
+ </para>
+ <indexterm><primary>mutation</primary></indexterm>
+ <para>
+ This recombination has given us two new candidates for the next generation, one of which is
+ better than either of the parents (offspring 1 has a fitness score of 3). This shows how
+ cross-over can lead towards better solutions. However, looking at the initial population as
+ a whole, we can see that no combination of cross-overs will ever result in a candidate with
+ a fitness higher than 6. This is because, among all 10 original candidates, there are only 6
+ positions in which we have the correct character. This can be mitigated to some extent by
+ increasing the size of the population. With 100 individuals in the initial population we
+ would be much more likely to have the necessary building blocks for a perfect solution, but
+ there is no guarantee. This is where <emphasis>mutation</emphasis> comes in.
+ </para>
+ <para>
+ Mutation is implemented by modifying each character in a string according to some small
+ probability, say 0.02 or 0.05. This means that any single individual will be changed only
+ slightly by mutation, or perhaps not at all.
+ </para>
+ <para>
+ By applying mutation to each of the offspring produced by cross-over we will occasionally
+ introduce correct characters in new positions. We will also occasionally remove correct
+ characters but these bad mutations are unlikely to survive selection in the next generation,
+ so this is not a big problem. Advantageous mutations will be propagated by cross-over and
+ selection and will quickly spread throughout the population.
+ </para>
+ <para>
+ After repeating this process for dozens or perhaps even hundreds of generations we will
+ eventually converge on our desired solution.
+ </para>
+ <para>
+ This is a convoluted process for finding a string that we already knew to start with.
+ However, as we shall see in the remainder of this book, the evolutionary approach
+ generalises to deal with problems where we don't know what the best solution is and
+ therefore can't encode that knowledge in our fitness function.
+ </para>
+ <para>
+ The important point demonstrated by this example is that we can arrive at a satisfactory
+ solution without having to enumerate every possible candidate in the search space.
+ Even for this trivial example, a brute force search would involve generating and
+ checking approximately 5.6 quadrillion strings.
+ </para>
+ </section>
+ <section>
+ <title>The Outline of an Evolutionary Algorithm</title>
+ <procedure>
+ <step>
+ <title>Genesis</title>
+ <para>
+ Create an initial set (population) of <literal>n</literal> candidate solutions.
+ This may be done entirely randomly or the population may be seeded with some
+ hand-picked candidates.
+ </para>
+ </step>
+ <step>
+ <title>Evaluation</title>
+ <para>
+ Evaluate each member of the population using some fitness function.
+ </para>
+ </step>
+ <step>
+ <title>Survival of the Fittest</title>
+ <indexterm><primary>selection</primary></indexterm>
+ <para>
+ Select a number of members of the evaluated population, favouring those
+ with higher fitness scores. These will be the parents of the next generation.
+ </para>
+ </step>
+ <step>
+ <title>Evolution</title>
+ <indexterm><primary>cross-over</primary></indexterm>
+ <indexterm><primary>mutation</primary></indexterm>
+ <para>
+ Generate a new population of offspring by randomly altering and/or combining
+ elements of the parent candidates. The evolution is performed by one or more
+ <emphasis>evolutionary operators</emphasis>. The most common operators are
+ cross-over and mutation.
+ Cross-over takes two parents, cuts them each into two or more pieces and recombines
+ the pieces to create two new offspring. Mutation copies an individual but with
+ small, random modifications (such as flipping a bit from zero to one).
+ </para>
+ </step>
+ <step>
+ <title>Iteration</title>
+ <para>
+ Repeat steps 2-4 until a satisfactory solution is found or some other termination
+ condition is met (such as the number of generations or elapsed time).
+ </para>
+ </step>
+ </procedure>
+ </section>
+ </section>
+ <section>
+ <title>When are Evolutionary Algorithms Useful?</title>
+ <para>
+ Evolutionary algorithms are typically used to provide good approximate
+ solutions to problems that cannot be solved easily using other techniques.
+ Many optimisation problems fall into this category. It may be too
+ computationally-intensive to find an exact solution but sometimes a near-optimal
+ solution is sufficient. In these situations evolutionary techniques can be
+ effective. Due to their random nature, evolutionary algorithms are never guaranteed
+ to find an optimal solution for any problem, but they will often find a good solution
+ if one exists.
+ </para>
+ <para>
+ One example of this kind of optimisation problem is the challenge of timetabling.
+ Schools and universities must arrange room and staff allocations to suit the needs
+ of their curriculum. There are several constraints that must be satisfied.
+ A member of staff can only be in one place at a time, they can only teach classes
+ that are in their area of expertise, rooms cannot host lessons if they are already
+ occupied, and classes must not clash with other classes taken by the same students.
+ This is a combinatorial problem and known to be NP-Hard.
+ It is not feasible to exhaustively search for the optimal timetable due to the huge
+ amount of computation involved. Instead, heuristics must be used.
+ Genetic algorithms have proven to be a successful way of generating satisfactory
+ solutions to many scheduling problems.
+ </para>
+ <para>
+ Evolutionary algorithms can also be used to tackle problems that humans don't really
+ know how to solve.
+ An EA, free of any human preconceptions or biases, can generate surprising solutions
+ that are comparable to, or better than, the best human-generated efforts.
+ It is merely necessary that we can recognise a good solution if
+ it were presented to us, even if we don't know <emphasis>how</emphasis> to create a
+ good solution.
+ In other words, we need to be able to formulate an effective fitness function.
+ </para>
+ <para>
+ Engineers working for NASA know a lot about physics. They know exactly which
+ characteristics make for a good communications antenna. But the process
+ of designing an antenna so that it has the necessary properties is hard. Even
+ though the engineers know what is required from the final antenna, they may not know
+ how to design the antenna so that it satisfies those requirements.
+ </para>
+ <para>
+ NASA's Evolvable Systems Group has used evolutionary algorithms to successfully
+ evolve antennas for use on satellites. These evolved antennas have irregular shapes
+ with no obvious symmetry (one of these antennas is pictured below).
+ It is unlikely that a human expert would have arrived at such an unconventional design.
+ Despite this, when tested these antennas proved to be extremely well adapted to their
+ purpose.
+ </para>
+ <informalfigure>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="antenna.jpg" format="JPG" width="50%" align="center"/>
+ </imageobject>
+ <caption>
+ <para>
+ <link xlink:href="http://ti.arc.nasa.gov/projects/esg/research/antenna.htm">NASA Evolvable Systems Group</link>
+ </para>
+ </caption>
+ </mediaobject>
+ </informalfigure>
+ <section>
+ <title>Pre-requisites</title>
+ <indexterm><primary>encoding</primary></indexterm>
+ <para>
+ There are two requirements that must be met before an evolutionary algorithm can
+ be used for a particular problem.
+ Firstly, we need a way to encode candidate solutions to the problem. The simplest
+ encoding, and that used by many genetic algorithms, is a bit string. Each candidate
+ is simply a sequence of zeros and ones.
+ This encoding makes cross-over and mutation very straightforward, but that does not
+ mean that you cannot use more complicated representations. In fact, we will see
+ several instances of more advanced candidate representations in later chapters.
+ As long as we can devise a scheme for evolving the candidates, there really is no
+ restriction on the types that we can use.
+ Genetic programming (GP) is a good example of this. GP evolves computer programs
+ represented as syntax trees.
+ </para>
+ <indexterm><primary>fitness function</primary></indexterm>
+ <para>
+ The second requirement for applying evolutionary algorithms is that there must be a
+ way of evaluating partial solutions to the problem - the fitness function. It is
+ not sufficient to evaluate solutions as right or wrong, the fitness score needs to
+ indicate <emphasis>how right</emphasis> or, if your glass is half empty,
+ <emphasis>how wrong</emphasis> a candidate solution is. So a function that returns
+ either 0 or 1 is useless. A function that returns a score on a scale of 1 - 100 is
+ better. We need shades of grey, not just black and white, since this is how the
+ algorithm guides the random evolution to find increasingly better solutions.
+ </para>
+ </section>
+ </section>
+ <section>
+ <title>Implementing Evolutionary Algorithms</title>
+ <para>
+ If an evolutionary algorithm is a good fit for a particular problem, there are plenty of
+ options when it comes to implementing it.
+ You may choose to use a high-level programming language for simplicity, or a low-level
+ language for performance.
+ You could write all of the code yourself from scratch, or you could reuse pre-written
+ components and libraries.
+ In this book we will necessarily be using one particular approach, but it is worth noting
+ that there are alternatives.
+ </para>
+ <section>
+ <title>Choice of Programming Language</title>
+ <para>
+ Evolutionary algorithms can be implemented in any general purpose programming language.
+ Most programmers will simply choose the language that they are most comfortable with.
+ A quick web search will return examples of evolutionary programs written in C, C++,
+ Java, C#, Python, Ruby, Perl, Lisp and several other languages.
+ </para>
+ <para>
+ Performance may be a consideration when choosing a language.
+ Almost all evolutionary algorithms are CPU-bound. For this reason, compiled languages
+ typically offer better EA performance than interpreted languages. For
+ short-lived programs the difference is unlikely to be significant, but for
+ long-running programs it could be considerable.
+ </para>
+ <indexterm><primary>Java</primary></indexterm>
+ <para>
+ If you can recall the title of this book it should come as no surprise that we will be
+ using Java for all of the example code. Java offers a good balance of performance,
+ ease-of-use and a rich standard library.
+ </para>
+ </section>
+ <section>
+ <title>Evolution Frameworks</title>
+ <indexterm><primary>frameworks</primary></indexterm>
+ <para>
+ As we saw above, the basic outline of an evolutionary algorithm is fairly
+ straightforward. It consists of a main loop that performs one generation per iteration,
+ supplemented by a few functions to perform fitness evaluation, selection and
+ mutation/cross-over. When implementing a simple EA, writing this structural code is
+ not particularly onerous. However, if you write many different evolutionary
+ programs, as we will be doing in the remainder of this book, you end up writing code
+ that is very similar over and over again.
+ </para>
+ <para>
+ A good programmer will usually want to extract and reuse this common code.
+ Once you have done this, you have the basis of an evolutionary computation framework.
+ Typically this will consist of an evolution engine that is reusable and that can
+ accept different functions to customise fitness evaluation, selection and evolutionary
+ operators.
+ </para>
+ <para>
+ An alternative to using a home-grown framework is to choose a ready-made one. There
+ are open source evolutionary computation frameworks available for most programming languages.
+ For popular languages, such as C, C++ and Java, there are dozens.
+ </para>
+ <para>
+ The advantage of a
+ ready-made framework that is used by many other programmers is that it will have been well
+ tested and should be free of significant bugs and performance problems. It may also provide
+ advanced features such as parallel and/or distributed processing.
+ </para>
+ </section>
+ </section>
+</chapter>
diff --git a/watchmaker/book/src/xml/furtherreading.xml b/watchmaker/book/src/xml/furtherreading.xml
new file mode 100644
index 0000000..c0ab4c3
--- /dev/null
+++ b/watchmaker/book/src/xml/furtherreading.xml
@@ -0,0 +1,6 @@
+<appendix xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Further Reading</title>
+ <para>TODO</para>
+</appendix>
diff --git a/watchmaker/book/src/xml/geneticprogramming.xml b/watchmaker/book/src/xml/geneticprogramming.xml
new file mode 100644
index 0000000..cb37ed6
--- /dev/null
+++ b/watchmaker/book/src/xml/geneticprogramming.xml
@@ -0,0 +1,6 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Genetic Programming</title>
+ <para>TODO</para>
+</chapter>
diff --git a/watchmaker/book/src/xml/gui.xml b/watchmaker/book/src/xml/gui.xml
new file mode 100644
index 0000000..f339f77
--- /dev/null
+++ b/watchmaker/book/src/xml/gui.xml
@@ -0,0 +1,22 @@
+<appendix xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Building Graphical User Interfaces for Evolutionary Programs</title>
+ <subtitle>Using the Watchmaker Swing Module</subtitle>
+ <section>
+ <title>Evolution Controls</title>
+ <para>TODO</para>
+ <section>
+ <title>Number Generators</title>
+ <para>TODO</para>
+ </section>
+ </section>
+ <section>
+ <title>Updating the GUI from an EvolutionObserver</title>
+ <para>TODO</para>
+ </section>
+ <section>
+ <title>The Evolution Monitor</title>
+ <para>TODO</para>
+ </section>
+</appendix>
diff --git a/watchmaker/book/src/xml/interactive.xml b/watchmaker/book/src/xml/interactive.xml
new file mode 100644
index 0000000..fa7369a
--- /dev/null
+++ b/watchmaker/book/src/xml/interactive.xml
@@ -0,0 +1,6 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Interactive Evolutionary Algorithms</title>
+ <para>TODO</para>
+</chapter>
diff --git a/watchmaker/book/src/xml/islands.xml b/watchmaker/book/src/xml/islands.xml
new file mode 100644
index 0000000..e7943f8
--- /dev/null
+++ b/watchmaker/book/src/xml/islands.xml
@@ -0,0 +1,175 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Island Models</title>
+ <indexterm><primary>island models</primary></indexterm>
+ <indexterm><primary>Australia</primary></indexterm>
+ <para>
+ In the natural world, populations of organisms might be separated by geography. Left to evolve in isolation
+ over millions of years, vastly different species will occur in different locations. Consider Australia,
+ an island continent protected by its seas. With little opportunity for outside organisms to
+ interfere, and few opportunities for its land-based organisms to migrate to other land masses, Australian
+ wildlife evolved to be distinctly different from that of other continents and countries. The majority of
+ Australia's plant and animal species, including 84% of its mammals, are endemic. They occur nowhere else
+ in the world.
+ </para>
+ <indexterm><primary>Darwin, Charles</primary></indexterm>
+ <indexterm><primary>Galápagos Islands</primary></indexterm>
+ <para>
+ Australia is not the only island to exhibit such levels of endemism. It was a visit to the Galápagos
+ Islands in 1835 that started Charles Darwin on the path to formulating his theory of evolution. Darwin
+ noticed the pronounced differences between the species of mocking birds and tortoises present on the
+ different islands of the archipelago and began to speculate on how such variations might have occurred.
+ </para>
+ <para>
+ In the world of evolutionary computation we can mimic this idea of having multiple isolated populations
+ evolving in parallel. Having additional populations would increase the likelihood of finding a solution that
+ is close to the global optimum. However, it is not just a question of having a larger global population.
+ A system of 10 islands each with a population of 50 individuals is not equivalent to a single island with a
+ population of 500. The reason for this is that the island system partitions the search. If one island
+ prematurely converges on a sub-optimal solution it does not affect the evolution happening on the other
+ islands; they are following their own paths. A single large population does not have this in-built
+ resilience.
+ </para>
+ <section>
+ <title>Migration</title>
+ <indexterm><primary>migration</primary></indexterm>
+ <indexterm><primary>island models</primary><secondary>migration</secondary></indexterm>
+ <para>
+ There is of course no real difference between evolving 10 completely separate islands in parallel and running
+ the same single-population evolution 10 times in a row, other than how the computing resources are utilised.
+ In practice the populations are not kept permanently isolated from each other and there are occasional
+ opportunities for individuals to migrate between islands.
+ </para>
+ <para>
+ In nature external species have been introduced to foreign ecosystems in several ways. In an ice age the waters
+ that previously separated two land masses might freeze providing a route for land animals to migrate to
+ previously unreachable places. Microorganisms and insects have often strayed beyond their usual environment by
+ hitching a ride with larger species.
+ </para>
+ <indexterm><primary>rabbits</primary></indexterm>
+ <indexterm><primary>Austin, Thomas</primary></indexterm>
+ <para>
+ The effect of introducing a foreign species to a new environment can vary. The new species might be
+ ill-adapted to its new surroundings and quickly perish. Alternatively, a lack of natural predators
+ may cause it to flourish, often to the detriment of indigenous species. One such example is the
+ introduction of rabbits to Australia. Australia was a land without rabbits until the arrival of European
+ settlers. An Englishman named Thomas Austin released 24 rabbits into the wild of Victoria in October 1859
+ with the intention of hunting them. If rabbits are famous for one thing it is for reproducing prodigiously.
+ The mild winters allowed year-round breeding and the absence of any natural rabbit predators, such as foxes,
+ allowed the Australian rabbit population to explode unchecked. Within 10 years an annual cull of two million
+ rabbits was having no noticeable effect on rabbit numbers and the habitats of some native animals were being
+ destroyed by the floppy-eared pests. Today there are hundreds of millions of rabbits in Australia, despite
+ efforts to reduce the population, and the name of Thomas Austin is widely cursed for his catastrophic lack
+ of foresight.
+ </para>
+ <para>
+ While such invasions of separate species provide a useful analogy for what can happen when we introduce migration
+ into island model evolutionary algorithms, we are specifically interested in the effects of migration involving
+ genetically different members of the same species. This is because, in our simplified model of evolution,
+ all individuals are compatible and can reproduce. The island model of evolution provides the isolation necessary
+ for diversity to thrive while still providing opportunities for diverse individuals to be combined to produce
+ potentially fitter offspring.
+ </para>
+ <para>
+ In an island model, the isolation of the separate populations often leads to different traits originating on
+ different islands. Migration brings these diverse individuals together occasionally to see what happens when
+ they are combined. Remember that, even if the immigrants are weak, cross-over can result in offspring that are
+ fitter than either of their parents. In this way, the introduction to the population of new genetic building
+ blocks may result in evolutionary progress even if the immigrants themselves are not viable in the new
+ population.
+ </para>
+ </section>
+ <section>
+ <title>Islands in the Watchmaker Framework</title>
+ <indexterm><primary><classname>IslandEvolution</classname></primary></indexterm>
+ <para>
+ The Watchmaker Framework for Evolutionary Computation supports islands models via the
+ <classname>IslandEvolution</classname> class. Each island is a self-contained
+ <classname>EvolutionEngine</classname> just like those we have been using previously for single-population
+ evolutionary algorithms. The evolution is divided into <emphasis>epochs</emphasis>. Each epoch consists
+ of a fixed number of generations that each island completes in isolation. At the end of an epoch migration
+ occurs. Then, if the termination conditions are not yet satisfied, a new epoch begins.
+ </para>
+ <para>
+ The <classname>IslandEvolution</classname> supports pluggable migration strategies via different implementations
+ of the <interfacename>Migration</interfacename> interface. An island version of the string evolution example
+ from <xref linkend="watchmaker_chapter" /> might look something like this:
+ </para>
+ <indexterm><primary><interfacename>Migration</interfacename></primary></indexterm>
+ <indexterm><primary><classname>RingMigration</classname></primary></indexterm>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[IslandEvolution<String> engine
+ = new IslandEvolution<String>(5, // Number of islands.
+ new RingMigration(),
+ candidateFactory,
+ evolutionaryOperator,
+ fitnessEvaluator,
+ selectionStrategy,
+ rng);
+
+engine.evolve(100, // Population size per island.
+ 5, // Elitism for each island.
+ 50, // Epoch length (no. generations).
+ 3, // Migrations from each island at each epoch.
+ new TargetFitness(0, false));]]>
+ </programlisting>
+ </informalexample>
+ <indexterm><primary><interfacename>IslandEvolutionObserver</interfacename></primary></indexterm>
+ <indexterm><primary><methodname>populationUpdate</methodname></primary></indexterm>
+ <indexterm><primary><methodname>islandPopulationUpdate</methodname></primary></indexterm>
+ <para>
+ We can add listeners to an <classname>IslandEvolution</classname> object, just as we can with individual
+ <interfacename>EvolutionEngine</interfacename>s. We use a different interface for this though,
+ <interfacename>IslandEvolutionObserver</interfacename>, which provides two call-backs.
+ The <methodname>populationUpdate</methodname> method reports the global state of the combined population
+ of all islands at the end of each epoch. The <methodname>islandPopulationUpdate</methodname> method reports
+ the state of individual island populations at the end of each generation.
+ </para>
+ <section>
+ <title>Advanced Usage</title>
+ <indexterm><primary><classname>GenerationalEvolutionEngine</classname></primary><secondary>island evolution</secondary></indexterm>
+ <para>
+ In the example code above we specified how many islands we wanted to use and the
+ <classname>IslandEvolution</classname> class created one <classname>GenerationalEvolutionEngine</classname>
+ for each island. Using this approach all of the islands have the same configuration; they use the same
+ candidate factory, evolutionary operator(s) and selection strategy. This is the easiest way to create an
+ island system but it is also possible to construct each island individually for ultimate flexibility.
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[List<EvolutionEngine<String>> islands
+ = new ArrayList<EvolutionEngine<String>>();
+
+// Create individual islands here and add them to the list.
+// ...
+
+IslandEvolution<String> engine
+ = new IslandEvolution<String>(islands,
+ new RingMigration(),
+ false, // Natural fitness?
+ rng);]]>
+ </programlisting>
+ </informalexample>
+ <para>
+ One reason you might choose to construct the islands explicitly is that it makes it possible to configure
+ individual islands differently. You may choose to have different islands use different parameters
+ for evolutionary operators, or even to use different evolutionary operators all together. Alternatively,
+ you could use the same evolutionary operators and parameters but have different selection strategies so that
+ some islands have stronger selection pressure than others. You should generally use the same fitness function
+ for all islands though, otherwise you might get some strange results.
+ </para>
+ <indexterm><primary><classname>SteadyStateEvolutionEngine</classname></primary><secondary>island evolution</secondary></indexterm>
+ <indexterm><primary><classname>EvolutionStrategyEngine</classname></primary><secondary>island evolution</secondary></indexterm>
+ <para>
+ Another possible reason for creating the islands explicitly is so you don't have to use the standard
+ <classname>GenerationalEvolutionEngine</classname> for the islands. You can choose to use any implementation
+ of the <interfacename>EvolutionEngine</interfacename> interface, such as the
+ <classname>SteadyStateEvolutionEngine</classname> class or the <classname>EvolutionStrategyEngine</classname>
+ class. You can even use a mixture of different island types with the same
+ <classname>IslandEvolution</classname> object.
+ </para>
+ </section>
+ </section>
+</chapter>
diff --git a/watchmaker/book/src/xml/multiobjective.xml b/watchmaker/book/src/xml/multiobjective.xml
new file mode 100644
index 0000000..d453c0f
--- /dev/null
+++ b/watchmaker/book/src/xml/multiobjective.xml
@@ -0,0 +1,6 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Multi-Objective Optimisations</title>
+ <para>TODO</para>
+</chapter>
diff --git a/watchmaker/book/src/xml/performance.xml b/watchmaker/book/src/xml/performance.xml
new file mode 100644
index 0000000..6f050bb
--- /dev/null
+++ b/watchmaker/book/src/xml/performance.xml
@@ -0,0 +1,157 @@
+<appendix xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Optimising for Performance</title>
+ <para>
+ This appendix lists some suggestions on how to get the best possible performance from your
+ evolutionary Java programs. Much of the advice here applies whether or not you are using the
+ Watchmaker Framework to develop your evolutionary programs.
+ </para>
+ <para>
+ As with all optimisations in software development, the golden rule is don't do it unless you have
+ a demonstrable need for improved performance. Optimisations often introduce complexity and make
+ code harder to maintain. Before starting on any optimisations, always use a profiler to identify
+ the bottlenecks in your application. This will pinpoint the areas where optimisations are most
+ likely to beneficial. It is pointless to expend effort to try to speed up a routine that accounts
+ for only 0.1% of the CPU time.
+ </para>
+ <section>
+ <title>Optimising the Fitness Evaluator</title>
+ <indexterm><primary>fitness function</primary><secondary>optimisation of</secondary></indexterm>
+ <para>
+ For most non-trivial evolutionary algorithms, the bulk of the work is the evaluation of
+ candidate solutions. For this reason the fitness function is often the obvious place to
+ make improvements. A fitness evaluator should do no more work than is absolutely
+ necessary on each invocation. If there is some initialisation that is repeated unnecessarily,
+ consider moving it to the constructor. If similar calculations are performed every time,
+ consider pre-computing the possible results and using a look-up table. When you consider
+ that the evaluator may be invoked millions of times in a single run, it is clear that even
+ small optimisations to the fitness function may add up to substantial reductions in running
+ time.
+ </para>
+ <section>
+ <title>The Caching Fitness Evaluator</title>
+ <indexterm><primary>CachingFitnessEvaluator</primary></indexterm>
+ <indexterm><primary>fitness function</primary><secondary>caching</secondary></indexterm>
+ <indexterm><primary>elitism</primary></indexterm>
+ <para>
+ In some evolutionary programs individuals can survive from generation to generation unmodified.
+ The most obvious example of this is elitism. Individuals that are preserved through elitism
+ will appear unaltered in the next generation and may survive for many generations. Individuals
+ may also survive without modification if the evolutionary operators in use are probabilistic and
+ don't always affect every candidate.
+ </para>
+ <para>
+ If fitness evaluations are expensive, it is wasteful to repeatedly recalculate fitness values
+ for unaltered individuals. The Watchmaker Framework provides the
+ <classname>org.uncommons.watchmaker.framework.CachingFitnessEvaluator</classname> class to
+ address this problem. It acts as a wrapper for your fitness evaluator and caches the results
+ of fitness calculations. If the same candidate is evaluated twice, the cached value is returned
+ the second time thus avoiding the cost of recalculating the fitness score. The cache uses Java's
+ weak references to avoid memory leakage (if the candidate does not survive, the associated cache
+ entry will also be garbage collected).
+ </para>
+ <note>
+ <para>
+ Caching of fitness scores is provided as an option rather than as the default Watchmaker
+ Framework behaviour because caching is only valid when fitness evaluations are
+ <emphasis>isolated</emphasis> and <emphasis>repeatable</emphasis>. An isolated fitness
+ evaluation is one where the result depends only upon the candidate being evaluated. This is
+ not the case when candidates are evaluated against the other members of the population.
+ Caching should not be used if it is possible for multiple evaluations of the same candidate
+ to return different scores.
+ </para>
+ </note>
+ </section>
+ </section>
+ <section>
+ <title>Minimising the Search Space</title>
+ <para>
+ An evolutionary algorithm is a type of non-deterministic search. The algorithm is
+ searching the space of all possible solutions to find one that is good enough. The
+ larger the search space, the longer it is likely to take to converge on a
+ satisfactory solution.
+ For this reason, anything we can do to constrain the search space, without
+ handicapping the algorithm, is likely to be beneficial. This includes choosing
+ an efficient candidate representation and using evolutionary operators that
+ avoid generating useless or invalid solutions.
+ A little intelligent design can go a long way.
+ </para>
+ </section>
+ <section>
+ <title>Random Number Generators</title>
+ <indexterm><primary>random number generator</primary></indexterm>
+ <indexterm><primary>Random</primary></indexterm>
+ <indexterm><primary>RNG</primary></indexterm>
+ <indexterm><primary>SecureRandom</primary></indexterm>
+ <para>
+ The random number generator (RNG) is a core component of any evolutionary simulation. It is
+ used for selection, for cross-over and for mutation. A slow random number generator can be
+ a bottleneck. Most programming languages provide a mechanism to generate random numbers.
+ Unfortunately, few of them are ideal. The Java standard library includes two RNGs,
+ <classname>java.util.Random</classname> and <classname>java.security.SecureRandom</classname>.
+ These should be avoided for statistical and performance reasons respectively.
+ </para>
+ <indexterm><primary>MersenneTwisterRNG</primary></indexterm>
+ <para>
+ The Watchmaker Framework comes bundled with three high-quality RNGs provided by the Uncommons
+ Maths project. Of these, the <classname>org.uncommons.maths.random.MersenneTwisterRNG</classname>
+ is the most suitable for the majority of evolutionary programs. Alternatively, you can use any
+ third party RNG that is a sub-class of <classname>java.util.Random</classname>.
+ </para>
+ </section>
+ <section>
+ <title>JVM Options</title>
+ <indexterm><primary>Java Virtual Machine</primary></indexterm>
+ <indexterm><primary>JVM</primary></indexterm>
+ <para>
+ The Java Virtual Machine (JVM) is a complex piece of software. It is designed to run a huge
+ variety different programs. As such, its default configuration is not optimised for the
+ particular needs of evolutionary computation. This section lists some of the JVM options
+ that you can tweak to try to achieve better performance.
+ </para>
+ <section>
+ <title>Server VM</title>
+ <indexterm><primary>server VM</primary></indexterm>
+ <para>
+ The Sun JVM provides two modes of operation, one optimised for client applications (the
+ default) and one for server applications. The server VM takes marginally longer to start
+ up but provides substantially better performance for long-running processes and is therefore
+ a better choice for most evolutionary algorithms. The server VM is enabled using the
+ <literal>-server</literal> switch.
+ </para>
+ </section>
+ <section>
+ <title>Garbage Collection</title>
+ <indexterm><primary>garbage collection</primary></indexterm>
+ <para>
+ Evolutionary algorithms create many short-lived objects. Modern JVMs, with their generational
+ garbage collectors, are typically well tuned for this usage pattern. However, you may find
+ that by modifying the settings you are able to improve throughput.
+ </para>
+ <para>
+ Garbage collectors make a trade-off between overall throughput and pause time. For
+ evolutionary algorithms we typically want to maximise throughput, even at the expense of
+ introducing noticeable pauses in the program's execution. What is most important is how soon
+ the program completes, not how smoothly it runs.
+ </para>
+ <para>
+ You can get information on what the garbage collector is doing by starting the JVM with the
+ <literal>-verbosegc</literal> switch. If you find that the program is spending a lot of time
+ collecting garbage, it may be because it is short of memory. If you have sufficient RAM,
+ increasing the maximum size of the Java heap (using the <literal>-Xmx</literal> switch) may
+ improve things.
+ </para>
+ </section>
+ <section>
+ <title>Alternative JVMs</title>
+ <para>
+ Sun Microsystems is not the only provider of virtual machines for Java. If your platform is
+ supported, you may also have the option of using a JVM from BEA, IBM or some other third party.
+ These virtual machines have different performance characteristics and different garbage
+ collector implementations. If you have tried everything else and still need something faster,
+ you may find that a different JVM will perform better. Then again, it may not.
+ </para>
+ </section>
+ </section>
+</appendix>
diff --git a/watchmaker/book/src/xml/preface.xml b/watchmaker/book/src/xml/preface.xml
new file mode 100644
index 0000000..9d2e1ec
--- /dev/null
+++ b/watchmaker/book/src/xml/preface.xml
@@ -0,0 +1,29 @@
+<preface xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Preface</title>
+ <para>
+ This book is intended to be a pragmatic, hands-on guide to implementing evolutionary
+ algorithms. The emphasis is on writing useful evolutionary programs and solving interesting
+ problems. This is not intended to be a thorough theoretical guide. I will introduce the
+ key evolutionary computation concepts and demonstrate their practical application, but I will
+ endeavour to keep abstract theory and mathematics to the minimum necessary.
+ For a more academic treatment of the material, I recommend Melanie Mitchell's
+ <emphasis>An Introduction to Genetic Algorithms</emphasis> and A.E. Eiben and J.E. Smith's
+ <emphasis>Introduction to Evolutionary Computing</emphasis>.
+ </para>
+ <para>
+ Though evolutionary algorithms can be implemented in any general purpose programming language,
+ all of the examples in this book are presented using Java. Java is one of the most widely
+ used programming languages in the world today. As such, many software developers are already
+ familiar with Java and able to understand programs written in it, even if they usually
+ develop in other languages. Java provides a good balance of performance, ease-of-use and
+ a rich standard library.
+ </para>
+ <para>
+ The concepts discussed in this book are not tied to a particular programming language,
+ so there is no reason why you couldn't implement the ideas in another language if you
+ preferred.
+ </para>
+</preface>
+
diff --git a/watchmaker/book/src/xml/salesman.xml b/watchmaker/book/src/xml/salesman.xml
new file mode 100644
index 0000000..9eac594
--- /dev/null
+++ b/watchmaker/book/src/xml/salesman.xml
@@ -0,0 +1,17 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>The Travelling Salesman</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="travelling_salesman_problem.png"
+ format="PNG" scalefit="1" width="100%" />
+ </imageobject>
+ <caption>
+ <para>
+ <link href="http://xkcd.com/399">http://xkcd.com/399</link>
+ </para>
+ </caption>
+ </mediaobject>
+ <para>TODO</para>
+</chapter>
diff --git a/watchmaker/book/src/xml/selection.xml b/watchmaker/book/src/xml/selection.xml
new file mode 100644
index 0000000..95525b1
--- /dev/null
+++ b/watchmaker/book/src/xml/selection.xml
@@ -0,0 +1,160 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd"
+ id="selection_chapter">
+ <title>Selection Strategies &amp; Elitism</title>
+ <para>
+ Selection is an important part of an evolutionary algorithm. Without selection directing the algorithm towards
+ fitter solutions there would be no progress. Selection must favour fitter candidates over weaker candidates but
+ beyond that there are no fixed rules. Furthermore, there is no one strategy that is best for all problems. Some
+ strategies result in fast convergence, others will tend to produce a more thorough exploration of the search space.
+ An evolutionary algorithm that appears ineffective with one selection strategy may be transformed by switching to
+ a strategy with different characteristics. This chapter describes the most commonly used selection strategies
+ (all of these strategies are supported in the Watchmaker Framework for Evolutionary Computation via different
+ implementations of the <classname>SelectionStrategy</classname> interface).
+ </para>
+ <section>
+ <title>Truncation Selection</title>
+ <indexterm><primary>truncation selection</primary></indexterm>
+ <indexterm><primary>selection</primary><secondary>truncation</secondary></indexterm>
+ <para>
+ Truncation selection is the simplest and arguably least useful selection strategy. Truncation selection
+ simply retains the fittest <varname>x</varname>% of the population. These fittest individuals are duplicated so
+ that the population size is maintained. For example, we might select the fittest 25% from a population of 100
+ individuals. In this case we would create four copies of each of the 25 candidates in order to maintain a population
+ of 100 individuals. This is an easy selection strategy to implement but it can result in premature convergence as
+ less fit candidates are ruthlessly culled without being given the opportunity to evolve into something better.
+ Nevertheless, truncation selection can be an effective strategy for certain problems.
+ </para>
+ </section>
+ <section>
+ <title>Fitness-Proportionate Selection</title>
+ <indexterm><primary>fitness-proportionate selection</primary></indexterm>
+ <indexterm><primary>selection</primary><secondary>fitness-proportionate</secondary></indexterm>
+ <para>
+ A better approach to selection is to give every individual a chance of being selected to breed but to make
+ fitter candidates more likely to be chosen than weaker individuals. This is achieved by making an individual's
+ survival probability a function of its fitness score. Such strategies are known as
+ <emphasis>fitness-proportionate selection</emphasis>.
+ </para>
+ <section>
+ <title>Roulette Wheel Selection</title>
+ <indexterm><primary>roulette wheel selection</primary></indexterm>
+ <indexterm><primary>selection</primary><secondary>roulette wheel</secondary></indexterm>
+ <para>
+ The most common fitness-proportionate selection technique is called <emphasis>Roulette Wheel
+ Selection</emphasis>. Conceptually, each member of the population is allocated a section of an imaginary
+ roulette wheel. Unlike a real roulette wheel the sections are different sizes, proportional to the
+ individual's fitness, such that the fittest candidate has the biggest slice of the wheel and the weakest
+ candidate has the smallest. The wheel is then spun and the individual associated with the winning section
+ is selected. The wheel is spun as many times as is necessary to select the full set of parents for the next
+ generation.
+ </para>
+ <para>
+ Using this technique it is possible (probable) that one or more individuals is selected multiple times.
+ That's OK, it's what we want to happen. Remember that we are not selecting the members of the next
+ generation, we are selecting their parents and it is possible for an individual to be a parent multiple times.
+ If there is a particularly fit member of the population we would expect it to be more successful at producing
+ offspring than a weaker rival.
+ </para>
+ </section>
+ <section>
+ <title>Stochastic Universal Sampling</title>
+ <indexterm><primary>stochastic universal sampling</primary></indexterm>
+ <para>
+ <emphasis>Stochastic Universal Sampling</emphasis> is an elaborately-named variation of roulette wheel
+ selection. Stochastic Universal Sampling ensures that the observed selection frequencies of each individual
+ are in line with the expected frequencies. So if we have an individual that occupies 4.5% of the wheel
+ and we select 100 individuals, we would expect on average for that individual to be selected between four
+ and five times. Stochastic Universal Sampling guarantees this. The individual will be selected either four
+ times or five times, not three times, not zero times and not 100 times. Standard roulette wheel selection
+ does not make this guarantee.
+ </para>
+ <para>
+ Stochastic Universal Sampling works by making a single spin of the roulette wheel. This provides a starting
+ position and the first selected individual. The selection process then proceeds by advancing all the way
+ around the wheel in equal sized steps, where the step size is determined by the number of individuals to be
+ selected. So if we are selecting 30 individuals we will advance by
+ <inlineequation><mathphrase>1/30 x 360 degrees</mathphrase></inlineequation> for each selection. Note that
+ this does not mean that every candidate on the wheel will be selected. Some weak individuals will have very
+ thin slices of the wheel and these might be stepped over completely depending on the random starting position.
+ </para>
+ </section>
+ </section>
+ <section>
+ <title>Rank Selection</title>
+ <indexterm><primary>rank selection</primary></indexterm>
+ <indexterm><primary>selection</primary><secondary>rank</secondary></indexterm>
+ <para>
+ <emphasis>Rank Selection</emphasis> is similar to fitness-proportionate selection except that selection
+ probability is proportional to relative fitness rather than absolute fitness. In other words, it doesn't make
+ any difference whether the fittest candidate is ten times fitter than the next fittest or 0.001% fitter. In
+ both cases the selection probabilities would be the same; all that matters is the ranking relative to other
+ individuals.
+ </para>
+ <para>
+ Rank selection will tend to avoid premature convergence by tempering selection
+ pressure for large fitness differentials that occur in early generations. Conversely, by amplifying small
+ fitness differences in later generations, selection pressure is increased compared to alternative selection
+ strategies.
+ </para>
+ </section>
+ <section>
+ <title>Tournament Selection</title>
+ <indexterm><primary>selection</primary><secondary>tournament</secondary></indexterm>
+ <indexterm><primary>tournament selection</primary></indexterm>
+ <para>
+ <emphasis>Tournament Selection</emphasis> is among the most widely used selection strategies in evolutionary
+ algorithms. It works well for a wide range of problems, it can be implemented efficiently, and it is amenable to
+ parallelisation.
+ </para>
+ <para>
+ At its simplest tournament selection involves randomly picking two individuals from the population and staging
+ a tournament to determine which one gets selected. The "tournament" isn't much of a tournament at all, it
+ just involves generating a random value between zero and one and comparing it to a pre-determined selection
+ probability. If the random value is less than or equal to the selection probability, the fitter candidate is
+ selected, otherwise the weaker candidate is chosen. The probability parameter provides a convenient mechanism
+ for adjusting the selection pressure. In practise it is always set to be greater than 0.5 in order to favour
+ fitter candidates. The tournament can be extended to involve more than two individuals if desired.
+ </para>
+ </section>
+ <section>
+ <title>Sigma Scaling</title>
+ <indexterm><primary>selection</primary><secondary>sigma scaling</secondary></indexterm>
+ <indexterm><primary>sigma scaling</primary></indexterm>
+ <para>
+ Like rank selection, <emphasis>Sigma Scaling</emphasis> attempts to moderate selection pressure over time
+ so that it is not too strong in early generations and not too weak once the population has stabilised and
+ fitness differences are smaller. The Greek letter Sigma is used in statistics to denote standard deviation
+ and that's what it means here too. The standard deviation of the population fitness is used to scale the
+ fitness scores so that selection pressure is relatively constant over the lifetime of the evolutionary
+ program.
+ </para>
+ </section>
+ <section>
+ <title>Elitism</title>
+ <indexterm><primary>elitism</primary></indexterm>
+ <para>
+ Sometimes good candidates can be lost when cross-over or mutation results in offspring
+ that are weaker than the parents. Often the EA will re-discover these lost improvements
+ in a subsequent generation but there is no guarantee. To combat this we can use a
+ feature known as <emphasis>elitism</emphasis>. Elitism involves copying a small
+ propotion of the fittest candidates, unchanged, into the next generation. This can
+ sometimes have a dramatic impact on performance by ensuring that the EA does not waste
+ time re-discovering previously discarded partial solutions.
+ Candidate solutions that are preserved unchanged through elitism remain eligible for
+ selection as parents when breeding the remainder of the next generation.
+ </para>
+ <tip>
+ <para>
+ The Watchmaker Framework supports elitism via the second parameter to the
+ <methodname>evolve</methodname> method of an <interfacename>EvolutionEngine</interfacename>.
+ This elite count is the number of candidates in a generation that should be copied
+ unchanged from the previous generation, rather than created via evolution. Collectively
+ these candidates are the <emphasis>elite</emphasis>. So for a population size of 100,
+ setting the elite count to 5 will result in the fittest 5% of each generation being copied,
+ without modification, into the next generation.
+ </para>
+ </tip>
+ </section>
+</chapter>
diff --git a/watchmaker/book/src/xml/steadystate.xml b/watchmaker/book/src/xml/steadystate.xml
new file mode 100644
index 0000000..2e032c8
--- /dev/null
+++ b/watchmaker/book/src/xml/steadystate.xml
@@ -0,0 +1,6 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>Steady-State Evolutionary Algorithms</title>
+ <para>TODO</para>
+</chapter>
diff --git a/watchmaker/book/src/xml/sudoku.xml b/watchmaker/book/src/xml/sudoku.xml
new file mode 100644
index 0000000..9c86740
--- /dev/null
+++ b/watchmaker/book/src/xml/sudoku.xml
@@ -0,0 +1,6 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <title>An Evolutionary Sudoku Solver</title>
+ <para>TODO</para>
+</chapter>
diff --git a/watchmaker/book/src/xml/watchmaker.xml b/watchmaker/book/src/xml/watchmaker.xml
new file mode 100644
index 0000000..b18cb39
--- /dev/null
+++ b/watchmaker/book/src/xml/watchmaker.xml
@@ -0,0 +1,478 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+ xmlns:xlink="http://www.w3.org/1999/xlink"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd"
+ id="watchmaker_chapter">
+ <title>The Watchmaker Framework</title>
+ <indexterm significance="preferred"><primary>Watchmaker Framework</primary></indexterm>
+ <para>
+ The Watchmaker Framework for Evolutionary Computation is an extensible, high-performance,
+ object-oriented framework for implementing platform-independent evolutionary algorithms
+ in Java.
+ It is freely available under a permissive Open Source licence. It can be downloaded from
+ <link xlink:href="http://watchmaker.uncommons.org">http://watchmaker.uncommons.org</link>.
+ </para>
+ <para>
+ This chapter introduces the core components of the Watchmaker Framework and shows how
+ they can be used to implement simple evolutionary algorithms such as the "Hello World"
+ example outlined in the previous chapter.
+ </para>
+ <section>
+ <title>The Evolution Engine</title>
+ <indexterm><primary><classname>GenerationalEvolutionEngine</classname></primary></indexterm>
+ <indexterm><primary><interfacename>EvolutionEngine</interfacename></primary></indexterm>
+ <para>
+ The central object of an evolutionary program built with the Watchmaker Framework is
+ the evolution engine.
+ </para>
+ <para>
+ The framework provides multiple implementations of the
+ <interfacename>EvolutionEngine</interfacename> interface, but the one that you will
+ usually want to use is <classname>GenerationalEvolutionEngine</classname>. This is a
+ general-purpose implementation of the evolutionary algorithm outline from chapter 1.
+ </para>
+ <para>
+ An <interfacename>EvolutionEngine</interfacename> has a single generic type parameter
+ that indicates the type of object that it can evolve.
+ For the "Hello World" program we need to be able to evolve Java strings.
+ Code that creates an engine that can evolve strings would look something like this:
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[EvolutionEngine<String> engine
+ = new GenerationalEvolutionEngine<String>(candidateFactory,
+ evolutionaryOperator,
+ fitnessEvaluator,
+ selectionStrategy,
+ rng);]]>
+ </programlisting>
+ </informalexample>
+ <para>
+ Once you have created an <interfacename>EvolutionEngine</interfacename>, your program
+ is as simple as calling the <methodname>evolve</methodname> method with appropriate
+ arguments.
+ However, as you can see from the code snippet above, there is a little bit of work to
+ be done first in order to create an <interfacename>EvolutionEngine</interfacename> that
+ is configured appropriately for the given problem.
+ The constructor of the <classname>GenerationalEvolutionEngine</classname> class requires
+ five objects. These are:
+ </para>
+ <itemizedlist>
+ <listitem>A Candidate Factory</listitem>
+ <listitem>An Evolutionary Operator</listitem>
+ <listitem>A Fitness Evaluator</listitem>
+ <listitem>A Selection Strategy</listitem>
+ <listitem>A Random Number Generator</listitem>
+ </itemizedlist>
+ </section>
+ <section>
+ <title>The Candidate Factory</title>
+ <indexterm significance="preferred"><primary><interfacename>CandidateFactory</interfacename></primary></indexterm>
+ <para>
+ The first object that needs to be plugged into the evolution engine is a candidate
+ factory. Every evolutionary simulation must start with an initial population of
+ candidate solutions and the <interfacename>CandidateFactory</interfacename> interface
+ is the mechanism by which the evolution engine creates this population.
+ </para>
+ <para>
+ A candidate factory implementation has an associated type. It can only create
+ objects of that type. The type must match the type of the evolution engine that
+ it is plugged into.
+ You can write your own implementation of <interfacename>CandidateFactory</interfacename>
+ for your program or, if you are using a common type such as strings, lists or
+ arrays, you may be able to use a ready-made factory from the
+ <package>org.uncommons.watchmaker.framework.factories</package> package.
+ </para>
+ <indexterm><primary><classname>StringFactory</classname></primary></indexterm>
+ <para>
+ For our "Hello World" program, we can use the provided
+ <classname>StringFactory</classname>:
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[// Define the set of permitted characters (A-Z plus space).
+char[] chars = new char[27];
+for (char c = 'A'; c <= 'Z'; c++)
+{
+ chars[c - 'A'] = c;
+}
+chars[26] = ' ';
+
+// Factory for random 11-character Strings.
+CandidateFactory<String> factory = new StringFactory(chars, 11);]]>
+ </programlisting>
+ </informalexample>
+ <tip>
+ <indexterm significance="preferred"><primary><classname>AbstractCandidateFactory</classname></primary></indexterm>
+ <para>
+ When writing your own <interfacename>CandidateFactory</interfacename> implementations,
+ it is easiest to extend the provided <classname>AbstractCandidateFactory</classname>
+ base class since there is then only a single method that must be implemented.
+ </para>
+ </tip>
+ </section>
+ <section>
+ <title>Evolutionary Operators</title>
+ <indexterm significance="preferred"><primary><interfacename>EvolutionaryOperator</interfacename></primary></indexterm>
+ <para>
+ Evolutionary operators are the components that perform the actual evolution of a
+ population. Cross-over is an evolutionary operator, as is mutation.
+ </para>
+ <para>
+ In the Watchmaker Framework, evolutionary operators are defined in terms of the
+ <interfacename>EvolutionaryOperator</interfacename> interface. This interface
+ declares a single method that takes a list of selected individuals and returns a
+ list of evolved individuals. Some operators (i.e. mutation) will process one
+ individual at a time, whereas others will process individuals in groups
+ (cross-over processes two individuals at a time).
+ </para>
+ <indexterm><primary><classname>StringCrossover</classname></primary></indexterm>
+ <indexterm><primary><classname>StringMutation</classname></primary></indexterm>
+ <para>
+ As with candidate factories, evolutionary operators have associated types that
+ must be compatible with the type of the evolution engine that they are used with.
+ And, as with candidate factories, the framework provides several ready-made operators
+ for common types. These can be found in the
+ <package>org.uncommons.watchmaker.framework.operators</package> package. The
+ cross-over and mutation operators that we need for our "Hello World" program are
+ provided by the <classname>StringCrossover</classname> and
+ <classname>StringMutation</classname> classes.
+ </para>
+ <section>
+ <title>The Evolution Pipeline</title>
+ <indexterm significance="preferred"><primary><classname>EvolutionPipeline</classname></primary></indexterm>
+ <para>
+ Alert readers will have noticed that the evolution engine constructor only accepts
+ a single evolutionary operator. So how can we use both cross-over and mutation?
+ The answer is provided by the <classname>EvolutionPipeline</classname> operator.
+ This is a compound evolutionary operator that chains together multiple operators of
+ the same type.
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[List<EvolutionaryOperator<String>> operators
+ = new LinkedList<EvolutionaryOperator<String>>();
+operators.add(new StringCrossover());
+operators.add(new StringMutation(chars, new Probability(0.02)));
+
+EvolutionaryOperator<String> pipeline
+ = new EvolutionPipeline<String>(operators);]]>
+ </programlisting>
+ </informalexample>
+ <note>
+ <para>
+ The evolution pipeline is just one of many useful operators included
+ in the <package>org.uncommons.watchmaker.framework.operators</package> package.
+ Elaborate evolution schemes can be constructed from combinations of these
+ operators.
+ Users of the Watchmaker Framework should take a few minutes to browse the API
+ documentation and familiarise themselves with the available classes.
+ </para>
+ </note>
+ </section>
+ </section>
+ <section>
+ <title>The Fitness Evaluator</title>
+ <indexterm significance="preferred"><primary><interfacename>FitnessEvaluator</interfacename></primary></indexterm>
+ <para>
+ So far we've been able to build our evolutionary program by simply combining instances
+ of classes provided by the framework. There is one part of the program that we will
+ always have to write for ourselves though and that is the fitness function, which is
+ necessarily different for every program.
+ </para>
+ <para>
+ In the Watchmaker Framework, a fitness function is written by implementing the
+ <interfacename>FitnessEvaluator</interfacename> interface. The
+ <methodname>getFitness</methodname> method of this interface takes a candidate solution
+ and returns its fitness score as a Java double. The method actually takes two
+ arguments, but we can ignore the second for now.
+ </para>
+ <para>
+ The listing below is a fitness evaluator for the "Hello World" program. It
+ simply assigns one point for each character in the candidate string that
+ matches the corresponding position in the target string.
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[public class StringEvaluator implements FitnessEvaluator<String>
+{
+ private final String targetString = "HELLO WORLD";
+
+ /**
+ * Assigns one "fitness point" for every character in the
+ * candidate String that matches the corresponding position in
+ * the target string.
+ */
+ public double getFitness(String candidate,
+ List<? extends String> population)
+ {
+ int matches = 0;
+ for (int i = 0; i < candidate.length(); i++)
+ {
+ if (candidate.charAt(i) == targetString.charAt(i))
+ {
+ ++matches;
+ }
+ }
+ return matches;
+ }
+
+ public boolean isNatural()
+ {
+ return true;
+ }
+}]]>
+ </programlisting>
+ </informalexample>
+ <indexterm><primary>fitness function</primary><secondary>natural</secondary></indexterm>
+ <indexterm><primary>natural fitness</primary></indexterm>
+ <para>
+ By some fitness measures, a higher value indicates a fitter solution. In other
+ cases a lower value is better. The <methodname>isNatural</methodname> method
+ of a fitness evaluator simply specifies which scenario applies. In Watchmaker
+ Framework terminology, a <emphasis>natural</emphasis> fitness function is one that
+ returns higher values for fitter individuals.
+ </para>
+ </section>
+ <section>
+ <title>Selection Strategy</title>
+ <indexterm><primary>selection</primary></indexterm>
+ <indexterm><primary>SelectionStrategy</primary></indexterm>
+ <para>
+ Selection is a key ingredient in any evolutionary algorithm. It's what determines
+ which individuals survive to reproduce and which are discarded. All we've said about
+ selection so far is that it should favour fitter individuals. This definition permits
+ several different implementations. The Watchmaker Framework includes all of the most
+ common selection strategies in the
+ <package>org.uncommons.watchmaker.framework.selection</package> package. These are
+ sufficient for most evolutionary algorithms but, if necessary, it is straightforward
+ to write your own implementation of the <interfacename>SelectionStrategy</interfacename>
+ interface.
+ </para>
+ <indexterm><primary>RouletteWheelSelection</primary></indexterm>
+ <para>
+ Some selection strategies work better than others for certain problems. Often a little
+ trial-and-error is required to pick the best option. We will delve into the details of
+ various selection strategies in <xref linkend="selection_chapter" />, but for now we will
+ just create an instance of the <classname>RouletteWheelSelection</classname> class and use
+ that for our "Hello World" application.
+ </para>
+ <indexterm><primary>fitness-proportionate selection</primary></indexterm>
+ <indexterm><primary>roulette wheel selection</primary></indexterm>
+ <indexterm><primary>selection</primary><secondary>fitness-proportionate</secondary></indexterm>
+ <indexterm><primary>selection</primary><secondary>roulette wheel</secondary></indexterm>
+ <para>
+ <emphasis>Roulette wheel selection</emphasis> is the most common type of
+ <emphasis>fitness-proportionate selection</emphasis>.
+ It gives all individuals a chance of being selected but favours the fitter
+ individuals since an individual's selection probability is derived from its
+ fitness score.
+ </para>
+ </section>
+ <section>
+ <title>Random Number Generator</title>
+ <indexterm><primary>random number generator</primary></indexterm>
+ <indexterm><primary>Random</primary></indexterm>
+ <indexterm><primary>RNG</primary></indexterm>
+ <indexterm><primary>SecureRandom</primary></indexterm>
+ <para>
+ The final dependency that must be satisfied in order to create an evolution engine
+ is the random number generator (RNG). An evolution engine has a single RNG that it
+ passes to its candidate factory, evolutionary operator and selection strategy.
+ An ideal RNG is both fast and statistically random. We <emphasis>could</emphasis>
+ use the standard Java RNG, <classname>java.util.Random</classname>, but its output
+ is not as random as it should be. The other RNG in the standard library,
+ <classname>java.security.SecureRandom</classname> is much better in this respect
+ but can be slow.
+ </para>
+ <indexterm><primary>MersenneTwisterRNG</primary></indexterm>
+ <para>
+ Fortunately, the Watchmaker Framework provides alternatives. The
+ <classname>org.uncommons.maths.random.MersenneTwisterRNG</classname> random number
+ generator is both fast and statistically sound. It is usually the best choice
+ when creating an evolution engine.
+ </para>
+ </section>
+ <section>
+ <title>Completing the Jigsaw</title>
+ <para>
+ We've now got all of the necessary pieces to complete the "Hello World" example
+ application. Assuming that you've already created the
+ <classname>StringEvaluator</classname> class (defined above) in a separate file,
+ the code needed to create the evolution engine looks like this:
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[// Create a factory to generate random 11-character Strings.
+char[] chars = new char[27];
+for (char c = 'A'; c <= 'Z'; c++)
+{
+ chars[c - 'A'] = c;
+}
+chars[26] = ' ';
+CandidateFactory<String> factory = new StringFactory(chars, 11);
+
+// Create a pipeline that applies cross-over then mutation.
+List<EvolutionaryOperator<String>> operators
+ = new LinkedList<EvolutionaryOperator<String>>();
+operators.add(new StringCrossover())
+operators.add(new StringMutation(chars, new Probability(0.02)));
+EvolutionaryOperator<String> pipeline
+ = new EvolutionPipeline<String>(operators);
+
+FitnessEvaluator<String> fitnessEvaluator = new StringEvaluator();
+SelectionStrategy<Object> selection = new RouletteWheelSelection();
+Random rng = new MersenneTwisterRNG();
+
+EvolutionEngine<String> engine
+ = new GenerationalEvolutionEngine<String>(factory,
+ pipeline,
+ fitnessEvaluator,
+ selection,
+ rng);]]>
+ </programlisting>
+ </informalexample>
+ <indexterm><primary>evolve method</primary></indexterm>
+ <indexterm><primary>population</primary><secondary>size of</secondary></indexterm>
+ <para>
+ The listing above only creates the evolution engine, it does not perform any
+ evolution. For that we need to call the <methodname>evolve</methodname> method.
+ The <methodname>evolve</methodname> method takes three parameters. The first
+ is the size of the population. This is the number of candidate solutions that
+ exist at any time. A bigger population will often result in a satisfactory
+ solution being found in fewer generations. On the other hand, the processing
+ of each generation will take longer because there are more individuals to deal
+ with. For the "Hello World" program, a population size of 10 is fine.
+ </para>
+ <para>
+ The second parameter is concerned with <emphasis>elitism</emphasis>. Elitism
+ is explained in <xref linkend="selection_chapter" />. For now, just use a value of zero.
+ The final varargs parameter specifies one or more termination conditions.
+ </para>
+ <section>
+ <title>Termination Conditions</title>
+ <indexterm><primary>TerminationCondition</primary></indexterm>
+ <indexterm><primary>TargetFitness</primary></indexterm>
+ <para>
+ Termination conditions make the evolution stop. There are a few reasons why
+ we would like the evolution to stop. The most obvious is because we have found the
+ solution that we are looking for. In the case of the "Hello World" program, that
+ is when we have found the target string. The target string has a fitness score of
+ 11 so we use the <classname>TargetFitness</classname> condition.
+ </para>
+ <para>
+ To complete the evolutionary "Hello World" application, add the following two lines:
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[String result = engine.evolve(10, 0, new TargetFitness(11, true));
+System.out.println(result);]]>
+ </programlisting>
+ </informalexample>
+ <note>
+ <indexterm><primary>ElapsedTime</primary></indexterm>
+ <indexterm><primary>GenerationCount</primary></indexterm>
+ <indexterm><primary>Stagnation</primary></indexterm>
+ <para>
+ When we move on to less trivial evolutionary programs, we will rarely be able to
+ specify the outcome so precisely. The
+ <package>org.uncommons.watchmaker.framework.termination</package> package includes
+ other termination conditions that can be used. For example, we may want the program
+ to run for a certain period of time, or a certain number of generations, and then
+ return the best solution it has found up until that point. The
+ <classname>ElapsedTime</classname> and <classname>GenerationCount</classname>
+ conditions provide this functionality. Alternatively, we may want the program to
+ continue as long as it is finding progressively better solutions. The
+ <classname>Stagnation</classname> condition will terminate the evolution after a
+ set number of generations pass without any improvement in the fitness of the fittest
+ candidate.
+ If multiple termination conditions are specified, the evolution will stop as soon
+ as any one of them is satisfied.
+ </para>
+ </note>
+ </section>
+ <section>
+ <title>Evolution Observers</title>
+ <para>
+ Compile and run the above code and, perhaps after a brief pause, you'll see the
+ following output:
+ </para>
+ <informalexample>
+ <programlisting>
+<![CDATA[ HELLO WORLD]]>
+ </programlisting>
+ </informalexample>
+ <indexterm><primary>EvolutionObserver</primary></indexterm>
+ <para>
+ This is quite probably the most convoluted "Hello World" program you'll ever write.
+ It also gives no hints as to its evolutionary nature. We can make the program more
+ interesting by adding an <interfacename>EvolutionObserver</interfacename> to report
+ on the progress of the evolution at the end of each generation. Add the following
+ code to your program before the call to the <methodname>evolve</methodname> method:
+ </para>
+ <informalexample>
+ <programlisting language="java">
+<![CDATA[engine.addEvolutionObserver(new EvolutionObserver<String>()
+{
+ public void populationUpdate(PopulationData<? extends String> data)
+ {
+ System.out.printf("Generation %d: %s\n",
+ data.getGenerationNumber(),
+ data.getBestCandidate());
+ }
+});]]>
+ </programlisting>
+ </informalexample>
+ <para>
+ Re-compile the program and run it again. This time you'll see all of the steps
+ taken to arrive at the target string:
+ </para>
+ <informalexample>
+ <programlisting>
+ Generation 0: JIKDORHOQZJ
+ Generation 1: ULLLFQWZPXG
+ Generation 2: UEULKFVFZLS
+ Generation 3: KLLLFKZGRLS
+ Generation 4: HLLLFKZGRLS
+ Generation 5: HEDPOYWOZLS
+ Generation 6: HEULKIWWZLD
+ Generation 7: HPRLOYWOZLS
+ Generation 8: HEULOYWOZLS
+ Generation 9: HEULOYWORLS
+ Generation 10: HEULOYWORLS
+ Generation 11: HPLLK WQRLH
+ Generation 12: HEBLOYWQRLS
+ Generation 13: HEULOYWOBLA
+ Generation 14: HEBLOIWMRLD
+ Generation 15: HEBLOIWMRLD
+ Generation 16: HEYLFNWQRLD
+ Generation 17: HEBLOIWORLS
+ Generation 18: HEBLOIWORLT
+ Generation 19: HEBLOKWGRLD
+ Generation 20: HELLAYWORLS
+ Generation 21: HELHOIWORLT
+ Generation 22: HEWLOIWORLS
+ Generation 23: HEBLOYCORLD
+ Generation 24: HELLKQWORLD
+ Generation 25: HELLOIWORLT
+ Generation 26: HELLOIWORLS
+ Generation 27: HELLKQWORLD
+ Generation 28: HELLFYWORLD
+ Generation 29: HELLOIWORLD
+ Generation 30: HELLOIWORLD
+ Generation 31: HELLOIWORLD
+ Generation 32: HELLOIWORLD
+ Generation 33: HELLOIWORLD
+ Generation 34: HELLOIWORLD
+ Generation 35: HELLOIWDRLD
+ Generation 36: HELLOIWORLD
+ Generation 37: HELLOIWORLD
+ Generation 38: HELLOPWORLD
+ Generation 39: HELLOIWORLD
+ Generation 40: HELLO WORLD
+ HELLO WORLD
+ </programlisting>
+ </informalexample>
+ </section>
+ </section>
+</chapter>
diff --git a/watchmaker/book/src/xml/website.xml b/watchmaker/book/src/xml/website.xml
new file mode 100644
index 0000000..724e586
--- /dev/null
+++ b/watchmaker/book/src/xml/website.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<book xmlns="http://docbook.org/ns/docbook"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://docbook.org/ns/docbook http://www.docbook.org/xml/5.0/xsd/docbook.xsd">
+ <info>
+ <title>Evolutionary Computation in Java</title>
+ <subtitle>A Practical Guide to the Watchmaker Framework</subtitle>
+ <author>
+ <personname>
+ <firstname>Daniel</firstname>
+ <surname>Dyer</surname>
+ <othername>W.</othername>
+ </personname>
+ <email>dan@uncommons.org</email>
+ <uri>http://www.dandyer.co.uk</uri>
+ </author>
+ <copyright>
+ <year>2008</year>
+ <year>2009</year>
+ <year>2010</year>
+ </copyright>
+ <keywordset>
+ <keyword>algorithms</keyword>
+ <keyword>evolution</keyword>
+ <keyword>evolutionary algorithms</keyword>
+ <keyword>evolutionary computation</keyword>
+ <keyword>genetic algorithms</keyword>
+ <keyword>Java</keyword>
+ <keyword>programming</keyword>
+ <keyword>software</keyword>
+ </keywordset>
+ </info>
+
+ <!-- Chapters -->
+ <xi:include href="evolution.xml" />
+ <xi:include href="watchmaker.xml" />
+ <xi:include href="selection.xml" />
+ <xi:include href="islands.xml" />
+
+ <!-- Appendices -->
+ <xi:include href="performance.xml" />
+
+</book>