Merge branch 'master' of github.com:clarencecastillo/dose

# By Clarence Castillo (19) and unknown (9) # Via Clarence Castillo (1) and unknown (1) * 'master' of github.com:clarencecastillo/dose: (28 commits) Added build directories and files to ignore list Updated setup.py version to 1.0.2 Added functionality to import dose from standard library Initial commit of example 17 Added FPS to postpopulation_control Updated simulation 14 script Renamed and updated simulation 13 example Updated fitness analyses scripts Added 'default' simulation time for simulation revival Added console header for simulation revival Fixed data type ambiguity for fitness_goal and generation_list Updated printing to console functions of the analysis script Reconstructed get_status_group_genome_by_generation to utilize a list comprehension logic Implemented new function to get fitness range according to specified percentage Fixed same dictionary key error for get_locations_list Rewritten example 15 analysis using latest analytics.py Rewritten example 14 analysis using latest analytics.py Rewritten example 13 analysis using latest analytics.py and removed example redundancy of example 16 Rewritten example 10 analysis into a single script using latest analytics.py Rewritten example 09 analysis into a single script using latest analytics.py ...
mauriceling · May 5, 2014 · f8061bb · f8061bb
2 parents ea36052 + 5ab3d4e
commit f8061bb
Show file tree

Hide file tree

Showing 46 changed files with 888 additions and 143,508 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,4 +8,9 @@ examples/*.db
 *.suo
 *.xlsx
 *.gap
-*.eco
+*.eco
+build/
+dist/
+dose.egg-info/
+*.gz
+*.egg
diff --git a/dose/analytics.py b/dose/analytics.py
@@ -4,7 +4,7 @@
 Date created: 21st October 2013
 '''
 
-import math
+import math, sys, os, random, database_calls
 
 def hamming_distance(sequence_1, sequence_2):
     '''
@@ -38,4 +38,160 @@ def average(data):
     @param data: list of float or integer
     @return: arithmetic mean of data
     '''
-    return float(sum(data))/len(data)
+    return float(sum(data))/len(data)
+
+class Analysis(object):
+
+    def __init__(self, db_source, population_name, starting_time = 'default'):
+        print '\n[INITIALIZING ANALYSIS]'
+        self.db_source = db_source
+        self.population_name = population_name
+        print 'Assembling database file directory...'
+        dbpath = os.getcwd().split(os.sep)
+        dbpath[-1] = 'examples'
+        dbpath = os.sep.join(dbpath)
+        dbpath = os.sep.join([dbpath, 'Simulations', db_source])
+        print 'Connecting to database file: ' + db_source + '...'
+        (self.con, self.cur) = database_calls.connect_database(dbpath, None)
+        print 'Acquiring simulation starting time...'
+        if starting_time == 'default':
+            self.starting_time = database_calls.db_list_simulations(self.cur)[0][0]
+        else:
+            self.starting_time = starting_time
+
+    def get_locations_list(self):
+        print 'Getting locations list...'
+        world_x = database_calls.db_reconstruct_simulation_parameters(self.cur, self.starting_time)['world_x']
+        world_y = database_calls.db_reconstruct_simulation_parameters(self.cur, self.starting_time)['world_y']
+        world_z = database_calls.db_reconstruct_simulation_parameters(self.cur, self.starting_time)['world_z']
+        return [(x,y,z) for x in xrange(world_x) for y in xrange(world_y) for z in xrange(world_z)]
+
+    def get_fitness_range_by_percentage(self, percentage):
+        print 'Getting fitness range...'
+        fitness_goal = database_calls.db_reconstruct_simulation_parameters(self.cur, self.starting_time)['goal']
+        return xrange(int(fitness_goal * percentage), int(fitness_goal + 1))
+
+    def get_individual_status_list_by_generation(self, status, generation):
+        status_dict = database_calls.db_get_organisms_status(self.cur, self.starting_time, self.population_name, status, [generation])
+        status_list = status_dict[generation].values()
+        return status_list
+
+    def get_individual_genome_list_by_generation(self, generation):
+        genome_dict = database_calls.db_get_organisms_genome(self.cur, self.starting_time, self.population_name, [generation])
+        genome_list = genome_dict[generation].values()
+        return genome_list
+
+    def get_status_group_genome_by_generation(self, status, target_status, generation):
+        organisms = database_calls.db_reconstruct_organisms(self.cur, self.starting_time, self.population_name, generation)
+        genome_list = [organism.genome for organism in organisms if organism.status[status] == target_status]
+        return genome_list
+
+    def analyze_individual_status_by_generation(self, csv_output, status, status_analysis, aggregate_functions = None, generations = 'all'):
+        print '\n[INDIVIDUAL ' + status.upper() + ' STATUS ANALYSIS]'
+        print 'Opening outputfile: ' + csv_output + '...'
+        outputfile = open(csv_output, 'w')
+        print 'Getting population size...'
+        pop_size = database_calls.db_reconstruct_simulation_parameters(self.cur, self.starting_time)['population_size']
+        print 'Writing outputfile header...'
+        header = ['Generation'] + [str(i) for i in xrange(1, pop_size + 1)]
+        if aggregate_functions != None:
+            header = header + [key for key in aggregate_functions.keys()]
+        outputfile.write(','.join(header) + '\n')
+        print 'Starting main analysis...'
+        if generations == 'all':
+            generation_list = database_calls.db_list_generations(self.cur, self.starting_time)
+        else:
+            generation_list = [str(gen) for gen in generations]
+        for generation in generation_list:
+            print 'Analyzing generation ' + str(generation) + '...',
+            print '\r',
+            status_list = [status_analysis(stat) for stat in self.get_individual_status_list_by_generation(status, generation)]
+            status_row = [str(generation)] + [str(stat_result) for stat_result in status_list]
+            if aggregate_functions != None:
+                for key in aggregate_functions.keys():
+                    status_row.append(str(aggregate_functions[key](status_list)))
+            outputfile.write(','.join(status_row) + '\n')
+        print '\nIndividual [' + status + '] analysis complete!'
+
+    def analyze_status_group_count_by_generation(self, csv_output, status, stats, aggregate_functions = None, generations = 'all'):
+        print '\n[' + status.upper() + ' STATUS GROUP COUNT ANALYSIS]'
+        print 'Opening outputfile: ' + csv_output + '...'
+        outputfile = open(csv_output, 'w')
+        print 'Constructing generations list...'
+        if generations == 'all':
+            generation_list = database_calls.db_list_generations(self.cur, self.starting_time)
+        else:
+            generation_list = generations
+        print 'Writing outputfile header...'
+        header = [str(stat).replace(", ","-") for stat in stats]
+        if aggregate_functions == None:
+            header = ['Generation'] + header
+        else:
+            header = ['Generation'] + header + [key for key in aggregate_functions.keys()]
+        outputfile.write(','.join(header) + '\n')
+        print 'Starting main analysis...'
+        for generation in generation_list:
+            print 'Analyzing generation ' + str(generation) + '...',
+            print '\r',
+            status_list = self.get_individual_status_list_by_generation(status, generation)
+            status_row = [str(generation)] + [str(status_list.count(target_stat)) for target_stat in stats]
+            if aggregate_functions != None:
+                for key in aggregate_functions.keys():
+                    status_row.append(str(aggregate_functions[key]([status_list.count(target_stat) for target_stat in stats])))
+            outputfile.write(','.join(status_row) + '\n')    
+        print '\nGrouped [' + status + '] count analysis complete!'
+
+    def analyze_individual_genomes_by_generation(self, csv_output, genome_analysis, aggregate_functions = None, generations = 'all'):
+        print '\n[INDIVIDUAL GENOME ANALYSIS]'
+        print 'Opening outputfile: ' + csv_output + '...'
+        outputfile = open(csv_output, 'w')
+        print 'Getting population size...'
+        pop_size = database_calls.db_reconstruct_simulation_parameters(self.cur, self.starting_time)['population_size']
+        print 'Writing outputfile header...'
+        header = ['Generation'] + [str(i) for i in xrange(1, pop_size + 1)]
+        if aggregate_functions != None:
+            header = header + [key for key in aggregate_functions.keys()]
+        outputfile.write(','.join(header) + '\n')
+        print 'Starting main analysis...'
+        if generations == 'all':
+            generation_list = database_calls.db_list_generations(self.cur, self.starting_time)
+        else:
+            generation_list = generations
+        for generation in generation_list:
+            print 'Analyzing generation ' + str(generation) + '...',
+            print '\r',
+            genome_list = [genome_analysis(genome) for genome in self.get_individual_genome_list_by_generation(generation)]
+            status_row = [str(generation)] + [str(genome_result) for genome_result in genome_list]
+            if aggregate_functions != None:
+                for key in aggregate_functions.keys():
+                    status_row.append(str(aggregate_functions[key](genome_list)))
+            outputfile.write(','.join(status_row) + '\n')
+        print '\nIndividual genome analysis complete!'
+
+    def analyze_status_group_genome_by_generation(self, csv_output, genome_analysis, status, stats, aggregate_functions = None, generations = 'all'):
+        print '\n[' + status.upper() + ' STATUS GROUP GENOME ANALYSIS]'
+        print 'Opening outputfile: ' + csv_output + '...'
+        outputfile = open(csv_output, 'w')
+        print 'Constructing generations list...'
+        if generations == 'all':
+            generation_list = database_calls.db_list_generations(self.cur, self.starting_time)
+        else:
+            generation_list = generations
+        print 'Writing outputfile header...'
+        header = [str(stat).replace(", ","-") for stat in stats]
+        if aggregate_functions == None:
+            header = ['Generation'] + header
+        else:
+            header = ['Generation'] + header + [key for key in aggregate_functions.keys()]
+        outputfile.write(','.join(header) + '\n')
+        print 'Starting main analysis...'
+        for generation in generation_list:
+            print 'Analyzing generation ' + str(generation) + '...',
+            print '\r',
+            analyzed_genome_list = [genome_analysis(self.get_status_group_genome_by_generation(status, target_status, generation)) for target_status in stats]
+            status_row = [str(generation)] + [str(status_result) for status_result in analyzed_genome_list]
+            if aggregate_functions != None:
+                for key in aggregate_functions.keys():
+                    status_row.append(str(aggregate_functions[key](analyzed_genome_list)))
+            outputfile.write(','.join(status_row) + '\n')    
+        print '\nGrouped [' + status + '] genome analysis complete!'
diff --git a/dose/database_calls.py b/dose/database_calls.py
@@ -176,7 +176,8 @@ def db_list_generations(cur, start_time, table='organisms'):
     # query plan: SCAN TABLE organisms USING COVERING INDEX organisms_index2
     cur.execute("select distinct generation from %s where start_time='%s'" 
                 % (str(table), str(start_time)))
-    return [str(x[0]) for x in cur.fetchall()]
+    generations = sorted([int(str(x[0])) for x in cur.fetchall()])
+    return [str(gen) for gen in generations]
 
 def db_list_datafields(cur, start_time, table='organisms'):
     '''

diff --git a/dose/dose.py b/dose/dose.py
@@ -16,7 +16,7 @@
 from simulation_calls import excavate_world, revive_population
 
 from database_calls import connect_database, db_reconstruct_simulation_parameters
-from database_calls import db_reconstruct_population, db_reconstruct_world
+from database_calls import db_reconstruct_population, db_reconstruct_world, db_list_simulations
 
 class dose_functions():
     '''
@@ -540,6 +540,7 @@ def filter_status(status_key, condition, agents):
     return extract
 
 def revive_simulation(rev_parameters, sim_functions):
+    print '\n[' + rev_parameters["simulation_name"].upper() + ' REVIVAL SIMULATION]'
     Populations = {}
     if "sim_folder" in rev_parameters:
         print 'Accessing simulation files directory...' 
@@ -568,6 +569,9 @@ def revive_simulation(rev_parameters, sim_functions):
         print 'Connecting to database file: ' + \
             rev_parameters["database_source"] + '...'
         (con, cur) = connect_database(dbpath, None)
+        if rev_parameters["simulation_time"] == 'default':
+            print 'Acquiring simulation starting time...'
+            rev_parameters["simulation_time"] = db_list_simulations(cur)[0][0]
         print 'Reconstructing old simulation parameters...'
         temp_parameters = db_reconstruct_simulation_parameters(cur, 
                                     rev_parameters["simulation_time"])
@@ -727,6 +731,7 @@ def simulate(sim_parameters, sim_functions):
     @param sim_functions: A class inherited from dose.dose_functions
     class to implement all the needed simulation functions.
     '''
+    print '\n[' + sim_parameters["simulation_name"].upper() + ' SIMULATION]'
     if not sim_parameters.has_key("initial_chromosome"):
         print 'Adding initial chromosome to simulation parameters...'
         sim_parameters["initial_chromosome"] = ['0'] * \

diff --git a/examples/01_basic_functions_one_cell_deployment.py b/examples/01_basic_functions_one_cell_deployment.py
@@ -12,7 +12,9 @@
 '''
 # needed to run this example without prior
 # installation of DOSE into Python site-packages
-import run_examples_without_installation
+try: 
+	import run_examples_without_installation
+except ImportError: pass
 
 # Example codes starts from here
 import dose

diff --git a/examples/02_basic_functions_even_deployment.py b/examples/02_basic_functions_even_deployment.py
@@ -13,7 +13,9 @@
 '''
 # needed to run this example without prior
 # installation of DOSE into Python site-packages
-import run_examples_without_installation
+try: 
+	import run_examples_without_installation
+except ImportError: pass
 
 # Example codes starts from here
 import dose

diff --git a/examples/03_no_migration_isolated_mating.py b/examples/03_no_migration_isolated_mating.py
@@ -14,10 +14,12 @@
 '''
 # needed to run this example without prior
 # installation of DOSE into Python site-packages
-import run_examples_without_installation
+try: 
+	import run_examples_without_installation
+except ImportError: pass
 
 # Example codes starts from here
-import dose, genetic, random
+import dose, random
 
 parameters = {
               "simulation_name": "03_no_migration_isolated_mating",
@@ -85,13 +87,13 @@ def mating(self, Populations, pop_name):
                         parents[i] = random.choice(Populations[pop_name].agents)
                     Populations[pop_name].agents.remove(parents[i])
                 crossover_pt = random.randint(0, len(parents[0].genome[0].sequence))
-                (new_chromo1, new_chromo2) = genetic.crossover(parents[0].genome[0], 
+                (new_chromo1, new_chromo2) = dose.genetic.crossover(parents[0].genome[0], 
                                                                parents[1].genome[0], 
                                                                crossover_pt)
-                children = [genetic.Organism([new_chromo1],
+                children = [dose.genetic.Organism([new_chromo1],
                                              parameters["mutation_type"],
                                              parameters["additional_mutation"]),
-                            genetic.Organism([new_chromo2],
+                            dose.genetic.Organism([new_chromo2],
                                              parameters["mutation_type"],
                                              parameters["additional_mutation"])]
                 for child in children:

diff --git a/examples/04_adjacent_migration_isolated_mating.py b/examples/04_adjacent_migration_isolated_mating.py
@@ -20,11 +20,12 @@
 '''
 # needed to run this example without prior
 # installation of DOSE into Python site-packages
-import run_examples_without_installation
+try: 
+	import run_examples_without_installation
+except ImportError: pass
 
 # Example codes starts from here
-import dose, genetic, random
-import simulation_calls as helper
+import dose, random
 
 parameters = {
               "simulation_name": "04_adjacent_migration_isolated_mating",
@@ -64,16 +65,16 @@ class simulation_functions(dose.dose_functions):
     def organism_movement(self, Populations, pop_name, World):
         for location in parameters["population_locations"][0]:
             group = dose.filter_location(location, Populations[pop_name].agents)
-            adj_cells = helper.adjacent_cells(parameters, location)
+            adj_cells = dose.simulation_calls.adjacent_cells(parameters, location)
             for i in xrange(int(round((len(group) * 0.1)))):
-                (x,y,z) = helper.coordinates(location)
+                (x,y,z) = dose.simulation_calls.coordinates(location)
                 World.ecosystem[x][y][z]['organisms'] -= 1
                 immigrant = random.choice(Populations[pop_name].agents)
                 while immigrant not in group:
                     immigrant = random.choice(Populations[pop_name].agents)
                 new_location = random.choice(adj_cells)
                 immigrant.status['location'] = new_location
-                (x,y,z) = helper.coordinates(new_location)
+                (x,y,z) = dose.simulation_calls.coordinates(new_location)
                 World.ecosystem[x][y][z]['organisms'] += 1
 
     def organism_location(self, Populations, pop_name, World): pass
@@ -105,13 +106,13 @@ def mating(self, Populations, pop_name):
                         parents[i] = random.choice(Populations[pop_name].agents)
                     Populations[pop_name].agents.remove(parents[i])
                 crossover_pt = random.randint(0, len(parents[0].genome[0].sequence))
-                (new_chromo1, new_chromo2) = genetic.crossover(parents[0].genome[0], 
+                (new_chromo1, new_chromo2) = dose.genetic.crossover(parents[0].genome[0], 
                                                                parents[1].genome[0], 
                                                                crossover_pt)
-                children = [genetic.Organism([new_chromo1],
+                children = [dose.genetic.Organism([new_chromo1],
                                              parameters["mutation_type"],
                                              parameters["additional_mutation"]),
-                            genetic.Organism([new_chromo2],
+                            dose.genetic.Organism([new_chromo2],
                                              parameters["mutation_type"],
                                              parameters["additional_mutation"])]
                 for child in children:

diff --git a/examples/05_long_migration_isolated_mating.py b/examples/05_long_migration_isolated_mating.py
@@ -26,11 +26,12 @@
 '''
 # needed to run this example without prior
 # installation of DOSE into Python site-packages
-import run_examples_without_installation
+try: 
+	import run_examples_without_installation
+except ImportError: pass
 
 # Example codes starts from here
-import dose, genetic, random
-import simulation_calls as helper
+import dose, random
 
 parameters = {
               "simulation_name": "05_long_migration_isolated_mating",
@@ -73,7 +74,7 @@ def organism_location(self, Populations, pop_name, World):
         for location in parameters["population_locations"][0]:
             group = dose.filter_location(location, Populations[pop_name].agents)
             for i in xrange(int(round((len(group) * 0.1)))):
-                (x,y,z) = helper.coordinates(location)
+                (x,y,z) = dose.simulation_calls.coordinates(location)
                 World.ecosystem[x][y][z]['organisms'] -= 1
                 immigrant = random.choice(Populations[pop_name].agents)
                 while immigrant not in group:
@@ -82,7 +83,7 @@ def organism_location(self, Populations, pop_name, World):
                 while new_location == location:
                     new_location = random.choice(parameters["population_locations"][0])
                 immigrant.status['location'] = new_location
-                (x,y,z) = helper.coordinates(new_location)
+                (x,y,z) = dose.simulation_calls.coordinates(new_location)
                 World.ecosystem[x][y][z]['organisms'] += 1
 
     def ecoregulate(self, World): pass
@@ -112,13 +113,13 @@ def mating(self, Populations, pop_name):
                         parents[i] = random.choice(Populations[pop_name].agents)
                     Populations[pop_name].agents.remove(parents[i])
                 crossover_pt = random.randint(0, len(parents[0].genome[0].sequence))
-                (new_chromo1, new_chromo2) = genetic.crossover(parents[0].genome[0], 
+                (new_chromo1, new_chromo2) = dose.genetic.crossover(parents[0].genome[0], 
                                                                parents[1].genome[0], 
                                                                crossover_pt)
-                children = [genetic.Organism([new_chromo1],
+                children = [dose.genetic.Organism([new_chromo1],
                                              parameters["mutation_type"],
                                              parameters["additional_mutation"]),
-                            genetic.Organism([new_chromo2],
+                            dose.genetic.Organism([new_chromo2],
                                              parameters["mutation_type"],
                                              parameters["additional_mutation"])]
                 for child in children:
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,4 +8,9 @@ examples/*.db @@
     *.suo
     *.xlsx
     *.gap
-    *.eco
+    *.eco
+    build/
+    dist/
+    dose.egg-info/
+    *.gz
+    *.egg