From 4eeecdef24bdd4640d9967c9e60adecc6ec2eac0 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Wed, 2 Jul 2014 07:34:28 -0600 Subject: [PATCH 01/18] migration for stats and added indices --- maproulette/models.py | 51 +++++++++++++++++++--- migrations/versions/14a905606ecc_.py | 64 ++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 7 deletions(-) create mode 100644 migrations/versions/14a905606ecc_.py diff --git a/maproulette/models.py b/maproulette/models.py index 01c56c4..93a4db7 100644 --- a/maproulette/models.py +++ b/maproulette/models.py @@ -69,6 +69,9 @@ class User(db.Model): difficulty = db.Column( db.SmallInteger) + __table_args__ = ( + db.Index('idx_user_displayname', display_name),) + def __unicode__(self): return self.display_name @@ -121,14 +124,9 @@ class Challenge(db.Model): default='default', nullable=False) -## @validates('slug') -## def validate_slug(self, key, slug): -## app.logger.debug("Slug passed in: " + slug) -## app.logger.debug("Type: " + type(slug)) -## assert match('^[a-z0-9]+$', str(slug)) -## return slug - # note that spatial indexes seem to be created automagically + __table_args__ = ( + db.Index('idx_challenge_slug', slug),) def __init__(self, slug, @@ -380,6 +378,12 @@ class Action(db.Model): editor = db.Column( db.String()) + __table_args__ = ( + db.Index('idx_action_timestamp', timestamp), + db.Index('idx_action_userid', user_id), + db.Index('idx_action_taskid', task_id), + db.Index('idx_action_status', status)) + def __repr__(self): return "" % (self.status, self.timestamp) @@ -391,3 +395,36 @@ def __init__(self, status, user_id=None, editor=None): self.user_id = user_id if editor: self.editor = editor + + +class Metrics(db.Model): + + """Holds daily metrics per challenge, user, status""" + + timestamp = db.Column( + db.DateTime, + primary_key=True, + nullable=False) + user_id = db.Column( + db.Integer, + primary_key=True) + challenge_slug = db.Column( + db.String, + primary_key=True) + status = db.Column( + db.String, + primary_key=True) + count = db.Column( + db.String) + + __table_args__ = ( + db.Index('idx_metrics_userid', user_id), + db.Index('idx_metrics_challengeslug', challenge_slug), + db.Index('idx_metrics_status', status)) + + def __init__(self, timestamp, user_id, challenge_slug, status, count): + self.timestamp = timestamp + self.user_id = user_id + self.challenge_slug = challenge_slug + self.status = status + self.count = count diff --git a/migrations/versions/14a905606ecc_.py b/migrations/versions/14a905606ecc_.py new file mode 100644 index 0000000..8b1f81c --- /dev/null +++ b/migrations/versions/14a905606ecc_.py @@ -0,0 +1,64 @@ +"""empty message + +Revision ID: 14a905606ecc +Revises: 3115f24a7604 +Create Date: 2014-07-02 07:30:37.625861 + +""" + +# revision identifiers, used by Alembic. +revision = '14a905606ecc' +down_revision = '3115f24a7604' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.create_table('metrics', + sa.Column('timestamp', sa.DateTime(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('challenge_slug', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column('count', sa.String(), nullable=True), + sa.PrimaryKeyConstraint( + 'timestamp', 'user_id', 'challenge_slug', 'status') + ) + op.create_index( + 'idx_metrics_challengeslug', + 'metrics', + ['challenge_slug'], + unique=False) + op.create_index('idx_metrics_status', 'metrics', ['status'], unique=False) + op.create_index('idx_metrics_userid', 'metrics', ['user_id'], unique=False) + op.create_index('idx_action_status', 'actions', ['status'], unique=False) + op.create_index('idx_action_taskid', 'actions', ['task_id'], unique=False) + op.create_index( + 'idx_action_timestamp', + 'actions', + ['timestamp'], + unique=False) + op.create_index('idx_action_userid', 'actions', ['user_id'], unique=False) + op.create_index('idx_challenge_slug', 'challenges', ['slug'], unique=False) + op.create_index( + 'idx_user_displayname', + 'users', + ['display_name'], + unique=False) + ### end Alembic commands ### + + +def downgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.drop_index('idx_user_displayname', table_name='users') + op.drop_index('idx_challenge_slug', table_name='challenges') + op.drop_index('idx_action_userid', table_name='actions') + op.drop_index('idx_action_timestamp', table_name='actions') + op.drop_index('idx_action_taskid', table_name='actions') + op.drop_index('idx_action_status', table_name='actions') + op.drop_index('idx_metrics_userid', table_name='metrics') + op.drop_index('idx_metrics_status', table_name='metrics') + op.drop_index('idx_metrics_challengeslug', table_name='metrics') + op.drop_table('metrics') + ### end Alembic commands ### From d9be78e995260195679855c101129ca3c6886af1 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Wed, 2 Jul 2014 07:38:58 -0600 Subject: [PATCH 02/18] adding CORS decorator for API --- maproulette/api/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index e1e0e71..c77752d 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -2,6 +2,7 @@ from flask.ext.restful import reqparse, fields, marshal, \ marshal_with, Api, Resource from flask.ext.restful.fields import Raw +from flask.ext.restful.utils import cors from flask import session, request, abort, url_for from maproulette.helpers import get_random_task,\ get_challenge_or_404, get_task_or_404,\ @@ -75,7 +76,7 @@ def format(self, text): 'display_name': fields.String } -api = Api(app) +api = Api(app, decorators=[cors.crossdomain(origin='*')]) # override the default JSON representation to support the geo objects From 150b43e10a70a7f66c77ec5ee90e57768f830095 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Wed, 2 Jul 2014 07:39:22 -0600 Subject: [PATCH 03/18] updated requirements --- requirements.txt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 65cadc9..929962b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ Flask>=0.10.1 -# Flask-RESTful>=0.2.0 --e git://github.com/twilio/flask-restful.git#egg=flask-restful +Flask-RESTful>=0.2.1 Flask-KVSession>=0.4 Flask-OAuthlib>=0.4.2 Flask-SQLAlchemy>=1.0 @@ -15,9 +14,9 @@ simplejson>=3.3.0 geojson>=1.0.6 six>=1.5.2 nose>=1.3.0 -Markdown==2.3.1 -iso8601==0.1.10 -Flask-Admin==1.0.8 -requests==2.2.1 -Fabric==1.7.0 -python-dateutil==2.2 +Markdown>=2.3.1 +iso8601>=0.1.10 +Flask-Admin>=1.0.8 +requests>=2.2.1 +Fabric>=1.7.0 +python-dateutil>=2.2 \ No newline at end of file From 0a98cf62c287406019368372149daad65337c8ad Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Wed, 2 Jul 2014 07:39:44 -0600 Subject: [PATCH 04/18] changing import for KVSession extension --- maproulette/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maproulette/__init__.py b/maproulette/__init__.py index ab21590..1f06d5c 100644 --- a/maproulette/__init__.py +++ b/maproulette/__init__.py @@ -1,7 +1,7 @@ import os from flask import Flask from simplekv.fs import FilesystemStore -from flaskext.kvsession import KVSessionExtension +from flask_kvsession import KVSessionExtension # initialize server KV session store if not os.path.exists('./sessiondata'): From 31538bc15d74a169ef2be6f53d0a1cfd135517ad Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Thu, 3 Jul 2014 08:43:29 -0600 Subject: [PATCH 05/18] count column is int not string --- maproulette/models.py | 2 +- migrations/versions/14a905606ecc_.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/maproulette/models.py b/maproulette/models.py index 93a4db7..394b967 100644 --- a/maproulette/models.py +++ b/maproulette/models.py @@ -415,7 +415,7 @@ class Metrics(db.Model): db.String, primary_key=True) count = db.Column( - db.String) + db.Integer) __table_args__ = ( db.Index('idx_metrics_userid', user_id), diff --git a/migrations/versions/14a905606ecc_.py b/migrations/versions/14a905606ecc_.py index 8b1f81c..7265fb3 100644 --- a/migrations/versions/14a905606ecc_.py +++ b/migrations/versions/14a905606ecc_.py @@ -21,7 +21,7 @@ def upgrade(): sa.Column('user_id', sa.Integer(), nullable=False), sa.Column('challenge_slug', sa.String(), nullable=False), sa.Column('status', sa.String(), nullable=False), - sa.Column('count', sa.String(), nullable=True), + sa.Column('count', sa.Integer(), nullable=True), sa.PrimaryKeyConstraint( 'timestamp', 'user_id', 'challenge_slug', 'status') ) From 8f60a08f9ea52afd59656e61ba26b93968fc6948 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Thu, 10 Jul 2014 09:39:13 -0600 Subject: [PATCH 06/18] migrate to new metrics models --- maproulette/models.py | 24 ++++++- migrations/versions/14a905606ecc_.py | 1 - migrations/versions/585808afd671_.py | 93 ++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 migrations/versions/585808afd671_.py diff --git a/maproulette/models.py b/maproulette/models.py index 394b967..e19c972 100644 --- a/maproulette/models.py +++ b/maproulette/models.py @@ -397,9 +397,11 @@ def __init__(self, status, user_id=None, editor=None): self.editor = editor -class Metrics(db.Model): +class HistoricalMetrics(db.Model): - """Holds daily metrics per challenge, user, status""" + """Holds daily metrics per challenge, user, status, day""" + + __tablename__ = 'metrics_historical' timestamp = db.Column( db.DateTime, @@ -428,3 +430,21 @@ def __init__(self, timestamp, user_id, challenge_slug, status, count): self.challenge_slug = challenge_slug self.status = status self.count = count + + +class AggregateMetrics(db.Model): + + """Holds the aggregate metrics for each challenge and user""" + + __tablename__ = 'metrics_aggregate' + + user_id = db.Column( + db.Integer, + primary_key=True) + challenge_slug = db.Column( + db.String, + primary_key=True) + status = db.Column( + db.String, + primary_key=True) + count = db.Column(db.Integer) diff --git a/migrations/versions/14a905606ecc_.py b/migrations/versions/14a905606ecc_.py index 7265fb3..b5a1efa 100644 --- a/migrations/versions/14a905606ecc_.py +++ b/migrations/versions/14a905606ecc_.py @@ -15,7 +15,6 @@ def upgrade(): - ### commands auto generated by Alembic - please adjust! ### op.create_table('metrics', sa.Column('timestamp', sa.DateTime(), nullable=False), sa.Column('user_id', sa.Integer(), nullable=False), diff --git a/migrations/versions/585808afd671_.py b/migrations/versions/585808afd671_.py new file mode 100644 index 0000000..a0f8f89 --- /dev/null +++ b/migrations/versions/585808afd671_.py @@ -0,0 +1,93 @@ +"""empty message + +Revision ID: 585808afd671 +Revises: 14a905606ecc +Create Date: 2014-07-10 09:28:52.309482 + +""" + +# revision identifiers, used by Alembic. +revision = '585808afd671' +down_revision = '14a905606ecc' + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +def upgrade(): + op.drop_table('metrics') + op.create_table('metrics_historical', + sa.Column('timestamp', sa.DateTime(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('challenge_slug', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column('count', sa.Integer(), nullable=True), + sa.PrimaryKeyConstraint('timestamp', + 'user_id', + 'challenge_slug', + 'status') + ) + op.create_index('idx_metrics_challengeslug', + 'metrics_historical', + ['challenge_slug'], + unique=False) + op.create_index('idx_metrics_status', + 'metrics_historical', + ['status'], + unique=False) + op.create_index('idx_metrics_userid', + 'metrics_historical', + ['user_id'], + unique=False) + op.create_table('metrics_aggregate', + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('challenge_slug', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column('count', sa.Integer(), nullable=True), + sa.PrimaryKeyConstraint('user_id', + 'challenge_slug', + 'status') + ) + + +def downgrade(): + op.create_table('metrics', + sa.Column( + 'timestamp', + postgresql.TIMESTAMP(), + autoincrement=False, + nullable=False), + sa.Column( + 'user_id', + sa.INTEGER(), + server_default= + "nextval('metrics_user_id_seq'::regclass)", + nullable=False), + sa.Column( + 'challenge_slug', + sa.VARCHAR(), + autoincrement=False, + nullable=False), + sa.Column( + 'status', + sa.VARCHAR(), + autoincrement=False, + nullable=False), + sa.Column( + 'count', + sa.INTEGER(), + autoincrement=False, + nullable=True), + sa.PrimaryKeyConstraint( + 'timestamp', + 'user_id', + 'challenge_slug', + 'status', + name=u'metrics_pkey') + ) + op.drop_table('metrics_aggregate') + op.drop_index('idx_metrics_userid', table_name='metrics_historical') + op.drop_index('idx_metrics_status', table_name='metrics_historical') + op.drop_index('idx_metrics_challengeslug', table_name='metrics_historical') + op.drop_table('metrics_historical') From 0b2fd43ced57db4a6172d5f577fd06a8d79d3a68 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Thu, 10 Jul 2014 10:14:28 -0600 Subject: [PATCH 07/18] adding user names to metrics model --- maproulette/models.py | 9 +++++++++ migrations/versions/1107fa473275_.py | 28 ++++++++++++++++++++++++++++ migrations/versions/14a905606ecc_.py | 5 +---- migrations/versions/585808afd671_.py | 2 +- 4 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 migrations/versions/1107fa473275_.py diff --git a/maproulette/models.py b/maproulette/models.py index e19c972..655f8c2 100644 --- a/maproulette/models.py +++ b/maproulette/models.py @@ -410,6 +410,8 @@ class HistoricalMetrics(db.Model): user_id = db.Column( db.Integer, primary_key=True) + user_name = db.Column( + db.String) challenge_slug = db.Column( db.String, primary_key=True) @@ -421,6 +423,7 @@ class HistoricalMetrics(db.Model): __table_args__ = ( db.Index('idx_metrics_userid', user_id), + db.Index('idx_metrics_username', user_name), db.Index('idx_metrics_challengeslug', challenge_slug), db.Index('idx_metrics_status', status)) @@ -441,6 +444,8 @@ class AggregateMetrics(db.Model): user_id = db.Column( db.Integer, primary_key=True) + user_name = db.Column( + db.String) challenge_slug = db.Column( db.String, primary_key=True) @@ -448,3 +453,7 @@ class AggregateMetrics(db.Model): db.String, primary_key=True) count = db.Column(db.Integer) + + __table_args__ = ( + db.Index('idx_metrics_agg_username', user_name), + ) diff --git a/migrations/versions/1107fa473275_.py b/migrations/versions/1107fa473275_.py new file mode 100644 index 0000000..a90e041 --- /dev/null +++ b/migrations/versions/1107fa473275_.py @@ -0,0 +1,28 @@ +"""adding user name columns + +Revision ID: 1107fa473275 +Revises: 585808afd671 +Create Date: 2014-07-10 09:54:57.940819 + +""" + +# revision identifiers, used by Alembic. +revision = '1107fa473275' +down_revision = '585808afd671' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column('metrics_aggregate', sa.Column('user_name', sa.String(), nullable=True)) + op.create_index('idx_metrics_agg_username', 'metrics_aggregate', ['user_name'], unique=False) + op.add_column('metrics_historical', sa.Column('user_name', sa.String(), nullable=True)) + op.create_index('idx_metrics_username', 'metrics_historical', ['user_name'], unique=False) + + +def downgrade(): + op.drop_index('idx_metrics_username', table_name='metrics_historical') + op.drop_column('metrics_historical', 'user_name') + op.drop_index('idx_metrics_agg_username', table_name='metrics_aggregate') + op.drop_column('metrics_aggregate', 'user_name') \ No newline at end of file diff --git a/migrations/versions/14a905606ecc_.py b/migrations/versions/14a905606ecc_.py index b5a1efa..fdefb80 100644 --- a/migrations/versions/14a905606ecc_.py +++ b/migrations/versions/14a905606ecc_.py @@ -1,4 +1,4 @@ -"""empty message +"""adding indices on action table, adding metrics table Revision ID: 14a905606ecc Revises: 3115f24a7604 @@ -45,11 +45,9 @@ def upgrade(): 'users', ['display_name'], unique=False) - ### end Alembic commands ### def downgrade(): - ### commands auto generated by Alembic - please adjust! ### op.drop_index('idx_user_displayname', table_name='users') op.drop_index('idx_challenge_slug', table_name='challenges') op.drop_index('idx_action_userid', table_name='actions') @@ -60,4 +58,3 @@ def downgrade(): op.drop_index('idx_metrics_status', table_name='metrics') op.drop_index('idx_metrics_challengeslug', table_name='metrics') op.drop_table('metrics') - ### end Alembic commands ### diff --git a/migrations/versions/585808afd671_.py b/migrations/versions/585808afd671_.py index a0f8f89..7f41ec0 100644 --- a/migrations/versions/585808afd671_.py +++ b/migrations/versions/585808afd671_.py @@ -1,4 +1,4 @@ -"""empty message +"""new metrics tables and indices Revision ID: 585808afd671 Revises: 14a905606ecc From 3df8cb182cd004203d5052cdcd974adcd5892c64 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Thu, 10 Jul 2014 12:58:54 -0600 Subject: [PATCH 08/18] adding debugging output --- maproulette/api/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index c77752d..90b3d71 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -349,6 +349,8 @@ def get(self, challenge_slug=None, user_id=None): query = query.filter( Action.timestamp.between(start, end)) + app.logger.debug(query) + return as_stats_dict( query.all(), order=[1, 0, 2], @@ -624,6 +626,8 @@ def put(self, slug): # Get the posted data taskdata = json.loads(request.data) + app.logger.debug(len(taskdata)) + for task in taskdata: parse_task_json(task, slug, task['identifier'], commit=False) From 3248fbdfd4b0e5176118f35389f6d35b712d542e Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Fri, 11 Jul 2014 08:40:32 -0600 Subject: [PATCH 09/18] add upgrade check to fabfile --- fabfile.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fabfile.py b/fabfile.py index 9179f83..44deb42 100644 --- a/fabfile.py +++ b/fabfile.py @@ -118,10 +118,12 @@ def setup_cron(instance): sudo('crontab /tmp/crondump', user='www-data') -def install_python_dependencies(instance): +def install_python_dependencies(instance, upgrade=False): dirname = "/srv/www/%s" % instance - cmd = 'source %s/virtualenv/bin/activate && pip\ - install -r %s/htdocs/maproulette/requirements.txt' % (dirname, dirname) + cmd = 'source {basepath}/virtualenv/bin/activate && pip\ + install {upgrade}-r {basepath}/htdocs/maproulette/requirements.txt'.format( + basepath=dirname, + upgrade='--upgrade' if upgrade else '') sudo(cmd, user="www-data") From f44f46b2dacc0548b3ad59e40a338ca2f37b9959 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Fri, 11 Jul 2014 08:43:08 -0600 Subject: [PATCH 10/18] simplifying task json parsing --- manage.py | 11 +++--- maproulette/api/__init__.py | 17 +++++---- maproulette/helpers.py | 70 ++++++++++++++----------------------- 3 files changed, 41 insertions(+), 57 deletions(-) diff --git a/manage.py b/manage.py index b4448e9..d5726bc 100644 --- a/manage.py +++ b/manage.py @@ -107,10 +107,8 @@ def create_testdata(challenges=10, tasks=100, users=10): for j in range(int(tasks)): # generate a unique identifier identifier = str(uuid.uuid4()) - # instantiate the task and register it with challenge 'test' - # Initialize a task with its challenge slug and persistent ID - task = Task(challenge.slug, identifier) # create two random points not too far apart + task_geometries = [] p1 = Point( random.randrange(minx, maxx) + random.random(), random.randrange(miny, maxy) + random.random()) @@ -123,10 +121,13 @@ def create_testdata(challenges=10, tasks=100, users=10): # generate some random 'osm ids' osmids = [random.randrange(1000000, 1000000000) for _ in range(2)] # add the first point and the linestring to the task's geometries - task.geometries.append(TaskGeometry(osmids[0], p1)) + task_geometries.append(TaskGeometry(osmids[0], p1)) # set a linestring for every other challenge if not j % 2: - task.geometries.append(TaskGeometry(osmids[1], l1)) + task_geometries.append(TaskGeometry(osmids[1], l1)) + # instantiate the task and register it with challenge 'test' + # Initialize a task with its challenge slug and persistent ID + task = Task(challenge.slug, identifier, task_geometries) # because we are not using the API, we need to call set_location # explicitly to set the task's location task.set_location() diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index 90b3d71..f315696 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -596,13 +596,13 @@ def get(self, slug): class AdminApiUpdateTask(Resource): - """Challenge Task Statuses endpoint""" + """Challenge Task Create / Update endpoint""" def put(self, slug, identifier): """Create or update one task.""" # Parse the posted data - parse_task_json(json.loads(request.data), slug, identifier) + db.session.add(parse_task_json(slug, json.loads(request.data))) return {}, 201 def delete(self, slug, identifier): @@ -617,19 +617,18 @@ def delete(self, slug, identifier): class AdminApiUpdateTasks(Resource): - """Bulk task creation / update endpoint""" + """Bulk task create / update endpoint""" def put(self, slug): - app.logger.debug('putting multiple tasks') - app.logger.debug(len(request.data)) # Get the posted data - taskdata = json.loads(request.data) + data = json.loads(request.data) - app.logger.debug(len(taskdata)) + # debug output number of tasks being posted + app.logger.debug('posting {number} tasks...'.format(number=len(data))) - for task in taskdata: - parse_task_json(task, slug, task['identifier'], commit=False) + for task in data: + db.session.merge(parse_task_json(slug, task)) # commit all dirty tasks at once. db.session.commit() diff --git a/maproulette/helpers.py b/maproulette/helpers.py index 1c3c65e..912bb25 100644 --- a/maproulette/helpers.py +++ b/maproulette/helpers.py @@ -80,16 +80,6 @@ def task_exists(challenge_slug, task_identifier): return True -def get_or_create_task(challenge, task_identifier): - """Return a task, either pull a new one or create a new one""" - - task = (Task.identifier == task_identifier). \ - filter(Task.challenge_slug == challenge.slug).first() - if not task: - task = Task(challenge.id, task_identifier) - return task - - def require_signedin(f): """Require the caller to be authenticated against OSM""" @@ -145,44 +135,38 @@ def get_random_task(challenge): return q.first() or None -def parse_task_json(data, slug, identifier, commit=True): +def parse_task_json(slug, data): """Parse task json coming in through the admin api""" - task_geometries = [] + app.logger.debug(data) - exists = task_exists(slug, identifier) + # task json needs to have identifier + if not 'identifier' in data: + abort(400, 'no identifier') - # abort if the taskdata does not contain geometries and it's a new task + # if the task is new, it needs to have geometry if not 'geometries' in data: - if not exists: - abort(400) - else: - # extract the geometries - geometries = data.pop('geometries') - # parse the geometries - for feature in geometries['features']: - osmid = feature['properties'].get('osmid') - shape = asShape(feature['geometry']) - t = TaskGeometry(osmid, shape) - task_geometries.append(t) - - # there's two possible scenarios: - # 1. An existing task gets an update, in that case - # we only need the identifier - # 2. A new task is inserted, in this case we need at - # least an identifier and encoded geometries. - - # now we check if the task exists - if exists: - # if it does, update it - task = get_task_or_404(slug, identifier) - if not task.update(data, task_geometries, commit=commit): - abort(400) - else: - # if it does not, create it - new_task = Task(slug, identifier) - new_task.update(data, task_geometries, commit=commit) - return True + if not task_exists(data['identifier']): + abort(400, 'no geometries for new tasks') + + # extract the task geometries + task_geometries = [] + geometries = data.pop('geometries') + # parse the geometries + for feature in geometries['features']: + osmid = feature['properties'].get('osmid') + shape = asShape(feature['geometry']) + g = TaskGeometry(osmid, shape) + task_geometries.append(g) + + # create the task + t = Task(slug, data['identifier'], task_geometries) + + # check for instruction + if 'instruction' in data: + t.instruction = data['instruction'] + + return t def get_envelope(geoms): From dd06786efafc5c7676d20263b8655843b50f8b2b Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Fri, 11 Jul 2014 08:43:41 -0600 Subject: [PATCH 11/18] change primary key in task table --- maproulette/models.py | 27 +++++---- migrations/versions/585808afd671_.py | 51 +++++----------- migrations/versions/844562c6ddf_.py | 90 ++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 45 deletions(-) create mode 100644 migrations/versions/844562c6ddf_.py diff --git a/maproulette/models.py b/maproulette/models.py index 655f8c2..b950eb8 100644 --- a/maproulette/models.py +++ b/maproulette/models.py @@ -201,14 +201,15 @@ class Task(db.Model): id = db.Column( db.Integer, unique=True, - primary_key=True, nullable=False) identifier = db.Column( db.String(72), + primary_key=True, nullable=False) challenge_slug = db.Column( db.String, - db.ForeignKey('challenges.slug', onupdate="cascade")) + db.ForeignKey('challenges.slug', onupdate="cascade"), + primary_key=True) random = db.Column( db.Float, default=getrandom, @@ -236,14 +237,22 @@ class Task(db.Model): db.Index('idx_challenge', challenge_slug), db.Index('idx_random', random)) - def __init__(self, challenge_slug, identifier, instruction=None): + # geometries should always be provided for new tasks, defaulting to None so + # we can handle task updates and two step initialization of tasks + def __init__( + self, + challenge_slug, + identifier, + geometries=None, + instruction=None): self.challenge_slug = challenge_slug self.identifier = identifier self.instruction = instruction + self.geometries = geometries self.append_action(Action('created')) def __repr__(self): - return '' % (self.identifier) + return ''.format(identifier=self.identifier) def __str__(self): return self.identifier @@ -271,16 +280,14 @@ def update(self, new_values, geometries, commit=True): return False setattr(self, k, v) - self.geometries = [] - - for geometry in geometries: - self.geometries = geometries + self.geometries = geometries # set the location for this task, as a representative point of the # combined geometries. - self.set_location() + if self.location is None: + self.set_location() - db.session.merge(self) + db.session.add(self) if commit: db.session.commit() return True diff --git a/migrations/versions/585808afd671_.py b/migrations/versions/585808afd671_.py index 7f41ec0..43b0c6b 100644 --- a/migrations/versions/585808afd671_.py +++ b/migrations/versions/585808afd671_.py @@ -12,7 +12,6 @@ from alembic import op import sqlalchemy as sa -from sqlalchemy.dialects import postgresql def upgrade(): @@ -52,42 +51,24 @@ def upgrade(): def downgrade(): - op.create_table('metrics', - sa.Column( - 'timestamp', - postgresql.TIMESTAMP(), - autoincrement=False, - nullable=False), - sa.Column( - 'user_id', - sa.INTEGER(), - server_default= - "nextval('metrics_user_id_seq'::regclass)", - nullable=False), - sa.Column( - 'challenge_slug', - sa.VARCHAR(), - autoincrement=False, - nullable=False), - sa.Column( - 'status', - sa.VARCHAR(), - autoincrement=False, - nullable=False), - sa.Column( - 'count', - sa.INTEGER(), - autoincrement=False, - nullable=True), - sa.PrimaryKeyConstraint( - 'timestamp', - 'user_id', - 'challenge_slug', - 'status', - name=u'metrics_pkey') - ) op.drop_table('metrics_aggregate') op.drop_index('idx_metrics_userid', table_name='metrics_historical') op.drop_index('idx_metrics_status', table_name='metrics_historical') op.drop_index('idx_metrics_challengeslug', table_name='metrics_historical') op.drop_table('metrics_historical') + op.create_table('metrics', + sa.Column('timestamp', sa.DateTime(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('challenge_slug', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column('count', sa.Integer(), nullable=True), + sa.PrimaryKeyConstraint( + 'timestamp', 'user_id', 'challenge_slug', 'status') + ) + op.create_index( + 'idx_metrics_challengeslug', + 'metrics', + ['challenge_slug'], + unique=False) + op.create_index('idx_metrics_status', 'metrics', ['status'], unique=False) + op.create_index('idx_metrics_userid', 'metrics', ['user_id'], unique=False) diff --git a/migrations/versions/844562c6ddf_.py b/migrations/versions/844562c6ddf_.py new file mode 100644 index 0000000..52af368 --- /dev/null +++ b/migrations/versions/844562c6ddf_.py @@ -0,0 +1,90 @@ +"""change primary key to challenge_slug + identifier in tasks table + +Revision ID: 844562c6ddf +Revises: 1107fa473275 +Create Date: 2014-07-10 16:29:47.975001 + +""" + +# revision identifiers, used by Alembic. +revision = '844562c6ddf' +down_revision = '1107fa473275' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + #temporarily drop fkey constraint so we can drop the primary key + op.drop_constraint( + 'actions_task_id_fkey', + 'actions') + op.drop_constraint( + 'task_geometries_task_id_fkey', + 'task_geometries') + # drop the old primary key + op.drop_constraint( + 'tasks_pkey', + 'tasks') + # change the challenge_slug column to be non nullable + op.alter_column('tasks', 'challenge_slug', + existing_type=sa.VARCHAR(), + nullable=False) + # create the new one + op.create_primary_key( + 'tasks_pkey', + 'tasks', + ['identifier', 'challenge_slug']) + # create an index on id column + op.create_index( + 'idx_tasks_id', + 'tasks', + ['id']) + # recreate the foreign keys + op.create_foreign_key( + 'actions_task_id_fkey', + 'actions', + 'tasks', + ['task_id'], + ['id']) + op.create_foreign_key( + 'task_geometries_task_id_fkey', + 'task_geometries', + 'tasks', + ['task_id'], + ['id']) + + +def downgrade(): + #temporarily drop fkey constraints so we can drop the primary key + op.drop_constraint( + 'actions_task_id_fkey', + 'actions') + op.drop_constraint( + 'task_geometries_task_id_fkey', + 'task_geometries') + op.drop_index('idx_tasks_id') + op.drop_constraint( + 'tasks_pkey', + 'tasks') + # change the challenge slug column to be nullable again + op.alter_column('tasks', 'challenge_slug', + existing_type=sa.VARCHAR(), + nullable=True) + op.create_primary_key( + 'tasks_pkey', + 'tasks', + ['id']) + # recreate the foreign keys + op.create_foreign_key( + 'actions_task_id_fkey', + 'actions', + 'tasks', + ['task_id'], + ['id']) + op.create_foreign_key( + 'task_geometries_task_id_fkey', + 'task_geometries', + 'tasks', + ['task_id'], + ['id']) From 57ec283eeaebb16253fe247f307df4bde0474124 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Fri, 11 Jul 2014 09:50:54 -0600 Subject: [PATCH 12/18] completing migration to optimized task / metrics model --- fabric_templates/config | 5 ++++- maproulette/api/__init__.py | 3 +++ maproulette/helpers.py | 2 -- maproulette/models.py | 2 ++ migrations/versions/844562c6ddf_.py | 22 ++++++++++++++++------ 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/fabric_templates/config b/fabric_templates/config index d80cdc2..446f864 100644 --- a/fabric_templates/config +++ b/fabric_templates/config @@ -74,4 +74,7 @@ MAILGUN_API_KEY = 'REPLACE WITH YOUR MAILGUN API KEY' # IP Whitelist for external API calls # (/api/admin/*, /api/stats*, /api/users, /api/challenges) -IP_WHITELIST = [ ] \ No newline at end of file +IP_WHITELIST = [ ] + +# Max number of tasks in a bulk update call +MAX_TASKS_BULK_UPDATE = 5000 \ No newline at end of file diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index f315696..2482f51 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -627,6 +627,9 @@ def put(self, slug): # debug output number of tasks being posted app.logger.debug('posting {number} tasks...'.format(number=len(data))) + if len(data) > 5000: + abort(400, 'more than 5000 tasks in bulk update') + for task in data: db.session.merge(parse_task_json(slug, task)) diff --git a/maproulette/helpers.py b/maproulette/helpers.py index 912bb25..06c3cd6 100644 --- a/maproulette/helpers.py +++ b/maproulette/helpers.py @@ -138,8 +138,6 @@ def get_random_task(challenge): def parse_task_json(slug, data): """Parse task json coming in through the admin api""" - app.logger.debug(data) - # task json needs to have identifier if not 'identifier' in data: abort(400, 'no identifier') diff --git a/maproulette/models.py b/maproulette/models.py index b950eb8..711fe70 100644 --- a/maproulette/models.py +++ b/maproulette/models.py @@ -4,6 +4,7 @@ from sqlalchemy.orm import synonym from sqlalchemy.ext.hybrid import hybrid_property, hybrid_method from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.schema import Sequence from flask.ext.sqlalchemy import SQLAlchemy from geoalchemy2.types import Geometry from geoalchemy2.shape import from_shape, to_shape @@ -200,6 +201,7 @@ class Task(db.Model): id = db.Column( db.Integer, + Sequence('tasks_id_seq'), unique=True, nullable=False) identifier = db.Column( diff --git a/migrations/versions/844562c6ddf_.py b/migrations/versions/844562c6ddf_.py index 52af368..10a71dd 100644 --- a/migrations/versions/844562c6ddf_.py +++ b/migrations/versions/844562c6ddf_.py @@ -12,6 +12,7 @@ from alembic import op import sqlalchemy as sa +from sqlalchemy import text def upgrade(): @@ -30,16 +31,23 @@ def upgrade(): op.alter_column('tasks', 'challenge_slug', existing_type=sa.VARCHAR(), nullable=False) + # now that the id column is no longer the primary key, + # ensure that it continues to be a serial + op.alter_column('tasks', 'id', + existing_type=sa.VARCHAR(), + nullable=False, + server_default=text("nextval('tasks_id_seq')")) + # create a unique constraint on the id column, this will also + # create an index + op.create_unique_constraint( + 'tasks_id_unique', + 'tasks', + ['id']) # create the new one op.create_primary_key( 'tasks_pkey', 'tasks', ['identifier', 'challenge_slug']) - # create an index on id column - op.create_index( - 'idx_tasks_id', - 'tasks', - ['id']) # recreate the foreign keys op.create_foreign_key( 'actions_task_id_fkey', @@ -63,7 +71,9 @@ def downgrade(): op.drop_constraint( 'task_geometries_task_id_fkey', 'task_geometries') - op.drop_index('idx_tasks_id') + op.drop_constraint( + 'tasks_id_unique', + 'tasks') op.drop_constraint( 'tasks_pkey', 'tasks') From 373f056076f4628be7a3bc2566f6afb2178f0665 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Mon, 14 Jul 2014 12:21:02 -0600 Subject: [PATCH 13/18] max bulk updates to config file --- maproulette/api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index 2482f51..9cec7bb 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -627,7 +627,7 @@ def put(self, slug): # debug output number of tasks being posted app.logger.debug('posting {number} tasks...'.format(number=len(data))) - if len(data) > 5000: + if len(data) > app.config['MAX_TASKS_BULK_UPDATE']: abort(400, 'more than 5000 tasks in bulk update') for task in data: From ad6cd3401961fa0be8ebe3acf16b0baddfd08f4a Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Mon, 14 Jul 2014 14:12:23 -0600 Subject: [PATCH 14/18] converted aggregate endpoints --- maproulette/api/__init__.py | 64 +++++++++++++------------------------ 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index 9cec7bb..a11e984 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -220,6 +220,7 @@ class ApiStats(Resource): def get(self, challenge_slug=None, user_id=None): from dateutil import parser as dateparser from datetime import datetime + from maproulette.models import AggregateMetrics start = None end = None @@ -234,57 +235,34 @@ def get(self, challenge_slug=None, user_id=None): breakdown = False - # base CTE and query - # the base CTE gets the set of latest actions for any task - latest_cte = db.session.query( - Action.id, - Action.task_id, - Action.timestamp, - Action.user_id, - Action.status, - Task.challenge_slug, - User.display_name).join( - Task).outerjoin(User).distinct( - Action.task_id).order_by( - Action.task_id, - Action.id.desc()).cte(name='latest') - - # the base query gets a count on the base CTE grouped by status, - # optionally broken down by users or challenges + select_fields = [ + AggregateMetrics.status, + func.sum(AggregateMetrics.count)] + + group_fields = [ + AggregateMetrics.status] + if request.path.endswith('/users'): + select_fields.insert(0, AggregateMetrics.user_name) + group_fields.insert(0, AggregateMetrics.user_name) breakdown = True - stats_query = db.session.query( - latest_cte.c.display_name, - latest_cte.c.status, - func.count(latest_cte.c.id)).group_by( - latest_cte.c.status, - latest_cte.c.display_name).order_by( - latest_cte.c.status) elif request.path.endswith('/challenges'): + select_fields.insert(0, AggregateMetrics.challenge_slug) + group_fields.insert(0, AggregateMetrics.challenge_slug) breakdown = True - stats_query = db.session.query( - latest_cte.c.challenge_slug, - latest_cte.c.status, - func.count(latest_cte.c.id)).group_by( - latest_cte.c.status, - latest_cte.c.challenge_slug).order_by( - latest_cte.c.status) - else: - stats_query = db.session.query( - latest_cte.c.status, - func.count(latest_cte.c.id)).group_by( - latest_cte.c.status).order_by( - latest_cte.c.status) + stats_query = db.session.query( + *select_fields).group_by( + *group_fields) # stats for a specific challenge if challenge_slug is not None: - stats_query = stats_query.filter( - latest_cte.c.challenge_slug == challenge_slug) + stats_query = stats_query.filter_by( + challenge_slug=challenge_slug) # stats for a specific user if user_id is not None: - stats_query = stats_query.filter( - latest_cte.c.user_id == user_id) + stats_query = stats_query.filter_by( + user_id=user_id) # time slicing filters if args['start'] is not None: @@ -294,7 +272,9 @@ def get(self, challenge_slug=None, user_id=None): else: end = dateparser.parse(args['end']) stats_query = stats_query.filter( - latest_cte.c.timestamp.between(start, end)) + AggregateMetrics.timestamp.between(start, end)) + + app.logger.debug(stats_query) if breakdown: # if this is a breakdown by a secondary variable, the From 517ddc220ed354a34aabe7751efc7d947a7e8cd4 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Mon, 14 Jul 2014 14:27:33 -0600 Subject: [PATCH 15/18] adapted history query to work with new model --- maproulette/api/__init__.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index a11e984..2a1e2ac 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -250,6 +250,7 @@ def get(self, challenge_slug=None, user_id=None): select_fields.insert(0, AggregateMetrics.challenge_slug) group_fields.insert(0, AggregateMetrics.challenge_slug) breakdown = True + stats_query = db.session.query( *select_fields).group_by( *group_fields) @@ -291,6 +292,8 @@ class ApiStatsHistory(Resource): def get(self, challenge_slug=None, user_id=None): + from maproulette.models import HistoricalMetrics as HM + start = None end = None @@ -304,20 +307,19 @@ def get(self, challenge_slug=None, user_id=None): args = parser.parse_args() - query = db.session.query( - func.date_trunc('day', Action.timestamp).label('day'), - Action.status, - func.count(Action.id)).join( - Task).outerjoin(User) + stats_query = db.session.query( + HM.timestamp, + HM.status, + func.sum(HM.count)) if challenge_slug is not None: - query = query.filter(Task.challenge_slug == challenge_slug) + stats_query = stats_query.filter(HM.challenge_slug == challenge_slug) if user_id is not None: - query = query.filter(User.id == user_id) + stats_query = stats_query.filter(HM.user_id == user_id) - query = query.group_by( - 'day', Action.status).order_by( - Action.status) + stats_query = stats_query.group_by( + HM.timestamp, HM.status).order_by( + HM.status) # time slicing filters if args['start'] is not None: @@ -326,13 +328,13 @@ def get(self, challenge_slug=None, user_id=None): end = datetime.utcnow() else: end = dateparser.parse(args['end']) - query = query.filter( + stats_query = stats_query.filter( Action.timestamp.between(start, end)) - app.logger.debug(query) + app.logger.debug(stats_query) return as_stats_dict( - query.all(), + stats_query.all(), order=[1, 0, 2], start=start, end=end) From e91191d919c4793f84376bcc01d4bde102bfc03b Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Mon, 14 Jul 2014 14:55:36 -0600 Subject: [PATCH 16/18] adding S3 domains to CORS whitelist --- maproulette/api/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index 2a1e2ac..a882e22 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -76,7 +76,9 @@ def format(self, text): 'display_name': fields.String } -api = Api(app, decorators=[cors.crossdomain(origin='*')]) +api = Api(app, decorators=[cors.crossdomain(origin=[ + 'maproulette-metrics-stage.s3-website-us-east-1.amazonaws.com', + 'maproulette-metrics-production.s3-website-us-east-1.amazonaws.com'])]) # override the default JSON representation to support the geo objects From f228af372f327a3c5dc2257f8a74eaba077ac660 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Mon, 14 Jul 2014 16:08:06 -0600 Subject: [PATCH 17/18] Adding metrics URL to config --- fabric_templates/config | 24 +++++++++++++++--------- maproulette/api/__init__.py | 4 +--- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/fabric_templates/config b/fabric_templates/config index 446f864..45f5b40 100644 --- a/fabric_templates/config +++ b/fabric_templates/config @@ -1,15 +1,18 @@ # The application secret key # http://www.thedutchtable.com/2013/03/een-ei-is-geen-eivrolijk-pasen.html -SECRET_KEY = 'een ei is geen ei twee ei is een half ei' +SECRET_KEY = 'REPLACE WITH YOUR APPLICATION SECRET KEY' -{% if setting == "dev" %} -{% set db = "maproulette_dev" %} -{% elif setting == "test" %} -{% set db = "maproulette_test" %} -{% elif setting == "prod" %} -{% set db = "maproulette_test" %} -{% endif %} +{% if setting == "dev" -%} +{% set db = "maproulette_dev" -%} +{% set cors_url = "http://maproulette-metrics-stage.s3-website-us-east-1.amazonaws.com/" -%} +{% elif setting == "test" -%} +{% set db = "maproulette_test" -%} +{% set cors_url = "http://maproulette-metrics-stage.s3-website-us-east-1.amazonaws.com/" -%} +{% elif setting == "prod" -%} +{% set db = "maproulette" -%} +{% set cors_url = "http://maproulette-metrics-production.s3-website-us-east-1.amazonaws.com/" -%} +{% endif -%} # The database connection SQLALCHEMY_DATABASE_URI = "postgresql://osm@localhost/{{db}}" @@ -77,4 +80,7 @@ MAILGUN_API_KEY = 'REPLACE WITH YOUR MAILGUN API KEY' IP_WHITELIST = [ ] # Max number of tasks in a bulk update call -MAX_TASKS_BULK_UPDATE = 5000 \ No newline at end of file +MAX_TASKS_BULK_UPDATE = 5000 + +# URL to the metrics site instance, for allowing CORS requests from +METRICS_URL = '{{cors_url}}' \ No newline at end of file diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index a882e22..62dd612 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -76,9 +76,7 @@ def format(self, text): 'display_name': fields.String } -api = Api(app, decorators=[cors.crossdomain(origin=[ - 'maproulette-metrics-stage.s3-website-us-east-1.amazonaws.com', - 'maproulette-metrics-production.s3-website-us-east-1.amazonaws.com'])]) +api = Api(app, decorators=[cors.crossdomain(origin=app.config['METRICS_URL'])]) # override the default JSON representation to support the geo objects From 82a23c7d28354fca84b7289c6c9cb6d208e9d308 Mon Sep 17 00:00:00 2001 From: Martijn van Exel Date: Mon, 14 Jul 2014 16:37:31 -0600 Subject: [PATCH 18/18] decorators on separate line for clarity --- maproulette/api/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/maproulette/api/__init__.py b/maproulette/api/__init__.py index 62dd612..0a1dc21 100644 --- a/maproulette/api/__init__.py +++ b/maproulette/api/__init__.py @@ -76,7 +76,8 @@ def format(self, text): 'display_name': fields.String } -api = Api(app, decorators=[cors.crossdomain(origin=app.config['METRICS_URL'])]) +api = Api(app) +api.decorators = [cors.crossdomain(origin=app.config['METRICS_URL'])] # override the default JSON representation to support the geo objects