diff --git a/README.md b/README.md index 0e590b1..2d86b7c 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,14 @@ This is a Python package that integrates Superset and CommCare HQ. Local Development ----------------- -Follow below instructions. +### Preparing CommCare HQ -### Setup env +The 'User configurable reports UI' feature flag must be enabled for the +domain in CommCare HQ, even if the data sources to be imported were +created by Report Builder, not a UCR. + + +### Setting up a dev environment While doing development on top of this integration, it's useful to install this via `pip -e` option so that any changes made get reflected @@ -51,11 +56,12 @@ directly without another `pip install`. Read through the initialization instructions at https://superset.apache.org/docs/installation/installing-superset-from-scratch/#installing-and-initializing-superset. -Create the database. These instructions assume that PostgreSQL is -running on localhost, and that its user is "commcarehq". Adapt -accordingly: +Create a database for Superset, and a database for storing data from +CommCare HQ. Adapt the username and database names to suit your +environment. ```bash -$ createdb -h localhost -p 5432 -U commcarehq superset_meta +$ createdb -h localhost -p 5432 -U postgres superset +$ createdb -h localhost -p 5432 -U postgres superset_hq_data ``` Set the following environment variables: @@ -64,10 +70,17 @@ $ export FLASK_APP=superset $ export SUPERSET_CONFIG_PATH=/path/to/superset_config.py ``` -Initialize the database. Create an administrator. Create default roles +Set this environment variable to allow OAuth 2.0 authentication with +CommCare HQ over insecure HTTP. (DO NOT USE THIS IN PRODUCTION.) +```bash +$ export AUTHLIB_INSECURE_TRANSPORT=1 +``` + +Initialize the databases. Create an administrator. Create default roles and permissions: ```bash $ superset db upgrade +$ superset db upgrade --directory hq_superset/migrations/ $ superset fab create-admin $ superset load_examples # (Optional) $ superset init @@ -78,28 +91,16 @@ You should now be able to run superset using the `superset run` command: ```bash $ superset run -p 8088 --with-threads --reload --debugger ``` -However, OAuth login does not work yet as hq-superset needs a Postgres -database created to store CommCare HQ data. - -### Create a Postgres Database Connection for storing HQ data - -- Create a Postgres database. e.g. - ```bash - $ createdb -h localhost -p 5432 -U commcarehq hq_data - ``` -- Log into Superset as the admin user created in the Superset - installation and initialization. Note that you will need to update - `AUTH_TYPE = AUTH_DB` to log in as admin user. `AUTH_TYPE` should be - otherwise set to `AUTH_OAUTH`. -- Go to 'Data' -> 'Databases' or http://127.0.0.1:8088/databaseview/list/ -- Create a database connection by clicking '+ DATABASE' button at the top. -- The name of the DISPLAY NAME should be 'HQ Data' exactly, as this is - the name by which this codebase refers to the Postgres DB. - -OAuth integration should now be working. You can log in as a CommCare -HQ web user. +You can now log in as a CommCare HQ web user. +In order for CommCare HQ to sync data source changes, you will need to +allow OAuth 2.0 authentication over insecure HTTP. (DO NOT USE THIS IN +PRODUCTION.) Set this environment variable in your CommCare HQ Django +server. (Yes, it's "OAUTHLIB" this time, not "AUTHLIB" as before.) +```bash +$ export OAUTHLIB_INSECURE_TRANSPORT=1 +``` ### Importing UCRs using Redis and Celery @@ -129,6 +130,41 @@ code you want to test will need to be in a module whose dependencies don't include Superset. +### Creating a migration + +You will need to create an Alembic migration for any new SQLAlchemy +models that you add. The Superset CLI should allow you to do this: + +```shell +$ superset db revision --autogenerate -m "Add table for Foo model" +``` + +However, problems with this approach have occurred in the past. You +might have more success by using Alembic directly. You will need to +modify the configuration a little to do this: + +1. Copy the "HQ_DATA" database URI from `superset_config.py`. + +2. Paste it as the value of `sqlalchemy.url` in + `hq_superset/migrations/alembic.ini`. + +3. Edit `env.py` and comment out the following lines: + ``` + hq_data_uri = current_app.config['SQLALCHEMY_BINDS'][HQ_DATA] + decoded_uri = urllib.parse.unquote(hq_data_uri) + config.set_main_option('sqlalchemy.url', decoded_uri) + ``` + +Those changes will allow Alembic to connect to the "HD Data" database +without the need to instantiate Superset's Flask app. You can now +autogenerate your new table with: + +```shell +$ cd hq_superset/migrations/ +$ alembic revision --autogenerate -m "Add table for Foo model" +``` + + Upgrading Superset ------------------ diff --git a/hq_superset/migrations/README b/hq_superset/migrations/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/hq_superset/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/hq_superset/migrations/alembic.ini b/hq_superset/migrations/alembic.ini new file mode 100644 index 0000000..f01502d --- /dev/null +++ b/hq_superset/migrations/alembic.ini @@ -0,0 +1,115 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = . + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +file_template = %%(year)d-%%(month).2d-%%(day).2d_%%(hour).2d-%%(minute).2d_%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +# prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to hq_superset/migrations/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:hq_superset/migrations/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/hq_superset/migrations/env.py b/hq_superset/migrations/env.py new file mode 100644 index 0000000..c89f28b --- /dev/null +++ b/hq_superset/migrations/env.py @@ -0,0 +1,77 @@ +import urllib.parse +from logging.config import fileConfig + +from alembic import context +from flask import current_app +from sqlalchemy import engine_from_config, pool + +from hq_superset.const import OAUTH2_DATABASE_NAME +from hq_superset.models import HQClient + +config = context.config +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +db_uri = current_app.config['SQLALCHEMY_BINDS'][OAUTH2_DATABASE_NAME] +decoded_uri = urllib.parse.unquote(db_uri) +config.set_main_option('sqlalchemy.url', decoded_uri) + +# add your model's MetaData object here for 'autogenerate' support +target_metadata = HQClient.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/hq_superset/migrations/script.py.mako b/hq_superset/migrations/script.py.mako new file mode 100644 index 0000000..fbc4b07 --- /dev/null +++ b/hq_superset/migrations/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/hq_superset/migrations/versions/2024-02-24_23-53_56d0467ff6ff_added_oauth_tables.py b/hq_superset/migrations/versions/2024-02-24_23-53_56d0467ff6ff_added_oauth_tables.py new file mode 100644 index 0000000..0b962f5 --- /dev/null +++ b/hq_superset/migrations/versions/2024-02-24_23-53_56d0467ff6ff_added_oauth_tables.py @@ -0,0 +1,67 @@ +"""Added OAuth tables + +Revision ID: 56d0467ff6ff +Revises: +Create Date: 2024-02-24 23:53:10.289606 +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '56d0467ff6ff' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + 'hq_oauth_client', + sa.Column('client_id', sa.String(length=48), nullable=True), + sa.Column('client_id_issued_at', sa.Integer(), nullable=False), + sa.Column('client_secret_expires_at', sa.Integer(), nullable=False), + sa.Column('client_metadata', sa.Text(), nullable=True), + sa.Column('domain', sa.String(length=255), nullable=False), + sa.Column('client_secret', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('domain'), + info={'bind_key': 'oauth2-server-data'}, + ) + op.create_index( + op.f('ix_hq_oauth_client_client_id'), + 'hq_oauth_client', + ['client_id'], + unique=False, + ) + op.create_table( + 'hq_oauth_token', + sa.Column('client_id', sa.String(length=48), nullable=True), + sa.Column('token_type', sa.String(length=40), nullable=True), + sa.Column('access_token', sa.String(length=255), nullable=False), + sa.Column('refresh_token', sa.String(length=255), nullable=True), + sa.Column('scope', sa.Text(), nullable=True), + sa.Column('issued_at', sa.Integer(), nullable=False), + sa.Column('access_token_revoked_at', sa.Integer(), nullable=False), + sa.Column('refresh_token_revoked_at', sa.Integer(), nullable=False), + sa.Column('expires_in', sa.Integer(), nullable=False), + sa.Column('id', sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('access_token'), + info={'bind_key': 'oauth2-server-data'}, + ) + op.create_index( + op.f('ix_hq_oauth_token_refresh_token'), + 'hq_oauth_token', + ['refresh_token'], + unique=False, + ) + + +def downgrade() -> None: + op.drop_table('hq_oauth_token') + op.drop_index( + op.f('ix_hq_oauth_client_client_id'), table_name='hq_oauth_client' + ) + op.drop_table('hq_oauth_client')