diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml new file mode 100644 index 0000000..e2588cc --- /dev/null +++ b/.github/workflows/sphinx.yml @@ -0,0 +1,27 @@ +name: Docs +on: + push: + branches: ['main'] +permissions: + contents: write +jobs: + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - name: Install dependencies + run: | + pip install sphinx sphinx_rtd_theme + - name: Sphinx build + run: | + cd docs + make html + - name: Deploy to Github Pages + uses: peaceiris/actions-gh-pages@v3 + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + with: + publish_branch: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} # the default token which no need create + publish_dir: docs/build/html # the path of doc + force_orphan: true diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..dc1312a --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..db56326 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,50 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html +import os +import sys +sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(1, os.path.abspath('../../ga4')) +sys.path.insert(2, os.path.abspath('../../ga4/analytic')) +sys.path.insert(3, os.path.abspath('../../ga4/model')) + + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'pyGA4' +copyright = '2023, Lin-jun-xiang' +author = 'Lin-jun-xiang' +release = '0.1.0' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.intersphinx', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.mathjax', + 'sphinx.ext.viewcode', + 'sphinx_rtd_theme', + 'sphinx.ext.githubpages', +] + +templates_path = ['_templates'] +exclude_patterns = [] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ['_static'] + +# Replace 'view source code' with 'Edit on Github' +# https://stackoverflow.com/questions/62904172/how-do-i-replace-view-page-source-with-edit-on-github-links-in-sphinx-rtd-th +html_context = { + 'display_github': True, + 'github_user': 'Lin-jun-xiang', + 'github_repo': 'pyGA4', + 'github_version': 'main/docs/source/', +} diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..ad52bb6 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,22 @@ +.. pyGA4 documentation master file, created by + sphinx-quickstart on Mon Sep 18 17:02:12 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +pyGA4's documentation! +====================== + +.. automodule:: ga4.model.bigquery + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: ga4.analytic.analytic + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: ga4.analytic.data_transform + :members: + :undoc-members: + :show-inheritance: diff --git a/ga4/analytic/analytic.py b/ga4/analytic/analytic.py index 2948e0b..5563fb0 100644 --- a/ga4/analytic/analytic.py +++ b/ga4/analytic/analytic.py @@ -5,6 +5,9 @@ class BaseAnalytic: + def __init__(self, table: Ga4Table) -> None: + self.table = table + @staticmethod def attribute_distribution(table: Ga4Table, attribute_name: str) -> Counter: attribute_list = getattr(table, attribute_name) @@ -17,7 +20,13 @@ class UserAnalytic(BaseAnalytic): """The features of user""" def __init__(self, table: Ga4Table) -> None: - self.table = table + super().__init__(table) + + @property + def user_id_distribution(self) -> Counter: + return self.attribute_distribution( + self.table, 'user_id_list' + ) @property def countries_distribution(self) -> Counter: @@ -30,7 +39,7 @@ class DeviceAnalytic(BaseAnalytic): """The features of technology""" def __init__(self, table: Ga4Table) -> None: - self.table = table + super().__init__(table) @property def os_distribution(self) -> Counter: @@ -67,7 +76,7 @@ class EventAnalytic(BaseAnalytic): """The features of event""" def __init__(self, table: Ga4Table) -> None: - self.table = table + super().__init__(table) @property def pages_distribution(self) -> Counter: diff --git a/ga4/model/bigquery.py b/ga4/model/bigquery.py index 5993456..7c4dd4e 100644 --- a/ga4/model/bigquery.py +++ b/ga4/model/bigquery.py @@ -29,7 +29,6 @@ class Client: Examples -------- ```python - # Setup dry_run = True Client().query_config.dry_run = True ``` """ @@ -37,14 +36,15 @@ class Client: dry_run = False, use_query_cache = False ) - def __init__(self, client) -> None: + def __init__(self, client, project_id: str) -> None: self.client = client + self.project_id = project_id class BaseTable(Client): """Connect to the data set from bigquery""" - def __init__(self, client, dataset_name: str) -> None: - super().__init__(client) + def __init__(self, client, project_id: str, dataset_name: str) -> None: + super().__init__(client, project_id) self.dataset_name = dataset_name self._table_id = None @@ -78,8 +78,7 @@ def to_dataframe(self): class Ga4Table(BaseTable): def __init__(self, client, project_id: str, dataset_name: str) -> None: - super().__init__(client, dataset_name) - self.project_id = project_id + super().__init__(client, project_id, dataset_name) self._query_job = None def _query_template(self, query_target: str) -> list: @@ -204,17 +203,17 @@ def page_location_list(self) -> list: """Return all event of page location from data table""" query = f""" SELECT - event_params.value.string_value AS param_string_value + param.value.string_value AS param_string_value FROM - `{self.project_id}.{self.dataset_name}.{self.table_id}` - UNNEST(event_params) AS event_params + `{self.project_id}.{self.dataset_name}.{self.table_id}`, + UNNEST(event_params) AS param WHERE - event_params.key = "page_location" + param.key = "page_location" OR param.key IS NULL """ self._query_job = self.client.query(query, job_config=self.query_config) results = self._query_job.result() - return [row.page_location for row in results] + return [row.param_string_value for row in results] @calculate_bytes_processed def query(self, query: str) -> list: diff --git a/pyproject.toml b/pyproject.toml index 4e3fa2c..d9e69a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "pyga4" +name = "pyGA4" version = "0.1.0" description = "Python Google Analytics 4 (GA4) Data Extraction and Analysis Toolkit" authors = ["Lin-jun-xiang "]