From 83e67220a42b98ba686c2d2bddd2cec6b2f2799d Mon Sep 17 00:00:00 2001 From: lizgzil Date: Fri, 26 Jul 2024 16:54:04 +0100 Subject: [PATCH 1/4] Add the file structure for this project, the basics of tests, github actions, documentation --- .github/workflows/docs.yaml | 27 ++++++++++++++ .github/workflows/pytest.yaml | 57 ++++++++++++++++++++++++++++++ .gitignore | 4 +++ README.md | 40 +++++++++++++++++++++ docs/README.md | 5 +++ docs/mkdocs.yaml | 44 +++++++++++++++++++++++ docs/page1.md | 1 + docs/site_assets/nesta_logo.png | Bin 0 -> 2737 bytes docs/site_assets/requirements.txt | 3 ++ docs/site_assets/style.css | 10 ++++++ nlp_link/__init__.py | 0 nlp_link/linker.py | 12 +++++++ pyproject.toml | 21 +++++++++++ tests/__init__.py | 0 tests/test_linker.py | 10 ++++++ 15 files changed, 234 insertions(+) create mode 100644 .github/workflows/docs.yaml create mode 100644 .github/workflows/pytest.yaml create mode 100644 .gitignore create mode 100644 docs/README.md create mode 100644 docs/mkdocs.yaml create mode 100644 docs/page1.md create mode 100644 docs/site_assets/nesta_logo.png create mode 100644 docs/site_assets/requirements.txt create mode 100644 docs/site_assets/style.css create mode 100644 nlp_link/__init__.py create mode 100644 nlp_link/linker.py create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/test_linker.py diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 0000000..6190c46 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,27 @@ +name: Deploy docs to gh-pages + +on: + workflow_dispatch: + push: + branches: + - dev + +jobs: + build: + runs-on: ubuntu-latest + name: Deploy docs to gh-pages + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Deploy + uses: mhausenblas/mkdocs-deploy-gh-pages@master + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CONFIG_FILE: docs/mkdocs.yml + REQUIREMENTS: docs/site_assets/requirements.txt \ No newline at end of file diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml new file mode 100644 index 0000000..b8e2a70 --- /dev/null +++ b/.github/workflows/pytest.yaml @@ -0,0 +1,57 @@ +name: Unit Tests + +on: [push] + +jobs: + test: + runs-on: ${{ matrix.os }} + + strategy: + matrix: + os: ["ubuntu-latest", "macos-latest"] + python-version: ["3.9", "3.10"] + steps: + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repository + uses: actions/checkout@v4 + - name: Set up python ${{ matrix.python-version }} + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + #---------------------------------------------- + # ----- install & configure poetry ----- + #---------------------------------------------- + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + #---------------------------------------------- + # load cached venv if cache exists + #---------------------------------------------- + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} + #---------------------------------------------- + # install dependencies if cache does not exist + #---------------------------------------------- + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + #---------------------------------------------- + # install your root project, if required + #---------------------------------------------- + - name: Install additional dependencies + run: | + poetry install --no-interaction + #---------------------------------------------- + # add matrix specifics and run test suite + #---------------------------------------------- + - name: Run tests + run: poetry run pytest tests/ --verbose diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2044a52 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +# Mac OS-specific storage files +.DS_Store + +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index fe82b75..62982ba 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,42 @@ # nlp-link A python package to semantically link two lists of texts. + + +## Set-up + +In setting up this project we ran: +``` +conda create --name nlp-link pip python=3.9 +conda activate nlp-link +pip install poetry +``` + +``` +poetry init + +``` + +``` +poetry install + +``` + +## Tests + +To run tests: + +``` +poetry run pytest tests/ +``` + +## Documentation + +Docs for this repo are automatically published to gh-pages branch via. Github actions after a PR is merged into dev. We use Material for MkDocs for these. Nothing needs to be done to update these. + +However, if you are editing the docs you can test them out locally by running + +``` +cd guidelines +pip install -r docs/requirements.txt +mkdocs serve +``` diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..13cbf86 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# nlp-link + +Documentation for NLP Link + +- [Page1](./page1.md) \ No newline at end of file diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml new file mode 100644 index 0000000..1bd0fb1 --- /dev/null +++ b/docs/mkdocs.yaml @@ -0,0 +1,44 @@ +site_name: NLP Link +site_description: A python package to semantically link two lists of texts.. +site_url: https://nestauk.github.io/nlp-link +repo_name: nestauk/nlp-link +repo_url: https://github.com/nestauk/nlp-link +extra: + homepage: https://nestauk.github.io/nlp-link +docs_dir: . +extra_css: + - site_assets/style.css +theme: + name: material + # disable_nav_previous_next: true + # disable_footer: false + logo: site_assets/nesta_logo.png + favicon: site_assets/nesta_logo.png + features: + - navigation.instant + - navigation.tracking + # - navigation.tabs + - navigation.sections + - navigation.top + font: + text: Century Gothic + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + primary: custom + accent: custom + toggle: + icon: material/weather-night + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: amber + accent: pink + toggle: + icon: material/weather-sunny + name: Switch to light mode +nav: + - Home: README.md + - Page 1: page1.md +plugins: + - same-dir \ No newline at end of file diff --git a/docs/page1.md b/docs/page1.md new file mode 100644 index 0000000..127d3fc --- /dev/null +++ b/docs/page1.md @@ -0,0 +1 @@ +## Title \ No newline at end of file diff --git a/docs/site_assets/nesta_logo.png b/docs/site_assets/nesta_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..af179d6076f9e81e5aaba6fdf10c898887d4ce6b GIT binary patch literal 2737 zcmb7G3se(V8omjL5EKRJ3YfDEu~@B5G6_VI7_tp72@2s=jc&JPGMR*6l8G||LW^mq~A2$m| zkYuee(1xNVDvYp;Q4?Xr#V)&(K>@W(1xY(jqXL)R=AcxrEFr6*f;7XH2nDPPy)sK^ zC@K-;5F{>GAV$Q9P{R`l1ZvV`R+Z)E2jOs)C9I-pr%EEJtE&^&rHcvDB9SVUN(my9 z$Ydg@A);10Xw)TgP_qL@M(pI`6h>N|w3ToO7`v#EsG+ljLdMbP6NnRcS;ss%s9;(! zK?x&~NX3X`A{cEo{|tw(HJKzB$OATZ^BfxmA2s_m4WqPcd6D!hjT{+HY>AxRW{UN$r8FmCftnH*l3|f zXNPIg0-d84r)aj(Y6-I~ZPsu*0dBZ?wPb=w)R2dfI7$=bI17x~a(q=dcPu{5`0LC1g5ULt^;oNe*ZSF+ndC zL=9HO=rbLEj5pT4QS3-~IZU;1lSYQfq#~(IgpA=wL{C8HxoDk9gJT%7KxWJoA!b>+ zCk|7lDdRmmn($!lq$=BGi)c?4aX-zmU6>ZE%1KZ)yz>B1dk04bOe$UMBz% zdR?xz!o|6{^EYo^M5dpn9?lSOvMUOD>U{fhcklJ@-KR|PMZ|32s(8=wQc_frqO{#w zZtfS$e12|g`fcUAANP=*t)1u6p8d9+Smd3)A4_8}q%9 z#f_U^KKlcbb^N`3U#GV1D9t%@cu~PW&2ipl+e^#(Ev|Lh-N~t4?Map2TvHAZue>*J z*R5yuu5J8Z&-hHnf4Q$Mng3hU^`)-2e8|P_jft-tyYk{bB<3iF4j=m7E}!h~iMaYz zWXY7ce;hoWQak^|iG5^(qTcssUF+}-MOE?B6}?TAMX2a8lp4Hs=^o<`G2Nh7uc_4} zt~guJJ!xyX^YUJ4)pfP)yJBR zTjkv?Z#4hwK!eD$xjuU5^$RM?J9q7~huqP-4wv`dJoYHrpZ031yZJ#+3EFNi9_XuW z&)bvn@Y753&lYc`=3e|@-X*E~>&AGsyYt3VoPnX!Z@>C&{?oS(u6gawlGu62I(`+G zePU%|)3jYP{ymsn9AA|dbGLGRC3#Yp>HLn`^v>|2-UBTGFYNwwN!Rt z!!^(8E8F5O41VR8#G+@)`+L6Z(|`1d9Zbn2H{}^Kz8NeT_*2sg{ewsP^tG$5t*DIr zB(;D4Q&R4g>CTHAlh&?^R#fM;99Km0yM+0bd~WNC_}NGtXVC}i~}k&jQ6BV7n=P$>{WdiURc^CI%>f>&+Tn&n|8GJQA^*DSN);|>7Unjjht#UYlm+"] +readme = "README.md" +packages = [{include = "nlp_link"}] + +[tool.poetry.dependencies] +python = "^3.9" +scikit-learn = "^1.4.2" +pandas = "^2.2.2" +sentence-transformers = "^2.1.0" +torch = "^1.10.0" +pytest = "^8.2.0" +tqdm = "^4.64.1" +numpy = "^1.24.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_linker.py b/tests/test_linker.py new file mode 100644 index 0000000..df8c2a9 --- /dev/null +++ b/tests/test_linker.py @@ -0,0 +1,10 @@ +from nlp_link.linker import link_lists + +def test_link_lists(): + + list_1 = ["dog", "cat"] + list_2 = ["kitten", "puppy"] + linked = link_lists(list_1, list_2) + + assert len(linked) == len(list_1) + From 2a69e88fda6032c790dbe2473cec8813e1909994 Mon Sep 17 00:00:00 2001 From: lizgzil Date: Fri, 26 Jul 2024 16:59:07 +0100 Subject: [PATCH 2/4] Add precommit hook --- .pre-commit-config.yaml | 14 ++++++++++++++ README.md | 1 + nlp_link/linker.py | 1 - 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..71835ca --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,14 @@ +repos: + - repo: local + hooks: + - id: black + name: black + entry: black + language: system + types: [python] + require_serial: true + + - repo: https://github.com/prettier/pre-commit + rev: v2.1.2 + hooks: + - id: prettier \ No newline at end of file diff --git a/README.md b/README.md index 62982ba..29f3738 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ In setting up this project we ran: conda create --name nlp-link pip python=3.9 conda activate nlp-link pip install poetry +pip install pre-commit ``` ``` diff --git a/nlp_link/linker.py b/nlp_link/linker.py index 1784c40..0a0c584 100644 --- a/nlp_link/linker.py +++ b/nlp_link/linker.py @@ -1,5 +1,4 @@ import pandas as pd - import random def link_lists(list_1, list_2): From 3b446d899f3bb34002feed7fc9320680200caabe Mon Sep 17 00:00:00 2001 From: lizgzil Date: Fri, 26 Jul 2024 17:02:08 +0100 Subject: [PATCH 3/4] Apply precommit --- .github/workflows/docs.yaml | 2 +- .pre-commit-config.yaml | 2 +- README.md | 8 +++++--- docs/README.md | 2 +- docs/mkdocs.yaml | 2 +- docs/page1.md | 2 +- docs/site_assets/style.css | 8 ++++---- nlp_link/linker.py | 13 +++++++------ tests/test_linker.py | 10 +++++----- 9 files changed, 26 insertions(+), 23 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 6190c46..03074f2 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -24,4 +24,4 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CONFIG_FILE: docs/mkdocs.yml - REQUIREMENTS: docs/site_assets/requirements.txt \ No newline at end of file + REQUIREMENTS: docs/site_assets/requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 71835ca..7a6d7fd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,4 +11,4 @@ repos: - repo: https://github.com/prettier/pre-commit rev: v2.1.2 hooks: - - id: prettier \ No newline at end of file + - id: prettier diff --git a/README.md b/README.md index 29f3738..84ea873 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,17 @@ # nlp-link -A python package to semantically link two lists of texts. +A python package to semantically link two lists of texts. ## Set-up In setting up this project we ran: + ``` conda create --name nlp-link pip python=3.9 conda activate nlp-link pip install poetry -pip install pre-commit +pip install pre-commit black +pre-commit install ``` ``` @@ -38,6 +40,6 @@ However, if you are editing the docs you can test them out locally by running ``` cd guidelines -pip install -r docs/requirements.txt +pip install -r docs/requirements.txt mkdocs serve ``` diff --git a/docs/README.md b/docs/README.md index 13cbf86..5a47f18 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,4 +2,4 @@ Documentation for NLP Link -- [Page1](./page1.md) \ No newline at end of file +- [Page1](./page1.md) diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml index 1bd0fb1..7e1a52a 100644 --- a/docs/mkdocs.yaml +++ b/docs/mkdocs.yaml @@ -41,4 +41,4 @@ nav: - Home: README.md - Page 1: page1.md plugins: - - same-dir \ No newline at end of file + - same-dir diff --git a/docs/page1.md b/docs/page1.md index 127d3fc..65e1cf4 100644 --- a/docs/page1.md +++ b/docs/page1.md @@ -1 +1 @@ -## Title \ No newline at end of file +## Title diff --git a/docs/site_assets/style.css b/docs/site_assets/style.css index f1497f0..2973f45 100644 --- a/docs/site_assets/style.css +++ b/docs/site_assets/style.css @@ -1,10 +1,10 @@ -@import url('https://fonts.cdnfonts.com/css/century-gothic'); +@import url("https://fonts.cdnfonts.com/css/century-gothic"); html, body, [class*="css"] { font-family: "Century Gothic"; } :root { - --md-primary-fg-color: #18A48C; - --md-accent-fg-color: #EB003B; -} \ No newline at end of file + --md-primary-fg-color: #18a48c; + --md-accent-fg-color: #eb003b; +} diff --git a/nlp_link/linker.py b/nlp_link/linker.py index 0a0c584..2b61fc8 100644 --- a/nlp_link/linker.py +++ b/nlp_link/linker.py @@ -1,11 +1,12 @@ import pandas as pd import random + def link_lists(list_1, list_2): - """ - Mock linker - """ - list_1_index = list(range(len(list_1))) - list_2_index = list(range(len(list_2))) + """ + Mock linker + """ + list_1_index = list(range(len(list_1))) + list_2_index = list(range(len(list_2))) - return [(i, random.choice(list_1_index)) for i in list_2_index] \ No newline at end of file + return [(i, random.choice(list_1_index)) for i in list_2_index] diff --git a/tests/test_linker.py b/tests/test_linker.py index df8c2a9..87526dc 100644 --- a/tests/test_linker.py +++ b/tests/test_linker.py @@ -1,10 +1,10 @@ from nlp_link.linker import link_lists -def test_link_lists(): - list_1 = ["dog", "cat"] - list_2 = ["kitten", "puppy"] - linked = link_lists(list_1, list_2) +def test_link_lists(): - assert len(linked) == len(list_1) + list_1 = ["dog", "cat"] + list_2 = ["kitten", "puppy"] + linked = link_lists(list_1, list_2) + assert len(linked) == len(list_1) From e50c769db2c448f18a333f72c8ba4dd3e348ff59 Mon Sep 17 00:00:00 2001 From: lizgzil Date: Fri, 26 Jul 2024 17:09:30 +0100 Subject: [PATCH 4/4] Add the MIT license --- LICENSE | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b874526 --- /dev/null +++ b/LICENSE @@ -0,0 +1,10 @@ + +The MIT License (MIT) +===================== +Copyright (c) 2024, Nesta + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file