diff --git a/README.rst b/README.rst index 74bf261..e1520ea 100644 --- a/README.rst +++ b/README.rst @@ -133,6 +133,15 @@ To compile the documentation, run: Changelog ========= +Version 0.3.2 +------------- +- Fixed RNSB bug where the classification labels were interchanged and could produce erroneous results when the attributes are of different sizes. +- Fixed RNSB replication notebook +- Update of WEFE case study scores. +- Improved documentation examples for WEAT, RNSB, RIPA. +- Holdout parameter added to RNSB, which allows to indicate whether or not a holdout is performed when training the classifier. +- Improved printing of the RNSB evaluation. + Version 0.3.1 ------------- - Update WEFE original case study diff --git a/examples/RNSB_experiments.ipynb b/examples/RNSB_experiments.ipynb index 2f28f18..28218cf 100644 --- a/examples/RNSB_experiments.ipynb +++ b/examples/RNSB_experiments.ipynb @@ -1,2888 +1,4185 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T14:18:06.044450Z", - "start_time": "2020-04-03T14:18:06.028856Z" - } - }, - "source": [ - "# Replications of the Experiments in RNSB paper" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-24T18:26:40.531212Z", - "start_time": "2020-04-24T18:12:29.141819Z" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from wefe.datasets import load_bingliu\n", - "from wefe.metrics import RNSB\n", - "from wefe.query import Query\n", - "from wefe.word_embedding import \n", - "\n", - "import pandas as pd\n", - "import plotly.express as px\n", - "import gensim.downloader as api\n", - "\n", - "# load the target word sets.\n", - "# In this case each word is an objective set because each of them represents a different social group.\n", - "RNSB_words = [\n", - " ['swedish'], ['irish'], ['mexican'], ['chinese'], ['filipino'], ['german'], ['english'],\n", - " ['french'], ['norwegian'], ['american'], ['indian'], ['dutch'], ['russian'],\n", - " ['scottish'], ['italian']\n", - "]\n", - "\n", - "bing_liu = load_bingliu()\n", - "\n", - "# Create the query\n", - "query = Query(RNSB_words,\n", - " [bing_liu['positive_words'], bing_liu['negative_words']])\n", - "\n", - "# Fetch the models\n", - "glove = (api.load('glove-wiki-gigaword-300'),\n", - " 'glove-wiki-gigaword-300')\n", - "# note that conceptnet uses a /c/en/ prefix before each word.\n", - "conceptnet = (api.load('conceptnet-numberbatch-17-06-300'),\n", - " 'conceptnet-numberbatch-17',\n", - " vocab_prefix='/c/en/')\n", - "\n", - "# Run the queries\n", - "glove_results = RNSB().run_query(query, glove)\n", - "conceptnet_results = RNSB().run_query(query, conceptnet)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-24T18:26:42.374928Z", - "start_time": "2020-04-24T18:26:41.035240Z" - } - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "alignmentgroup": "True", - "hovertemplate": "Word=%{x}
Sentiment distribution=%{y}", - "legendgroup": "", - "marker": { - "color": "#636efa" - }, - "name": "", - "offsetgroup": "", - "orientation": "v", - "showlegend": false, - "textposition": "auto", - "type": "bar", - "x": [ - "swedish", - "irish", - "mexican", - "chinese", - "filipino", - "german", - "english", - "french", - "norwegian", - "american", - "indian", - "dutch", - "russian", - "scottish", - "italian" - ], - "xaxis": "x", - "y": [ - 0.06890568993157037, - 0.1418796423001591, - 0.021489692643818555, - 0.0364189418945278, - 0.02839471235264379, - 0.06567525170488654, - 0.029292958672046527, - 0.06366809055483107, - 0.01162510093319096, - 0.01580559235556976, - 0.1998574558844747, - 0.23719037263643367, - 0.01452927045934127, - 0.04773169450685658, - 0.01753553316964919 - ], - "yaxis": "y" - } - ], - "layout": { - "barmode": "relative", - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Glove negative sentiment distribution" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Word" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "range": [ - 0, - 0.2 - ], - "title": { - "text": "Sentiment distribution" - } - } - } - } - }, - "metadata": {} - } - ], - "source": [ - "# Show the results obtained with glove\n", - "glove_fig = px.bar(\n", - " pd.DataFrame(glove_results['negative_sentiment_distribution'].items(),\n", - " columns=['Word', 'Sentiment distribution']), x='Word',\n", - " y='Sentiment distribution', title='Glove negative sentiment distribution')\n", - "glove_fig.update_yaxes(range=[0, 0.2])\n", - "glove_fig.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-24T18:26:43.495196Z", - "start_time": "2020-04-24T18:26:43.015041Z" - } - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "alignmentgroup": "True", - "hovertemplate": "Word=%{x}
Sentiment distribution=%{y}", - "legendgroup": "", - "marker": { - "color": "#636efa" - }, - "name": "", - "offsetgroup": "", - "orientation": "v", - "showlegend": false, - "textposition": "auto", - "type": "bar", - "x": [ - "/c/en/swedish", - "/c/en/irish", - "/c/en/mexican", - "/c/en/chinese", - "/c/en/filipino", - "/c/en/german", - "/c/en/english", - "/c/en/french", - "/c/en/norwegian", - "/c/en/american", - "/c/en/indian", - "/c/en/dutch", - "/c/en/russian", - "/c/en/scottish", - "/c/en/italian" - ], - "xaxis": "x", - "y": [ - 0.15976272535179706, - 0.043127872065751395, - 0.050596973081996684, - 0.13154029252279223, - 0.025975450409833557, - 0.07856920059681988, - 0.029771705145042904, - 0.03498040677026377, - 0.07434933211387784, - 0.026155682070083845, - 0.038746164733314985, - 0.01946670156158594, - 0.13502768266429815, - 0.1259608864598023, - 0.025968924452739392 - ], - "yaxis": "y" - } - ], - "layout": { - "barmode": "relative", - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Conceptnet negative sentiment distribution" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Word" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "range": [ - 0, - 0.2 - ], - "title": { - "text": "Sentiment distribution" - } - } - } - } - }, - "metadata": {} - } - ], - "source": [ - "# Show the results obtained with conceptnet\n", - "conceptnet_fig = px.bar(\n", - " pd.DataFrame(conceptnet_results['negative_sentiment_distribution'].items(),\n", - " columns=['Word', 'Sentiment distribution']), x='Word',\n", - " y='Sentiment distribution',\n", - " title='Conceptnet negative sentiment distribution')\n", - "conceptnet_fig.update_yaxes(range=[0, 0.2])\n", - "conceptnet_fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-24T18:26:44.655173Z", - "start_time": "2020-04-24T18:26:44.155189Z" - } - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "alignmentgroup": "True", - "hovertemplate": "Word=%{x}
Sentiment distribution=%{y}", - "legendgroup": "", - "marker": { - "color": "#636efa" - }, - "name": "", - "offsetgroup": "", - "orientation": "v", - "showlegend": false, - "textposition": "auto", - "type": "bar", - "x": [ - "/c/en/swedish", - "/c/en/irish", - "/c/en/mexican", - "/c/en/chinese", - "/c/en/filipino", - "/c/en/german", - "/c/en/english", - "/c/en/french", - "/c/en/norwegian", - "/c/en/american", - "/c/en/indian", - "/c/en/dutch", - "/c/en/russian", - "/c/en/scottish", - "/c/en/italian" - ], - "xaxis": "x", - "y": [ - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667, - 0.06666666666666667 - ], - "yaxis": "y" - } - ], - "layout": { - "barmode": "relative", - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Fair negative sentiment distribution" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Word" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "range": [ - 0, - 0.2 - ], - "title": { - "text": "Sentiment distribution" - } - } - } - } - }, - "metadata": {} - } - ], - "source": [ - "import numpy as np\n", - "# Show a fairness distribution.\n", - "fair_distribution = pd.DataFrame(\n", - " conceptnet_results['negative_sentiment_distribution'].items(),\n", - " columns=['Word', 'Sentiment distribution'])\n", - "fair_distribution['Sentiment distribution'] = np.ones(\n", - " fair_distribution.shape[0]) / fair_distribution.shape[0]\n", - "\n", - "fair_distribution_fig = px.bar(fair_distribution, x='Word',\n", - " y='Sentiment distribution',\n", - " title='Fair negative sentiment distribution')\n", - "fair_distribution_fig.update_yaxes(range=[0, 0.2])\n", - "fair_distribution_fig.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2020-04-03T14:18:06.044450Z", + "start_time": "2020-04-03T14:18:06.028856Z" + } + }, + "source": [ + "# RNSB Paper Replication" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the Words and Create the Query" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-04-24T18:26:40.531212Z", + "start_time": "2020-04-24T18:12:29.141819Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/pablo/miniconda3/envs/wefe/lib/python3.7/site-packages/gensim/similarities/__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", + " warnings.warn(msg)\n" + ] + } + ], + "source": [ + "from wefe.datasets import load_bingliu\n", + "from wefe.metrics import RNSB\n", + "from wefe.query import Query\n", + "from wefe.word_embedding_model import WordEmbeddingModel\n", + "\n", + "import pandas as pd\n", + "import plotly.express as px\n", + "import gensim.downloader as api\n", + "\n", + "# load the target word sets.\n", + "# In this case each word is an objective set because each of them represents a different social group.\n", + "RNSB_words = [\n", + " [\"swedish\"],\n", + " [\"irish\"],\n", + " [\"mexican\"],\n", + " [\"chinese\"],\n", + " [\"filipino\"],\n", + " [\"german\"],\n", + " [\"english\"],\n", + " [\"french\"],\n", + " [\"norwegian\"],\n", + " [\"american\"],\n", + " [\"indian\"],\n", + " [\"dutch\"],\n", + " [\"russian\"],\n", + " [\"scottish\"],\n", + " [\"italian\"],\n", + "]\n", + "\n", + "bing_liu = load_bingliu()\n", + "\n", + "# Create the query\n", + "query = Query(RNSB_words, [bing_liu[\"positive_words\"], bing_liu[\"negative_words\"]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Execute RNSB and Plot Results\n", + "\n", + "### Glove" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def evaluate(\n", + " query: Query, gensim_model_name: str, short_model_name: str, model_args: dict = {}\n", + "):\n", + " # Fetch the model\n", + " model = WordEmbeddingModel(\n", + " api.load(gensim_model_name), short_model_name, **model_args\n", + " )\n", + "\n", + " # Run the queries\n", + " results = RNSB().run_query(\n", + " query, model, holdout=True, print_model_evaluation=True, n_iterations=100\n", + " )\n", + "\n", + " # Show the results obtained with glove\n", + " fig = px.bar(\n", + " pd.DataFrame(\n", + " results[\"negative_sentiment_distribution\"].items(),\n", + " columns=[\"Word\", \"Sentiment distribution\"],\n", + " ),\n", + " x=\"Word\",\n", + " y=\"Sentiment distribution\",\n", + " title=f\"{short_model_name} Negative Sentiment Distribution\",\n", + " )\n", + "\n", + " fig.update_yaxes(range=[0, 0.2])\n", + " fig.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " -1.0 0.92 0.94 0.93 869\n", + " 1.0 0.86 0.82 0.84 379\n", + "\n", + " accuracy 0.91 1248\n", + " macro avg 0.89 0.88 0.89 1248\n", + "weighted avg 0.90 0.91 0.90 1248\n", + "\n", + "When n_iterations > 1, only the first evaluation is printed.\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hovertemplate": "Word=%{x}
Sentiment distribution=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + "swedish", + "irish", + "mexican", + "chinese", + "filipino", + "german", + "english", + "french", + "norwegian", + "american", + "indian", + "dutch", + "russian", + "scottish", + "italian" + ], + "xaxis": "x", + "y": [ + 0.01430402053054762, + 0.043508493246170545, + 0.1677851166902982, + 0.006668552099237882, + 0.027303037131829497, + 0.019857261736229405, + 0.021008165849930645, + 0.03100842134285276, + 0.04121551288883424, + 0.0736208519496387, + 0.2746127318225983, + 0.08440268148117956, + 0.1812936972132892, + 0.010605191487654353, + 0.0028062645297089275 + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Glove wiki Negative Sentiment Distribution" + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Word" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "range": [ + 0, + 0.2 + ], + "title": { + "text": "Sentiment distribution" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "evaluate(query, 'glove-wiki-gigaword-300', 'Glove wiki')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conceptnet" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " -1.0 0.98 0.99 0.98 893\n", + " 1.0 0.97 0.94 0.95 364\n", + "\n", + " accuracy 0.97 1257\n", + " macro avg 0.97 0.96 0.97 1257\n", + "weighted avg 0.97 0.97 0.97 1257\n", + "\n", + "When n_iterations > 1, only the first evaluation is printed.\n" + ] + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hovertemplate": "Word=%{x}
Sentiment distribution=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + "/c/en/swedish", + "/c/en/irish", + "/c/en/mexican", + "/c/en/chinese", + "/c/en/filipino", + "/c/en/german", + "/c/en/english", + "/c/en/french", + "/c/en/norwegian", + "/c/en/american", + "/c/en/indian", + "/c/en/dutch", + "/c/en/russian", + "/c/en/scottish", + "/c/en/italian" + ], + "xaxis": "x", + "y": [ + 0.06739126061241793, + 0.06526230554468647, + 0.07863799690686868, + 0.06402964520748745, + 0.06362306685351089, + 0.07492394188139202, + 0.05951386775996924, + 0.07298888733805359, + 0.06092390160226963, + 0.06756327236098728, + 0.06893854924636353, + 0.06068554683009826, + 0.08356499355944683, + 0.04700528996359845, + 0.06494747433284966 + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Conceptnet Negative Sentiment Distribution" + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Word" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "range": [ + 0, + 0.2 + ], + "title": { + "text": "Sentiment distribution" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "evaluate(\n", + " query,\n", + " \"conceptnet-numberbatch-17-06-300\",\n", + " \"Conceptnet\",\n", + " model_args={\"vocab_prefix\": \"/c/en/\"},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2020-04-24T18:26:42.374928Z", + "start_time": "2020-04-24T18:26:41.035240Z" + } + }, + "source": [ + "### Bonus 1: Word2vec" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " -1.0 0.96 0.97 0.97 889\n", + " 1.0 0.94 0.91 0.92 372\n", + "\n", + " accuracy 0.96 1261\n", + " macro avg 0.95 0.94 0.95 1261\n", + "weighted avg 0.96 0.96 0.96 1261\n", + "\n", + "When n_iterations > 1, only the first evaluation is printed.\n" + ] + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hovertemplate": "Word=%{x}
Sentiment distribution=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + "swedish", + "irish", + "mexican", + "chinese", + "filipino", + "german", + "english", + "french", + "norwegian", + "american", + "indian", + "dutch", + "russian", + "scottish", + "italian" + ], + "xaxis": "x", + "y": [ + 0.10918604422114991, + 0.04394455154759233, + 0.09940774930503268, + 0.07116558935576658, + 0.0452809035330149, + 0.011836419831824154, + 0.07685624645952549, + 0.015792943276073036, + 0.08283035912885199, + 0.08029047844629111, + 0.07518334633962705, + 0.10956080839841124, + 0.08920854787849791, + 0.07619530855005788, + 0.013260703728283684 + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Word2vec Negative Sentiment Distribution" + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Word" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "range": [ + 0, + 0.2 + ], + "title": { + "text": "Sentiment distribution" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "evaluate(query, 'word2vec-google-news-300', 'Word2vec')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bonus 2: Fasttext" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " -1.0 0.96 0.98 0.97 916\n", + " 1.0 0.95 0.91 0.93 389\n", + "\n", + " accuracy 0.96 1305\n", + " macro avg 0.96 0.95 0.95 1305\n", + "weighted avg 0.96 0.96 0.96 1305\n", + "\n", + "When n_iterations > 1, only the first evaluation is printed.\n" + ] + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hovertemplate": "Word=%{x}
Sentiment distribution=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + "swedish", + "irish", + "mexican", + "chinese", + "filipino", + "german", + "english", + "french", + "norwegian", + "american", + "indian", + "dutch", + "russian", + "scottish", + "italian" + ], + "xaxis": "x", + "y": [ + 0.05987884801944825, + 0.08201676686326247, + 0.10517306623870974, + 0.08711619999935442, + 0.031108716128351924, + 0.06256908334903725, + 0.04376420388124419, + 0.06630758857926408, + 0.049425884190328784, + 0.07034488127984587, + 0.06865671848311242, + 0.06741533280393565, + 0.10160094032960477, + 0.07678982345378481, + 0.027831946400715398 + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "FastText Negative Sentiment Distribution" + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Word" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "range": [ + 0, + 0.2 + ], + "title": { + "text": "Sentiment distribution" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "evaluate(query, 'fasttext-wiki-news-subwords-300', 'FastText')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/WEAT_experiments.ipynb b/examples/WEAT_experiments.ipynb index 59d1df5..0554852 100644 --- a/examples/WEAT_experiments.ipynb +++ b/examples/WEAT_experiments.ipynb @@ -1,1739 +1,1739 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Replications of WEAT Experiments \n", - "\n", - "Reference:\n", - "\n", - "> Caliskan, A., Bryson, J. J., & Narayanan, A. (2017). Semantics derived automatically from language corpora contain human-like biases. Science, 356(6334), 183-186.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-28T20:51:31.761648Z", - "start_time": "2020-12-28T20:51:28.677284Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import gensim.downloader as api\n", - "\n", - "from wefe.metrics import WEAT\n", - "from wefe.datasets import load_weat\n", - "from wefe.query import Query\n", - "from wefe.word_embedding_model import WordEmbeddingModel\n", - "from wefe.utils import run_queries\n", - "from wefe.utils import plot_queries_results\n", - "\n", - "original_results = pd.DataFrame({\n", - " 'word2vec-google-news-300 (WEAT original)':\n", - " {'Flowers and Insects wrt Pleasant(5) and Unpleasant(5)': 1.54,\n", - " 'Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)': 1.63,\n", - " 'European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)': 0.58,\n", - " 'European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)': 1.24,\n", - " 'European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)': 0.72,\n", - " 'Male names and Female names wrt Career and Family': 1.89,\n", - " 'Math and Arts wrt Male terms and Female terms': 0.97,\n", - " 'Science and Arts 2 wrt Male terms and Female terms': 1.24,\n", - " 'Mental disease and Physical disease wrt Temporary and Permanent': 1.30,\n", - " 'Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)': -0.08},\n", - " \n", - " 'glove-wiki-gigaword-300 (WEAT original)': \n", - " {'Flowers and Insects wrt Pleasant(5) and Unpleasant(5)': 1.50,\n", - " 'Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)': 1.53,\n", - " 'European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)': 1.41,\n", - " 'European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)': 1.50,\n", - " 'European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)': 1.28,\n", - " 'Male names and Female names wrt Career and Family': 1.81,\n", - " 'Math and Arts wrt Male terms and Female terms': 1.06,\n", - " 'Science and Arts 2 wrt Male terms and Female terms': 1.24,\n", - " 'Mental disease and Physical disease wrt Temporary and Permanent': 1.38,\n", - " 'Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)': 1.21}})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define the queries\n", - "\n", - "All queries are based on the original paper and are loaded using the `load_weat` function from the `datasets` module" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-28T20:51:31.775651Z", - "start_time": "2020-12-28T20:51:31.763648Z" - } - }, - "outputs": [], - "source": [ - "# Load the wordset\n", - "weat_wordset = load_weat()\n", - "\n", - "# Define the 10 Queries:\n", - "queries = [\n", - " # Flowers vs Insects wrt Pleasant (5) and Unpleasant (5)\n", - " Query([weat_wordset['flowers'], weat_wordset['insects']],\n", - " [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", - " ['Flowers', 'Insects'], ['Pleasant(5)', 'Unpleasant(5)']),\n", - "\n", - " # Instruments vs Weapons wrt Pleasant (5) and Unpleasant (5)\n", - " Query([weat_wordset['instruments'], weat_wordset['weapons']],\n", - " [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", - " ['Instruments', 'Weapons'], ['Pleasant(5)', 'Unpleasant(5)']),\n", - "\n", - " # European american names(5) vs African american names(5)\n", - " # wrt Pleasant (5) and Unpleasant (5)\n", - " Query([\n", - " weat_wordset['european_american_names_5'],\n", - " weat_wordset['african_american_names_5']\n", - " ], [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", - " ['European american names(5)', 'African american names(5)'],\n", - " ['Pleasant(5)', 'Unpleasant(5)']),\n", - "\n", - " # European american names(7) vs African american names(7)\n", - " # wrt Pleasant (5) and Unpleasant (5)\n", - " Query([\n", - " weat_wordset['european_american_names_7'],\n", - " weat_wordset['african_american_names_7']\n", - " ], [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", - " ['European american names(7)', 'African american names(7)'],\n", - " ['Pleasant(5)', 'Unpleasant(5)']),\n", - "\n", - " # European american names(7) vs African american names(7)\n", - " # wrt Pleasant (9) and Unpleasant (9)\n", - " Query([\n", - " weat_wordset['european_american_names_7'],\n", - " weat_wordset['african_american_names_7']\n", - " ], [weat_wordset['pleasant_9'], weat_wordset['unpleasant_9']],\n", - " ['European american names(7)', 'African american names(7)'],\n", - " ['Pleasant(9)', 'Unpleasant(9)']),\n", - "\n", - " # Male and female names wrt Career and family\n", - " Query([weat_wordset['male_names'], weat_wordset['female_names']],\n", - " [weat_wordset['career'], weat_wordset['family']],\n", - " ['Male names', 'Female names'], ['Career', 'Family']),\n", - "\n", - " # Math and arts wrt male and female terms\n", - " Query([weat_wordset['math'], weat_wordset['arts']],\n", - " [weat_wordset['male_terms'], weat_wordset['female_terms']],\n", - " ['Math', 'Arts'], ['Male terms', 'Female terms']),\n", - "\n", - " # Science and arts wrt male and female terms\n", - " Query([weat_wordset['science'], weat_wordset['arts_2']],\n", - " [weat_wordset['male_terms'], weat_wordset['female_terms']],\n", - " ['Science', 'Arts 2'], ['Male terms', 'Female terms']),\n", - "\n", - " # Mental and Physical disease wrt Temporary and Permanent\n", - " Query([weat_wordset['mental_disease'], weat_wordset['physical_disease']],\n", - " [weat_wordset['temporary'], weat_wordset['permanent']],\n", - " ['Mental disease', 'Physical disease'], ['Temporary', 'Permanent']),\n", - "\n", - " # Young people names and Old people names disease wrt Pleasant(9) and Unpleasant(9)\n", - " Query(\n", - " [weat_wordset['young_people_names'], weat_wordset['old_people_names']],\n", - " [weat_wordset['pleasant_9'], weat_wordset['unpleasant_9']],\n", - " ['Young peoples names', 'Old peoples names'],\n", - " ['Pleasant(9)', 'Unpleasant(9)'])\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load the embedding models\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-28T20:55:43.880381Z", - "start_time": "2020-12-28T20:51:31.781649Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:gensim.models.keyedvectors:loading projection weights from C:\\Users\\pablo/gensim-data\\word2vec-google-news-300\\word2vec-google-news-300.gz\n", - "DEBUG:smart_open.smart_open_lib:{'uri': 'C:\\\\Users\\\\pablo/gensim-data\\\\word2vec-google-news-300\\\\word2vec-google-news-300.gz', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}\n", - "INFO:gensim.models.keyedvectors:loaded (3000000, 300) matrix from C:\\Users\\pablo/gensim-data\\word2vec-google-news-300\\word2vec-google-news-300.gz\n", - "INFO:gensim.models.keyedvectors:loading projection weights from C:\\Users\\pablo/gensim-data\\glove-wiki-gigaword-300\\glove-wiki-gigaword-300.gz\n", - "DEBUG:smart_open.smart_open_lib:{'uri': 'C:\\\\Users\\\\pablo/gensim-data\\\\glove-wiki-gigaword-300\\\\glove-wiki-gigaword-300.gz', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}\n", - "INFO:gensim.models.keyedvectors:loaded (400000, 300) matrix from C:\\Users\\pablo/gensim-data\\glove-wiki-gigaword-300\\glove-wiki-gigaword-300.gz\n" - ] - } - ], - "source": [ - "w2v = WordEmbeddingModel(api.load('word2vec-google-news-300'),\n", - " 'word2vec-google-news-300')\n", - "glove = WordEmbeddingModel(api.load('glove-wiki-gigaword-300'),\n", - " 'glove-wiki-gigaword-300')\n", - "\n", - "models = [w2v, glove]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run the queries using WEAT\n", - "\n", - "\n", - "We use `run_queries` with `secondary_preprocessor_args : {'lowercase': True}` in `metric_params` so that when transforming the words to embeddings, first look for the original word and if you do not find it, look for the word in lower case. This feature is particularly useful for glove, since it is uncased.\n", - "\n", - "In practice, it transforms the original words into word2vec and transforms the lower words into glove." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-28T20:55:45.123583Z", - "start_time": "2020-12-28T20:55:43.885382Z" - }, - "scrolled": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:The following words from set 'Flowers' do not exist within the vocabulary of glove-wiki-gigaword-300: ['gladiola']\n", - "WARNING:root:The following words from set 'Weapons' do not exist within the vocabulary of word2vec-google-news-300: ['axe']\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
word2vec-google-news-300 (WEAT original)word2vec-google-news-300glove-wiki-gigaword-300 (WEAT original)glove-wiki-gigaword-300
query_name
Flowers and Insects wrt Pleasant(5) and Unpleasant(5)1.541.411.501.27
Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)1.631.751.531.91
European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)0.580.451.411.28
European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)1.240.451.500.85
European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)0.720.341.281.07
Male names and Female names wrt Career and Family1.891.251.811.32
Math and Arts wrt Male terms and Female terms0.970.231.060.24
Science and Arts 2 wrt Male terms and Female terms1.240.291.240.32
Mental disease and Physical disease wrt Temporary and Permanent1.300.341.380.57
Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)-0.08-0.051.210.21
\n", - "
" - ], - "text/plain": [ - " word2vec-google-news-300 (WEAT original) \\\n", - "query_name \n", - "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.54 \n", - "Instruments and Weapons wrt Pleasant(5) and Unp... 1.63 \n", - "European american names(5) and African american... 0.58 \n", - "European american names(7) and African american... 1.24 \n", - "European american names(7) and African american... 0.72 \n", - "Male names and Female names wrt Career and Family 1.89 \n", - "Math and Arts wrt Male terms and Female terms 0.97 \n", - "Science and Arts 2 wrt Male terms and Female terms 1.24 \n", - "Mental disease and Physical disease wrt Tempora... 1.30 \n", - "Young peoples names and Old peoples names wrt P... -0.08 \n", - "\n", - " word2vec-google-news-300 \\\n", - "query_name \n", - "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.41 \n", - "Instruments and Weapons wrt Pleasant(5) and Unp... 1.75 \n", - "European american names(5) and African american... 0.45 \n", - "European american names(7) and African american... 0.45 \n", - "European american names(7) and African american... 0.34 \n", - "Male names and Female names wrt Career and Family 1.25 \n", - "Math and Arts wrt Male terms and Female terms 0.23 \n", - "Science and Arts 2 wrt Male terms and Female terms 0.29 \n", - "Mental disease and Physical disease wrt Tempora... 0.34 \n", - "Young peoples names and Old peoples names wrt P... -0.05 \n", - "\n", - " glove-wiki-gigaword-300 (WEAT original) \\\n", - "query_name \n", - "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.50 \n", - "Instruments and Weapons wrt Pleasant(5) and Unp... 1.53 \n", - "European american names(5) and African american... 1.41 \n", - "European american names(7) and African american... 1.50 \n", - "European american names(7) and African american... 1.28 \n", - "Male names and Female names wrt Career and Family 1.81 \n", - "Math and Arts wrt Male terms and Female terms 1.06 \n", - "Science and Arts 2 wrt Male terms and Female terms 1.24 \n", - "Mental disease and Physical disease wrt Tempora... 1.38 \n", - "Young peoples names and Old peoples names wrt P... 1.21 \n", - "\n", - " glove-wiki-gigaword-300 \n", - "query_name \n", - "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.27 \n", - "Instruments and Weapons wrt Pleasant(5) and Unp... 1.91 \n", - "European american names(5) and African american... 1.28 \n", - "European american names(7) and African american... 0.85 \n", - "European american names(7) and African american... 1.07 \n", - "Male names and Female names wrt Career and Family 1.32 \n", - "Math and Arts wrt Male terms and Female terms 0.24 \n", - "Science and Arts 2 wrt Male terms and Female terms 0.32 \n", - "Mental disease and Physical disease wrt Tempora... 0.57 \n", - "Young peoples names and Old peoples names wrt P... 0.21 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "wefe_results = run_queries(WEAT,\n", - " queries,\n", - " models,\n", - " metric_params={\n", - " 'secondary_preprocessor_args': {\n", - " 'lowercase': True\n", - " },\n", - " 'warn_not_found_words': True\n", - " }).T.round(2)\n", - "\n", - "joined_results = pd.concat([wefe_results, original_results], axis=1)\n", - "joined_results = joined_results.iloc[:, [2,0,3,1]] # reorder the columns\n", - "joined_results" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-28T20:39:54.655373Z", - "start_time": "2020-12-28T20:39:54.652371Z" - } - }, - "source": [ - "## Visualize the results" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-28T20:55:46.875912Z", - "start_time": "2020-12-28T20:55:45.125583Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=Flowers and Insects wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", - "legendgroup": "Flowers and Insects wrt Pleasant(5) and Unpleasant(5)", - "marker": { - "color": "#636efa" - }, - "name": "Flowers and Insects wrt Pleasant(5) and Unpleasant(5)", - "offsetgroup": "Flowers and Insects wrt Pleasant(5) and Unpleasant(5)", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 1.54, - 1.41, - 1.5, - 1.27 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", - "legendgroup": "Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)", - "marker": { - "color": "#EF553B" - }, - "name": "Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)", - "offsetgroup": "Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 1.63, - 1.75, - 1.53, - 1.91 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", - "legendgroup": "European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)", - "marker": { - "color": "#00cc96" - }, - "name": "European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)", - "offsetgroup": "European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 0.58, - 0.45, - 1.41, - 1.28 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", - "legendgroup": "European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)", - "marker": { - "color": "#ab63fa" - }, - "name": "European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)", - "offsetgroup": "European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 1.24, - 0.45, - 1.5, - 0.85 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)
query_name=%{x}
value=%{y}", - "legendgroup": "European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)", - "marker": { - "color": "#FFA15A" - }, - "name": "European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)", - "offsetgroup": "European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 0.72, - 0.34, - 1.28, - 1.07 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=Male names and Female names wrt Career and Family
query_name=%{x}
value=%{y}", - "legendgroup": "Male names and Female names wrt Career and Family", - "marker": { - "color": "#19d3f3" - }, - "name": "Male names and Female names wrt Career and Family", - "offsetgroup": "Male names and Female names wrt Career and Family", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 1.89, - 1.25, - 1.81, - 1.32 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=Math and Arts wrt Male terms and Female terms
query_name=%{x}
value=%{y}", - "legendgroup": "Math and Arts wrt Male terms and Female terms", - "marker": { - "color": "#FF6692" - }, - "name": "Math and Arts wrt Male terms and Female terms", - "offsetgroup": "Math and Arts wrt Male terms and Female terms", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 0.97, - 0.23, - 1.06, - 0.24 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=Science and Arts 2 wrt Male terms and Female terms
query_name=%{x}
value=%{y}", - "legendgroup": "Science and Arts 2 wrt Male terms and Female terms", - "marker": { - "color": "#B6E880" - }, - "name": "Science and Arts 2 wrt Male terms and Female terms", - "offsetgroup": "Science and Arts 2 wrt Male terms and Female terms", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 1.24, - 0.29, - 1.24, - 0.32 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=Mental disease and Physical disease wrt Temporary and Permanent
query_name=%{x}
value=%{y}", - "legendgroup": "Mental disease and Physical disease wrt Temporary and Permanent", - "marker": { - "color": "#FF97FF" - }, - "name": "Mental disease and Physical disease wrt Temporary and Permanent", - "offsetgroup": "Mental disease and Physical disease wrt Temporary and Permanent", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - 1.3, - 0.34, - 1.38, - 0.57 - ], - "yaxis": "y" - }, - { - "alignmentgroup": "True", - "hovertemplate": "Word Embedding Model=Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)
query_name=%{x}
value=%{y}", - "legendgroup": "Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)", - "marker": { - "color": "#FECB52" - }, - "name": "Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)", - "offsetgroup": "Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)", - "orientation": "v", - "showlegend": true, - "textposition": "auto", - "type": "bar", - "x": [ - "word2vec-google-news-300 (WEAT original)", - "word2vec-google-news-300", - "glove-wiki-gigaword-300 (WEAT original)", - "glove-wiki-gigaword-300" - ], - "xaxis": "x", - "y": [ - -0.08, - -0.05, - 1.21, - 0.21 - ], - "yaxis": "y" - } - ], - "layout": { - "barmode": "group", - "legend": { - "title": { - "text": "Word Embedding Model" - }, - "tracegroupgap": 0 - }, - "margin": { - "t": 60 - }, - "showlegend": false, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Query" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Bias measure" - } - } - } - }, - "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig = plot_queries_results(joined_results)\n", - "fig.update_layout(showlegend=False)\n", - "fig.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Replications of WEAT Experiments \n", + "\n", + "Reference:\n", + "\n", + "> Caliskan, A., Bryson, J. J., & Narayanan, A. (2017). Semantics derived automatically from language corpora contain human-like biases. Science, 356(6334), 183-186.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-28T20:51:31.761648Z", + "start_time": "2020-12-28T20:51:28.677284Z" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import gensim.downloader as api\n", + "\n", + "from wefe.metrics import WEAT\n", + "from wefe.datasets import load_weat\n", + "from wefe.query import Query\n", + "from wefe.word_embedding_model import WordEmbeddingModel\n", + "from wefe.utils import run_queries\n", + "from wefe.utils import plot_queries_results\n", + "\n", + "original_results = pd.DataFrame({\n", + " 'word2vec-google-news-300 (WEAT original)':\n", + " {'Flowers and Insects wrt Pleasant(5) and Unpleasant(5)': 1.54,\n", + " 'Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)': 1.63,\n", + " 'European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)': 0.58,\n", + " 'European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)': 1.24,\n", + " 'European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)': 0.72,\n", + " 'Male names and Female names wrt Career and Family': 1.89,\n", + " 'Math and Arts wrt Male terms and Female terms': 0.97,\n", + " 'Science and Arts 2 wrt Male terms and Female terms': 1.24,\n", + " 'Mental disease and Physical disease wrt Temporary and Permanent': 1.30,\n", + " 'Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)': -0.08},\n", + " \n", + " 'glove-wiki-gigaword-300 (WEAT original)': \n", + " {'Flowers and Insects wrt Pleasant(5) and Unpleasant(5)': 1.50,\n", + " 'Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)': 1.53,\n", + " 'European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)': 1.41,\n", + " 'European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)': 1.50,\n", + " 'European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)': 1.28,\n", + " 'Male names and Female names wrt Career and Family': 1.81,\n", + " 'Math and Arts wrt Male terms and Female terms': 1.06,\n", + " 'Science and Arts 2 wrt Male terms and Female terms': 1.24,\n", + " 'Mental disease and Physical disease wrt Temporary and Permanent': 1.38,\n", + " 'Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)': 1.21}})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define the queries\n", + "\n", + "All queries are based on the original paper and are loaded using the `load_weat` function from the `datasets` module" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-28T20:51:31.775651Z", + "start_time": "2020-12-28T20:51:31.763648Z" + } + }, + "outputs": [], + "source": [ + "# Load the wordset\n", + "weat_wordset = load_weat()\n", + "\n", + "# Define the 10 Queries:\n", + "queries = [\n", + " # Flowers vs Insects wrt Pleasant (5) and Unpleasant (5)\n", + " Query([weat_wordset['flowers'], weat_wordset['insects']],\n", + " [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", + " ['Flowers', 'Insects'], ['Pleasant(5)', 'Unpleasant(5)']),\n", + "\n", + " # Instruments vs Weapons wrt Pleasant (5) and Unpleasant (5)\n", + " Query([weat_wordset['instruments'], weat_wordset['weapons']],\n", + " [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", + " ['Instruments', 'Weapons'], ['Pleasant(5)', 'Unpleasant(5)']),\n", + "\n", + " # European american names(5) vs African american names(5)\n", + " # wrt Pleasant (5) and Unpleasant (5)\n", + " Query([\n", + " weat_wordset['european_american_names_5'],\n", + " weat_wordset['african_american_names_5']\n", + " ], [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", + " ['European american names(5)', 'African american names(5)'],\n", + " ['Pleasant(5)', 'Unpleasant(5)']),\n", + "\n", + " # European american names(7) vs African american names(7)\n", + " # wrt Pleasant (5) and Unpleasant (5)\n", + " Query([\n", + " weat_wordset['european_american_names_7'],\n", + " weat_wordset['african_american_names_7']\n", + " ], [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],\n", + " ['European american names(7)', 'African american names(7)'],\n", + " ['Pleasant(5)', 'Unpleasant(5)']),\n", + "\n", + " # European american names(7) vs African american names(7)\n", + " # wrt Pleasant (9) and Unpleasant (9)\n", + " Query([\n", + " weat_wordset['european_american_names_7'],\n", + " weat_wordset['african_american_names_7']\n", + " ], [weat_wordset['pleasant_9'], weat_wordset['unpleasant_9']],\n", + " ['European american names(7)', 'African american names(7)'],\n", + " ['Pleasant(9)', 'Unpleasant(9)']),\n", + "\n", + " # Male and female names wrt Career and family\n", + " Query([weat_wordset['male_names'], weat_wordset['female_names']],\n", + " [weat_wordset['career'], weat_wordset['family']],\n", + " ['Male names', 'Female names'], ['Career', 'Family']),\n", + "\n", + " # Math and arts wrt male and female terms\n", + " Query([weat_wordset['math'], weat_wordset['arts']],\n", + " [weat_wordset['male_terms'], weat_wordset['female_terms']],\n", + " ['Math', 'Arts'], ['Male terms', 'Female terms']),\n", + "\n", + " # Science and arts wrt male and female terms\n", + " Query([weat_wordset['science'], weat_wordset['arts_2']],\n", + " [weat_wordset['male_terms'], weat_wordset['female_terms']],\n", + " ['Science', 'Arts 2'], ['Male terms', 'Female terms']),\n", + "\n", + " # Mental and Physical disease wrt Temporary and Permanent\n", + " Query([weat_wordset['mental_disease'], weat_wordset['physical_disease']],\n", + " [weat_wordset['temporary'], weat_wordset['permanent']],\n", + " ['Mental disease', 'Physical disease'], ['Temporary', 'Permanent']),\n", + "\n", + " # Young people names and Old people names disease wrt Pleasant(9) and Unpleasant(9)\n", + " Query(\n", + " [weat_wordset['young_people_names'], weat_wordset['old_people_names']],\n", + " [weat_wordset['pleasant_9'], weat_wordset['unpleasant_9']],\n", + " ['Young peoples names', 'Old peoples names'],\n", + " ['Pleasant(9)', 'Unpleasant(9)'])\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the embedding models\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-28T20:55:43.880381Z", + "start_time": "2020-12-28T20:51:31.781649Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:gensim.models.keyedvectors:loading projection weights from C:\\Users\\pablo/gensim-data\\word2vec-google-news-300\\word2vec-google-news-300.gz\n", + "DEBUG:smart_open.smart_open_lib:{'uri': 'C:\\\\Users\\\\pablo/gensim-data\\\\word2vec-google-news-300\\\\word2vec-google-news-300.gz', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}\n", + "INFO:gensim.models.keyedvectors:loaded (3000000, 300) matrix from C:\\Users\\pablo/gensim-data\\word2vec-google-news-300\\word2vec-google-news-300.gz\n", + "INFO:gensim.models.keyedvectors:loading projection weights from C:\\Users\\pablo/gensim-data\\glove-wiki-gigaword-300\\glove-wiki-gigaword-300.gz\n", + "DEBUG:smart_open.smart_open_lib:{'uri': 'C:\\\\Users\\\\pablo/gensim-data\\\\glove-wiki-gigaword-300\\\\glove-wiki-gigaword-300.gz', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}\n", + "INFO:gensim.models.keyedvectors:loaded (400000, 300) matrix from C:\\Users\\pablo/gensim-data\\glove-wiki-gigaword-300\\glove-wiki-gigaword-300.gz\n" + ] + } + ], + "source": [ + "w2v = WordEmbeddingModel(api.load('word2vec-google-news-300'),\n", + " 'word2vec-google-news-300')\n", + "glove = WordEmbeddingModel(api.load('glove-wiki-gigaword-300'),\n", + " 'glove-wiki-gigaword-300')\n", + "\n", + "models = [w2v, glove]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run the queries using WEAT\n", + "\n", + "\n", + "We use `run_queries` with `secondary_preprocessor_args : {'lowercase': True}` in `metric_params` so that when transforming the words to embeddings, first look for the original word and if you do not find it, look for the word in lower case. This feature is particularly useful for glove, since it is uncased.\n", + "\n", + "In practice, it transforms the original words into word2vec and transforms the lower words into glove." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-28T20:55:45.123583Z", + "start_time": "2020-12-28T20:55:43.885382Z" + }, + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The following words from set 'Flowers' do not exist within the vocabulary of glove-wiki-gigaword-300: ['gladiola']\n", + "WARNING:root:The following words from set 'Weapons' do not exist within the vocabulary of word2vec-google-news-300: ['axe']\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
word2vec-google-news-300 (WEAT original)word2vec-google-news-300glove-wiki-gigaword-300 (WEAT original)glove-wiki-gigaword-300
query_name
Flowers and Insects wrt Pleasant(5) and Unpleasant(5)1.541.411.501.27
Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)1.631.751.531.91
European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)0.580.451.411.28
European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)1.240.451.500.85
European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)0.720.341.281.07
Male names and Female names wrt Career and Family1.891.251.811.32
Math and Arts wrt Male terms and Female terms0.970.231.060.24
Science and Arts 2 wrt Male terms and Female terms1.240.291.240.32
Mental disease and Physical disease wrt Temporary and Permanent1.300.341.380.57
Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)-0.08-0.051.210.21
\n", + "
" + ], + "text/plain": [ + " word2vec-google-news-300 (WEAT original) \\\n", + "query_name \n", + "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.54 \n", + "Instruments and Weapons wrt Pleasant(5) and Unp... 1.63 \n", + "European american names(5) and African american... 0.58 \n", + "European american names(7) and African american... 1.24 \n", + "European american names(7) and African american... 0.72 \n", + "Male names and Female names wrt Career and Family 1.89 \n", + "Math and Arts wrt Male terms and Female terms 0.97 \n", + "Science and Arts 2 wrt Male terms and Female terms 1.24 \n", + "Mental disease and Physical disease wrt Tempora... 1.30 \n", + "Young peoples names and Old peoples names wrt P... -0.08 \n", + "\n", + " word2vec-google-news-300 \\\n", + "query_name \n", + "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.41 \n", + "Instruments and Weapons wrt Pleasant(5) and Unp... 1.75 \n", + "European american names(5) and African american... 0.45 \n", + "European american names(7) and African american... 0.45 \n", + "European american names(7) and African american... 0.34 \n", + "Male names and Female names wrt Career and Family 1.25 \n", + "Math and Arts wrt Male terms and Female terms 0.23 \n", + "Science and Arts 2 wrt Male terms and Female terms 0.29 \n", + "Mental disease and Physical disease wrt Tempora... 0.34 \n", + "Young peoples names and Old peoples names wrt P... -0.05 \n", + "\n", + " glove-wiki-gigaword-300 (WEAT original) \\\n", + "query_name \n", + "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.50 \n", + "Instruments and Weapons wrt Pleasant(5) and Unp... 1.53 \n", + "European american names(5) and African american... 1.41 \n", + "European american names(7) and African american... 1.50 \n", + "European american names(7) and African american... 1.28 \n", + "Male names and Female names wrt Career and Family 1.81 \n", + "Math and Arts wrt Male terms and Female terms 1.06 \n", + "Science and Arts 2 wrt Male terms and Female terms 1.24 \n", + "Mental disease and Physical disease wrt Tempora... 1.38 \n", + "Young peoples names and Old peoples names wrt P... 1.21 \n", + "\n", + " glove-wiki-gigaword-300 \n", + "query_name \n", + "Flowers and Insects wrt Pleasant(5) and Unpleas... 1.27 \n", + "Instruments and Weapons wrt Pleasant(5) and Unp... 1.91 \n", + "European american names(5) and African american... 1.28 \n", + "European american names(7) and African american... 0.85 \n", + "European american names(7) and African american... 1.07 \n", + "Male names and Female names wrt Career and Family 1.32 \n", + "Math and Arts wrt Male terms and Female terms 0.24 \n", + "Science and Arts 2 wrt Male terms and Female terms 0.32 \n", + "Mental disease and Physical disease wrt Tempora... 0.57 \n", + "Young peoples names and Old peoples names wrt P... 0.21 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wefe_results = run_queries(WEAT,\n", + " queries,\n", + " models,\n", + " metric_params={\n", + " 'secondary_preprocessor_args': {\n", + " 'lowercase': True\n", + " },\n", + " 'warn_not_found_words': True\n", + " }).T.round(2)\n", + "\n", + "joined_results = pd.concat([wefe_results, original_results], axis=1)\n", + "joined_results = joined_results.iloc[:, [2,0,3,1]] # reorder the columns\n", + "joined_results" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-28T20:39:54.655373Z", + "start_time": "2020-12-28T20:39:54.652371Z" + } + }, + "source": [ + "## Visualize the results" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-12-28T20:55:46.875912Z", + "start_time": "2020-12-28T20:55:45.125583Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=Flowers and Insects wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", + "legendgroup": "Flowers and Insects wrt Pleasant(5) and Unpleasant(5)", + "marker": { + "color": "#636efa" + }, + "name": "Flowers and Insects wrt Pleasant(5) and Unpleasant(5)", + "offsetgroup": "Flowers and Insects wrt Pleasant(5) and Unpleasant(5)", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 1.54, + 1.41, + 1.5, + 1.27 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", + "legendgroup": "Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)", + "marker": { + "color": "#EF553B" + }, + "name": "Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)", + "offsetgroup": "Instruments and Weapons wrt Pleasant(5) and Unpleasant(5)", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 1.63, + 1.75, + 1.53, + 1.91 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", + "legendgroup": "European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)", + "marker": { + "color": "#00cc96" + }, + "name": "European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)", + "offsetgroup": "European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5)", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 0.58, + 0.45, + 1.41, + 1.28 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)
query_name=%{x}
value=%{y}", + "legendgroup": "European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)", + "marker": { + "color": "#ab63fa" + }, + "name": "European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)", + "offsetgroup": "European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5)", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 1.24, + 0.45, + 1.5, + 0.85 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)
query_name=%{x}
value=%{y}", + "legendgroup": "European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)", + "marker": { + "color": "#FFA15A" + }, + "name": "European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)", + "offsetgroup": "European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9)", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 0.72, + 0.34, + 1.28, + 1.07 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=Male names and Female names wrt Career and Family
query_name=%{x}
value=%{y}", + "legendgroup": "Male names and Female names wrt Career and Family", + "marker": { + "color": "#19d3f3" + }, + "name": "Male names and Female names wrt Career and Family", + "offsetgroup": "Male names and Female names wrt Career and Family", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 1.89, + 1.25, + 1.81, + 1.32 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=Math and Arts wrt Male terms and Female terms
query_name=%{x}
value=%{y}", + "legendgroup": "Math and Arts wrt Male terms and Female terms", + "marker": { + "color": "#FF6692" + }, + "name": "Math and Arts wrt Male terms and Female terms", + "offsetgroup": "Math and Arts wrt Male terms and Female terms", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 0.97, + 0.23, + 1.06, + 0.24 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=Science and Arts 2 wrt Male terms and Female terms
query_name=%{x}
value=%{y}", + "legendgroup": "Science and Arts 2 wrt Male terms and Female terms", + "marker": { + "color": "#B6E880" + }, + "name": "Science and Arts 2 wrt Male terms and Female terms", + "offsetgroup": "Science and Arts 2 wrt Male terms and Female terms", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 1.24, + 0.29, + 1.24, + 0.32 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=Mental disease and Physical disease wrt Temporary and Permanent
query_name=%{x}
value=%{y}", + "legendgroup": "Mental disease and Physical disease wrt Temporary and Permanent", + "marker": { + "color": "#FF97FF" + }, + "name": "Mental disease and Physical disease wrt Temporary and Permanent", + "offsetgroup": "Mental disease and Physical disease wrt Temporary and Permanent", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + 1.3, + 0.34, + 1.38, + 0.57 + ], + "yaxis": "y" + }, + { + "alignmentgroup": "True", + "hovertemplate": "Word Embedding Model=Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)
query_name=%{x}
value=%{y}", + "legendgroup": "Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)", + "marker": { + "color": "#FECB52" + }, + "name": "Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)", + "offsetgroup": "Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9)", + "orientation": "v", + "showlegend": true, + "textposition": "auto", + "type": "bar", + "x": [ + "word2vec-google-news-300 (WEAT original)", + "word2vec-google-news-300", + "glove-wiki-gigaword-300 (WEAT original)", + "glove-wiki-gigaword-300" + ], + "xaxis": "x", + "y": [ + -0.08, + -0.05, + 1.21, + 0.21 + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "group", + "legend": { + "title": { + "text": "Word Embedding Model" + }, + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Query" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Bias measure" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plot_queries_results(joined_results)\n", + "fig.update_layout(showlegend=False)\n", + "fig.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/WEFE_rankings.ipynb b/examples/WEFE_rankings.ipynb index 88cf580..895d1f9 100644 --- a/examples/WEFE_rankings.ipynb +++ b/examples/WEFE_rankings.ipynb @@ -690,7 +690,7 @@ " model,\n", " queries_set_name=queries_set_name,\n", " metric_params={\n", - " \"num_iterations\": RNSB_NUM_ITERATIONS,\n", + " \"n_iterations\": RNSB_NUM_ITERATIONS,\n", " \"preprocessors\": [{}, {\"lowercase\": True,}],\n", " },\n", " aggregate_results=True,\n", @@ -734,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2020-04-24T16:55:31.785184Z", @@ -742,7 +742,130 @@ }, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading glove-twitter-200 from gensim downloader\n", + "Model loaded successfully.\n", + "Running Gender queries using WEAT\n", + "Running Gender queries using WEAT Effect Size\n", + "Running Gender queries using RND\n", + "Running Gender queries using RNSB\n", + "Running Ethnicity queries using WEAT\n", + "Running Ethnicity queries using WEAT Effect Size\n", + "Running Ethnicity queries using RND\n", + "Running Ethnicity queries using RNSB\n", + "Running Religion queries using WEAT\n", + "Running Religion queries using WEAT Effect Size\n", + "Running Religion queries using RND\n", + "Running Religion queries using RNSB\n", + "Queries executed and saved correctly for glove-twitter-200.\n", + "----------------------------------\n", + "\n", + "Loading glove-wiki-gigaword-300 from gensim downloader\n", + "Model loaded successfully.\n", + "Running Gender queries using WEAT\n", + "Running Gender queries using WEAT Effect Size\n", + "Running Gender queries using RND\n", + "Running Gender queries using RNSB\n", + "Running Ethnicity queries using WEAT\n", + "Running Ethnicity queries using WEAT Effect Size\n", + "Running Ethnicity queries using RND\n", + "Running Ethnicity queries using RNSB\n", + "Running Religion queries using WEAT\n", + "Running Religion queries using WEAT Effect Size\n", + "Running Religion queries using RND\n", + "Running Religion queries using RNSB\n", + "Queries executed and saved correctly for glove-wiki-gigaword-300.\n", + "----------------------------------\n", + "\n", + "Loading fasttext-wiki-news-subwords-300 from gensim downloader\n", + "Model loaded successfully.\n", + "Running Gender queries using WEAT\n", + "Running Gender queries using WEAT Effect Size\n", + "Running Gender queries using RND\n", + "Running Gender queries using RNSB\n", + "Running Ethnicity queries using WEAT\n", + "Running Ethnicity queries using WEAT Effect Size\n", + "Running Ethnicity queries using RND\n", + "Running Ethnicity queries using RNSB\n", + "Running Religion queries using WEAT\n", + "Running Religion queries using WEAT Effect Size\n", + "Running Religion queries using RND\n", + "Running Religion queries using RNSB\n", + "Queries executed and saved correctly for fasttext-wiki-news-subwords-300.\n", + "----------------------------------\n", + "\n", + "Loading conceptnet-numberbatch 19.08-en dim=300 from a file\n", + "Running Gender queries using WEAT\n", + "Running Gender queries using WEAT Effect Size\n", + "Running Gender queries using RND\n", + "Running Gender queries using RNSB\n", + "Running Ethnicity queries using WEAT\n", + "Running Ethnicity queries using WEAT Effect Size\n", + "Running Ethnicity queries using RND\n", + "Running Ethnicity queries using RNSB\n", + "Running Religion queries using WEAT\n", + "Running Religion queries using WEAT Effect Size\n", + "Running Religion queries using RND\n", + "Running Religion queries using RNSB\n", + "Queries executed and saved correctly for conceptnet-numberbatch 19.08-en dim=300.\n", + "----------------------------------\n", + "\n", + "Loading lexvec-commoncrawl W+C dim=300 from a file\n", + "Running Gender queries using WEAT\n", + "Running Gender queries using WEAT Effect Size\n", + "Running Gender queries using RND\n", + "Running Gender queries using RNSB\n", + "Running Ethnicity queries using WEAT\n", + "Running Ethnicity queries using WEAT Effect Size\n", + "Running Ethnicity queries using RND\n", + "Running Ethnicity queries using RNSB\n", + "Running Religion queries using WEAT\n", + "Running Religion queries using WEAT Effect Size\n", + "Running Religion queries using RND\n", + "Running Religion queries using RNSB\n", + "Queries executed and saved correctly for lexvec-commoncrawl W+C dim=300.\n", + "----------------------------------\n", + "\n", + "Loading word2vec-gender-hard-debiased dim=300 from a file\n", + "Running Gender queries using WEAT\n", + "Running Gender queries using WEAT Effect Size\n", + "Running Gender queries using RND\n", + "Running Gender queries using RNSB\n", + "Running Ethnicity queries using WEAT\n", + "Running Ethnicity queries using WEAT Effect Size\n", + "Running Ethnicity queries using RND\n", + "Running Ethnicity queries using RNSB\n", + "Running Religion queries using WEAT\n", + "Running Religion queries using WEAT Effect Size\n", + "Running Religion queries using RND\n", + "Running Religion queries using RNSB\n", + "Queries executed and saved correctly for word2vec-gender-hard-debiased dim=300.\n", + "----------------------------------\n", + "\n", + "Loading word2vec-google-news-300 from gensim downloader\n", + "Model loaded successfully.\n", + "Running Gender queries using WEAT\n", + "Running Gender queries using WEAT Effect Size\n", + "Running Gender queries using RND\n", + "Running Gender queries using RNSB\n", + "Running Ethnicity queries using WEAT\n", + "Running Ethnicity queries using WEAT Effect Size\n", + "Running Ethnicity queries using RND\n", + "Running Ethnicity queries using RNSB\n", + "Running Religion queries using WEAT\n", + "Running Religion queries using WEAT Effect Size\n", + "Running Religion queries using RND\n", + "Running Religion queries using RNSB\n", + "Queries executed and saved correctly for word2vec-google-news-300.\n", + "----------------------------------\n", + "\n" + ] + } + ], "source": [ "import logging\n", "\n", @@ -754,7 +877,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -840,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "scrolled": false }, @@ -878,7 +1001,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -893,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -910,7 +1033,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -953,14 +1076,14 @@ " 2 (0.202)\n", " 2 (0.366)\n", " 2 (0.007)\n", - " 3 (0.019)\n", + " 2 (0.008)\n", " \n", " \n", " fasttext-wiki-news-subwords-300\n", " 4 (0.468)\n", " 5 (0.709)\n", " 3 (0.018)\n", - " 2 (0.018)\n", + " 3 (0.015)\n", " \n", " \n", " glove-twitter-200\n", @@ -974,28 +1097,28 @@ " 7 (0.845)\n", " 4 (0.656)\n", " 6 (0.183)\n", - " 7 (0.075)\n", + " 6 (0.085)\n", " \n", " \n", " lexvec-commoncrawl W+C dim=300\n", " 5 (0.712)\n", " 6 (0.791)\n", " 7 (0.332)\n", - " 6 (0.067)\n", + " 7 (0.094)\n", " \n", " \n", " word2vec-gender-hard-debiased dim=300\n", " 1 (0.086)\n", " 1 (0.18)\n", " 1 (0.003)\n", - " 1 (0.01)\n", + " 1 (0.004)\n", " \n", " \n", " word2vec-google-news-300\n", " 6 (0.83)\n", " 7 (0.941)\n", " 4 (0.084)\n", - " 4 (0.034)\n", + " 4 (0.023)\n", " \n", " \n", "\n", @@ -1014,13 +1137,13 @@ "\n", " RNSB \n", "model_name \n", - "conceptnet-numberbatch 19.08-en dim=300 3 (0.019) \n", - "fasttext-wiki-news-subwords-300 2 (0.018) \n", + "conceptnet-numberbatch 19.08-en dim=300 2 (0.008) \n", + "fasttext-wiki-news-subwords-300 3 (0.015) \n", "glove-twitter-200 5 (0.05) \n", - "glove-wiki-gigaword-300 7 (0.075) \n", - "lexvec-commoncrawl W+C dim=300 6 (0.067) \n", - "word2vec-gender-hard-debiased dim=300 1 (0.01) \n", - "word2vec-google-news-300 4 (0.034) " + "glove-wiki-gigaword-300 6 (0.085) \n", + "lexvec-commoncrawl W+C dim=300 7 (0.094) \n", + "word2vec-gender-hard-debiased dim=300 1 (0.004) \n", + "word2vec-google-news-300 4 (0.023) " ] }, "metadata": {}, @@ -1242,10 +1365,10 @@ "x": [ 4, 1, - 6, - 3, - 2, 7, + 2, + 3, + 6, 5 ], "xaxis": "x", @@ -2120,9 +2243,9 @@ } }, "text/html": [ - "