{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Univariate Numeric Summary\n\nExample of univariate eda summary for a numeric variable.\n\nThe numeric summary computes the following:\n\n- A histogram\n- A boxplot\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import warnings\n\nwarnings.filterwarnings(\"ignore\")\n\nimport pandas as pd\nimport intedact\nimport plotly"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Here we take a look at some GDPR violation prices to showcase the other parameters:\n\n- log transformation\n- outlier filtering\n- kde overlay\n- custom bin count\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "data = pd.read_csv(\n    \"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-21/gdpr_violations.tsv\",\n    sep=\"\\t\",\n)\nfig = intedact.numeric_summary(\n    data, \"price\", bins=20, transform=\"log\", upper_quantile=0.95, fig_width=700\n)\nplotly.io.show(fig)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}