{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Univariate URL Summary\n\nExample of univariate eda summary for an url variable\n\nThe URL summary computes the following:\n\n- Countplot for the invididual unique urls\n- Countplot for the domains of the urls\n- Countplot for the domain suffixes of the urls\n- Countplot for the file types of the urls\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import warnings\n\nwarnings.filterwarnings(\"ignore\")\n\nimport pandas as pd\nimport intedact\nimport plotly"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Here we take a look at the source URL's for countries GDPR violations recordings.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "data = pd.read_csv(\n    \"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-21/gdpr_violations.tsv\",\n    sep=\"\\t\",\n)\n\nfig = intedact.url_summary(data, \"source\", fig_width=700)\nplotly.io.show(fig)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}