{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Consultas de Selección: Group By, Having, Union, Intersect, Except, Any\n",
    "\n",
    "## Conexión a la Base de Datos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import psycopg2\n",
    "import sqlalchemy\n",
    "import matplotlib as plt\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "from sqlalchemy import create_engine\n",
    "\n",
    "POSTGRES_ADDRESS = 'localhost' # Este es el servidor, puede ser una IP\n",
    "POSTGRES_PORT = '5432'\n",
    "POSTGRES_USERNAME = 'postgres' \n",
    "POSTGRES_PASSWORD = '1234' \n",
    "POSTGRES_DBNAME = 'dvdrental' \n",
    "\n",
    "# Ahora se configura la cadena de conexión.\n",
    "# Esta es una cadena que contiene los parámetros necesarios para establecer una\n",
    "# conexión con posgres.\n",
    "postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(username=POSTGRES_USERNAME,\n",
    "                                                                                        password=POSTGRES_PASSWORD,\n",
    "                                                                                        ipaddress=POSTGRES_ADDRESS,\n",
    "                                                                                        port=POSTGRES_PORT,\n",
    "                                                                                        dbname=POSTGRES_DBNAME))\n",
    "# A continuación se crea la conexión\n",
    "cnx = create_engine(postgres_str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## La cláusula Group By\n",
    "\n",
    "La cláusula GROUP BY divide las filas devueltas de la declaración SELECT en grupos. Para cada grupo, puede aplicar una función agregada, por ejemplo, SUM() para calcular la suma de elementos o COUNT() para obtener el número de elementos en los grupos."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>total_pago</th>\n",
       "      <th>promedio_pago</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>184</td>\n",
       "      <td>80.80</td>\n",
       "      <td>4.040000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>87</td>\n",
       "      <td>137.72</td>\n",
       "      <td>4.918571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>477</td>\n",
       "      <td>106.79</td>\n",
       "      <td>5.085238</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>273</td>\n",
       "      <td>130.72</td>\n",
       "      <td>4.668571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>550</td>\n",
       "      <td>151.69</td>\n",
       "      <td>4.893226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>594</th>\n",
       "      <td>449</td>\n",
       "      <td>80.83</td>\n",
       "      <td>4.754706</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>595</th>\n",
       "      <td>64</td>\n",
       "      <td>91.70</td>\n",
       "      <td>3.056667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>520</td>\n",
       "      <td>127.69</td>\n",
       "      <td>4.119032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>597</th>\n",
       "      <td>55</td>\n",
       "      <td>84.81</td>\n",
       "      <td>4.463684</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>598</th>\n",
       "      <td>148</td>\n",
       "      <td>211.55</td>\n",
       "      <td>4.701111</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>599 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     customer_id  total_pago  promedio_pago\n",
       "0            184       80.80       4.040000\n",
       "1             87      137.72       4.918571\n",
       "2            477      106.79       5.085238\n",
       "3            273      130.72       4.668571\n",
       "4            550      151.69       4.893226\n",
       "..           ...         ...            ...\n",
       "594          449       80.83       4.754706\n",
       "595           64       91.70       3.056667\n",
       "596          520      127.69       4.119032\n",
       "597           55       84.81       4.463684\n",
       "598          148      211.55       4.701111\n",
       "\n",
       "[599 rows x 3 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sql = ''' \n",
    "SELECT\n",
    "\tcustomer_id,\n",
    "\tSUM (amount) as total_pago,\n",
    "    AVG (amount) as promedio_pago\n",
    "FROM\n",
    "\tpayment\n",
    "GROUP BY\n",
    "\tcustomer_id;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>total_pago</th>\n",
       "      <th>promedio_pago</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>148</td>\n",
       "      <td>211.55</td>\n",
       "      <td>4.701111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>526</td>\n",
       "      <td>208.58</td>\n",
       "      <td>4.966190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>178</td>\n",
       "      <td>194.61</td>\n",
       "      <td>4.990000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>137</td>\n",
       "      <td>191.62</td>\n",
       "      <td>5.042632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>144</td>\n",
       "      <td>189.60</td>\n",
       "      <td>4.740000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>594</th>\n",
       "      <td>110</td>\n",
       "      <td>49.88</td>\n",
       "      <td>4.156667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>595</th>\n",
       "      <td>320</td>\n",
       "      <td>47.85</td>\n",
       "      <td>3.190000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>248</td>\n",
       "      <td>37.87</td>\n",
       "      <td>2.913077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>597</th>\n",
       "      <td>281</td>\n",
       "      <td>32.90</td>\n",
       "      <td>3.290000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>598</th>\n",
       "      <td>318</td>\n",
       "      <td>27.93</td>\n",
       "      <td>3.990000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>599 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     customer_id  total_pago  promedio_pago\n",
       "0            148      211.55       4.701111\n",
       "1            526      208.58       4.966190\n",
       "2            178      194.61       4.990000\n",
       "3            137      191.62       5.042632\n",
       "4            144      189.60       4.740000\n",
       "..           ...         ...            ...\n",
       "594          110       49.88       4.156667\n",
       "595          320       47.85       3.190000\n",
       "596          248       37.87       2.913077\n",
       "597          281       32.90       3.290000\n",
       "598          318       27.93       3.990000\n",
       "\n",
       "[599 rows x 3 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sql = ''' \n",
    "SELECT\n",
    "\tcustomer_id,\n",
    "\tSUM (amount) as total_pago,\n",
    "    AVG (amount) as promedio_pago\n",
    "FROM\n",
    "\tpayment\n",
    "GROUP BY\n",
    "\tcustomer_id\n",
    "ORDER BY\n",
    "    total_pago DESC;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>148</td>\n",
       "      <td>211.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>526</td>\n",
       "      <td>208.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>178</td>\n",
       "      <td>194.61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>137</td>\n",
       "      <td>191.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>144</td>\n",
       "      <td>189.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>594</th>\n",
       "      <td>110</td>\n",
       "      <td>49.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>595</th>\n",
       "      <td>320</td>\n",
       "      <td>47.85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>248</td>\n",
       "      <td>37.87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>597</th>\n",
       "      <td>281</td>\n",
       "      <td>32.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>598</th>\n",
       "      <td>318</td>\n",
       "      <td>27.93</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>599 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     customer_id     sum\n",
       "0            148  211.55\n",
       "1            526  208.58\n",
       "2            178  194.61\n",
       "3            137  191.62\n",
       "4            144  189.60\n",
       "..           ...     ...\n",
       "594          110   49.88\n",
       "595          320   47.85\n",
       "596          248   37.87\n",
       "597          281   32.90\n",
       "598          318   27.93\n",
       "\n",
       "[599 rows x 2 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sql = ''' \n",
    "SELECT\n",
    "\tcustomer_id,\n",
    "\tSUM (amount)\n",
    "FROM\n",
    "\tpayment\n",
    "GROUP BY\n",
    "\tcustomer_id\n",
    "ORDER BY\n",
    "\tSUM (amount) DESC;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nombre_completo</th>\n",
       "      <th>total_pago</th>\n",
       "      <th>promedio_pago</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brian Wyman</td>\n",
       "      <td>27.93</td>\n",
       "      <td>3.990000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Leona Obrien</td>\n",
       "      <td>32.90</td>\n",
       "      <td>3.290000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Caroline Bowman</td>\n",
       "      <td>37.87</td>\n",
       "      <td>2.913077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Anthony Schwab</td>\n",
       "      <td>47.85</td>\n",
       "      <td>3.190000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Tiffany Jordan</td>\n",
       "      <td>49.88</td>\n",
       "      <td>4.156667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>594</th>\n",
       "      <td>Clara Shaw</td>\n",
       "      <td>189.60</td>\n",
       "      <td>4.740000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>595</th>\n",
       "      <td>Rhonda Kennedy</td>\n",
       "      <td>191.62</td>\n",
       "      <td>5.042632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>Marion Snyder</td>\n",
       "      <td>194.61</td>\n",
       "      <td>4.990000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>597</th>\n",
       "      <td>Karl Seal</td>\n",
       "      <td>208.58</td>\n",
       "      <td>4.966190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>598</th>\n",
       "      <td>Eleanor Hunt</td>\n",
       "      <td>211.55</td>\n",
       "      <td>4.701111</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>599 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     nombre_completo  total_pago  promedio_pago\n",
       "0        Brian Wyman       27.93       3.990000\n",
       "1       Leona Obrien       32.90       3.290000\n",
       "2    Caroline Bowman       37.87       2.913077\n",
       "3     Anthony Schwab       47.85       3.190000\n",
       "4     Tiffany Jordan       49.88       4.156667\n",
       "..               ...         ...            ...\n",
       "594       Clara Shaw      189.60       4.740000\n",
       "595   Rhonda Kennedy      191.62       5.042632\n",
       "596    Marion Snyder      194.61       4.990000\n",
       "597        Karl Seal      208.58       4.966190\n",
       "598     Eleanor Hunt      211.55       4.701111\n",
       "\n",
       "[599 rows x 3 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Obtener también el nombre (¡Cuidado con los homónimos!)\n",
    "sql = ''' \n",
    "SELECT\n",
    "\tfirst_name || ' ' || last_name nombre_completo,\n",
    "\tSUM (amount) as total_pago,\n",
    "    AVG (amount) as promedio_pago\n",
    "FROM\n",
    "\tpayment\n",
    "INNER JOIN customer USING (customer_id)    \t\n",
    "GROUP BY nombre_completo\n",
    "ORDER BY total_pago;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>paid_date</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2007-02-14</td>\n",
       "      <td>116.73</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2007-02-19</td>\n",
       "      <td>1290.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2007-02-20</td>\n",
       "      <td>1219.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2007-03-19</td>\n",
       "      <td>2617.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2007-04-26</td>\n",
       "      <td>347.21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2007-04-08</td>\n",
       "      <td>2227.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2007-02-15</td>\n",
       "      <td>1188.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2007-04-28</td>\n",
       "      <td>2622.73</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2007-03-17</td>\n",
       "      <td>2442.16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2007-03-20</td>\n",
       "      <td>2669.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2007-03-23</td>\n",
       "      <td>2342.43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2007-03-21</td>\n",
       "      <td>2868.27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2007-04-29</td>\n",
       "      <td>2717.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2007-04-10</td>\n",
       "      <td>1973.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2007-05-14</td>\n",
       "      <td>514.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2007-04-12</td>\n",
       "      <td>1930.48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2007-03-02</td>\n",
       "      <td>2550.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2007-04-27</td>\n",
       "      <td>2673.57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2007-03-16</td>\n",
       "      <td>299.28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2007-02-18</td>\n",
       "      <td>1275.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2007-04-07</td>\n",
       "      <td>1984.28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>2007-02-21</td>\n",
       "      <td>917.87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2007-02-16</td>\n",
       "      <td>1154.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2007-03-18</td>\n",
       "      <td>2701.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2007-04-06</td>\n",
       "      <td>2077.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2007-04-30</td>\n",
       "      <td>5723.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2007-03-01</td>\n",
       "      <td>2808.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2007-04-11</td>\n",
       "      <td>1940.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2007-04-09</td>\n",
       "      <td>2067.86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2007-04-05</td>\n",
       "      <td>273.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2007-02-17</td>\n",
       "      <td>1188.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>2007-03-22</td>\n",
       "      <td>2586.79</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     paid_date      sum\n",
       "0   2007-02-14   116.73\n",
       "1   2007-02-19  1290.90\n",
       "2   2007-02-20  1219.09\n",
       "3   2007-03-19  2617.69\n",
       "4   2007-04-26   347.21\n",
       "5   2007-04-08  2227.84\n",
       "6   2007-02-15  1188.92\n",
       "7   2007-04-28  2622.73\n",
       "8   2007-03-17  2442.16\n",
       "9   2007-03-20  2669.89\n",
       "10  2007-03-23  2342.43\n",
       "11  2007-03-21  2868.27\n",
       "12  2007-04-29  2717.60\n",
       "13  2007-04-10  1973.18\n",
       "14  2007-05-14   514.18\n",
       "15  2007-04-12  1930.48\n",
       "16  2007-03-02  2550.05\n",
       "17  2007-04-27  2673.57\n",
       "18  2007-03-16   299.28\n",
       "19  2007-02-18  1275.98\n",
       "20  2007-04-07  1984.28\n",
       "21  2007-02-21   917.87\n",
       "22  2007-02-16  1154.18\n",
       "23  2007-03-18  2701.76\n",
       "24  2007-04-06  2077.14\n",
       "25  2007-04-30  5723.89\n",
       "26  2007-03-01  2808.24\n",
       "27  2007-04-11  1940.32\n",
       "28  2007-04-09  2067.86\n",
       "29  2007-04-05   273.36\n",
       "30  2007-02-17  1188.17\n",
       "31  2007-03-22  2586.79"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Agrupar por Fecha\n",
    "sql = ''' \n",
    "SELECT \n",
    "\tDATE(payment_date) paid_date, \n",
    "\tSUM(amount) sum\n",
    "FROM \n",
    "\tpayment\n",
    "GROUP BY\n",
    "\tDATE(payment_date);\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cláusulas HAVING \n",
    "\n",
    "La cláusula HAVING especifica una condición de búsqueda para un grupo o un agregado. Se usa a menudo con la cláusula GROUP BY para filtrar grupos o agregados en función de una condición específica."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>526</td>\n",
       "      <td>208.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>148</td>\n",
       "      <td>211.55</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id     sum\n",
       "0          526  208.58\n",
       "1          148  211.55"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Los Clientes que han gastado más de 200 dólares\n",
    "sql = ''' \n",
    "SELECT\n",
    "\tcustomer_id,\n",
    "\tSUM (amount)\n",
    "FROM\n",
    "\tpayment\n",
    "GROUP BY\n",
    "\tcustomer_id\n",
    "HAVING\n",
    "\tSUM (amount) > 200;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>store_id</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>326</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   store_id  count\n",
       "0         1    326"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Las tiendas con más de 300 clientes\n",
    "sql = ''' \n",
    "SELECT\n",
    "\tstore_id,\n",
    "\tCOUNT (customer_id)\n",
    "FROM\n",
    "\tcustomer\n",
    "GROUP BY\n",
    "\tstore_id\n",
    "HAVING\n",
    "\tCOUNT (customer_id) > 300;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## El operador de UNION\n",
    "\n",
    "El operador UNION combina conjuntos de resultados de dos o más declaraciones SELECT en un único conjunto de resultados."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<sqlalchemy.engine.result.ResultProxy at 0x2d360e281f0>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sql = '''\n",
    "DROP TABLE IF EXISTS top_rated_films;\n",
    "CREATE TABLE top_rated_films(\n",
    "\ttitle VARCHAR NOT NULL,\n",
    "\trelease_year SMALLINT\n",
    ");\n",
    "\n",
    "DROP TABLE IF EXISTS most_popular_films;\n",
    "CREATE TABLE most_popular_films(\n",
    "\ttitle VARCHAR NOT NULL,\n",
    "\trelease_year SMALLINT\n",
    ");\n",
    "\n",
    "INSERT INTO \n",
    "   top_rated_films(title,release_year)\n",
    "VALUES\n",
    "   ('The Shawshank Redemption',1994),\n",
    "   ('The Godfather',1972),\n",
    "   ('12 Angry Men',1957);\n",
    "\n",
    "INSERT INTO \n",
    "   most_popular_films(title,release_year)\n",
    "VALUES\n",
    "   ('An American Pickle',2020),\n",
    "   ('The Godfather',1972),\n",
    "   ('Greyhound',2020);\n",
    "'''\n",
    "\n",
    "cnx.execute(sql)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>release_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>1994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>1957</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      title  release_year\n",
       "0  The Shawshank Redemption          1994\n",
       "1             The Godfather          1972\n",
       "2              12 Angry Men          1957"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sql = ''' \n",
    "SELECT * FROM top_rated_films;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>release_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>An American Pickle</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Greyhound</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                title  release_year\n",
       "0  An American Pickle          2020\n",
       "1       The Godfather          1972\n",
       "2           Greyhound          2020"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sql = ''' \n",
    "SELECT * FROM most_popular_films;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>release_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>An American Pickle</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Greyhound</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>1994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>1957</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      title  release_year\n",
       "0        An American Pickle          2020\n",
       "1                 Greyhound          2020\n",
       "2  The Shawshank Redemption          1994\n",
       "3             The Godfather          1972\n",
       "4              12 Angry Men          1957"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# La unión de dos tablas sin duplicados\n",
    "sql = ''' \n",
    "SELECT * FROM top_rated_films\n",
    "UNION\n",
    "SELECT * FROM most_popular_films;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>release_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>1994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>1957</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>An American Pickle</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Greyhound</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      title  release_year\n",
       "0  The Shawshank Redemption          1994\n",
       "1             The Godfather          1972\n",
       "2              12 Angry Men          1957\n",
       "3        An American Pickle          2020\n",
       "4             The Godfather          1972\n",
       "5                 Greyhound          2020"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# La unión de dos tablas con duplicados\n",
    "sql = ''' \n",
    "SELECT * FROM top_rated_films\n",
    "UNION ALL\n",
    "SELECT * FROM most_popular_films;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>release_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>1957</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>An American Pickle</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Greyhound</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>1994</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      title  release_year\n",
       "0              12 Angry Men          1957\n",
       "1        An American Pickle          2020\n",
       "2                 Greyhound          2020\n",
       "3             The Godfather          1972\n",
       "4             The Godfather          1972\n",
       "5  The Shawshank Redemption          1994"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# La unión de dos tablas con duplicados\n",
    "sql = ''' \n",
    "SELECT * FROM top_rated_films\n",
    "UNION ALL\n",
    "SELECT * FROM most_popular_films\n",
    "ORDER BY title;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Operador INTERSECT\n",
    "\n",
    "Devuelve las filas que están disponibles en ambos conjuntos de resultados. Ambos conjuntos DEBEN tener la misma cantidad de columnas y tipo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>release_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>The Godfather</td>\n",
       "      <td>1972</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           title  release_year\n",
       "0  The Godfather          1972"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# La unión de dos tablas con duplicados\n",
    "sql = ''' \n",
    "SELECT *\n",
    "FROM most_popular_films \n",
    "INTERSECT\n",
    "SELECT *\n",
    "FROM top_rated_films;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Operador EXCEPT\n",
    "\n",
    "Devuelve filas distintas de la primera consulta A (izquierda) que no están en la salida de la segunda consulta B (derecha).\n",
    "\n",
    "Except corresponde al área sombreada. [Fuente](https://www.postgresqltutorial.com/postgresql-except/)\n",
    "\n",
    "![Venn](https://sp.postgresqltutorial.com/wp-content/uploads/2016/06/PostgreSQL-EXCEPT-300x202.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>release_year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>1994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>1957</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      title  release_year\n",
       "0  The Shawshank Redemption          1994\n",
       "1              12 Angry Men          1957"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# La unión de dos tablas con duplicados\n",
    "sql = ''' \n",
    "SELECT * FROM top_rated_films\n",
    "EXCEPT \n",
    "SELECT * FROM most_popular_films;\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<sqlalchemy.engine.result.ResultProxy at 0x2d35db590a0>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Eliminar las tablas del ejemplo\n",
    "sql = '''\n",
    "DROP TABLE IF EXISTS top_rated_films;\n",
    "DROP TABLE IF EXISTS most_popular_films;\n",
    "'''\n",
    "\n",
    "cnx.execute(sql)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## El operador ANY / SOME\n",
    "\n",
    "Permite comparar un valor con un conjunto de valores devueltos por una subconsulta.\n",
    "\n",
    "* El operador ANY debe estar precedido por uno de los siguientes operadores de comparación =, <=,>, <,> y <>\n",
    "* El operador ANY devuelve verdadero si cualquier valor de la subconsulta cumple la condición; de lo contrario, devuelve falso.\n",
    "* Tenga en cuenta que SOME es sinónimo de ANY, puede sustituir SOME por ANY cualquier sentencia SQL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alley Evolution</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Analyze Hoosiers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Anonymous Human</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Baked Cleopatra</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Casualties Encino</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>Wild Apollo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>Worst Banger</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66</th>\n",
       "      <td>Wrong Behavior</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>Young Language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>Youth Kick</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>69 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                title\n",
       "0     Alley Evolution\n",
       "1    Analyze Hoosiers\n",
       "2     Anonymous Human\n",
       "3     Baked Cleopatra\n",
       "4   Casualties Encino\n",
       "..                ...\n",
       "64        Wild Apollo\n",
       "65       Worst Banger\n",
       "66     Wrong Behavior\n",
       "67     Young Language\n",
       "68         Youth Kick\n",
       "\n",
       "[69 rows x 1 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Las películas cuya duración es mayor o igual que \n",
    "# la duración máxima de cualquier categoría de película.\n",
    "\n",
    "sql = ''' \n",
    "SELECT title\n",
    "FROM film\n",
    "WHERE length >= ANY(\n",
    "    SELECT MAX( length )\n",
    "    FROM film\n",
    "    INNER JOIN film_category USING(film_id)\n",
    "    GROUP BY  category_id );\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Igual ANY ( = ANY) es equivalente al operador IN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>category_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Amadeus Holy</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>American Circus</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Antitrust Tomatoes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Apollo Teen</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Ark Ridgemont</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121</th>\n",
       "      <td>Werewolf Lola</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>West Lion</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>Witches Panic</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>Women Dorado</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>Worst Banger</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>126 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  title  category_id\n",
       "0          Amadeus Holy            1\n",
       "1       American Circus            1\n",
       "2    Antitrust Tomatoes            1\n",
       "3           Apollo Teen            7\n",
       "4         Ark Ridgemont            1\n",
       "..                  ...          ...\n",
       "121       Werewolf Lola            1\n",
       "122           West Lion            7\n",
       "123       Witches Panic            7\n",
       "124        Women Dorado            1\n",
       "125        Worst Banger            1\n",
       "\n",
       "[126 rows x 2 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# obtiener las películas cuya categoría es Actiono Drama.\n",
    "\n",
    "sql = ''' \n",
    "SELECT\n",
    "    title,\n",
    "    category_id\n",
    "FROM\n",
    "    film\n",
    "    INNER JOIN film_category\n",
    "        USING(film_id)\n",
    "WHERE\n",
    "    category_id = ANY(\n",
    "        SELECT\n",
    "            category_id\n",
    "        FROM\n",
    "            category\n",
    "        WHERE\n",
    "            NAME = 'Action'\n",
    "            OR NAME = 'Drama'\n",
    "    );\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>category_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Amadeus Holy</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>American Circus</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Antitrust Tomatoes</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Apollo Teen</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Ark Ridgemont</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121</th>\n",
       "      <td>Werewolf Lola</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>West Lion</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>Witches Panic</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>Women Dorado</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>Worst Banger</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>126 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  title  category_id\n",
       "0          Amadeus Holy            1\n",
       "1       American Circus            1\n",
       "2    Antitrust Tomatoes            1\n",
       "3           Apollo Teen            7\n",
       "4         Ark Ridgemont            1\n",
       "..                  ...          ...\n",
       "121       Werewolf Lola            1\n",
       "122           West Lion            7\n",
       "123       Witches Panic            7\n",
       "124        Women Dorado            1\n",
       "125        Worst Banger            1\n",
       "\n",
       "[126 rows x 2 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Consulta IN equivalente\n",
    "\n",
    "sql = ''' \n",
    "SELECT\n",
    "    title,\n",
    "    category_id\n",
    "FROM\n",
    "    film\n",
    "INNER JOIN film_category\n",
    "        USING(film_id)\n",
    "WHERE\n",
    "    category_id IN (\n",
    "        SELECT\n",
    "            category_id\n",
    "        FROM\n",
    "            category\n",
    "        WHERE\n",
    "            NAME = 'Action'\n",
    "            OR NAME = 'Drama'\n",
    "    );\n",
    "'''\n",
    "\n",
    "pd.read_sql_query(sql, cnx)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}