{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Consultas de Selección: Group By, Having, Union, Intersect, Except, Any\n", "\n", "## Conexión a la Base de Datos" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import psycopg2\n", "import sqlalchemy\n", "import matplotlib as plt\n", "\n", "%matplotlib inline\n", "\n", "from sqlalchemy import create_engine\n", "\n", "POSTGRES_ADDRESS = 'localhost' # Este es el servidor, puede ser una IP\n", "POSTGRES_PORT = '5432'\n", "POSTGRES_USERNAME = 'postgres' \n", "POSTGRES_PASSWORD = '1234' \n", "POSTGRES_DBNAME = 'dvdrental' \n", "\n", "# Ahora se configura la cadena de conexión.\n", "# Esta es una cadena que contiene los parámetros necesarios para establecer una\n", "# conexión con posgres.\n", "postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(username=POSTGRES_USERNAME,\n", " password=POSTGRES_PASSWORD,\n", " ipaddress=POSTGRES_ADDRESS,\n", " port=POSTGRES_PORT,\n", " dbname=POSTGRES_DBNAME))\n", "# A continuación se crea la conexión\n", "cnx = create_engine(postgres_str)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## La cláusula Group By\n", "\n", "La cláusula GROUP BY divide las filas devueltas de la declaración SELECT en grupos. Para cada grupo, puede aplicar una función agregada, por ejemplo, SUM() para calcular la suma de elementos o COUNT() para obtener el número de elementos en los grupos." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idtotal_pagopromedio_pago
018480.804.040000
187137.724.918571
2477106.795.085238
3273130.724.668571
4550151.694.893226
............
59444980.834.754706
5956491.703.056667
596520127.694.119032
5975584.814.463684
598148211.554.701111
\n", "

599 rows × 3 columns

\n", "
" ], "text/plain": [ " customer_id total_pago promedio_pago\n", "0 184 80.80 4.040000\n", "1 87 137.72 4.918571\n", "2 477 106.79 5.085238\n", "3 273 130.72 4.668571\n", "4 550 151.69 4.893226\n", ".. ... ... ...\n", "594 449 80.83 4.754706\n", "595 64 91.70 3.056667\n", "596 520 127.69 4.119032\n", "597 55 84.81 4.463684\n", "598 148 211.55 4.701111\n", "\n", "[599 rows x 3 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql = ''' \n", "SELECT\n", "\tcustomer_id,\n", "\tSUM (amount) as total_pago,\n", " AVG (amount) as promedio_pago\n", "FROM\n", "\tpayment\n", "GROUP BY\n", "\tcustomer_id;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idtotal_pagopromedio_pago
0148211.554.701111
1526208.584.966190
2178194.614.990000
3137191.625.042632
4144189.604.740000
............
59411049.884.156667
59532047.853.190000
59624837.872.913077
59728132.903.290000
59831827.933.990000
\n", "

599 rows × 3 columns

\n", "
" ], "text/plain": [ " customer_id total_pago promedio_pago\n", "0 148 211.55 4.701111\n", "1 526 208.58 4.966190\n", "2 178 194.61 4.990000\n", "3 137 191.62 5.042632\n", "4 144 189.60 4.740000\n", ".. ... ... ...\n", "594 110 49.88 4.156667\n", "595 320 47.85 3.190000\n", "596 248 37.87 2.913077\n", "597 281 32.90 3.290000\n", "598 318 27.93 3.990000\n", "\n", "[599 rows x 3 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql = ''' \n", "SELECT\n", "\tcustomer_id,\n", "\tSUM (amount) as total_pago,\n", " AVG (amount) as promedio_pago\n", "FROM\n", "\tpayment\n", "GROUP BY\n", "\tcustomer_id\n", "ORDER BY\n", " total_pago DESC;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idsum
0148211.55
1526208.58
2178194.61
3137191.62
4144189.60
.........
59411049.88
59532047.85
59624837.87
59728132.90
59831827.93
\n", "

599 rows × 2 columns

\n", "
" ], "text/plain": [ " customer_id sum\n", "0 148 211.55\n", "1 526 208.58\n", "2 178 194.61\n", "3 137 191.62\n", "4 144 189.60\n", ".. ... ...\n", "594 110 49.88\n", "595 320 47.85\n", "596 248 37.87\n", "597 281 32.90\n", "598 318 27.93\n", "\n", "[599 rows x 2 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql = ''' \n", "SELECT\n", "\tcustomer_id,\n", "\tSUM (amount)\n", "FROM\n", "\tpayment\n", "GROUP BY\n", "\tcustomer_id\n", "ORDER BY\n", "\tSUM (amount) DESC;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nombre_completototal_pagopromedio_pago
0Brian Wyman27.933.990000
1Leona Obrien32.903.290000
2Caroline Bowman37.872.913077
3Anthony Schwab47.853.190000
4Tiffany Jordan49.884.156667
............
594Clara Shaw189.604.740000
595Rhonda Kennedy191.625.042632
596Marion Snyder194.614.990000
597Karl Seal208.584.966190
598Eleanor Hunt211.554.701111
\n", "

599 rows × 3 columns

\n", "
" ], "text/plain": [ " nombre_completo total_pago promedio_pago\n", "0 Brian Wyman 27.93 3.990000\n", "1 Leona Obrien 32.90 3.290000\n", "2 Caroline Bowman 37.87 2.913077\n", "3 Anthony Schwab 47.85 3.190000\n", "4 Tiffany Jordan 49.88 4.156667\n", ".. ... ... ...\n", "594 Clara Shaw 189.60 4.740000\n", "595 Rhonda Kennedy 191.62 5.042632\n", "596 Marion Snyder 194.61 4.990000\n", "597 Karl Seal 208.58 4.966190\n", "598 Eleanor Hunt 211.55 4.701111\n", "\n", "[599 rows x 3 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Obtener también el nombre (¡Cuidado con los homónimos!)\n", "sql = ''' \n", "SELECT\n", "\tfirst_name || ' ' || last_name nombre_completo,\n", "\tSUM (amount) as total_pago,\n", " AVG (amount) as promedio_pago\n", "FROM\n", "\tpayment\n", "INNER JOIN customer USING (customer_id) \t\n", "GROUP BY nombre_completo\n", "ORDER BY total_pago;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
paid_datesum
02007-02-14116.73
12007-02-191290.90
22007-02-201219.09
32007-03-192617.69
42007-04-26347.21
52007-04-082227.84
62007-02-151188.92
72007-04-282622.73
82007-03-172442.16
92007-03-202669.89
102007-03-232342.43
112007-03-212868.27
122007-04-292717.60
132007-04-101973.18
142007-05-14514.18
152007-04-121930.48
162007-03-022550.05
172007-04-272673.57
182007-03-16299.28
192007-02-181275.98
202007-04-071984.28
212007-02-21917.87
222007-02-161154.18
232007-03-182701.76
242007-04-062077.14
252007-04-305723.89
262007-03-012808.24
272007-04-111940.32
282007-04-092067.86
292007-04-05273.36
302007-02-171188.17
312007-03-222586.79
\n", "
" ], "text/plain": [ " paid_date sum\n", "0 2007-02-14 116.73\n", "1 2007-02-19 1290.90\n", "2 2007-02-20 1219.09\n", "3 2007-03-19 2617.69\n", "4 2007-04-26 347.21\n", "5 2007-04-08 2227.84\n", "6 2007-02-15 1188.92\n", "7 2007-04-28 2622.73\n", "8 2007-03-17 2442.16\n", "9 2007-03-20 2669.89\n", "10 2007-03-23 2342.43\n", "11 2007-03-21 2868.27\n", "12 2007-04-29 2717.60\n", "13 2007-04-10 1973.18\n", "14 2007-05-14 514.18\n", "15 2007-04-12 1930.48\n", "16 2007-03-02 2550.05\n", "17 2007-04-27 2673.57\n", "18 2007-03-16 299.28\n", "19 2007-02-18 1275.98\n", "20 2007-04-07 1984.28\n", "21 2007-02-21 917.87\n", "22 2007-02-16 1154.18\n", "23 2007-03-18 2701.76\n", "24 2007-04-06 2077.14\n", "25 2007-04-30 5723.89\n", "26 2007-03-01 2808.24\n", "27 2007-04-11 1940.32\n", "28 2007-04-09 2067.86\n", "29 2007-04-05 273.36\n", "30 2007-02-17 1188.17\n", "31 2007-03-22 2586.79" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Agrupar por Fecha\n", "sql = ''' \n", "SELECT \n", "\tDATE(payment_date) paid_date, \n", "\tSUM(amount) sum\n", "FROM \n", "\tpayment\n", "GROUP BY\n", "\tDATE(payment_date);\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cláusulas HAVING \n", "\n", "La cláusula HAVING especifica una condición de búsqueda para un grupo o un agregado. Se usa a menudo con la cláusula GROUP BY para filtrar grupos o agregados en función de una condición específica." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idsum
0526208.58
1148211.55
\n", "
" ], "text/plain": [ " customer_id sum\n", "0 526 208.58\n", "1 148 211.55" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Los Clientes que han gastado más de 200 dólares\n", "sql = ''' \n", "SELECT\n", "\tcustomer_id,\n", "\tSUM (amount)\n", "FROM\n", "\tpayment\n", "GROUP BY\n", "\tcustomer_id\n", "HAVING\n", "\tSUM (amount) > 200;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
store_idcount
01326
\n", "
" ], "text/plain": [ " store_id count\n", "0 1 326" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Las tiendas con más de 300 clientes\n", "sql = ''' \n", "SELECT\n", "\tstore_id,\n", "\tCOUNT (customer_id)\n", "FROM\n", "\tcustomer\n", "GROUP BY\n", "\tstore_id\n", "HAVING\n", "\tCOUNT (customer_id) > 300;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## El operador de UNION\n", "\n", "El operador UNION combina conjuntos de resultados de dos o más declaraciones SELECT en un único conjunto de resultados." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql = '''\n", "DROP TABLE IF EXISTS top_rated_films;\n", "CREATE TABLE top_rated_films(\n", "\ttitle VARCHAR NOT NULL,\n", "\trelease_year SMALLINT\n", ");\n", "\n", "DROP TABLE IF EXISTS most_popular_films;\n", "CREATE TABLE most_popular_films(\n", "\ttitle VARCHAR NOT NULL,\n", "\trelease_year SMALLINT\n", ");\n", "\n", "INSERT INTO \n", " top_rated_films(title,release_year)\n", "VALUES\n", " ('The Shawshank Redemption',1994),\n", " ('The Godfather',1972),\n", " ('12 Angry Men',1957);\n", "\n", "INSERT INTO \n", " most_popular_films(title,release_year)\n", "VALUES\n", " ('An American Pickle',2020),\n", " ('The Godfather',1972),\n", " ('Greyhound',2020);\n", "'''\n", "\n", "cnx.execute(sql)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_year
0The Shawshank Redemption1994
1The Godfather1972
212 Angry Men1957
\n", "
" ], "text/plain": [ " title release_year\n", "0 The Shawshank Redemption 1994\n", "1 The Godfather 1972\n", "2 12 Angry Men 1957" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql = ''' \n", "SELECT * FROM top_rated_films;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_year
0An American Pickle2020
1The Godfather1972
2Greyhound2020
\n", "
" ], "text/plain": [ " title release_year\n", "0 An American Pickle 2020\n", "1 The Godfather 1972\n", "2 Greyhound 2020" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql = ''' \n", "SELECT * FROM most_popular_films;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_year
0An American Pickle2020
1Greyhound2020
2The Shawshank Redemption1994
3The Godfather1972
412 Angry Men1957
\n", "
" ], "text/plain": [ " title release_year\n", "0 An American Pickle 2020\n", "1 Greyhound 2020\n", "2 The Shawshank Redemption 1994\n", "3 The Godfather 1972\n", "4 12 Angry Men 1957" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# La unión de dos tablas sin duplicados\n", "sql = ''' \n", "SELECT * FROM top_rated_films\n", "UNION\n", "SELECT * FROM most_popular_films;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_year
0The Shawshank Redemption1994
1The Godfather1972
212 Angry Men1957
3An American Pickle2020
4The Godfather1972
5Greyhound2020
\n", "
" ], "text/plain": [ " title release_year\n", "0 The Shawshank Redemption 1994\n", "1 The Godfather 1972\n", "2 12 Angry Men 1957\n", "3 An American Pickle 2020\n", "4 The Godfather 1972\n", "5 Greyhound 2020" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# La unión de dos tablas con duplicados\n", "sql = ''' \n", "SELECT * FROM top_rated_films\n", "UNION ALL\n", "SELECT * FROM most_popular_films;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_year
012 Angry Men1957
1An American Pickle2020
2Greyhound2020
3The Godfather1972
4The Godfather1972
5The Shawshank Redemption1994
\n", "
" ], "text/plain": [ " title release_year\n", "0 12 Angry Men 1957\n", "1 An American Pickle 2020\n", "2 Greyhound 2020\n", "3 The Godfather 1972\n", "4 The Godfather 1972\n", "5 The Shawshank Redemption 1994" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# La unión de dos tablas con duplicados\n", "sql = ''' \n", "SELECT * FROM top_rated_films\n", "UNION ALL\n", "SELECT * FROM most_popular_films\n", "ORDER BY title;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Operador INTERSECT\n", "\n", "Devuelve las filas que están disponibles en ambos conjuntos de resultados. Ambos conjuntos DEBEN tener la misma cantidad de columnas y tipo." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_year
0The Godfather1972
\n", "
" ], "text/plain": [ " title release_year\n", "0 The Godfather 1972" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# La unión de dos tablas con duplicados\n", "sql = ''' \n", "SELECT *\n", "FROM most_popular_films \n", "INTERSECT\n", "SELECT *\n", "FROM top_rated_films;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Operador EXCEPT\n", "\n", "Devuelve filas distintas de la primera consulta A (izquierda) que no están en la salida de la segunda consulta B (derecha).\n", "\n", "Except corresponde al área sombreada. [Fuente](https://www.postgresqltutorial.com/postgresql-except/)\n", "\n", "![Venn](https://sp.postgresqltutorial.com/wp-content/uploads/2016/06/PostgreSQL-EXCEPT-300x202.png)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_year
0The Shawshank Redemption1994
112 Angry Men1957
\n", "
" ], "text/plain": [ " title release_year\n", "0 The Shawshank Redemption 1994\n", "1 12 Angry Men 1957" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# La unión de dos tablas con duplicados\n", "sql = ''' \n", "SELECT * FROM top_rated_films\n", "EXCEPT \n", "SELECT * FROM most_popular_films;\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Eliminar las tablas del ejemplo\n", "sql = '''\n", "DROP TABLE IF EXISTS top_rated_films;\n", "DROP TABLE IF EXISTS most_popular_films;\n", "'''\n", "\n", "cnx.execute(sql)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## El operador ANY / SOME\n", "\n", "Permite comparar un valor con un conjunto de valores devueltos por una subconsulta.\n", "\n", "* El operador ANY debe estar precedido por uno de los siguientes operadores de comparación =, <=,>, <,> y <>\n", "* El operador ANY devuelve verdadero si cualquier valor de la subconsulta cumple la condición; de lo contrario, devuelve falso.\n", "* Tenga en cuenta que SOME es sinónimo de ANY, puede sustituir SOME por ANY cualquier sentencia SQL." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
title
0Alley Evolution
1Analyze Hoosiers
2Anonymous Human
3Baked Cleopatra
4Casualties Encino
......
64Wild Apollo
65Worst Banger
66Wrong Behavior
67Young Language
68Youth Kick
\n", "

69 rows × 1 columns

\n", "
" ], "text/plain": [ " title\n", "0 Alley Evolution\n", "1 Analyze Hoosiers\n", "2 Anonymous Human\n", "3 Baked Cleopatra\n", "4 Casualties Encino\n", ".. ...\n", "64 Wild Apollo\n", "65 Worst Banger\n", "66 Wrong Behavior\n", "67 Young Language\n", "68 Youth Kick\n", "\n", "[69 rows x 1 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Las películas cuya duración es mayor o igual que \n", "# la duración máxima de cualquier categoría de película.\n", "\n", "sql = ''' \n", "SELECT title\n", "FROM film\n", "WHERE length >= ANY(\n", " SELECT MAX( length )\n", " FROM film\n", " INNER JOIN film_category USING(film_id)\n", " GROUP BY category_id );\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Igual ANY ( = ANY) es equivalente al operador IN" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlecategory_id
0Amadeus Holy1
1American Circus1
2Antitrust Tomatoes1
3Apollo Teen7
4Ark Ridgemont1
.........
121Werewolf Lola1
122West Lion7
123Witches Panic7
124Women Dorado1
125Worst Banger1
\n", "

126 rows × 2 columns

\n", "
" ], "text/plain": [ " title category_id\n", "0 Amadeus Holy 1\n", "1 American Circus 1\n", "2 Antitrust Tomatoes 1\n", "3 Apollo Teen 7\n", "4 Ark Ridgemont 1\n", ".. ... ...\n", "121 Werewolf Lola 1\n", "122 West Lion 7\n", "123 Witches Panic 7\n", "124 Women Dorado 1\n", "125 Worst Banger 1\n", "\n", "[126 rows x 2 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# obtiener las películas cuya categoría es Actiono Drama.\n", "\n", "sql = ''' \n", "SELECT\n", " title,\n", " category_id\n", "FROM\n", " film\n", " INNER JOIN film_category\n", " USING(film_id)\n", "WHERE\n", " category_id = ANY(\n", " SELECT\n", " category_id\n", " FROM\n", " category\n", " WHERE\n", " NAME = 'Action'\n", " OR NAME = 'Drama'\n", " );\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlecategory_id
0Amadeus Holy1
1American Circus1
2Antitrust Tomatoes1
3Apollo Teen7
4Ark Ridgemont1
.........
121Werewolf Lola1
122West Lion7
123Witches Panic7
124Women Dorado1
125Worst Banger1
\n", "

126 rows × 2 columns

\n", "
" ], "text/plain": [ " title category_id\n", "0 Amadeus Holy 1\n", "1 American Circus 1\n", "2 Antitrust Tomatoes 1\n", "3 Apollo Teen 7\n", "4 Ark Ridgemont 1\n", ".. ... ...\n", "121 Werewolf Lola 1\n", "122 West Lion 7\n", "123 Witches Panic 7\n", "124 Women Dorado 1\n", "125 Worst Banger 1\n", "\n", "[126 rows x 2 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Consulta IN equivalente\n", "\n", "sql = ''' \n", "SELECT\n", " title,\n", " category_id\n", "FROM\n", " film\n", "INNER JOIN film_category\n", " USING(film_id)\n", "WHERE\n", " category_id IN (\n", " SELECT\n", " category_id\n", " FROM\n", " category\n", " WHERE\n", " NAME = 'Action'\n", " OR NAME = 'Drama'\n", " );\n", "'''\n", "\n", "pd.read_sql_query(sql, cnx)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }