{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Consultas de Selección: Group By, Having, Union, Intersect, Except, Any\n",
"\n",
"## Conexión a la Base de Datos"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import psycopg2\n",
"import sqlalchemy\n",
"import matplotlib as plt\n",
"\n",
"%matplotlib inline\n",
"\n",
"from sqlalchemy import create_engine\n",
"\n",
"POSTGRES_ADDRESS = 'localhost' # Este es el servidor, puede ser una IP\n",
"POSTGRES_PORT = '5432'\n",
"POSTGRES_USERNAME = 'postgres' \n",
"POSTGRES_PASSWORD = '1234' \n",
"POSTGRES_DBNAME = 'dvdrental' \n",
"\n",
"# Ahora se configura la cadena de conexión.\n",
"# Esta es una cadena que contiene los parámetros necesarios para establecer una\n",
"# conexión con posgres.\n",
"postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(username=POSTGRES_USERNAME,\n",
" password=POSTGRES_PASSWORD,\n",
" ipaddress=POSTGRES_ADDRESS,\n",
" port=POSTGRES_PORT,\n",
" dbname=POSTGRES_DBNAME))\n",
"# A continuación se crea la conexión\n",
"cnx = create_engine(postgres_str)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## La cláusula Group By\n",
"\n",
"La cláusula GROUP BY divide las filas devueltas de la declaración SELECT en grupos. Para cada grupo, puede aplicar una función agregada, por ejemplo, SUM() para calcular la suma de elementos o COUNT() para obtener el número de elementos en los grupos."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" customer_id | \n",
" total_pago | \n",
" promedio_pago | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 184 | \n",
" 80.80 | \n",
" 4.040000 | \n",
"
\n",
" \n",
" 1 | \n",
" 87 | \n",
" 137.72 | \n",
" 4.918571 | \n",
"
\n",
" \n",
" 2 | \n",
" 477 | \n",
" 106.79 | \n",
" 5.085238 | \n",
"
\n",
" \n",
" 3 | \n",
" 273 | \n",
" 130.72 | \n",
" 4.668571 | \n",
"
\n",
" \n",
" 4 | \n",
" 550 | \n",
" 151.69 | \n",
" 4.893226 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 594 | \n",
" 449 | \n",
" 80.83 | \n",
" 4.754706 | \n",
"
\n",
" \n",
" 595 | \n",
" 64 | \n",
" 91.70 | \n",
" 3.056667 | \n",
"
\n",
" \n",
" 596 | \n",
" 520 | \n",
" 127.69 | \n",
" 4.119032 | \n",
"
\n",
" \n",
" 597 | \n",
" 55 | \n",
" 84.81 | \n",
" 4.463684 | \n",
"
\n",
" \n",
" 598 | \n",
" 148 | \n",
" 211.55 | \n",
" 4.701111 | \n",
"
\n",
" \n",
"
\n",
"
599 rows × 3 columns
\n",
"
"
],
"text/plain": [
" customer_id total_pago promedio_pago\n",
"0 184 80.80 4.040000\n",
"1 87 137.72 4.918571\n",
"2 477 106.79 5.085238\n",
"3 273 130.72 4.668571\n",
"4 550 151.69 4.893226\n",
".. ... ... ...\n",
"594 449 80.83 4.754706\n",
"595 64 91.70 3.056667\n",
"596 520 127.69 4.119032\n",
"597 55 84.81 4.463684\n",
"598 148 211.55 4.701111\n",
"\n",
"[599 rows x 3 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sql = ''' \n",
"SELECT\n",
"\tcustomer_id,\n",
"\tSUM (amount) as total_pago,\n",
" AVG (amount) as promedio_pago\n",
"FROM\n",
"\tpayment\n",
"GROUP BY\n",
"\tcustomer_id;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" customer_id | \n",
" total_pago | \n",
" promedio_pago | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 148 | \n",
" 211.55 | \n",
" 4.701111 | \n",
"
\n",
" \n",
" 1 | \n",
" 526 | \n",
" 208.58 | \n",
" 4.966190 | \n",
"
\n",
" \n",
" 2 | \n",
" 178 | \n",
" 194.61 | \n",
" 4.990000 | \n",
"
\n",
" \n",
" 3 | \n",
" 137 | \n",
" 191.62 | \n",
" 5.042632 | \n",
"
\n",
" \n",
" 4 | \n",
" 144 | \n",
" 189.60 | \n",
" 4.740000 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 594 | \n",
" 110 | \n",
" 49.88 | \n",
" 4.156667 | \n",
"
\n",
" \n",
" 595 | \n",
" 320 | \n",
" 47.85 | \n",
" 3.190000 | \n",
"
\n",
" \n",
" 596 | \n",
" 248 | \n",
" 37.87 | \n",
" 2.913077 | \n",
"
\n",
" \n",
" 597 | \n",
" 281 | \n",
" 32.90 | \n",
" 3.290000 | \n",
"
\n",
" \n",
" 598 | \n",
" 318 | \n",
" 27.93 | \n",
" 3.990000 | \n",
"
\n",
" \n",
"
\n",
"
599 rows × 3 columns
\n",
"
"
],
"text/plain": [
" customer_id total_pago promedio_pago\n",
"0 148 211.55 4.701111\n",
"1 526 208.58 4.966190\n",
"2 178 194.61 4.990000\n",
"3 137 191.62 5.042632\n",
"4 144 189.60 4.740000\n",
".. ... ... ...\n",
"594 110 49.88 4.156667\n",
"595 320 47.85 3.190000\n",
"596 248 37.87 2.913077\n",
"597 281 32.90 3.290000\n",
"598 318 27.93 3.990000\n",
"\n",
"[599 rows x 3 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sql = ''' \n",
"SELECT\n",
"\tcustomer_id,\n",
"\tSUM (amount) as total_pago,\n",
" AVG (amount) as promedio_pago\n",
"FROM\n",
"\tpayment\n",
"GROUP BY\n",
"\tcustomer_id\n",
"ORDER BY\n",
" total_pago DESC;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" customer_id | \n",
" sum | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 148 | \n",
" 211.55 | \n",
"
\n",
" \n",
" 1 | \n",
" 526 | \n",
" 208.58 | \n",
"
\n",
" \n",
" 2 | \n",
" 178 | \n",
" 194.61 | \n",
"
\n",
" \n",
" 3 | \n",
" 137 | \n",
" 191.62 | \n",
"
\n",
" \n",
" 4 | \n",
" 144 | \n",
" 189.60 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 594 | \n",
" 110 | \n",
" 49.88 | \n",
"
\n",
" \n",
" 595 | \n",
" 320 | \n",
" 47.85 | \n",
"
\n",
" \n",
" 596 | \n",
" 248 | \n",
" 37.87 | \n",
"
\n",
" \n",
" 597 | \n",
" 281 | \n",
" 32.90 | \n",
"
\n",
" \n",
" 598 | \n",
" 318 | \n",
" 27.93 | \n",
"
\n",
" \n",
"
\n",
"
599 rows × 2 columns
\n",
"
"
],
"text/plain": [
" customer_id sum\n",
"0 148 211.55\n",
"1 526 208.58\n",
"2 178 194.61\n",
"3 137 191.62\n",
"4 144 189.60\n",
".. ... ...\n",
"594 110 49.88\n",
"595 320 47.85\n",
"596 248 37.87\n",
"597 281 32.90\n",
"598 318 27.93\n",
"\n",
"[599 rows x 2 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sql = ''' \n",
"SELECT\n",
"\tcustomer_id,\n",
"\tSUM (amount)\n",
"FROM\n",
"\tpayment\n",
"GROUP BY\n",
"\tcustomer_id\n",
"ORDER BY\n",
"\tSUM (amount) DESC;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" nombre_completo | \n",
" total_pago | \n",
" promedio_pago | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Brian Wyman | \n",
" 27.93 | \n",
" 3.990000 | \n",
"
\n",
" \n",
" 1 | \n",
" Leona Obrien | \n",
" 32.90 | \n",
" 3.290000 | \n",
"
\n",
" \n",
" 2 | \n",
" Caroline Bowman | \n",
" 37.87 | \n",
" 2.913077 | \n",
"
\n",
" \n",
" 3 | \n",
" Anthony Schwab | \n",
" 47.85 | \n",
" 3.190000 | \n",
"
\n",
" \n",
" 4 | \n",
" Tiffany Jordan | \n",
" 49.88 | \n",
" 4.156667 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 594 | \n",
" Clara Shaw | \n",
" 189.60 | \n",
" 4.740000 | \n",
"
\n",
" \n",
" 595 | \n",
" Rhonda Kennedy | \n",
" 191.62 | \n",
" 5.042632 | \n",
"
\n",
" \n",
" 596 | \n",
" Marion Snyder | \n",
" 194.61 | \n",
" 4.990000 | \n",
"
\n",
" \n",
" 597 | \n",
" Karl Seal | \n",
" 208.58 | \n",
" 4.966190 | \n",
"
\n",
" \n",
" 598 | \n",
" Eleanor Hunt | \n",
" 211.55 | \n",
" 4.701111 | \n",
"
\n",
" \n",
"
\n",
"
599 rows × 3 columns
\n",
"
"
],
"text/plain": [
" nombre_completo total_pago promedio_pago\n",
"0 Brian Wyman 27.93 3.990000\n",
"1 Leona Obrien 32.90 3.290000\n",
"2 Caroline Bowman 37.87 2.913077\n",
"3 Anthony Schwab 47.85 3.190000\n",
"4 Tiffany Jordan 49.88 4.156667\n",
".. ... ... ...\n",
"594 Clara Shaw 189.60 4.740000\n",
"595 Rhonda Kennedy 191.62 5.042632\n",
"596 Marion Snyder 194.61 4.990000\n",
"597 Karl Seal 208.58 4.966190\n",
"598 Eleanor Hunt 211.55 4.701111\n",
"\n",
"[599 rows x 3 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Obtener también el nombre (¡Cuidado con los homónimos!)\n",
"sql = ''' \n",
"SELECT\n",
"\tfirst_name || ' ' || last_name nombre_completo,\n",
"\tSUM (amount) as total_pago,\n",
" AVG (amount) as promedio_pago\n",
"FROM\n",
"\tpayment\n",
"INNER JOIN customer USING (customer_id) \t\n",
"GROUP BY nombre_completo\n",
"ORDER BY total_pago;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" paid_date | \n",
" sum | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2007-02-14 | \n",
" 116.73 | \n",
"
\n",
" \n",
" 1 | \n",
" 2007-02-19 | \n",
" 1290.90 | \n",
"
\n",
" \n",
" 2 | \n",
" 2007-02-20 | \n",
" 1219.09 | \n",
"
\n",
" \n",
" 3 | \n",
" 2007-03-19 | \n",
" 2617.69 | \n",
"
\n",
" \n",
" 4 | \n",
" 2007-04-26 | \n",
" 347.21 | \n",
"
\n",
" \n",
" 5 | \n",
" 2007-04-08 | \n",
" 2227.84 | \n",
"
\n",
" \n",
" 6 | \n",
" 2007-02-15 | \n",
" 1188.92 | \n",
"
\n",
" \n",
" 7 | \n",
" 2007-04-28 | \n",
" 2622.73 | \n",
"
\n",
" \n",
" 8 | \n",
" 2007-03-17 | \n",
" 2442.16 | \n",
"
\n",
" \n",
" 9 | \n",
" 2007-03-20 | \n",
" 2669.89 | \n",
"
\n",
" \n",
" 10 | \n",
" 2007-03-23 | \n",
" 2342.43 | \n",
"
\n",
" \n",
" 11 | \n",
" 2007-03-21 | \n",
" 2868.27 | \n",
"
\n",
" \n",
" 12 | \n",
" 2007-04-29 | \n",
" 2717.60 | \n",
"
\n",
" \n",
" 13 | \n",
" 2007-04-10 | \n",
" 1973.18 | \n",
"
\n",
" \n",
" 14 | \n",
" 2007-05-14 | \n",
" 514.18 | \n",
"
\n",
" \n",
" 15 | \n",
" 2007-04-12 | \n",
" 1930.48 | \n",
"
\n",
" \n",
" 16 | \n",
" 2007-03-02 | \n",
" 2550.05 | \n",
"
\n",
" \n",
" 17 | \n",
" 2007-04-27 | \n",
" 2673.57 | \n",
"
\n",
" \n",
" 18 | \n",
" 2007-03-16 | \n",
" 299.28 | \n",
"
\n",
" \n",
" 19 | \n",
" 2007-02-18 | \n",
" 1275.98 | \n",
"
\n",
" \n",
" 20 | \n",
" 2007-04-07 | \n",
" 1984.28 | \n",
"
\n",
" \n",
" 21 | \n",
" 2007-02-21 | \n",
" 917.87 | \n",
"
\n",
" \n",
" 22 | \n",
" 2007-02-16 | \n",
" 1154.18 | \n",
"
\n",
" \n",
" 23 | \n",
" 2007-03-18 | \n",
" 2701.76 | \n",
"
\n",
" \n",
" 24 | \n",
" 2007-04-06 | \n",
" 2077.14 | \n",
"
\n",
" \n",
" 25 | \n",
" 2007-04-30 | \n",
" 5723.89 | \n",
"
\n",
" \n",
" 26 | \n",
" 2007-03-01 | \n",
" 2808.24 | \n",
"
\n",
" \n",
" 27 | \n",
" 2007-04-11 | \n",
" 1940.32 | \n",
"
\n",
" \n",
" 28 | \n",
" 2007-04-09 | \n",
" 2067.86 | \n",
"
\n",
" \n",
" 29 | \n",
" 2007-04-05 | \n",
" 273.36 | \n",
"
\n",
" \n",
" 30 | \n",
" 2007-02-17 | \n",
" 1188.17 | \n",
"
\n",
" \n",
" 31 | \n",
" 2007-03-22 | \n",
" 2586.79 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" paid_date sum\n",
"0 2007-02-14 116.73\n",
"1 2007-02-19 1290.90\n",
"2 2007-02-20 1219.09\n",
"3 2007-03-19 2617.69\n",
"4 2007-04-26 347.21\n",
"5 2007-04-08 2227.84\n",
"6 2007-02-15 1188.92\n",
"7 2007-04-28 2622.73\n",
"8 2007-03-17 2442.16\n",
"9 2007-03-20 2669.89\n",
"10 2007-03-23 2342.43\n",
"11 2007-03-21 2868.27\n",
"12 2007-04-29 2717.60\n",
"13 2007-04-10 1973.18\n",
"14 2007-05-14 514.18\n",
"15 2007-04-12 1930.48\n",
"16 2007-03-02 2550.05\n",
"17 2007-04-27 2673.57\n",
"18 2007-03-16 299.28\n",
"19 2007-02-18 1275.98\n",
"20 2007-04-07 1984.28\n",
"21 2007-02-21 917.87\n",
"22 2007-02-16 1154.18\n",
"23 2007-03-18 2701.76\n",
"24 2007-04-06 2077.14\n",
"25 2007-04-30 5723.89\n",
"26 2007-03-01 2808.24\n",
"27 2007-04-11 1940.32\n",
"28 2007-04-09 2067.86\n",
"29 2007-04-05 273.36\n",
"30 2007-02-17 1188.17\n",
"31 2007-03-22 2586.79"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Agrupar por Fecha\n",
"sql = ''' \n",
"SELECT \n",
"\tDATE(payment_date) paid_date, \n",
"\tSUM(amount) sum\n",
"FROM \n",
"\tpayment\n",
"GROUP BY\n",
"\tDATE(payment_date);\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cláusulas HAVING \n",
"\n",
"La cláusula HAVING especifica una condición de búsqueda para un grupo o un agregado. Se usa a menudo con la cláusula GROUP BY para filtrar grupos o agregados en función de una condición específica."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" customer_id | \n",
" sum | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 526 | \n",
" 208.58 | \n",
"
\n",
" \n",
" 1 | \n",
" 148 | \n",
" 211.55 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" customer_id sum\n",
"0 526 208.58\n",
"1 148 211.55"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Los Clientes que han gastado más de 200 dólares\n",
"sql = ''' \n",
"SELECT\n",
"\tcustomer_id,\n",
"\tSUM (amount)\n",
"FROM\n",
"\tpayment\n",
"GROUP BY\n",
"\tcustomer_id\n",
"HAVING\n",
"\tSUM (amount) > 200;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" store_id | \n",
" count | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 326 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" store_id count\n",
"0 1 326"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Las tiendas con más de 300 clientes\n",
"sql = ''' \n",
"SELECT\n",
"\tstore_id,\n",
"\tCOUNT (customer_id)\n",
"FROM\n",
"\tcustomer\n",
"GROUP BY\n",
"\tstore_id\n",
"HAVING\n",
"\tCOUNT (customer_id) > 300;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## El operador de UNION\n",
"\n",
"El operador UNION combina conjuntos de resultados de dos o más declaraciones SELECT en un único conjunto de resultados."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sql = '''\n",
"DROP TABLE IF EXISTS top_rated_films;\n",
"CREATE TABLE top_rated_films(\n",
"\ttitle VARCHAR NOT NULL,\n",
"\trelease_year SMALLINT\n",
");\n",
"\n",
"DROP TABLE IF EXISTS most_popular_films;\n",
"CREATE TABLE most_popular_films(\n",
"\ttitle VARCHAR NOT NULL,\n",
"\trelease_year SMALLINT\n",
");\n",
"\n",
"INSERT INTO \n",
" top_rated_films(title,release_year)\n",
"VALUES\n",
" ('The Shawshank Redemption',1994),\n",
" ('The Godfather',1972),\n",
" ('12 Angry Men',1957);\n",
"\n",
"INSERT INTO \n",
" most_popular_films(title,release_year)\n",
"VALUES\n",
" ('An American Pickle',2020),\n",
" ('The Godfather',1972),\n",
" ('Greyhound',2020);\n",
"'''\n",
"\n",
"cnx.execute(sql)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" The Shawshank Redemption | \n",
" 1994 | \n",
"
\n",
" \n",
" 1 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
" 2 | \n",
" 12 Angry Men | \n",
" 1957 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title release_year\n",
"0 The Shawshank Redemption 1994\n",
"1 The Godfather 1972\n",
"2 12 Angry Men 1957"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sql = ''' \n",
"SELECT * FROM top_rated_films;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" An American Pickle | \n",
" 2020 | \n",
"
\n",
" \n",
" 1 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
" 2 | \n",
" Greyhound | \n",
" 2020 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title release_year\n",
"0 An American Pickle 2020\n",
"1 The Godfather 1972\n",
"2 Greyhound 2020"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sql = ''' \n",
"SELECT * FROM most_popular_films;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" An American Pickle | \n",
" 2020 | \n",
"
\n",
" \n",
" 1 | \n",
" Greyhound | \n",
" 2020 | \n",
"
\n",
" \n",
" 2 | \n",
" The Shawshank Redemption | \n",
" 1994 | \n",
"
\n",
" \n",
" 3 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
" 4 | \n",
" 12 Angry Men | \n",
" 1957 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title release_year\n",
"0 An American Pickle 2020\n",
"1 Greyhound 2020\n",
"2 The Shawshank Redemption 1994\n",
"3 The Godfather 1972\n",
"4 12 Angry Men 1957"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# La unión de dos tablas sin duplicados\n",
"sql = ''' \n",
"SELECT * FROM top_rated_films\n",
"UNION\n",
"SELECT * FROM most_popular_films;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" The Shawshank Redemption | \n",
" 1994 | \n",
"
\n",
" \n",
" 1 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
" 2 | \n",
" 12 Angry Men | \n",
" 1957 | \n",
"
\n",
" \n",
" 3 | \n",
" An American Pickle | \n",
" 2020 | \n",
"
\n",
" \n",
" 4 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
" 5 | \n",
" Greyhound | \n",
" 2020 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title release_year\n",
"0 The Shawshank Redemption 1994\n",
"1 The Godfather 1972\n",
"2 12 Angry Men 1957\n",
"3 An American Pickle 2020\n",
"4 The Godfather 1972\n",
"5 Greyhound 2020"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# La unión de dos tablas con duplicados\n",
"sql = ''' \n",
"SELECT * FROM top_rated_films\n",
"UNION ALL\n",
"SELECT * FROM most_popular_films;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 12 Angry Men | \n",
" 1957 | \n",
"
\n",
" \n",
" 1 | \n",
" An American Pickle | \n",
" 2020 | \n",
"
\n",
" \n",
" 2 | \n",
" Greyhound | \n",
" 2020 | \n",
"
\n",
" \n",
" 3 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
" 4 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
" 5 | \n",
" The Shawshank Redemption | \n",
" 1994 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title release_year\n",
"0 12 Angry Men 1957\n",
"1 An American Pickle 2020\n",
"2 Greyhound 2020\n",
"3 The Godfather 1972\n",
"4 The Godfather 1972\n",
"5 The Shawshank Redemption 1994"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# La unión de dos tablas con duplicados\n",
"sql = ''' \n",
"SELECT * FROM top_rated_films\n",
"UNION ALL\n",
"SELECT * FROM most_popular_films\n",
"ORDER BY title;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Operador INTERSECT\n",
"\n",
"Devuelve las filas que están disponibles en ambos conjuntos de resultados. Ambos conjuntos DEBEN tener la misma cantidad de columnas y tipo."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" The Godfather | \n",
" 1972 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title release_year\n",
"0 The Godfather 1972"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# La unión de dos tablas con duplicados\n",
"sql = ''' \n",
"SELECT *\n",
"FROM most_popular_films \n",
"INTERSECT\n",
"SELECT *\n",
"FROM top_rated_films;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Operador EXCEPT\n",
"\n",
"Devuelve filas distintas de la primera consulta A (izquierda) que no están en la salida de la segunda consulta B (derecha).\n",
"\n",
"Except corresponde al área sombreada. [Fuente](https://www.postgresqltutorial.com/postgresql-except/)\n",
"\n",
""
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" The Shawshank Redemption | \n",
" 1994 | \n",
"
\n",
" \n",
" 1 | \n",
" 12 Angry Men | \n",
" 1957 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title release_year\n",
"0 The Shawshank Redemption 1994\n",
"1 12 Angry Men 1957"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# La unión de dos tablas con duplicados\n",
"sql = ''' \n",
"SELECT * FROM top_rated_films\n",
"EXCEPT \n",
"SELECT * FROM most_popular_films;\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Eliminar las tablas del ejemplo\n",
"sql = '''\n",
"DROP TABLE IF EXISTS top_rated_films;\n",
"DROP TABLE IF EXISTS most_popular_films;\n",
"'''\n",
"\n",
"cnx.execute(sql)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## El operador ANY / SOME\n",
"\n",
"Permite comparar un valor con un conjunto de valores devueltos por una subconsulta.\n",
"\n",
"* El operador ANY debe estar precedido por uno de los siguientes operadores de comparación =, <=,>, <,> y <>\n",
"* El operador ANY devuelve verdadero si cualquier valor de la subconsulta cumple la condición; de lo contrario, devuelve falso.\n",
"* Tenga en cuenta que SOME es sinónimo de ANY, puede sustituir SOME por ANY cualquier sentencia SQL."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Alley Evolution | \n",
"
\n",
" \n",
" 1 | \n",
" Analyze Hoosiers | \n",
"
\n",
" \n",
" 2 | \n",
" Anonymous Human | \n",
"
\n",
" \n",
" 3 | \n",
" Baked Cleopatra | \n",
"
\n",
" \n",
" 4 | \n",
" Casualties Encino | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 64 | \n",
" Wild Apollo | \n",
"
\n",
" \n",
" 65 | \n",
" Worst Banger | \n",
"
\n",
" \n",
" 66 | \n",
" Wrong Behavior | \n",
"
\n",
" \n",
" 67 | \n",
" Young Language | \n",
"
\n",
" \n",
" 68 | \n",
" Youth Kick | \n",
"
\n",
" \n",
"
\n",
"
69 rows × 1 columns
\n",
"
"
],
"text/plain": [
" title\n",
"0 Alley Evolution\n",
"1 Analyze Hoosiers\n",
"2 Anonymous Human\n",
"3 Baked Cleopatra\n",
"4 Casualties Encino\n",
".. ...\n",
"64 Wild Apollo\n",
"65 Worst Banger\n",
"66 Wrong Behavior\n",
"67 Young Language\n",
"68 Youth Kick\n",
"\n",
"[69 rows x 1 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Las películas cuya duración es mayor o igual que \n",
"# la duración máxima de cualquier categoría de película.\n",
"\n",
"sql = ''' \n",
"SELECT title\n",
"FROM film\n",
"WHERE length >= ANY(\n",
" SELECT MAX( length )\n",
" FROM film\n",
" INNER JOIN film_category USING(film_id)\n",
" GROUP BY category_id );\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Igual ANY ( = ANY) es equivalente al operador IN"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" category_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Amadeus Holy | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" American Circus | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" Antitrust Tomatoes | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" Apollo Teen | \n",
" 7 | \n",
"
\n",
" \n",
" 4 | \n",
" Ark Ridgemont | \n",
" 1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 121 | \n",
" Werewolf Lola | \n",
" 1 | \n",
"
\n",
" \n",
" 122 | \n",
" West Lion | \n",
" 7 | \n",
"
\n",
" \n",
" 123 | \n",
" Witches Panic | \n",
" 7 | \n",
"
\n",
" \n",
" 124 | \n",
" Women Dorado | \n",
" 1 | \n",
"
\n",
" \n",
" 125 | \n",
" Worst Banger | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
126 rows × 2 columns
\n",
"
"
],
"text/plain": [
" title category_id\n",
"0 Amadeus Holy 1\n",
"1 American Circus 1\n",
"2 Antitrust Tomatoes 1\n",
"3 Apollo Teen 7\n",
"4 Ark Ridgemont 1\n",
".. ... ...\n",
"121 Werewolf Lola 1\n",
"122 West Lion 7\n",
"123 Witches Panic 7\n",
"124 Women Dorado 1\n",
"125 Worst Banger 1\n",
"\n",
"[126 rows x 2 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# obtiener las películas cuya categoría es Actiono Drama.\n",
"\n",
"sql = ''' \n",
"SELECT\n",
" title,\n",
" category_id\n",
"FROM\n",
" film\n",
" INNER JOIN film_category\n",
" USING(film_id)\n",
"WHERE\n",
" category_id = ANY(\n",
" SELECT\n",
" category_id\n",
" FROM\n",
" category\n",
" WHERE\n",
" NAME = 'Action'\n",
" OR NAME = 'Drama'\n",
" );\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" category_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Amadeus Holy | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" American Circus | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" Antitrust Tomatoes | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" Apollo Teen | \n",
" 7 | \n",
"
\n",
" \n",
" 4 | \n",
" Ark Ridgemont | \n",
" 1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 121 | \n",
" Werewolf Lola | \n",
" 1 | \n",
"
\n",
" \n",
" 122 | \n",
" West Lion | \n",
" 7 | \n",
"
\n",
" \n",
" 123 | \n",
" Witches Panic | \n",
" 7 | \n",
"
\n",
" \n",
" 124 | \n",
" Women Dorado | \n",
" 1 | \n",
"
\n",
" \n",
" 125 | \n",
" Worst Banger | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
126 rows × 2 columns
\n",
"
"
],
"text/plain": [
" title category_id\n",
"0 Amadeus Holy 1\n",
"1 American Circus 1\n",
"2 Antitrust Tomatoes 1\n",
"3 Apollo Teen 7\n",
"4 Ark Ridgemont 1\n",
".. ... ...\n",
"121 Werewolf Lola 1\n",
"122 West Lion 7\n",
"123 Witches Panic 7\n",
"124 Women Dorado 1\n",
"125 Worst Banger 1\n",
"\n",
"[126 rows x 2 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Consulta IN equivalente\n",
"\n",
"sql = ''' \n",
"SELECT\n",
" title,\n",
" category_id\n",
"FROM\n",
" film\n",
"INNER JOIN film_category\n",
" USING(film_id)\n",
"WHERE\n",
" category_id IN (\n",
" SELECT\n",
" category_id\n",
" FROM\n",
" category\n",
" WHERE\n",
" NAME = 'Action'\n",
" OR NAME = 'Drama'\n",
" );\n",
"'''\n",
"\n",
"pd.read_sql_query(sql, cnx)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}