Consultas de Selección: IN. LIKE, IS NULL¶
Conexión a la Base de Datos¶
[1]:
import pandas as pd
import psycopg2
import sqlalchemy
import matplotlib as plt
%matplotlib inline
from sqlalchemy import create_engine
POSTGRES_ADDRESS = 'localhost' # Este es el servidor, puede ser una IP
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = 'postgres'
POSTGRES_PASSWORD = '1234'
POSTGRES_DBNAME = 'dvdrental'
# Ahora se configura la cadena de conexión.
# Esta es una cadena que contiene los parámetros necesarios para establecer una
# conexión con posgres.
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(username=POSTGRES_USERNAME,
password=POSTGRES_PASSWORD,
ipaddress=POSTGRES_ADDRESS,
port=POSTGRES_PORT,
dbname=POSTGRES_DBNAME))
# A continuación se crea la conexión
cnx = create_engine(postgres_str)
El Operador IN¶
Se utiliza el operador IN en la cláusula WHERE para comprobar si un valor coincide con algún valor en una lista de valores.
[2]:
# conocer la información de alquiler de los ID de cliente 1 y 2
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id IN (1, 2)
ORDER BY return_date DESC;
'''
pd.read_sql_query(sql, cnx)
[2]:
| customer_id | rental_id | return_date | |
|---|---|---|---|
| 0 | 2 | 15145 | 2005-08-31 15:51:04 |
| 1 | 1 | 15315 | 2005-08-30 01:51:46 |
| 2 | 2 | 14743 | 2005-08-29 00:18:56 |
| 3 | 1 | 15298 | 2005-08-28 22:49:37 |
| 4 | 2 | 14475 | 2005-08-27 08:59:32 |
| 5 | 1 | 14825 | 2005-08-27 07:01:57 |
| 6 | 2 | 15907 | 2005-08-25 23:23:35 |
| 7 | 2 | 12963 | 2005-08-23 11:37:04 |
| 8 | 1 | 13176 | 2005-08-23 08:50:54 |
| 9 | 1 | 14762 | 2005-08-23 01:30:57 |
| 10 | 1 | 12250 | 2005-08-22 23:05:29 |
| 11 | 1 | 13068 | 2005-08-20 14:44:16 |
| 12 | 2 | 11614 | 2005-08-20 07:04:18 |
| 13 | 1 | 11824 | 2005-08-19 10:11:54 |
| 14 | 1 | 11299 | 2005-08-10 16:40:52 |
| 15 | 1 | 10437 | 2005-08-10 12:12:04 |
| 16 | 2 | 11177 | 2005-08-10 10:55:48 |
| 17 | 2 | 11087 | 2005-08-10 10:37:41 |
| 18 | 2 | 9236 | 2005-08-08 18:52:43 |
| 19 | 2 | 9296 | 2005-08-08 11:57:13 |
| 20 | 2 | 8230 | 2005-08-06 19:52:59 |
| 21 | 2 | 9465 | 2005-08-06 16:43:53 |
| 22 | 2 | 10466 | 2005-08-06 06:28:26 |
| 23 | 2 | 9248 | 2005-08-05 11:19:11 |
| 24 | 2 | 11256 | 2005-08-04 16:39:53 |
| 25 | 1 | 11367 | 2005-08-04 13:19:38 |
| 26 | 2 | 9031 | 2005-08-04 10:45:10 |
| 27 | 2 | 7376 | 2005-08-04 10:35:02 |
| 28 | 2 | 10136 | 2005-08-03 19:44:56 |
| 29 | 1 | 9571 | 2005-08-02 23:26:18 |
| 30 | 2 | 10918 | 2005-08-02 21:23:56 |
| 31 | 2 | 7459 | 2005-08-02 21:07:20 |
| 32 | 2 | 7346 | 2005-08-02 16:48:42 |
| 33 | 2 | 8705 | 2005-08-02 16:01:29 |
| 34 | 2 | 8598 | 2005-08-01 08:39:59 |
| 35 | 1 | 8326 | 2005-08-01 05:16:49 |
| 36 | 1 | 7273 | 2005-07-31 06:50:22 |
| 37 | 1 | 8033 | 2005-07-30 17:56:23 |
| 38 | 1 | 7841 | 2005-07-30 12:37:45 |
| 39 | 1 | 8116 | 2005-07-29 22:54:07 |
| 40 | 1 | 8074 | 2005-07-29 20:17:39 |
| 41 | 2 | 5755 | 2005-07-19 17:02:56 |
| 42 | 1 | 6163 | 2005-07-19 13:15:46 |
| 43 | 1 | 5244 | 2005-07-14 14:01:07 |
| 44 | 1 | 4526 | 2005-07-14 01:19:05 |
| 45 | 1 | 5326 | 2005-07-13 18:02:01 |
| 46 | 2 | 5636 | 2005-07-13 02:36:24 |
| 47 | 1 | 4611 | 2005-07-12 13:25:56 |
| 48 | 1 | 3284 | 2005-06-28 03:28:45 |
| 49 | 1 | 1476 | 2005-06-25 02:26:46 |
| 50 | 2 | 2128 | 2005-06-24 00:41:58 |
| 51 | 1 | 1185 | 2005-06-23 02:42:12 |
| 52 | 1 | 2308 | 2005-06-22 03:36:48 |
| 53 | 1 | 2363 | 2005-06-19 17:40:59 |
| 54 | 1 | 1422 | 2005-06-19 15:54:53 |
| 55 | 1 | 1725 | 2005-06-17 21:05:57 |
| 56 | 1 | 76 | 2005-06-03 12:00:37 |
| 57 | 1 | 573 | 2005-06-03 06:32:23 |
| 58 | 2 | 320 | 2005-05-28 04:30:24 |
[3]:
# conocer la información de alquiler de los ID de cliente 1 y 2
# esta es equivalente a la anterior
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id = 1 OR customer_id = 2
ORDER BY return_date DESC;
'''
pd.read_sql_query(sql, cnx)
[3]:
| customer_id | rental_id | return_date | |
|---|---|---|---|
| 0 | 2 | 15145 | 2005-08-31 15:51:04 |
| 1 | 1 | 15315 | 2005-08-30 01:51:46 |
| 2 | 2 | 14743 | 2005-08-29 00:18:56 |
| 3 | 1 | 15298 | 2005-08-28 22:49:37 |
| 4 | 2 | 14475 | 2005-08-27 08:59:32 |
| 5 | 1 | 14825 | 2005-08-27 07:01:57 |
| 6 | 2 | 15907 | 2005-08-25 23:23:35 |
| 7 | 2 | 12963 | 2005-08-23 11:37:04 |
| 8 | 1 | 13176 | 2005-08-23 08:50:54 |
| 9 | 1 | 14762 | 2005-08-23 01:30:57 |
| 10 | 1 | 12250 | 2005-08-22 23:05:29 |
| 11 | 1 | 13068 | 2005-08-20 14:44:16 |
| 12 | 2 | 11614 | 2005-08-20 07:04:18 |
| 13 | 1 | 11824 | 2005-08-19 10:11:54 |
| 14 | 1 | 11299 | 2005-08-10 16:40:52 |
| 15 | 1 | 10437 | 2005-08-10 12:12:04 |
| 16 | 2 | 11177 | 2005-08-10 10:55:48 |
| 17 | 2 | 11087 | 2005-08-10 10:37:41 |
| 18 | 2 | 9236 | 2005-08-08 18:52:43 |
| 19 | 2 | 9296 | 2005-08-08 11:57:13 |
| 20 | 2 | 8230 | 2005-08-06 19:52:59 |
| 21 | 2 | 9465 | 2005-08-06 16:43:53 |
| 22 | 2 | 10466 | 2005-08-06 06:28:26 |
| 23 | 2 | 9248 | 2005-08-05 11:19:11 |
| 24 | 2 | 11256 | 2005-08-04 16:39:53 |
| 25 | 1 | 11367 | 2005-08-04 13:19:38 |
| 26 | 2 | 9031 | 2005-08-04 10:45:10 |
| 27 | 2 | 7376 | 2005-08-04 10:35:02 |
| 28 | 2 | 10136 | 2005-08-03 19:44:56 |
| 29 | 1 | 9571 | 2005-08-02 23:26:18 |
| 30 | 2 | 10918 | 2005-08-02 21:23:56 |
| 31 | 2 | 7459 | 2005-08-02 21:07:20 |
| 32 | 2 | 7346 | 2005-08-02 16:48:42 |
| 33 | 2 | 8705 | 2005-08-02 16:01:29 |
| 34 | 2 | 8598 | 2005-08-01 08:39:59 |
| 35 | 1 | 8326 | 2005-08-01 05:16:49 |
| 36 | 1 | 7273 | 2005-07-31 06:50:22 |
| 37 | 1 | 8033 | 2005-07-30 17:56:23 |
| 38 | 1 | 7841 | 2005-07-30 12:37:45 |
| 39 | 1 | 8116 | 2005-07-29 22:54:07 |
| 40 | 1 | 8074 | 2005-07-29 20:17:39 |
| 41 | 2 | 5755 | 2005-07-19 17:02:56 |
| 42 | 1 | 6163 | 2005-07-19 13:15:46 |
| 43 | 1 | 5244 | 2005-07-14 14:01:07 |
| 44 | 1 | 4526 | 2005-07-14 01:19:05 |
| 45 | 1 | 5326 | 2005-07-13 18:02:01 |
| 46 | 2 | 5636 | 2005-07-13 02:36:24 |
| 47 | 1 | 4611 | 2005-07-12 13:25:56 |
| 48 | 1 | 3284 | 2005-06-28 03:28:45 |
| 49 | 1 | 1476 | 2005-06-25 02:26:46 |
| 50 | 2 | 2128 | 2005-06-24 00:41:58 |
| 51 | 1 | 1185 | 2005-06-23 02:42:12 |
| 52 | 1 | 2308 | 2005-06-22 03:36:48 |
| 53 | 1 | 2363 | 2005-06-19 17:40:59 |
| 54 | 1 | 1422 | 2005-06-19 15:54:53 |
| 55 | 1 | 1725 | 2005-06-17 21:05:57 |
| 56 | 1 | 76 | 2005-06-03 12:00:37 |
| 57 | 1 | 573 | 2005-06-03 06:32:23 |
| 58 | 2 | 320 | 2005-05-28 04:30:24 |
[4]:
# Se encuentra que todos los alquileres cuyo ID de cliente no es 1 o 2
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id NOT IN (1, 2);
'''
pd.read_sql_query(sql, cnx)
[4]:
| customer_id | rental_id | return_date | |
|---|---|---|---|
| 0 | 459 | 2 | 2005-05-28 19:40:33 |
| 1 | 408 | 3 | 2005-06-01 22:12:39 |
| 2 | 333 | 4 | 2005-06-03 01:43:41 |
| 3 | 222 | 5 | 2005-06-02 04:33:21 |
| 4 | 549 | 6 | 2005-05-27 01:32:07 |
| ... | ... | ... | ... |
| 15980 | 74 | 16046 | 2005-08-27 18:02:47 |
| 15981 | 114 | 16047 | 2005-08-25 02:48:48 |
| 15982 | 103 | 16048 | 2005-08-31 21:33:07 |
| 15983 | 393 | 16049 | 2005-08-30 01:01:12 |
| 15984 | 130 | 1 | 2005-05-26 22:04:30 |
15985 rows × 3 columns
[5]:
# Equivalente a la anterior
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id <> 1 AND customer_id <> 2;
'''
pd.read_sql_query(sql, cnx)
[5]:
| customer_id | rental_id | return_date | |
|---|---|---|---|
| 0 | 459 | 2 | 2005-05-28 19:40:33 |
| 1 | 408 | 3 | 2005-06-01 22:12:39 |
| 2 | 333 | 4 | 2005-06-03 01:43:41 |
| 3 | 222 | 5 | 2005-06-02 04:33:21 |
| 4 | 549 | 6 | 2005-05-27 01:32:07 |
| ... | ... | ... | ... |
| 15980 | 74 | 16046 | 2005-08-27 18:02:47 |
| 15981 | 114 | 16047 | 2005-08-25 02:48:48 |
| 15982 | 103 | 16048 | 2005-08-31 21:33:07 |
| 15983 | 393 | 16049 | 2005-08-30 01:01:12 |
| 15984 | 130 | 1 | 2005-05-26 22:04:30 |
15985 rows × 3 columns
Subconsultas con el operador IN¶
Suponga que se desea consultar el nombre completo de las personas que tienen préstamos con fecha de devolución al 30 de mayo del 2005.
[6]:
# Primero, consultamos lista de identificadores de clientes de la
# tabla rental (prestamos) con la fecha de devolución 2005-05-27
# Sin embargo, esta tabla no tiene los nombres de las personas
sql = '''
SELECT customer_id
FROM rental
WHERE CAST(return_date AS DATE) = '2005-05-30'
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[6]:
| customer_id | |
|---|---|
| 0 | 5 |
| 1 | 14 |
| 2 | 16 |
| 3 | 21 |
| 4 | 23 |
| ... | ... |
| 93 | 538 |
| 94 | 554 |
| 95 | 574 |
| 96 | 575 |
| 97 | 578 |
98 rows × 1 columns
[7]:
# Para obtener los nombres de las personas, la sentencia anterior se usa como subconsulta en IN...
sql = '''
SELECT customer_id, first_name || ' ' || last_name as full_name
FROM customer
WHERE customer_id IN (
SELECT customer_id
FROM rental
WHERE CAST (return_date AS DATE) = '2005-05-30'
)
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[7]:
| customer_id | full_name | |
|---|---|---|
| 0 | 5 | Elizabeth Brown |
| 1 | 14 | Betty White |
| 2 | 16 | Sandra Martin |
| 3 | 21 | Michelle Clark |
| 4 | 23 | Sarah Lewis |
| ... | ... | ... |
| 89 | 538 | Ted Breaux |
| 90 | 554 | Dwayne Olvera |
| 91 | 574 | Julian Vest |
| 92 | 575 | Isaac Oglesby |
| 93 | 578 | Willard Lumpkin |
94 rows × 2 columns
[8]:
# ¿Por qué la siguiente sentencia no entrega los mismos resultados?
sql = '''
SELECT c.customer_id, c.first_name || ' ' || c.last_name as full_name
FROM customer as c, ( SELECT customer_id
FROM rental
WHERE CAST (return_date AS DATE) = '2005-05-30'
) as ids
WHERE ids.customer_id = c.customer_id
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[8]:
| customer_id | full_name | |
|---|---|---|
| 0 | 5 | Elizabeth Brown |
| 1 | 14 | Betty White |
| 2 | 16 | Sandra Martin |
| 3 | 21 | Michelle Clark |
| 4 | 23 | Sarah Lewis |
| ... | ... | ... |
| 93 | 538 | Ted Breaux |
| 94 | 554 | Dwayne Olvera |
| 95 | 574 | Julian Vest |
| 96 | 575 | Isaac Oglesby |
| 97 | 578 | Willard Lumpkin |
98 rows × 2 columns
[9]:
# sentencia equivalente a la operación IN
sql = '''
SELECT c.customer_id, c.first_name || ' ' || c.last_name as full_name
FROM customer as c, ( SELECT distinct customer_id
FROM rental
WHERE CAST (return_date AS DATE) = '2005-05-30'
) as ids
WHERE ids.customer_id = c.customer_id
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[9]:
| customer_id | full_name | |
|---|---|---|
| 0 | 5 | Elizabeth Brown |
| 1 | 14 | Betty White |
| 2 | 16 | Sandra Martin |
| 3 | 21 | Michelle Clark |
| 4 | 23 | Sarah Lewis |
| ... | ... | ... |
| 89 | 538 | Ted Breaux |
| 90 | 554 | Dwayne Olvera |
| 91 | 574 | Julian Vest |
| 92 | 575 | Isaac Oglesby |
| 93 | 578 | Willard Lumpkin |
94 rows × 2 columns
Operadores LIKE y ILIKE¶
Se usa para consultar datos usando coincidencias de patrones simples, mediante comodines (%) para buscar cualquier coincidencia con n cantidad de caracteres y guión bajo ( _ ) para un solo caracter.
[10]:
# Buscar las coincidencias de los nombres que comiencen por Albert
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name LIKE 'Albert%'
ORDER BY first_name;
'''
pd.read_sql_query(sql, cnx)
[10]:
| first_name | last_name | |
|---|---|---|
| 0 | Albert | Crouse |
| 1 | Alberto | Henning |
[11]:
# Buscar las coincidencias de los nombres que comiencen por Albert y finalicen con una letra
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name LIKE 'Albert_'
ORDER BY first_name;
'''
pd.read_sql_query(sql, cnx)
[11]:
| first_name | last_name | |
|---|---|---|
| 0 | Alberto | Henning |
[12]:
# Buscar las coincidencias de los nombres que comiencen por una letra
# que contengan her y finalicen con ningun o varios caracteres
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name LIKE '_her%'
ORDER BY first_name;
'''
pd.read_sql_query(sql, cnx)
[12]:
| first_name | last_name | |
|---|---|---|
| 0 | Cheryl | Murphy |
| 1 | Sherri | Rhodes |
| 2 | Sherry | Marshall |
| 3 | Theresa | Watson |
[13]:
# Encontrar los nombres que no comienzan por A o B
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name NOT LIKE 'A%' AND first_name NOT LIKE 'B%'
ORDER BY first_name
'''
pd.read_sql_query(sql, cnx)
[13]:
| first_name | last_name | |
|---|---|---|
| 0 | Calvin | Martel |
| 1 | Carl | Artis |
| 2 | Carla | Gutierrez |
| 3 | Carlos | Coughlin |
| 4 | Carmen | Owens |
| ... | ... | ... |
| 518 | Willie | Markham |
| 519 | Wilma | Richards |
| 520 | Yolanda | Weaver |
| 521 | Yvonne | Watkins |
| 522 | Zachary | Hite |
523 rows × 2 columns
[14]:
# ILIKE compara el valor sin distinción entre mayúsculas y minúsculas
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name ILIKE 'NAT%';
'''
pd.read_sql_query(sql, cnx)
[14]:
| first_name | last_name | |
|---|---|---|
| 0 | Natalie | Meyer |
| 1 | Nathan | Runyon |
| 2 | Nathaniel | Adam |
Valores NULOS y Operador IS NULL¶
NULL significa información faltante o no aplicable. NULL no es un valor, por lo tanto, no puede compararlo con otros valores como números o cadenas. La comparación de NULL con un valor siempre dará como resultado NULL, lo que significa un resultado desconocido.
NULL no es igual a NULL, esta operación siempre será NULL
[15]:
sql = '''
CREATE TABLE contactos(
id INT GENERATED BY DEFAULT AS IDENTITY,
nombre VARCHAR(50) NOT NULL,
apellido VARCHAR(50) NOT NULL,
correo VARCHAR(255) NOT NULL,
telefono VARCHAR(15),
PRIMARY KEY (id)
);
'''
cnx.execute(sql)
[15]:
<sqlalchemy.engine.result.ResultProxy at 0x1c4349842e0>
[16]:
sql = '''
INSERT INTO contactos(nombre, apellido, correo, telefono)
VALUES ('Natalia','Rosas','natalia.rosas@rebrand.com', NULL),
('Lilana','Posada','lily.pos@gmail.com','305 234 27 64'),
('Lucio','Vasquez','lvasquez@gmail.com','305 234 34 65');
'''
cnx.execute(sql)
[16]:
<sqlalchemy.engine.result.ResultProxy at 0x1c43170a700>
[17]:
# Cuáles son los contactos que no tienen número de teléfono
# No se retorna ninguna fila
sql = '''
SELECT *
FROM contactos
WHERE telefono = NULL;
'''
pd.read_sql_query(sql, cnx)
[17]:
| id | nombre | apellido | correo | telefono |
|---|
[18]:
# Cuáles son los contactos que no tienen número de teléfono
# No se retorna ninguna fila
sql = '''
SELECT *
FROM contactos
WHERE telefono is NULL;
'''
pd.read_sql_query(sql, cnx)
[18]:
| id | nombre | apellido | correo | telefono | |
|---|---|---|---|---|---|
| 0 | 1 | Natalia | Rosas | natalia.rosas@rebrand.com | None |
[19]:
# Cuáles son los contactos que tienen número de teléfono
# No se retorna ninguna fila
sql = '''
SELECT *
FROM contactos
WHERE telefono IS NOT NULL;
'''
pd.read_sql_query(sql, cnx)
[19]:
| id | nombre | apellido | correo | telefono | |
|---|---|---|---|---|---|
| 0 | 2 | Lilana | Posada | lily.pos@gmail.com | 305 234 27 64 |
| 1 | 3 | Lucio | Vasquez | lvasquez@gmail.com | 305 234 34 65 |
[20]:
sql = '''
DROP TABLE contactos;
'''
cnx.execute(sql)
[20]:
<sqlalchemy.engine.result.ResultProxy at 0x1c43172fb20>