Consultas de Selección: IN. LIKE, IS NULL¶
Conexión a la Base de Datos¶
[1]:
import pandas as pd
import psycopg2
import sqlalchemy
import matplotlib as plt
%matplotlib inline
from sqlalchemy import create_engine
POSTGRES_ADDRESS = 'localhost' # Este es el servidor, puede ser una IP
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = 'postgres'
POSTGRES_PASSWORD = '1234'
POSTGRES_DBNAME = 'dvdrental'
# Ahora se configura la cadena de conexión.
# Esta es una cadena que contiene los parámetros necesarios para establecer una
# conexión con posgres.
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(username=POSTGRES_USERNAME,
password=POSTGRES_PASSWORD,
ipaddress=POSTGRES_ADDRESS,
port=POSTGRES_PORT,
dbname=POSTGRES_DBNAME))
# A continuación se crea la conexión
cnx = create_engine(postgres_str)
El Operador IN¶
Se utiliza el operador IN en la cláusula WHERE para comprobar si un valor coincide con algún valor en una lista de valores.
[2]:
# conocer la información de alquiler de los ID de cliente 1 y 2
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id IN (1, 2)
ORDER BY return_date DESC;
'''
pd.read_sql_query(sql, cnx)
[2]:
customer_id | rental_id | return_date | |
---|---|---|---|
0 | 2 | 15145 | 2005-08-31 15:51:04 |
1 | 1 | 15315 | 2005-08-30 01:51:46 |
2 | 2 | 14743 | 2005-08-29 00:18:56 |
3 | 1 | 15298 | 2005-08-28 22:49:37 |
4 | 2 | 14475 | 2005-08-27 08:59:32 |
5 | 1 | 14825 | 2005-08-27 07:01:57 |
6 | 2 | 15907 | 2005-08-25 23:23:35 |
7 | 2 | 12963 | 2005-08-23 11:37:04 |
8 | 1 | 13176 | 2005-08-23 08:50:54 |
9 | 1 | 14762 | 2005-08-23 01:30:57 |
10 | 1 | 12250 | 2005-08-22 23:05:29 |
11 | 1 | 13068 | 2005-08-20 14:44:16 |
12 | 2 | 11614 | 2005-08-20 07:04:18 |
13 | 1 | 11824 | 2005-08-19 10:11:54 |
14 | 1 | 11299 | 2005-08-10 16:40:52 |
15 | 1 | 10437 | 2005-08-10 12:12:04 |
16 | 2 | 11177 | 2005-08-10 10:55:48 |
17 | 2 | 11087 | 2005-08-10 10:37:41 |
18 | 2 | 9236 | 2005-08-08 18:52:43 |
19 | 2 | 9296 | 2005-08-08 11:57:13 |
20 | 2 | 8230 | 2005-08-06 19:52:59 |
21 | 2 | 9465 | 2005-08-06 16:43:53 |
22 | 2 | 10466 | 2005-08-06 06:28:26 |
23 | 2 | 9248 | 2005-08-05 11:19:11 |
24 | 2 | 11256 | 2005-08-04 16:39:53 |
25 | 1 | 11367 | 2005-08-04 13:19:38 |
26 | 2 | 9031 | 2005-08-04 10:45:10 |
27 | 2 | 7376 | 2005-08-04 10:35:02 |
28 | 2 | 10136 | 2005-08-03 19:44:56 |
29 | 1 | 9571 | 2005-08-02 23:26:18 |
30 | 2 | 10918 | 2005-08-02 21:23:56 |
31 | 2 | 7459 | 2005-08-02 21:07:20 |
32 | 2 | 7346 | 2005-08-02 16:48:42 |
33 | 2 | 8705 | 2005-08-02 16:01:29 |
34 | 2 | 8598 | 2005-08-01 08:39:59 |
35 | 1 | 8326 | 2005-08-01 05:16:49 |
36 | 1 | 7273 | 2005-07-31 06:50:22 |
37 | 1 | 8033 | 2005-07-30 17:56:23 |
38 | 1 | 7841 | 2005-07-30 12:37:45 |
39 | 1 | 8116 | 2005-07-29 22:54:07 |
40 | 1 | 8074 | 2005-07-29 20:17:39 |
41 | 2 | 5755 | 2005-07-19 17:02:56 |
42 | 1 | 6163 | 2005-07-19 13:15:46 |
43 | 1 | 5244 | 2005-07-14 14:01:07 |
44 | 1 | 4526 | 2005-07-14 01:19:05 |
45 | 1 | 5326 | 2005-07-13 18:02:01 |
46 | 2 | 5636 | 2005-07-13 02:36:24 |
47 | 1 | 4611 | 2005-07-12 13:25:56 |
48 | 1 | 3284 | 2005-06-28 03:28:45 |
49 | 1 | 1476 | 2005-06-25 02:26:46 |
50 | 2 | 2128 | 2005-06-24 00:41:58 |
51 | 1 | 1185 | 2005-06-23 02:42:12 |
52 | 1 | 2308 | 2005-06-22 03:36:48 |
53 | 1 | 2363 | 2005-06-19 17:40:59 |
54 | 1 | 1422 | 2005-06-19 15:54:53 |
55 | 1 | 1725 | 2005-06-17 21:05:57 |
56 | 1 | 76 | 2005-06-03 12:00:37 |
57 | 1 | 573 | 2005-06-03 06:32:23 |
58 | 2 | 320 | 2005-05-28 04:30:24 |
[3]:
# conocer la información de alquiler de los ID de cliente 1 y 2
# esta es equivalente a la anterior
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id = 1 OR customer_id = 2
ORDER BY return_date DESC;
'''
pd.read_sql_query(sql, cnx)
[3]:
customer_id | rental_id | return_date | |
---|---|---|---|
0 | 2 | 15145 | 2005-08-31 15:51:04 |
1 | 1 | 15315 | 2005-08-30 01:51:46 |
2 | 2 | 14743 | 2005-08-29 00:18:56 |
3 | 1 | 15298 | 2005-08-28 22:49:37 |
4 | 2 | 14475 | 2005-08-27 08:59:32 |
5 | 1 | 14825 | 2005-08-27 07:01:57 |
6 | 2 | 15907 | 2005-08-25 23:23:35 |
7 | 2 | 12963 | 2005-08-23 11:37:04 |
8 | 1 | 13176 | 2005-08-23 08:50:54 |
9 | 1 | 14762 | 2005-08-23 01:30:57 |
10 | 1 | 12250 | 2005-08-22 23:05:29 |
11 | 1 | 13068 | 2005-08-20 14:44:16 |
12 | 2 | 11614 | 2005-08-20 07:04:18 |
13 | 1 | 11824 | 2005-08-19 10:11:54 |
14 | 1 | 11299 | 2005-08-10 16:40:52 |
15 | 1 | 10437 | 2005-08-10 12:12:04 |
16 | 2 | 11177 | 2005-08-10 10:55:48 |
17 | 2 | 11087 | 2005-08-10 10:37:41 |
18 | 2 | 9236 | 2005-08-08 18:52:43 |
19 | 2 | 9296 | 2005-08-08 11:57:13 |
20 | 2 | 8230 | 2005-08-06 19:52:59 |
21 | 2 | 9465 | 2005-08-06 16:43:53 |
22 | 2 | 10466 | 2005-08-06 06:28:26 |
23 | 2 | 9248 | 2005-08-05 11:19:11 |
24 | 2 | 11256 | 2005-08-04 16:39:53 |
25 | 1 | 11367 | 2005-08-04 13:19:38 |
26 | 2 | 9031 | 2005-08-04 10:45:10 |
27 | 2 | 7376 | 2005-08-04 10:35:02 |
28 | 2 | 10136 | 2005-08-03 19:44:56 |
29 | 1 | 9571 | 2005-08-02 23:26:18 |
30 | 2 | 10918 | 2005-08-02 21:23:56 |
31 | 2 | 7459 | 2005-08-02 21:07:20 |
32 | 2 | 7346 | 2005-08-02 16:48:42 |
33 | 2 | 8705 | 2005-08-02 16:01:29 |
34 | 2 | 8598 | 2005-08-01 08:39:59 |
35 | 1 | 8326 | 2005-08-01 05:16:49 |
36 | 1 | 7273 | 2005-07-31 06:50:22 |
37 | 1 | 8033 | 2005-07-30 17:56:23 |
38 | 1 | 7841 | 2005-07-30 12:37:45 |
39 | 1 | 8116 | 2005-07-29 22:54:07 |
40 | 1 | 8074 | 2005-07-29 20:17:39 |
41 | 2 | 5755 | 2005-07-19 17:02:56 |
42 | 1 | 6163 | 2005-07-19 13:15:46 |
43 | 1 | 5244 | 2005-07-14 14:01:07 |
44 | 1 | 4526 | 2005-07-14 01:19:05 |
45 | 1 | 5326 | 2005-07-13 18:02:01 |
46 | 2 | 5636 | 2005-07-13 02:36:24 |
47 | 1 | 4611 | 2005-07-12 13:25:56 |
48 | 1 | 3284 | 2005-06-28 03:28:45 |
49 | 1 | 1476 | 2005-06-25 02:26:46 |
50 | 2 | 2128 | 2005-06-24 00:41:58 |
51 | 1 | 1185 | 2005-06-23 02:42:12 |
52 | 1 | 2308 | 2005-06-22 03:36:48 |
53 | 1 | 2363 | 2005-06-19 17:40:59 |
54 | 1 | 1422 | 2005-06-19 15:54:53 |
55 | 1 | 1725 | 2005-06-17 21:05:57 |
56 | 1 | 76 | 2005-06-03 12:00:37 |
57 | 1 | 573 | 2005-06-03 06:32:23 |
58 | 2 | 320 | 2005-05-28 04:30:24 |
[4]:
# Se encuentra que todos los alquileres cuyo ID de cliente no es 1 o 2
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id NOT IN (1, 2);
'''
pd.read_sql_query(sql, cnx)
[4]:
customer_id | rental_id | return_date | |
---|---|---|---|
0 | 459 | 2 | 2005-05-28 19:40:33 |
1 | 408 | 3 | 2005-06-01 22:12:39 |
2 | 333 | 4 | 2005-06-03 01:43:41 |
3 | 222 | 5 | 2005-06-02 04:33:21 |
4 | 549 | 6 | 2005-05-27 01:32:07 |
... | ... | ... | ... |
15980 | 74 | 16046 | 2005-08-27 18:02:47 |
15981 | 114 | 16047 | 2005-08-25 02:48:48 |
15982 | 103 | 16048 | 2005-08-31 21:33:07 |
15983 | 393 | 16049 | 2005-08-30 01:01:12 |
15984 | 130 | 1 | 2005-05-26 22:04:30 |
15985 rows × 3 columns
[5]:
# Equivalente a la anterior
sql = '''
SELECT customer_id, rental_id, return_date
FROM rental
WHERE customer_id <> 1 AND customer_id <> 2;
'''
pd.read_sql_query(sql, cnx)
[5]:
customer_id | rental_id | return_date | |
---|---|---|---|
0 | 459 | 2 | 2005-05-28 19:40:33 |
1 | 408 | 3 | 2005-06-01 22:12:39 |
2 | 333 | 4 | 2005-06-03 01:43:41 |
3 | 222 | 5 | 2005-06-02 04:33:21 |
4 | 549 | 6 | 2005-05-27 01:32:07 |
... | ... | ... | ... |
15980 | 74 | 16046 | 2005-08-27 18:02:47 |
15981 | 114 | 16047 | 2005-08-25 02:48:48 |
15982 | 103 | 16048 | 2005-08-31 21:33:07 |
15983 | 393 | 16049 | 2005-08-30 01:01:12 |
15984 | 130 | 1 | 2005-05-26 22:04:30 |
15985 rows × 3 columns
Subconsultas con el operador IN¶
Suponga que se desea consultar el nombre completo de las personas que tienen préstamos con fecha de devolución al 30 de mayo del 2005.
[6]:
# Primero, consultamos lista de identificadores de clientes de la
# tabla rental (prestamos) con la fecha de devolución 2005-05-27
# Sin embargo, esta tabla no tiene los nombres de las personas
sql = '''
SELECT customer_id
FROM rental
WHERE CAST(return_date AS DATE) = '2005-05-30'
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[6]:
customer_id | |
---|---|
0 | 5 |
1 | 14 |
2 | 16 |
3 | 21 |
4 | 23 |
... | ... |
93 | 538 |
94 | 554 |
95 | 574 |
96 | 575 |
97 | 578 |
98 rows × 1 columns
[7]:
# Para obtener los nombres de las personas, la sentencia anterior se usa como subconsulta en IN...
sql = '''
SELECT customer_id, first_name || ' ' || last_name as full_name
FROM customer
WHERE customer_id IN (
SELECT customer_id
FROM rental
WHERE CAST (return_date AS DATE) = '2005-05-30'
)
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[7]:
customer_id | full_name | |
---|---|---|
0 | 5 | Elizabeth Brown |
1 | 14 | Betty White |
2 | 16 | Sandra Martin |
3 | 21 | Michelle Clark |
4 | 23 | Sarah Lewis |
... | ... | ... |
89 | 538 | Ted Breaux |
90 | 554 | Dwayne Olvera |
91 | 574 | Julian Vest |
92 | 575 | Isaac Oglesby |
93 | 578 | Willard Lumpkin |
94 rows × 2 columns
[8]:
# ¿Por qué la siguiente sentencia no entrega los mismos resultados?
sql = '''
SELECT c.customer_id, c.first_name || ' ' || c.last_name as full_name
FROM customer as c, ( SELECT customer_id
FROM rental
WHERE CAST (return_date AS DATE) = '2005-05-30'
) as ids
WHERE ids.customer_id = c.customer_id
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[8]:
customer_id | full_name | |
---|---|---|
0 | 5 | Elizabeth Brown |
1 | 14 | Betty White |
2 | 16 | Sandra Martin |
3 | 21 | Michelle Clark |
4 | 23 | Sarah Lewis |
... | ... | ... |
93 | 538 | Ted Breaux |
94 | 554 | Dwayne Olvera |
95 | 574 | Julian Vest |
96 | 575 | Isaac Oglesby |
97 | 578 | Willard Lumpkin |
98 rows × 2 columns
[9]:
# sentencia equivalente a la operación IN
sql = '''
SELECT c.customer_id, c.first_name || ' ' || c.last_name as full_name
FROM customer as c, ( SELECT distinct customer_id
FROM rental
WHERE CAST (return_date AS DATE) = '2005-05-30'
) as ids
WHERE ids.customer_id = c.customer_id
ORDER BY customer_id;
'''
pd.read_sql_query(sql, cnx)
[9]:
customer_id | full_name | |
---|---|---|
0 | 5 | Elizabeth Brown |
1 | 14 | Betty White |
2 | 16 | Sandra Martin |
3 | 21 | Michelle Clark |
4 | 23 | Sarah Lewis |
... | ... | ... |
89 | 538 | Ted Breaux |
90 | 554 | Dwayne Olvera |
91 | 574 | Julian Vest |
92 | 575 | Isaac Oglesby |
93 | 578 | Willard Lumpkin |
94 rows × 2 columns
Operadores LIKE y ILIKE¶
Se usa para consultar datos usando coincidencias de patrones simples, mediante comodines (%) para buscar cualquier coincidencia con n cantidad de caracteres y guión bajo ( _ ) para un solo caracter.
[10]:
# Buscar las coincidencias de los nombres que comiencen por Albert
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name LIKE 'Albert%'
ORDER BY first_name;
'''
pd.read_sql_query(sql, cnx)
[10]:
first_name | last_name | |
---|---|---|
0 | Albert | Crouse |
1 | Alberto | Henning |
[11]:
# Buscar las coincidencias de los nombres que comiencen por Albert y finalicen con una letra
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name LIKE 'Albert_'
ORDER BY first_name;
'''
pd.read_sql_query(sql, cnx)
[11]:
first_name | last_name | |
---|---|---|
0 | Alberto | Henning |
[12]:
# Buscar las coincidencias de los nombres que comiencen por una letra
# que contengan her y finalicen con ningun o varios caracteres
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name LIKE '_her%'
ORDER BY first_name;
'''
pd.read_sql_query(sql, cnx)
[12]:
first_name | last_name | |
---|---|---|
0 | Cheryl | Murphy |
1 | Sherri | Rhodes |
2 | Sherry | Marshall |
3 | Theresa | Watson |
[13]:
# Encontrar los nombres que no comienzan por A o B
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name NOT LIKE 'A%' AND first_name NOT LIKE 'B%'
ORDER BY first_name
'''
pd.read_sql_query(sql, cnx)
[13]:
first_name | last_name | |
---|---|---|
0 | Calvin | Martel |
1 | Carl | Artis |
2 | Carla | Gutierrez |
3 | Carlos | Coughlin |
4 | Carmen | Owens |
... | ... | ... |
518 | Willie | Markham |
519 | Wilma | Richards |
520 | Yolanda | Weaver |
521 | Yvonne | Watkins |
522 | Zachary | Hite |
523 rows × 2 columns
[14]:
# ILIKE compara el valor sin distinción entre mayúsculas y minúsculas
sql = '''
SELECT first_name, last_name
FROM customer
WHERE first_name ILIKE 'NAT%';
'''
pd.read_sql_query(sql, cnx)
[14]:
first_name | last_name | |
---|---|---|
0 | Natalie | Meyer |
1 | Nathan | Runyon |
2 | Nathaniel | Adam |
Valores NULOS y Operador IS NULL¶
NULL significa información faltante o no aplicable. NULL no es un valor, por lo tanto, no puede compararlo con otros valores como números o cadenas. La comparación de NULL con un valor siempre dará como resultado NULL, lo que significa un resultado desconocido.
NULL no es igual a NULL, esta operación siempre será NULL
[15]:
sql = '''
CREATE TABLE contactos(
id INT GENERATED BY DEFAULT AS IDENTITY,
nombre VARCHAR(50) NOT NULL,
apellido VARCHAR(50) NOT NULL,
correo VARCHAR(255) NOT NULL,
telefono VARCHAR(15),
PRIMARY KEY (id)
);
'''
cnx.execute(sql)
[15]:
<sqlalchemy.engine.result.ResultProxy at 0x1c4349842e0>
[16]:
sql = '''
INSERT INTO contactos(nombre, apellido, correo, telefono)
VALUES ('Natalia','Rosas','natalia.rosas@rebrand.com', NULL),
('Lilana','Posada','lily.pos@gmail.com','305 234 27 64'),
('Lucio','Vasquez','lvasquez@gmail.com','305 234 34 65');
'''
cnx.execute(sql)
[16]:
<sqlalchemy.engine.result.ResultProxy at 0x1c43170a700>
[17]:
# Cuáles son los contactos que no tienen número de teléfono
# No se retorna ninguna fila
sql = '''
SELECT *
FROM contactos
WHERE telefono = NULL;
'''
pd.read_sql_query(sql, cnx)
[17]:
id | nombre | apellido | correo | telefono |
---|
[18]:
# Cuáles son los contactos que no tienen número de teléfono
# No se retorna ninguna fila
sql = '''
SELECT *
FROM contactos
WHERE telefono is NULL;
'''
pd.read_sql_query(sql, cnx)
[18]:
id | nombre | apellido | correo | telefono | |
---|---|---|---|---|---|
0 | 1 | Natalia | Rosas | natalia.rosas@rebrand.com | None |
[19]:
# Cuáles son los contactos que tienen número de teléfono
# No se retorna ninguna fila
sql = '''
SELECT *
FROM contactos
WHERE telefono IS NOT NULL;
'''
pd.read_sql_query(sql, cnx)
[19]:
id | nombre | apellido | correo | telefono | |
---|---|---|---|---|---|
0 | 2 | Lilana | Posada | lily.pos@gmail.com | 305 234 27 64 |
1 | 3 | Lucio | Vasquez | lvasquez@gmail.com | 305 234 34 65 |
[20]:
sql = '''
DROP TABLE contactos;
'''
cnx.execute(sql)
[20]:
<sqlalchemy.engine.result.ResultProxy at 0x1c43172fb20>