Aceleracion de map()#
Última modificación: Mayo 14, 2022
Adaptado del libro “Mastering Large Datasets with Python”.
Preparación de datos#
[1]:
import os
import re
[2]:
REPEAT = 1000000
phone_numbers = [
"(123) 456-7890",
"1234567890",
"123.456.7890",
"+1 123 456-7890",
]
new_numbers = []
regex = re.compile(r"\d")
[3]:
class PhoneFormatter:
def __init__(self):
self.regex = re.compile(r"\d")
def pretty_format(self, phone_number):
digits = self.regex.findall(phone_number)
area_code = "".join(digits[-10:-7])
first_3 = "".join(digits[-7:-4])
last_4 = "".join(digits[-4 : len(digits)])
return "({}) {}-{}".format(
area_code,
first_3,
last_4,
)
phone_numbers = [
"(123) 456-7890",
"1234567890",
"123.456.7890",
"+1 123 456-7890",
]
formatter = PhoneFormatter()
Definición de funciones#
[4]:
def lazzy_map(phone_numbers):
return list(
map(
formatter.pretty_format,
phone_numbers * REPEAT,
)
)
[5]:
from multiprocessing import Pool
def parallel_map(phone_numbers):
with Pool() as pool:
result = pool.map(
formatter.pretty_format,
phone_numbers * REPEAT,
)
return result
Medición de tiempos#
[6]:
%%timeit
_ = lazzy_map(phone_numbers)
7.24 s ± 199 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
[7]:
%%timeit
_ = parallel_map(phone_numbers)
1.45 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Función imap()#
[8]:
#
# Esta función retorna un iterador por lo que se retraza
# su ejecución hasta que el valor es requerido
#
def parallel_imap(phone_numbers):
with Pool() as pool:
result = pool.imap(
formatter.pretty_format,
phone_numbers * REPEAT,
)
return result
[9]:
%%timeit
_ = parallel_imap(phone_numbers)
112 ms ± 13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Función imap_unordered()#
[10]:
#
# Esta función retorna un iterador por lo que se retraza
# su ejecución hasta que el valor es requerido
#
def parallel_imap_unordered(phone_numbers):
with Pool() as pool:
result = pool.imap_unordered(
formatter.pretty_format,
phone_numbers * REPEAT,
)
return result
[11]:
%%timeit
_ = parallel_imap_unordered(phone_numbers)
106 ms ± 3.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Función starmap()#
[12]:
x = [7, 3, 1, 19, 11, 3]
y = [3, 4, 6, 10, 14, 1]
[13]:
%%timeit
[max(k) for k in zip(x, y)]
862 ns ± 6.55 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
[14]:
from itertools import starmap
[15]:
%%timeit
list(starmap(max, zip(x, y)))
663 ns ± 7.59 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)