Introducción a csvkit#

Descarga de los datos#

[1]:
#
# Crea un archivo temporal para el tutorial
#
!mkdir -p /tmp/csvkit_tutorial

#
# Cambia el directorio de trabajo del notebook al temporal
#
%cd /tmp/csvkit_tutorial
/tmp/csvkit_tutorial
[2]:
#
# Descarga de los datos
#
!curl -L -O https://raw.githubusercontent.com/wireservice/csvkit/master/examples/realdata/ne_1033_data.xlsx
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 65331  100 65331    0     0   428k      0 --:--:-- --:--:-- --:--:--  428k
[3]:
!ls
data.csv  ne_1033_data.xlsx

in2csv#

[4]:
#
# Exporta el archivo xlsx a CSV
#
!in2csv ne_1033_data.xlsx > data.csv
[5]:
!head data.csv
state,county,fips,nsn,item_name,quantity,ui,acquisition_cost,total_cost,ship_date,federal_supply_category,federal_supply_category_name,federal_supply_class,federal_supply_class_name
NE,ADAMS,31001.0,1005-00-589-1271,"RIFLE,7.62 MILLIMETER",1.0,Each,138.0,138.0,2008-07-11,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,ADAMS,31001.0,1005-00-589-1271,"RIFLE,7.62 MILLIMETER",1.0,Each,138.0,138.0,2008-07-11,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,ADAMS,31001.0,1005-00-589-1271,"RIFLE,7.62 MILLIMETER",1.0,Each,138.0,138.0,2008-07-11,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,ADAMS,31001.0,1005-00-589-1271,"RIFLE,7.62 MILLIMETER",1.0,Each,138.0,138.0,2008-07-11,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,ADAMS,31001.0,1005-00-589-1271,"RIFLE,7.62 MILLIMETER",1.0,Each,138.0,138.0,2008-07-11,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,ADAMS,31001.0,1005-00-589-1271,"RIFLE,7.62 MILLIMETER",1.0,Each,138.0,138.0,2008-07-11,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,BUFFALO,31019.0,1005-00-073-9421,"RIFLE,5.56 MILLIMETER",1.0,Each,499.0,499.0,2008-09-24,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,BUFFALO,31019.0,1005-00-073-9421,"RIFLE,5.56 MILLIMETER",1.0,Each,499.0,499.0,2008-09-24,10.0,WEAPONS,1005.0,"Guns, through 30 mm"
NE,BUFFALO,31019.0,1005-00-073-9421,"RIFLE,5.56 MILLIMETER",1.0,Each,499.0,499.0,2008-09-24,10.0,WEAPONS,1005.0,"Guns, through 30 mm"

csvlook#

[6]:
#
# En el *Terminal* use: csvlook data.csv | less -S
#
!csvlook data.csv | head
| state | county     |   fips | nsn              | item_name                                                      | quantity | ui      | acquisition_cost | total_cost |  ship_date | federal_supply_category | federal_supply_category_name        | federal_supply_class | federal_supply_class_name                                       |
| ----- | ---------- | ------ | ---------------- | -------------------------------------------------------------- | -------- | ------- | ---------------- | ---------- | ---------- | ----------------------- | ----------------------------------- | -------------------- | --------------------------------------------------------------- |
| NE    | ADAMS      | 31,001 | 1005-00-589-1271 | RIFLE,7.62 MILLIMETER                                          |        1 | Each    |           138.00 |     138.00 | 2008-07-11 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |
| NE    | ADAMS      | 31,001 | 1005-00-589-1271 | RIFLE,7.62 MILLIMETER                                          |        1 | Each    |           138.00 |     138.00 | 2008-07-11 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |
| NE    | ADAMS      | 31,001 | 1005-00-589-1271 | RIFLE,7.62 MILLIMETER                                          |        1 | Each    |           138.00 |     138.00 | 2008-07-11 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |
| NE    | ADAMS      | 31,001 | 1005-00-589-1271 | RIFLE,7.62 MILLIMETER                                          |        1 | Each    |           138.00 |     138.00 | 2008-07-11 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |
| NE    | ADAMS      | 31,001 | 1005-00-589-1271 | RIFLE,7.62 MILLIMETER                                          |        1 | Each    |           138.00 |     138.00 | 2008-07-11 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |
| NE    | ADAMS      | 31,001 | 1005-00-589-1271 | RIFLE,7.62 MILLIMETER                                          |        1 | Each    |           138.00 |     138.00 | 2008-07-11 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |
| NE    | BUFFALO    | 31,019 | 1005-00-073-9421 | RIFLE,5.56 MILLIMETER                                          |        1 | Each    |           499.00 |     499.00 | 2008-09-24 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |
| NE    | BUFFALO    | 31,019 | 1005-00-073-9421 | RIFLE,5.56 MILLIMETER                                          |        1 | Each    |           499.00 |     499.00 | 2008-09-24 |                      10 | WEAPONS                             |                1,005 | Guns, through 30 mm                                             |

csvcut#

[7]:
#
# Columnas en los datos
#
!csvcut -n data.csv
  1: state
  2: county
  3: fips
  4: nsn
  5: item_name
  6: quantity
  7: ui
  8: acquisition_cost
  9: total_cost
 10: ship_date
 11: federal_supply_category
 12: federal_supply_category_name
 13: federal_supply_class
 14: federal_supply_class_name
[8]:
#
# Extracción de columnas por posición
#
!csvcut -c 2,5,6 data.csv | head
county,item_name,quantity
ADAMS,"RIFLE,7.62 MILLIMETER",1.0
ADAMS,"RIFLE,7.62 MILLIMETER",1.0
ADAMS,"RIFLE,7.62 MILLIMETER",1.0
ADAMS,"RIFLE,7.62 MILLIMETER",1.0
ADAMS,"RIFLE,7.62 MILLIMETER",1.0
ADAMS,"RIFLE,7.62 MILLIMETER",1.0
BUFFALO,"RIFLE,5.56 MILLIMETER",1.0
BUFFALO,"RIFLE,5.56 MILLIMETER",1.0
BUFFALO,"RIFLE,5.56 MILLIMETER",1.0
[9]:
#
# Extracción de columnas por nombre
#
!csvcut -c county,item_name,quantity data.csv | csvlook | head
| county     | item_name                                                      | quantity |
| ---------- | -------------------------------------------------------------- | -------- |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| BUFFALO    | RIFLE,5.56 MILLIMETER                                          |        1 |
| BUFFALO    | RIFLE,5.56 MILLIMETER                                          |        1 |

Pipeline#

[10]:
!in2csv ne_1033_data.xlsx | csvcut -c county,item_name,quantity | csvlook | head
| county     | item_name                                                      | quantity |
| ---------- | -------------------------------------------------------------- | -------- |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| ADAMS      | RIFLE,7.62 MILLIMETER                                          |        1 |
| BUFFALO    | RIFLE,5.56 MILLIMETER                                          |        1 |
| BUFFALO    | RIFLE,5.56 MILLIMETER                                          |        1 |