Skip to content

Gzip

read csv in gzip

it's much faster to use pyarrow.csv

import gzip

file = 'c:/test/archive.csv.gz'
# readonly text mode
with gzip.open(file, 'rt') as f:
    df = pd.read_csv(f)

read csv in zip

import io
import zipfile

file = 'c:/test/archive.csv.zip'
with zipfile.ZipFile(zip_file_path, 'r') as archive:
    for csv_filename in archive.namelist():
        with archive.open(csv_filename) as csv_file:
            text_data = io.TextIOWrapper(csv_file, encoding='utf-8') #convert binary to text
            df = pd.read_csv(text_data)