Skip to content

PyODBC

faster way to insert data

https://stackoverflow.com/questions/48006551/speeding-up-pandas-dataframe-to-sql-with-fast-executemany-of-pyodbc

fast_executemany

pyodbc needs string even for numerical data when fast_executemany=True

Insert or update dataframe into table

# Convert numerical values to string
for col in upd_cols:
    df[col] = df[col].map('{:.4f}'.format, na_action='ignore')

# Standardize column names
id_cols = [f'[{c}]' for c in id_cols]
upd_cols = [f'[{c}]' for c in upd_cols]
cols = id_cols + upd_cols

# Command terms
stmt_on = ' AND '.join([f'T.{i} = S.{i}' for i in id_cols])
cols_upd = ','.join([f'T.{c} = S.{c}' for c in upd_cols])
cols_ins = ','.join(cols)
cols_val = ','.join([f'S.{c}' for c in cols])

# Create parameter indicators (?, ?, ..., ?) and parameter values
param_slots = ','.join(['?'] * df.shape[1])
param_values = df.to_records(index=False).tolist()

table_full_name = f'[{schema}].[{database}].[{table_name}]'
sql_stmt = f'''
    MERGE INTO {table_full_name} WITH (XLOCK, ROWLOCK) AS T
    USING (
        SELECT * FROM
        (VALUES ({param_slots})) AS Vtmp ({cols_ins})
    ) AS S
        ON {stmt_on}
    WHEN MATCHED THEN
        UPDATE SET {cols_upd}
    WHEN NOT MATCHED BY TARGET THEN
        INSERT ({cols_ins}) VALUES ({cols_val});
'''
# Create engine
url = sa.engine.url.URL.create(**cfg)
engine = sa.create_engine(url, **kwargs, fast_executemany=True)
# Merge df with table
with engine.begin() as cnn:
    cnn.execute(sql_stmt, param_values)