Save CSV / DataFrame to Excel with automatic column width setting, column autofilter, etc.

Question

Save CSV / DataFrame to Excel with automatic column width setting, column autofilter, etc.

There was a need to automatically convert CSV files to Excel files, but so that the appropriate column width is automatically set, so that" auto filters " of columns are automatically set, so that the top row with the column name is automatically frozen, etc.

A solution using the Pandas module that converts a CSV file to Excel format:

import pandas as pd

pd.read_csv("filename.csv").to_excel("filename.xlsx", index=False)

Question: but what about automatic " autofilters", the column width adapted to the data width or the width of the column name, and freezing the row with the column names?

5

python excel csv pandas форматирование

Author: 0xdb, 2020-03-23

Source

2 answers

The function below allows you to automatically set "autofilters" of columns, adapt the column width to the data width or the column name width, and "freeze" the top rows and / or left columns:

from pathlib import Path
import pandas as pd


def df_to_excel_auto_fmt(
        df,
        fn,
        max_col_width=30,
        autofilter=True,
        freeze_panes=(1, 0),
        fmt_int="#,##0",
        fmt_float="#,##0.00", 
        fmt_date="yyyy-mm-dd",
        fmt_datetime="yyyy-mm-dd hh:mm:ss",
        **kwargs):
    """
    Export / save Pandas.DataFrame to an Excel file with automatically adjusted column's widths.
    It can also add Excel column "autofilters" and freeze panes (top rows and left columns).
    Cell values will be formatted according to "fmt_*" parameters.

    :param df: DataFrame to be exported to Excel
    :param fn: output Excelfile name. NOTE: the extension will be changed to ".xlsx"
    :param max_col_width: maximum column width in Excel. Default: 30
    :param autofilter: boolean - whether add Excel autofilter or not. Default: True
    :param freeze_panes: tuple of int (length 2).
                         Specifies the one-based bottommost row and rightmost column
                         that is to be frozen.
    :param fmt_int: Excel format for integer numbers
    :param fmt_float: Excel format for float numbers
    :param fmt_date: Excel format for dates
    :param fmt_datetime: Excel format for datetime's
    :param kwargs: additional arguments to pass to df.to_excel(filename, **kwargs)
    :return:  None
    """
    file = Path(fn).with_suffix(".xlsx")
    # get default parameters
    first_col = int(kwargs.get("index", True))
    sheet_name = kwargs.get("sheet_name", "Sheet1")
    if "freeze_panes" not in kwargs:
        kwargs["freeze_panes"] = freeze_panes
    writer = pd.ExcelWriter(
        file.with_suffix(".xlsx"), 
        engine="xlsxwriter",
        date_format=fmt_date,
        datetime_format=fmt_datetime)
    df.to_excel(writer, sheet_name=sheet_name, **kwargs)
    workbook = writer.book
    worksheet = writer.sheets[sheet_name]
    int_fmt = workbook.add_format({'num_format': fmt_int})
    float_fmt = workbook.add_format({'num_format': fmt_float})
    for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
        col_no = xl_col_no - first_col
        width = max(df.iloc[:, col_no].astype(str).str.len().max(), 
                    len(df.columns[col_no]) + 6)
        width = min(max_col_width, width)
        # print(f"column: [{df.columns[col_no]}]\twidth:\t[{width}]")
        if np.issubdtype(dtyp, np.integer):
            worksheet.set_column(xl_col_no, xl_col_no, width, int_fmt)
        elif np.issubdtype(dtyp, np.floating):
            worksheet.set_column(xl_col_no, xl_col_no, width, float_fmt)
        else:
            worksheet.set_column(xl_col_no, xl_col_no, width)
    if autofilter:
        worksheet.autofilter(0, 0, 0, df.shape[1] + first_col)
    writer.save()
    writer.close()

Usage example:

fn = r"c:\download\file.csv"
df = pd.read_csv(fn, sep=";")
df_to_excel_auto_fmt(
    df,
    fn,
    max_col_width=30,
    fmt_datetime="dd.mm.yy hh:mm",
    index=False)

3

Author: MaxU, 2020-04-25 16:51:27

score 3 · Accepted Answer

After the next update of the modules, the version based on the XlsxWriter, so I decided to rewrite the same function using openpyxl:

def df_to_excel_auto_fmt(
        df,
        fn,
        max_col_width=30,
        autofilter=True,
        freeze_panes=(1, 0),
        fmt_int="#,##0",
        fmt_float="#,##0.00",
        fmt_date="yyyy-mm-dd",
        fmt_datetime="yyyy-mm-dd hh:mm:ss",
        **kwargs):
    """
    Export / save Pandas.DataFrame to an Excel file with automatically adjusted column's widths.
    It can also add Excel column "autofilters" and freeze panes (top rows and left columns).
    Cell values will be formatted according to "fmt_*" parameters.

    :param df: DataFrame to be exported to Excel
    :param fn: output Excelfile name. NOTE: the extension will be changed to ".xlsx"
    :param max_col_width: maximum column width in Excel. Default: 30
    :param autofilter: boolean - whether add Excel autofilter or not. Default: True
    :param freeze_panes: tuple of int (length 2).
                         Specifies the one-based bottommost row and rightmost column
                         that is to be frozen.
    :param fmt_int: Excel format for integer numbers
    :param fmt_float: Excel format for float numbers
    :param fmt_date: Excel format for dates
    :param fmt_datetime: Excel format for datetime's
    :param kwargs: additional arguments to pass to df.to_excel(filename, **kwargs)
    :return:  None

    (c) https://ru.stackoverflow.com/users/211923/maxu?tab=profile
    """
    from openpyxl.utils import get_column_letter

    def set_column_format(ws, column_letter, fmt):
        for cell in ws[column_letter]:
            cell.number_format = fmt
    file = Path(fn).with_suffix(".xlsx")
    # get default parameters
    first_col = int(kwargs.get("index", True)) + 1
    sheet_name = kwargs.get("sheet_name", "Sheet1")
    if "freeze_panes" not in kwargs:
        kwargs["freeze_panes"] = freeze_panes
    writer = pd.ExcelWriter(
        file.with_suffix(".xlsx"),
        engine="openpyxl",
        date_format=fmt_date,
        datetime_format=fmt_datetime)
    df.to_excel(writer, sheet_name=sheet_name, **kwargs)
    # workbook = writer.book
    worksheet = writer.sheets[sheet_name]
    for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
        col_no = xl_col_no - first_col
        width = max(df.iloc[:, col_no].astype(str).str.len().max(),
                    len(df.columns[col_no]) + 6)
        width = min(max_col_width, width)
        # print(f"column: [{df.columns[col_no]} ({dtyp.name})]\twidth:\t[{width}]")
        column_letter = get_column_letter(xl_col_no)
        worksheet.column_dimensions[column_letter].width = width
        if np.issubdtype(dtyp, np.integer):
            set_column_format(worksheet, column_letter, fmt_int)
        if np.issubdtype(dtyp, np.floating):
            set_column_format(worksheet, column_letter, fmt_float)
    if autofilter:
        worksheet.auto_filter.ref = worksheet.dimensions
    writer.save()
    writer.close()

Usage example:

fn = r"c:\download\file.csv"
df = pd.read_csv(fn, sep=";")
df_to_excel_auto_fmt(
    df,
    fn,
    max_col_width=30,
    fmt_datetime="dd.mm.yy hh:mm",
    index=False)