Create test_train_split

This commit is contained in:
avimallu
2023-06-17 08:53:41 -05:00
committed by GitHub
parent 1303be7e57
commit ea45040f03

15
polars/test_train_split Normal file
View File

@@ -0,0 +1,15 @@
def train_test_split(
df: pl.DataFrame, train_fraction: float = 0.75
) -> Tuple[pl.DataFrame, pl.DataFrame]:
"""Split polars dataframe into two sets.
Args:
df (pl.DataFrame): Dataframe to split
train_fraction (float, optional): Fraction that goes to train. Defaults to 0.75.
Returns:
Tuple[pl.DataFrame, pl.DataFrame]: Tuple of train and test dataframes
"""
df = df.with_column(pl.all().shuffle(seed=1))
split_index = int(train_fraction * len(df))
df_train = df[:split_index]
df_test = df[split_index:]
return (df_train, df_test)