-
Notifications
You must be signed in to change notification settings - Fork 0
/
kfold.py
32 lines (32 loc) · 1.06 KB
/
kfold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def kfold(df,label='label',kf=5,):
totalindex = df.index
totalsplit = []
for label_value in df[label].unique():
subdf = df[df[label]==label_value]
indexlist = list(subdf.index)
random.shuffle(indexlist)
subindex = pd.Index(indexlist)
indexsplit = []
size = math.floor(len(subindex)/kf)
for k in range(kf):
indexsplit.append(subindex[k*size:(k+1)*size])
totalsplit.append(indexsplit)
batchs = []
for k_ in range(kf):
batch = pd.Index([])
for indexes in totalsplit:
batch = batch.append(indexes[k_])
batchs.append(batch)
dfs = []
for batch in batchs:
train=df.loc[totalindex.difference(batch)]
test=df.loc[batch]
xtrain = train.drop([label],axis=1)
ytrain = train[[label]]
xtest = test.drop([label],axis=1)
ytest = test[[label]]
dfs.append([xtrain,ytrain,xtest,ytest])
# for i in dfs:
# for j in i:
# print(j.shape)
return dfs