import pandas as pd
data = {'SKU': ['A', 'A', 'A', 'A',
'B','B','B','B',
'C','C','C','C',
'D', 'D', 'D', 'D',
'E', 'E', 'E', 'E',
'F', 'F', 'F', 'F'],
'Current_Stock': [2,0,0,0,
0,0,-1,-1,
2,-1,0,1,
2,2,3,1,
2,0,0,1,
2,-1,1,0],
'Date_Updated': ['2024/7/26', '2024/7/27','2024/7/28', '2024/7/29',
'2024/7/26', '2024/7/27','2024/7/28', '2024/7/29',
'2024/7/26', '2024/7/27','2024/7/28', '2024/7/29',
'2024/7/26', '2024/7/27','2024/7/28', '2024/7/29',
'2024/7/26', '2024/7/27','2024/7/28', '2024/7/29',
'2024/7/26', '2024/7/27','2024/7/28', '2024/7/29']}
df = pd.DataFrame(data)
result = pd.DataFrame({'SKU': ['A', 'B', 'C', 'D', 'E', 'F'],
'Days_Out_of_Stock': [3,4,0,0,0,1]})
Мое желание состоит в том, чтобы подсчитать количество дней отсутствия на складе, что
Вы можете использовать это:
df["Current_Stock"] = (
df.assign(Current_Stock=df["Current_Stock"].mask(df["Current_Stock"].le(0)))
.groupby("SKU")["Current_Stock"]
.transform(lambda x: x.bfill().isna())
)
df = df.groupby("SKU")["Current_Stock"].sum().reset_index(name = "Days_Out_of_Stock")
SKU Days_Out_of_Stock
0 A 3
1 B 4
2 C 0
3 D 0
4 E 0
5 F 1
Определите дни без запаса (le ), затем выполните специальный groupby.agg с обратным cummin , чтобы сохранить только самый последний, и сумму:
out = (df.loc[::-1, 'Current_Stock'].le(0)
.groupby(df['SKU'])
.agg(lambda x: x.cummin().sum())
.reset_index(name='Days_Out_of_Stock')
)
Вариант с именованной агрегацией:
out = (df.groupby('SKU', as_index=False)
.agg(**{'Days_Out_of_Stock': ('Current_Stock',
lambda x: x.le(0)[::-1].cummin().sum())})
)
Выход:
SKU Days_Out_of_Stock
0 A 3
1 B 4
2 C 0
3 D 0
4 E 0
5 F 1
Промежуточные, чтобы продемонстрировать логику:
SKU Current_Stock Date_Updated le(0) rev_cummin
0 A 2 2024/7/26 False False
1 A 0 2024/7/27 True True
2 A 0 2024/7/28 True True
3 A 0 2024/7/29 True True
4 B 0 2024/7/26 True True
5 B 0 2024/7/27 True True
6 B -1 2024/7/28 True True
7 B -1 2024/7/29 True True
8 C 2 2024/7/26 False False
9 C -1 2024/7/27 True False
10 C 0 2024/7/28 True False
11 C 1 2024/7/29 False False
12 D 2 2024/7/26 False False
13 D 2 2024/7/27 False False
14 D 3 2024/7/28 False False
15 D 1 2024/7/29 False False
16 E 2 2024/7/26 False False
17 E 0 2024/7/27 True False
18 E 0 2024/7/28 True False
19 E 1 2024/7/29 False False
20 F 2 2024/7/26 False False
21 F -1 2024/7/27 True False
22 F 1 2024/7/28 False False
23 F 0 2024/7/29 True True
import pandas as pd
import numpy as np
# Sample data
data = {'id': ['A', 'A', 'A', 'A',
'B', 'B', 'B', 'B',
'C', 'C', 'C', 'C',
'D', 'D', 'D', 'D',
'E', 'E', 'E', 'E',
'F', 'F', 'F', 'F'],
'current_stock': [2, 0, 0, 0,
0, 0, -1, -1,
2, -1, 0, 1,
2, 2, 3, 1,
2, 0, 0, 1,
2, -1, 1, 0],
'date_updated': ['1990/7/26', '1990/7/27', '1990/7/28', '1990/7/29',
'1990/7/26', '1990/7/27', '1990/7/28', '1990/7/29',
'1990/7/26', '1990/7/27', '1990/7/28', '1990/7/29',
'1990/7/26', '1990/7/27', '1990/7/28', '1990/7/29',
'1990/7/26', '1990/7/27', '1990/7/28', '1990/7/29',
'1990/7/26', '1990/7/27', '1990/7/28', '1990/7/29']}
df = pd.DataFrame(data)
df['date_updated'] = pd.to_datetime(df['date_updated'])
def f(df):
# Create a boolean mask for rows where current stock is less than 1 (out of stock)
out_of_stock_mask = df['current_stock'].values < 1
# Create a boolean mask for rows where current stock is greater than or equal to 1 (in stock)
in_stock_mask = df['current_stock'].values >= 1
# If all rows are out of stock, return the total number of rows
if out_of_stock_mask.all():
return out_of_stock_mask.sum()
# Find the index of the last row where stock was in stock
last_restock_index = np.where(in_stock_mask)[0][-1]
# Calculate the number of days the stock was out of stock after the last restock
out_of_stock_total_days = out_of_stock_mask[last_restock_index + 1:].sum()
return out_of_stock_total_days
# Group the DataFrame by 'id' and apply the function 'f' to each group
res = df.groupby('id').apply(f,include_groups =False).reset_index(name='Days_Out_of_Stock')
print(res)
'''
id Days_Out_of_Stock
0 A 3
1 B 4
2 C 0
3 D 0
4 E 0
5 F 1
'''