import pandas as pd


df = pd.read_csv("garments_worker_productivity.csv")
df.head()


df.shape

(1197, 15)


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1197 entries, 0 to 1196
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   date                   1197 non-null   object 
 1   quarter                1197 non-null   object 
 2   department             1197 non-null   object 
 3   day                    1197 non-null   object 
 4   team                   1197 non-null   int64  
 5   targeted_productivity  1197 non-null   float64
 6   smv                    1197 non-null   float64
 7   wip                    691 non-null    float64
 8   over_time              1197 non-null   int64  
 9   incentive              1197 non-null   int64  
 10  idle_time              1197 non-null   float64
 11  idle_men               1197 non-null   int64  
 12  no_of_style_change     1197 non-null   int64  
 13  no_of_workers          1197 non-null   float64
 14  actual_productivity    1197 non-null   float64
dtypes: float64(6), int64(5), object(4)
memory usage: 140.4+ KB


df.describe()


df["date"].head()

0    1/1/2015
1    1/1/2015
2    1/1/2015
3    1/1/2015
4    1/1/2015
Name: date, dtype: object


df["date"].sample(20, random_state = 14)

959     2/26/2015
464     1/27/2015
672      2/8/2015
321     1/19/2015
282     1/17/2015
307     1/18/2015
609      2/4/2015
1123     3/8/2015
877     2/22/2015
950     2/26/2015
692     2/10/2015
51       1/4/2015
505     1/29/2015
554      2/1/2015
801     2/16/2015
1017     3/2/2015
340     1/20/2015
732     2/12/2015
616      2/4/2015
806     2/17/2015
Name: date, dtype: object


df["quarter"].value_counts()

Quarter1    360
Quarter2    335
Quarter4    248
Quarter3    210
Quarter5     44
Name: quarter, dtype: int64


df[df["quarter"] == "Quarter5"]


df["department"].value_counts()

sweing        691
finishing     257
finishing     249
Name: department, dtype: int64


df["department"].unique()

array(['sweing', 'finishing ', 'finishing'], dtype=object)


df["day"].value_counts()

Wednesday    208
Sunday       203
Tuesday      201
Thursday     199
Monday       199
Saturday     187
Name: day, dtype: int64


df["team"].value_counts().sort_index()

1     105
2     109
3      95
4     105
5      93
6      94
7      96
8     109
9     104
10    100
11     88
12     99
Name: team, dtype: int64


df[df["actual_productivity"] > df["targeted_productivity"]]


df[df["targeted_productivity"] == df["actual_productivity"]]


df[df["targeted_productivity"] > df["actual_productivity"]]


df[df["actual_productivity"] > 1]


df[df["wip"].notnull()]


df["over_time"].describe()

count     1197.000000
mean      4567.460317
std       3348.823563
min          0.000000
25%       1440.000000
50%       3960.000000
75%       6960.000000
max      25920.000000
Name: over_time, dtype: float64


df[df["over_time"] > 3960]


df[df["over_time"] == 0]


len(df[df["over_time"] == 0])

31


df[(df["over_time"] == 0) & (df["actual_productivity"] > df["targeted_productivity"])]


df["incentive"].describe()

count    1197.000000
mean       38.210526
std       160.182643
min         0.000000
25%         0.000000
50%         0.000000
75%        50.000000
max      3600.000000
Name: incentive, dtype: float64


len(df[df["incentive"] == 0])

604


df[df["incentive"] > 50]


df[df["incentive"] > 100]


len(df[df["incentive"] > 100])

34


df[df["idle_time"] > 0]


len(df[(df["idle_time"] > 0)])

18


df[(df["idle_time"] > 0) & (df["idle_men"] == 0)]


len(df[(df["idle_men"] > 0)])

18


df["no_of_style_change"].describe()

count    1197.000000
mean        0.150376
std         0.427848
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         2.000000
Name: no_of_style_change, dtype: float64


df[df["no_of_style_change"] == 1]


df[df["no_of_style_change"] == 2]


len(df[df["no_of_style_change"] == 2])

33


df[df["no_of_workers"] > 57]


df[df["no_of_workers"] == 0]


df["department"].unique()

array(['sweing', 'finishing ', 'finishing'], dtype=object)


df.loc[df["department"] == "finishing ", "department"] = "finishing"
# follow code will not work because the value is trying to be set on a copy of a slice from a DataFrame.
# df[df['department'] == 'finishing ']['department'] = 'finishing'
df["department"].value_counts()

sweing       691
finishing    506
Name: department, dtype: int64


df = df.drop("date", axis = 1)
df.head(2)


df.loc[df["quarter"] == "Quarter5", "quarter"] = "Quarter4"
df["quarter"].value_counts()

Quarter1    360
Quarter2    335
Quarter4    292
Quarter3    210
Name: quarter, dtype: int64


# df.loc[df["quarter"] == "Quarter1", "quarter"] = 1
# df.loc[df["quarter"] == "Quarter2", "quarter"] = 2
# df.loc[df["quarter"] == "Quarter3", "quarter"] = 3
# df.loc[df["quarter"] == "Quarter4", "quarter"] = 4

# Define the dictionary to map "quarter" values to numerical values
quarter_mapping = {"Quarter1": 1, "Quarter2": 2, "Quarter3": 3, "Quarter4": 4}
# Use the "map" method with the dictionary to replace values in the "quarter" column
df["quarter"] = df["quarter"].map(quarter_mapping)

df["quarter"].value_counts()

1    360
2    335
4    292
3    210
Name: quarter, dtype: int64


df["quarter"] = df["quarter"].astype("int")
df[["quarter"]].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1197 entries, 0 to 1196
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   quarter  1197 non-null   int32
dtypes: int32(1)
memory usage: 4.8 KB


df = df.drop(["idle_time", "idle_men"], axis = 1)
df.head(2)


df = df.drop("wip", axis = 1)
df.head(2)


df = df.drop("no_of_style_change", axis = 1)
df.head(2)


df["no_of_workers"] = df["no_of_workers"].astype("int")
df.head(2)


df["actual_productivity"] = df["actual_productivity"].round(2)
df.head(2)


df["productive"] = df["actual_productivity"] >= df["targeted_productivity"]
df.sample(10, random_state = 14)


df.columns

Index(['quarter', 'department', 'day', 'team', 'targeted_productivity', 'smv',
       'over_time', 'incentive', 'no_of_workers', 'actual_productivity',
       'productive'],
      dtype='object')


df = df[["quarter", "day", "department", "team", "no_of_workers", "targeted_productivity", 
        "actual_productivity", "productive", "over_time", "smv", "incentive"]]
df.head()


df["department"].replace({"finishing": 0, "sweing": 1}, inplace = True)
# Also changing column's name here
df.rename(columns = {"department": "dept_sweing"}, inplace = True)
df.head(10)


df = pd.concat([df, pd.get_dummies(df["quarter"], prefix = "q")], axis = 1) \
       .drop(["quarter"], axis = 1)
df.sample(10, random_state = 14)


df = pd.concat([df, pd.get_dummies(df["day"], prefix= None)], axis=1) \
       .drop(["day"], axis=1)
df.sample(10, random_state = 14)


df = pd.concat([df, pd.get_dummies(df["team"], prefix= "team")], axis=1) \
       .drop(["team"], axis=1)
df.sample(10, random_state = 14)


# Importing required libraries

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt
from sklearn.tree import plot_tree


# Feature and target columns

X = df.drop(["actual_productivity", "productive"], axis = 1)
y = df["productive"]


# Sanity check: first two observations of "X"

X[:2]


# Sanity check: first two observations of "y"

y[:2]

0    True
1    True
Name: productive, dtype: bool


# Dividing in training and test sets with train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = True, random_state = 24)


# Instantiating and fitting the Decision Tree Classifier

tree = DecisionTreeClassifier(max_depth = 3, random_state = 24)

tree.fit(X_train, y_train)

DecisionTreeClassifier(max_depth=3, random_state=24)

DecisionTreeClassifier(max_depth=3, random_state=24)


y_pred = tree.predict(X_test)


from sklearn.metrics import accuracy_score

print("Accuracy:", round(accuracy_score(y_test,y_pred), 2))

Accuracy: 0.85


tree.classes_ 
# We'll transform them to ["Unproductive", "Productive"]

array([False,  True])


# Setting plot size

plt.figure(figsize = [20.0, 8.0])

# Plotting the tree with some specific parameters

_ = plot_tree(tree, 
              feature_names = X.columns, 
              class_names = ["Unproductive", "Productive"],
              filled = True, 
              rounded = False, 
              proportion = True, 
              fontsize = 11)


from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

array([[ 35,  24],
       [ 13, 168]], dtype=int64)


from sklearn.metrics import precision_score, recall_score, f1_score


print("Precision:", round(precision_score(y_test, y_pred), 2))

Precision: 0.88


print("Recall:", round(recall_score(y_test, y_pred), 2))

Recall: 0.93


print("F1 Score:", round(f1_score(y_test, y_pred), 2))

F1 Score: 0.9


print("Accuracy:", round(tree.score(X_test, y_test), 2))

Accuracy: 0.85


from sklearn.model_selection import cross_val_score

scores = cross_val_score(tree, X, y, cv = 10)

print("Cross Validation Accuracy Scores:", scores.round(2))

Cross Validation Accuracy Scores: [0.85 0.88 0.81 0.87 0.87 0.82 0.72 0.76 0.84 0.79]


print("Mean Cross Validation Score:", scores.mean().round(2))

Mean Cross Validation Score: 0.82


from sklearn.model_selection import cross_validate

multiple_cross_scores = cross_validate(
                             tree, 
                             X, y, cv = 10, 
                             scoring= ("precision", "recall", "f1") )


multiple_cross_scores["test_precision"].round(2)

array([0.86, 0.9 , 0.83, 0.85, 0.86, 0.87, 0.78, 0.85, 0.86, 0.87])


print("Mean Cross Validated Precision:", round(multiple_cross_scores["test_precision"].mean(), 2))

Mean Cross Validated Precision: 0.85


multiple_cross_scores["test_recall"].round(2)

array([0.96, 0.94, 0.94, 1.  , 0.99, 0.9 , 0.85, 0.82, 0.94, 0.84])


print("Mean Cross Validated Recall:", round(multiple_cross_scores["test_recall"].mean(), 2))

Mean Cross Validated Recall: 0.92


multiple_cross_scores["test_f1"].round(2)

array([0.91, 0.92, 0.88, 0.92, 0.92, 0.88, 0.82, 0.83, 0.9 , 0.86])


print("Mean Cross Validated F1:", round(multiple_cross_scores["test_f1"].mean(), 2))

Mean Cross Validated F1: 0.88


tree.classes_ # We'll transform them to ["Unproductive", "Productive"]

array([False,  True])


plt.figure(figsize = [20.0, 8.0])

_ = plot_tree(tree, 
              feature_names = X.columns, 
              class_names = ["Unproductive", "Productive"],
              filled = True, 
              rounded = False, 
              proportion = True, 
              fontsize = 11)


from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier(
                                oob_score = True,                                  
                                random_state = 24
                               )

forest.fit(X_train, y_train)

y_pred_forest = forest.predict(X_test)

print("Accuracy:", round(accuracy_score(y_test,y_pred_forest), 2))

Accuracy: 0.85


print("Out Of Bag Score:", round(forest.oob_score_, 2))

Out Of Bag Score: 0.83

	date	quarter	department	day	team	targeted_productivity	smv	wip	over_time	incentive	no_of_workers	actual_productivity
498	1/29/2015	Quarter5	sweing	Thursday	2	0.80	22.52	1416.0	6840	113	57.0	1.000230
499	1/29/2015	Quarter5	finishing	Thursday	4	0.80	4.30	NaN	1200	0	10.0	0.989000
500	1/29/2015	Quarter5	sweing	Thursday	3	0.80	22.52	1287.0	6840	100	57.0	0.950186
501	1/29/2015	Quarter5	sweing	Thursday	4	0.80	22.52	1444.0	6900	88	57.5	0.900800
502	1/29/2015	Quarter5	sweing	Thursday	10	0.80	22.52	1088.0	6720	88	56.0	0.900130
503	1/29/2015	Quarter5	finishing	Thursday	6	0.50	2.90	NaN	1200	0	10.0	0.899000
504	1/29/2015	Quarter5	finishing	Thursday	8	0.65	4.15	NaN	960	0	8.0	0.877552
505	1/29/2015	Quarter5	finishing	Thursday	11	0.60	2.90	NaN	960	0	8.0	0.864583
506	1/29/2015	Quarter5	finishing	Thursday	10	0.80	3.94	NaN	1200	0	10.0	0.856950
507	1/29/2015	Quarter5	finishing	Thursday	1	0.75	3.94	NaN	1200	0	10.0	0.853667
508	1/29/2015	Quarter5	sweing	Thursday	1	0.75	22.94	1579.0	6960	81	58.0	0.850362
509	1/29/2015	Quarter5	sweing	Thursday	9	0.70	29.12	1170.0	6960	53	58.0	0.850170
510	1/29/2015	Quarter5	sweing	Thursday	5	0.65	20.79	1015.0	7080	81	59.0	0.800474
511	1/29/2015	Quarter5	finishing	Thursday	2	0.80	3.94	NaN	1200	0	10.0	0.773333
512	1/29/2015	Quarter5	sweing	Thursday	12	0.75	15.26	1436.0	4200	45	35.0	0.750647
513	1/29/2015	Quarter5	finishing	Thursday	12	0.75	4.08	NaN	1080	0	9.0	0.634667
514	1/29/2015	Quarter5	sweing	Thursday	11	0.60	20.10	1601.0	4320	46	51.0	0.600598
515	1/29/2015	Quarter5	sweing	Thursday	6	0.50	18.79	717.0	3960	23	33.0	0.500118
516	1/29/2015	Quarter5	finishing	Thursday	3	0.80	3.94	NaN	960	0	8.0	0.492500
517	1/29/2015	Quarter5	sweing	Thursday	7	0.65	23.54	830.0	6600	0	55.0	0.487920
518	1/31/2015	Quarter5	sweing	Saturday	3	0.80	22.52	1136.0	6960	113	58.0	1.000457
519	1/31/2015	Quarter5	sweing	Saturday	2	0.80	22.52	1397.0	6840	113	57.0	1.000230
520	1/31/2015	Quarter5	finishing	Saturday	2	0.80	3.94	NaN	1200	0	10.0	0.971867
521	1/31/2015	Quarter5	finishing	Saturday	3	0.80	3.94	NaN	960	0	8.0	0.971867
522	1/31/2015	Quarter5	finishing	Saturday	4	0.80	3.94	NaN	1200	0	10.0	0.971867
523	1/31/2015	Quarter5	finishing	Saturday	10	0.80	3.94	NaN	1200	0	10.0	0.971867
524	1/31/2015	Quarter5	finishing	Saturday	1	0.75	3.94	NaN	1800	0	15.0	0.971867
525	1/31/2015	Quarter5	finishing	Saturday	9	0.75	3.94	NaN	240	0	2.0	0.971867
526	1/31/2015	Quarter5	finishing	Saturday	12	0.75	4.08	NaN	1080	0	9.0	0.971867
527	1/31/2015	Quarter5	finishing	Saturday	5	0.70	3.94	NaN	240	0	2.0	0.971867
528	1/31/2015	Quarter5	finishing	Saturday	7	0.70	3.94	NaN	1200	0	10.0	0.971867
529	1/31/2015	Quarter5	finishing	Saturday	8	0.65	3.94	NaN	960	0	8.0	0.971867
530	1/31/2015	Quarter5	finishing	Saturday	11	0.65	3.94	NaN	600	0	5.0	0.971867
531	1/31/2015	Quarter5	finishing	Saturday	6	0.60	3.94	NaN	1200	0	10.0	0.971867
532	1/31/2015	Quarter5	sweing	Saturday	10	0.80	22.52	1116.0	6720	93	56.0	0.920237
533	1/31/2015	Quarter5	sweing	Saturday	4	0.80	22.52	1432.0	6660	88	57.5	0.900537
534	1/31/2015	Quarter5	sweing	Saturday	9	0.75	29.12	1082.0	6840	81	57.0	0.850611
535	1/31/2015	Quarter5	sweing	Saturday	1	0.75	22.94	1502.0	6960	81	58.0	0.850362
536	1/31/2015	Quarter5	sweing	Saturday	12	0.75	15.26	1209.0	4200	45	35.0	0.750647
537	1/31/2015	Quarter5	sweing	Saturday	5	0.70	50.89	282.0	5880	56	59.0	0.656764
538	1/31/2015	Quarter5	sweing	Saturday	11	0.65	20.10	1417.0	6480	49	54.0	0.650148
539	1/31/2015	Quarter5	sweing	Saturday	6	0.60	18.79	799.0	3960	23	33.0	0.600711
540	1/31/2015	Quarter5	sweing	Saturday	7	0.70	23.54	1109.0	6720	0	56.0	0.388830
541	1/31/2015	Quarter5	sweing	Saturday	8	0.50	23.54	1144.0	6480	0	54.0	0.286985

	date	quarter	department	day	team	targeted_productivity	smv	wip	over_time	incentive	idle_time	idle_men	no_of_style_change	no_of_workers	actual_productivity
0	1/1/2015	Quarter1	sweing	Thursday	8	0.80	26.16	1108.0	7080	98	0.0	0	0	59.0	0.940725
1	1/1/2015	Quarter1	finishing	Thursday	1	0.75	3.94	NaN	960	0	0.0	0	0	8.0	0.886500
2	1/1/2015	Quarter1	sweing	Thursday	11	0.80	11.41	968.0	3660	50	0.0	0	0	30.5	0.800570
3	1/1/2015	Quarter1	sweing	Thursday	12	0.80	11.41	968.0	3660	50	0.0	0	0	30.5	0.800570
4	1/1/2015	Quarter1	sweing	Thursday	6	0.80	25.90	1170.0	1920	50	0.0	0	0	56.0	0.800382
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1187	3/11/2015	Quarter2	sweing	Wednesday	4	0.75	26.82	1054.0	7080	45	0.0	0	0	59.0	0.750051
1188	3/11/2015	Quarter2	sweing	Wednesday	5	0.70	26.82	992.0	6960	30	0.0	0	1	58.0	0.700557
1189	3/11/2015	Quarter2	sweing	Wednesday	8	0.70	30.48	914.0	6840	30	0.0	0	1	57.0	0.700505
1190	3/11/2015	Quarter2	sweing	Wednesday	6	0.70	23.41	1128.0	4560	40	0.0	0	1	38.0	0.700246
1191	3/11/2015	Quarter2	sweing	Wednesday	7	0.65	30.48	935.0	6840	26	0.0	0	1	57.0	0.650596

	date	quarter	department	day	team	targeted_productivity	smv	wip	over_time	incentive	no_of_workers	actual_productivity
337	1/20/2015	Quarter3	finishing	Tuesday	5	0.70	4.15	NaN	1440	0	8.0	1.033570
437	1/26/2015	Quarter4	finishing	Monday	3	0.75	3.94	NaN	1800	0	10.0	1.059621
456	1/27/2015	Quarter4	sweing	Tuesday	2	0.75	22.52	1635.0	6840	119	57.0	1.000230
457	1/27/2015	Quarter4	sweing	Tuesday	3	0.75	22.52	1299.0	6840	119	57.0	1.000230
477	1/28/2015	Quarter4	sweing	Wednesday	2	0.80	22.52	1559.0	6840	90	57.0	1.000230
478	1/28/2015	Quarter4	sweing	Wednesday	3	0.80	22.52	1350.0	6840	113	57.0	1.000230
498	1/29/2015	Quarter5	sweing	Thursday	2	0.80	22.52	1416.0	6840	113	57.0	1.000230
518	1/31/2015	Quarter5	sweing	Saturday	3	0.80	22.52	1136.0	6960	113	58.0	1.000457
519	1/31/2015	Quarter5	sweing	Saturday	2	0.80	22.52	1397.0	6840	113	57.0	1.000230
542	2/1/2015	Quarter1	finishing	Sunday	8	0.65	4.15	NaN	960	0	8.0	1.011562
543	2/1/2015	Quarter1	sweing	Sunday	2	0.80	22.52	1396.0	6900	113	57.5	1.000671
544	2/1/2015	Quarter1	sweing	Sunday	1	0.80	22.94	1582.0	3060	113	58.5	1.000402
561	2/2/2015	Quarter1	sweing	Monday	1	0.80	22.94	16882.0	7020	113	58.5	1.000602
580	2/3/2015	Quarter1	finishing	Tuesday	2	0.80	3.94	NaN	2400	0	20.0	1.001417
581	2/3/2015	Quarter1	sweing	Tuesday	1	0.80	22.52	1500.0	6900	113	57.5	1.000019
599	2/4/2015	Quarter1	sweing	Wednesday	2	0.80	22.52	1263.0	6900	100	57.5	1.050281
600	2/4/2015	Quarter1	sweing	Wednesday	3	0.80	22.52	968.0	6840	113	57.0	1.000230
618	2/5/2015	Quarter1	sweing	Thursday	2	0.80	22.52	1300.0	6780	113	56.5	1.000446
619	2/5/2015	Quarter1	sweing	Thursday	1	0.80	22.52	1485.0	6900	113	57.5	1.000019
636	2/7/2015	Quarter1	finishing	Saturday	2	0.80	3.94	NaN	3000	0	25.0	1.050667
637	2/7/2015	Quarter1	sweing	Saturday	2	0.80	22.52	1186.0	6900	113	58.0	1.000019
655	2/8/2015	Quarter2	sweing	Sunday	2	0.80	22.52	1233.0	6900	113	57.5	1.000019
674	2/9/2015	Quarter2	finishing	Monday	2	0.80	3.94	NaN	2160	0	18.0	1.057963
692	2/10/2015	Quarter2	finishing	Tuesday	12	0.80	4.08	NaN	1080	0	9.0	1.004889
711	2/11/2015	Quarter2	finishing	Wednesday	4	0.70	4.15	NaN	1800	0	15.0	1.033156
712	2/11/2015	Quarter2	finishing	Wednesday	12	0.80	4.08	NaN	1080	0	9.0	1.020000
713	2/11/2015	Quarter2	sweing	Wednesday	2	0.80	22.52	1557.0	0	90	57.5	1.000345
714	2/11/2015	Quarter2	sweing	Wednesday	1	0.80	22.52	1498.0	0	113	57.0	1.000066
730	2/12/2015	Quarter2	sweing	Thursday	1	0.80	22.52	1397.0	0	138	57.0	1.100484
731	2/12/2015	Quarter2	finishing	Thursday	4	0.70	4.15	NaN	1800	0	15.0	1.096633
732	2/12/2015	Quarter2	sweing	Thursday	2	0.80	22.52	1327.0	0	113	57.5	1.000345
749	2/14/2015	Quarter2	sweing	Saturday	1	0.80	22.52	1416.0	6840	113	57.0	1.000230
766	2/15/2015	Quarter3	finishing	Sunday	1	0.80	3.94	NaN	960	0	8.0	1.120437
767	2/15/2015	Quarter3	finishing	Sunday	2	0.80	3.94	NaN	960	0	8.0	1.108125
768	2/15/2015	Quarter3	sweing	Sunday	1	0.80	22.52	1420.0	6840	113	57.0	1.000230
785	2/16/2015	Quarter3	sweing	Monday	1	0.80	22.52	1422.0	6840	113	57.0	1.000230
803	2/17/2015	Quarter3	sweing	Tuesday	1	0.80	22.52	1445.0	6840	113	57.0	1.000230

	date	quarter	department	day	team	targeted_productivity	smv	wip	over_time	incentive	idle_time	idle_men	no_of_style_change	no_of_workers	actual_productivity
0	1/1/2015	Quarter1	sweing	Thursday	8	0.80	26.16	1108.0	7080	98	0.0	0	0	59.0	0.940725
2	1/1/2015	Quarter1	sweing	Thursday	11	0.80	11.41	968.0	3660	50	0.0	0	0	30.5	0.800570
3	1/1/2015	Quarter1	sweing	Thursday	12	0.80	11.41	968.0	3660	50	0.0	0	0	30.5	0.800570
4	1/1/2015	Quarter1	sweing	Thursday	6	0.80	25.90	1170.0	1920	50	0.0	0	0	56.0	0.800382
5	1/1/2015	Quarter1	sweing	Thursday	7	0.80	25.90	984.0	6720	38	0.0	0	0	56.0	0.800125
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1187	3/11/2015	Quarter2	sweing	Wednesday	4	0.75	26.82	1054.0	7080	45	0.0	0	0	59.0	0.750051
1188	3/11/2015	Quarter2	sweing	Wednesday	5	0.70	26.82	992.0	6960	30	0.0	0	1	58.0	0.700557
1189	3/11/2015	Quarter2	sweing	Wednesday	8	0.70	30.48	914.0	6840	30	0.0	0	1	57.0	0.700505
1190	3/11/2015	Quarter2	sweing	Wednesday	6	0.70	23.41	1128.0	4560	40	0.0	0	1	38.0	0.700246
1191	3/11/2015	Quarter2	sweing	Wednesday	7	0.65	30.48	935.0	6840	26	0.0	0	1	57.0	0.650596

	date	quarter	department	day	team	targeted_productivity	smv	wip	over_time	incentive	idle_time	idle_men	no_of_style_change	no_of_workers	actual_productivity
0	1/1/2015	Quarter1	sweing	Thursday	8	0.80	26.16	1108.0	7080	98	0.0	0	0	59.0	0.940725
5	1/1/2015	Quarter1	sweing	Thursday	7	0.80	25.90	984.0	6720	38	0.0	0	0	56.0	0.800125
7	1/1/2015	Quarter1	sweing	Thursday	3	0.75	28.08	795.0	6900	45	0.0	0	0	57.5	0.753683
8	1/1/2015	Quarter1	sweing	Thursday	2	0.75	19.87	733.0	6000	34	0.0	0	0	55.0	0.753098
9	1/1/2015	Quarter1	sweing	Thursday	1	0.75	28.08	681.0	6900	45	0.0	0	0	57.5	0.750428
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1187	3/11/2015	Quarter2	sweing	Wednesday	4	0.75	26.82	1054.0	7080	45	0.0	0	0	59.0	0.750051
1188	3/11/2015	Quarter2	sweing	Wednesday	5	0.70	26.82	992.0	6960	30	0.0	0	1	58.0	0.700557
1189	3/11/2015	Quarter2	sweing	Wednesday	8	0.70	30.48	914.0	6840	30	0.0	0	1	57.0	0.700505
1190	3/11/2015	Quarter2	sweing	Wednesday	6	0.70	23.41	1128.0	4560	40	0.0	0	1	38.0	0.700246
1191	3/11/2015	Quarter2	sweing	Wednesday	7	0.65	30.48	935.0	6840	26	0.0	0	1	57.0	0.650596

Tree Models: Predicting Employee Productivity¶

Introduction¶

Dataset Exploration (EDA)¶

"date" column¶

"quarter" column¶

"department" column¶

"day" column¶

"team" column¶

"targeted_productivity" & "actual_productivity" columns¶

"wip" column¶

"over_time" column¶

"incentive" column¶

"idle_time" & "idle_men" column¶

"no_of_style_change" column¶

"no_of_workers" column¶

Dataset Cleaning¶

Categorical Encoding¶

Building the Tree¶

Visualizing and Evaluating the Tree¶

Explaining the Tree¶

Using Random Forest¶

Final Thoughts¶

	team	targeted_productivity	smv	wip	over_time	incentive	idle_time	idle_men	no_of_style_change	no_of_workers	actual_productivity
count	1197.000000	1197.000000	1197.000000	691.000000	1197.000000	1197.000000	1197.000000	1197.000000	1197.000000	1197.000000	1197.000000
mean	6.426901	0.729632	15.062172	1190.465991	4567.460317	38.210526	0.730159	0.369256	0.150376	34.609858	0.735091
std	3.463963	0.097891	10.943219	1837.455001	3348.823563	160.182643	12.709757	3.268987	0.427848	22.197687	0.174488
min	1.000000	0.070000	2.900000	7.000000	0.000000	0.000000	0.000000	0.000000	0.000000	2.000000	0.233705
25%	3.000000	0.700000	3.940000	774.500000	1440.000000	0.000000	0.000000	0.000000	0.000000	9.000000	0.650307
50%	6.000000	0.750000	15.260000	1039.000000	3960.000000	0.000000	0.000000	0.000000	0.000000	34.000000	0.773333
75%	9.000000	0.800000	24.260000	1252.500000	6960.000000	50.000000	0.000000	0.000000	0.000000	57.000000	0.850253
max	12.000000	0.800000	54.560000	23122.000000	25920.000000	3600.000000	300.000000	45.000000	2.000000	89.000000	1.120437

	date	quarter	department	day	team	targeted_productivity	smv	wip	over_time	incentive	no_of_style_change	no_of_workers	actual_productivity
163	1/10/2015	Quarter2	sweing	Saturday	10	0.8	28.08	1082.0	10530	63	0	58.5	0.8
182	1/11/2015	Quarter2	sweing	Sunday	2	0.8	28.08	805.0	10530	63	0	58.5	0.8
183	1/11/2015	Quarter2	sweing	Sunday	10	0.8	28.08	762.0	10530	38	0	58.5	0.8
207	1/12/2015	Quarter2	sweing	Monday	2	0.8	28.08	737.0	10530	63	0	58.5	0.8
229	1/13/2015	Quarter2	sweing	Tuesday	2	0.8	28.08	723.0	10530	50	0	58.5	0.8
855	2/19/2015	Quarter3	sweing	Thursday	8	0.7	29.40	1116.0	6240	0	2	57.0	0.7

	quarter	day	dept_sweing	team	no_of_workers	targeted_productivity	actual_productivity	productive	over_time	smv	incentive
0	1	Thursday	1	8	59	0.80	0.94	True	7080	26.16	98
1	1	Thursday	0	1	8	0.75	0.89	True	960	3.94	0
2	1	Thursday	1	11	30	0.80	0.80	True	3660	11.41	50
3	1	Thursday	1	12	30	0.80	0.80	True	3660	11.41	50
4	1	Thursday	1	6	56	0.80	0.80	True	1920	25.90	50
5	1	Thursday	1	7	56	0.80	0.80	True	6720	25.90	38
6	1	Thursday	0	2	8	0.75	0.76	True	960	3.94	0
7	1	Thursday	1	3	57	0.75	0.75	True	6900	28.08	45
8	1	Thursday	1	2	55	0.75	0.75	True	6000	19.87	34
9	1	Thursday	1	1	57	0.75	0.75	True	6900	28.08	45

	day	dept_sweing	team	no_of_workers	targeted_productivity	actual_productivity	productive	over_time	smv	incentive	q_1	q_2	q_3	q_4
959	Thursday	0	10	8	0.70	0.41	False	3360	2.90	0	0	0	0	1
464	Tuesday	0	8	8	0.65	0.85	True	960	3.94	0	0	0	0	1
672	Sunday	1	7	58	0.70	0.36	False	6960	24.26	0	0	1	0	0
321	Monday	1	6	31	0.80	0.80	True	4380	11.41	50	0	0	1	0
282	Saturday	0	9	10	0.80	0.83	True	1800	3.94	0	0	0	1	0
307	Sunday	1	10	56	0.70	0.70	True	10080	22.52	40	0	0	1	0
609	Wednesday	0	9	8	0.75	0.76	True	960	3.94	0	1	0	0	0
1123	Sunday	1	8	56	0.60	0.60	True	6720	30.48	0	0	1	0	0
877	Sunday	1	9	57	0.70	0.63	False	3240	18.79	30	0	0	0	1
950	Thursday	1	3	57	0.75	0.75	True	6840	29.40	45	0	0	0	1