-
Notifications
You must be signed in to change notification settings - Fork 141
/
Copy pathpredict.py
43 lines (32 loc) · 1.58 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import settings
import pandas as pd
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
def read():
train = pd.read_csv(os.path.join(settings.PROCESSED_DIR, "train.csv"))
return train
def cross_validate(train):
clf = LogisticRegression(random_state=1, class_weight="balanced")
predictors = train.columns.tolist()
predictors = [p for p in predictors if p not in settings.NON_PREDICTORS]
predictions = cross_validation.cross_val_predict(clf, train[predictors], train[settings.TARGET], cv=settings.CV_FOLDS)
return predictions
def compute_error(target, predictions):
return metrics.accuracy_score(target, predictions)
def compute_false_negatives(target, predictions):
df = pd.DataFrame({"target": target, "predictions": predictions})
return df[(df["target"] == 1) & (df["predictions"] == 0)].shape[0] / (df[(df["target"] == 1)].shape[0] + 1)
def compute_false_positives(target, predictions):
df = pd.DataFrame({"target": target, "predictions": predictions})
return df[(df["target"] == 0) & (df["predictions"] == 1)].shape[0] / (df[(df["target"] == 0)].shape[0] + 1)
if __name__ == "__main__":
train = read()
predictions = cross_validate(train)
error = compute_error(train[settings.TARGET], predictions)
fn = compute_false_negatives(train[settings.TARGET], predictions)
fp = compute_false_positives(train[settings.TARGET], predictions)
print("Accuracy Score: {}".format(error))
print("False Negatives: {}".format(fn))
print("False Positives: {}".format(fp))