-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluator_better.py
159 lines (130 loc) · 5.84 KB
/
evaluator_better.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import pandas as pd
import os
from huggingFaceAgent import detectSmell
from groqAgent import detectSmellWithGroq
import time
from huggingFaceMiddleware import smellSirializer
smell_columns = ['Assertion Roulette', 'Conditional Test Logic', 'Constructor Initialization',
'Default Test', 'EmptyTest', 'Exception Catching Throwing', 'General Fixture',
'Mystery Guest', 'Print Statement', 'Redundant Assertion', 'Sensitive Equality',
'Verbose Test', 'Wait And See', 'Eager Test', 'Lazy Test', 'Lazy Assert',
'Unknown Test']
def getSmellsByModel(test_code, prod_code):
"""
Simulates model inference. Replace with actual API call.
"""
# Example of dynamic model integration
prod_code = ""
smells = smellSirializer(detectSmellWithGroq(test_code, prod_code))
# return ['Assertion Roulette', 'Conditional Test Logic']
return smells
def analyze_test_smells(row):
app_name = row['App']
TestFilePath = row['TestFilePath']
ProductionFilePath = row['ProductionFilePath']
test_file_content = row['TestFileCode']
prod_file_content = row['ProductionFileCode']
present_smells = [smell for smell in smell_columns if row[smell] == True]
# print(f"Analyzing test smells for found {len(present_smells)} smells")
found_smells = getSmellsByModel(test_file_content, prod_file_content)
true_positive = len(set(present_smells).intersection(found_smells))
false_positive = len(set(found_smells).difference(present_smells))
false_negative = len(set(present_smells).difference(found_smells))
true_negative = len(set(smell_columns) - set(present_smells) - set(found_smells))
print("Test Smells: ", present_smells)
print("Found Smells: ", found_smells)
print(f"TP: {true_positive}, FP: {false_positive}, FN: {false_negative}, TN: {true_negative}")
return {
'App': app_name,
'TestFilePath': TestFilePath,
'ProductionFilePath': ProductionFilePath,
'TestFileCode': test_file_content,
'ProductionFileCode': prod_file_content,
'present_smells': present_smells,
'found_smells': found_smells,
'false_positive': false_positive,
'false_negative': false_negative,
'true_positive': true_positive,
'true_negative': true_negative
}
def calculate_metrics(results_df):
tp = results_df['true_positive'].sum()
fp = results_df['false_positive'].sum()
fn = results_df['false_negative'].sum()
tn = results_df['true_negative'].sum()
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
accuracy = (tp + tn) / (tp + fp + fn + tn) if (tp + fp + fn + tn) > 0 else 0
return precision, recall, f1_score, accuracy
def save_results(results_df, evaluation_name, precision, recall, f1_score, accuracy):
os.makedirs('result', exist_ok=True)
results_df.to_csv(f'result/evaluation_result_{evaluation_name}.csv', index=False)
summary = {
"Precision": precision,
"Recall": recall,
"F1 Score": f1_score,
"Accuracy": accuracy
}
with open(f'result/summary_{evaluation_name}.json', 'w') as file:
import json
json.dump(summary, file, indent=4)
print(f"Results saved: evaluation_result_{evaluation_name}.csv and summary_{evaluation_name}.json")
def token_count(text):
tokens = len(text)
return tokens
def main():
evaluation_name = 'mixtral-8x7b-32768-large'
try:
df = pd.read_csv('evalDataset.csv')
except Exception as e:
print(f"Error reading input file: {e}")
return
# n = 100 # or any number you want
# df = df.head(n)
required_columns = ['TestFileCode', 'ProductionFileCode'] + smell_columns
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
raise ValueError(f"Missing columns in input data: {missing_columns}")
df['tokens'] = df['TestFileCode'].apply(token_count)
df = df.sort_values(by='tokens', ascending=False).reset_index(drop=True)
n = 50
df = df.head(n)
print(f"Analyzing {len(df)} test files")
analysis_results = []
for index, row in df.iterrows():
print(f"Analyzing row {index}")
try:
result = analyze_test_smells(row)
analysis_results.append(result)
time.sleep(30)
except Exception as e:
print(f"Error analyzing row {index}: {e}")
try:
result = analyze_test_smells(row)
analysis_results.append(result)
time.sleep(30)
except Exception as e:
print(f"Error analyzing row {index} again: {e}")
result = {
'App': row['App'],
'TestFilePath': row['TestFilePath'],
'ProductionFilePath': row['ProductionFilePath'],
'TestFileCode': row['TestFileCode'],
'ProductionFileCode': row['ProductionFileCode'],
'present_smells': [],
'found_smells': [],
'false_positive': 0,
'false_negative': 0,
'true_positive': 0,
'true_negative': 0
}
analysis_results.append(result)
time.sleep(60)
continue
df['analysis'] = analysis_results
results_df = pd.DataFrame(df['analysis'].tolist())
precision, recall, f1_score, accuracy = calculate_metrics(results_df)
save_results(results_df, evaluation_name, precision, recall, f1_score, accuracy)
if __name__ == "__main__":
main()