-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhuggingFaceMiddleware.py
83 lines (61 loc) · 3.48 KB
/
huggingFaceMiddleware.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import HumanMessage, SystemMessage
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace, HuggingFacePipeline
from smells import getAllSmellsInASingleString
from typing import List, Optional
from pydantic import BaseModel, Field
load_dotenv()
# class DetectedSmell(BaseModel):
# smellName: str
# reason: str
gemini_api_key = os.getenv("GEMINI_API_KEY")
class DetectedSmells(BaseModel):
smellNames: List[str] = Field(description="List of detected smells")
llm = ChatGoogleGenerativeAI(
model="gemini-1.5-flash-8b",
api_key=gemini_api_key,
)
# llm = HuggingFaceEndpoint(
# repo_id="google/gemma-7b",
# task="text-generation",
# temperature=0.01,
# huggingfacehub_api_token="",
# )
# llm = HuggingFacePipeline.from_model_id(
# model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
# task="text-generation",
# pipeline_kwargs={"max_new_tokens": 512}
# )
structured_llm = llm.with_structured_output(DetectedSmells)
all_smells = getAllSmellsInASingleString()
def smellSirializer(found_smells_in_natural_language: str) -> list[str]:
message = [
SystemMessage(
"You are an expert in making test smells list from natural language format. You will be given a natual language report of which smells are in a test code block, you have to reuturn a array containing the name of the test smells found in the text block. Here is the list of test smells: " + all_smells
),
HumanMessage("Here is the List of found test smells in natutal language :" + found_smells_in_natural_language + ". Please return the list of test smells found in the text block in array format."),
]
response = structured_llm.invoke(message)
return response.smellNames
if __name__ == "__main__":
smells_natual_lang = """
.
:The provided test code block contains the following test smells:
1. **Assertion Roulette**: The test method `realCase` contains multiple assertions without a descriptive message for each assertion. This makes it difficult to understand which specific assertion failed if the test fails.
2. **Eager Test**: The test method `realCase` invokes several methods of the production object `Abriss` (e.g., `removeDAO`, `add`, `compute`, `getResults`, `getMean`, `getMSE`, `getMeanErrComp`). This makes the test harder to comprehend and maintain.
3. **Magic Number Test**: The test method `realCase` contains numeric literals (magic numbers) as parameters in the assertions. These numbers do not indicate the meaning/purpose of the number, and they should be replaced with constants or variables.
4. **Exception Handling**: The test method `realCase` contains a try-catch block to handle `CalculationException`. Instead of custom exception handling, JUnit's exception handling should be used to automatically pass/fail the test.
5. **Sensitive Equality**: The test method `realCase` uses `this.df4.format()` and `this.df1.format()` to format the results before comparing them with expected values. This can lead to issues if the formatting logic changes, making the test fragile.
Here is a summary of the identified test smells:
- Assertion Roulette
- Eager Test
- Magic Number Test
- Exception Handling
- Sensitive Equality
"""
res = smellSirializer(smells_natual_lang)
print(res)
print(type(res))