-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_plots.py
153 lines (119 loc) · 4.61 KB
/
build_plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import pandas as pd
import matplotlib.pyplot as plt
from chardet import detect as detect_encoding
"""
Set the input variables here
WORKING_DIRECTORY is the folder where everything is stored. By default, it is the subfolder `data`.
INPUT_FILENAMES is a list of formant tables that you have exported from Praat. You can add / remove as many lines as you need - it will work fine with just one.
CHART_FILESNAMES is a list of the files that will be created.
CHART_TITLES is the title at the top of your charts.
X_SCALE, Y_SCALE and FIGURE_SIZE can be set to fit all of the data in for your speaker. You may need to tweak these to match your speaker's voice.
X_SCALE is formant 2 and Y_SCALE is formant 1.
"""
WORKING_DIRECTORY = r"data"
INPUT_FILENAMES = [
"standard_italian_vowels_formants.Table",
"jf_italian_vowels_202410_formants.Table",
]
CHART_FILENAMES = [
"standard_italian_vowels.png",
"jf_italian_vowels_202410.png",
]
CHART_TITLES = [
"Standard Italian Vowels",
"JF Italian Vowels Oct 2024",
]
X_SCALE = (500, 2500)
Y_SCALE = (100, 1000)
FIGURE_SIZE = (20, 8)
def main() -> None:
# Sanity check on user inputs
if (len(INPUT_FILENAMES) != len(CHART_FILENAMES)) or (
len(INPUT_FILENAMES) != len(CHART_TITLES)
):
raise ValueError(
"The number of items in INPUT_FILENAMES, CHART_FILENAMES and CHART_TITLES does not match"
)
# Plot all files specified by user
for i in range(0, len(INPUT_FILENAMES)):
# Paths
print(f"Processing {INPUT_FILENAMES[i]}")
input_path = os.path.join(WORKING_DIRECTORY, INPUT_FILENAMES[i])
output_path = os.path.join(WORKING_DIRECTORY, CHART_FILENAMES[i])
chart_title = CHART_TITLES[i]
# Re-encode file to force utf-8
reencode_as_utf8(input_path)
# Plot data
build_plot(input_path, output_path, chart_title, X_SCALE, Y_SCALE, FIGURE_SIZE)
def build_plot(
input_path: str,
output_path: str,
chart_title: str,
x_scale: tuple[int, int],
y_scale: tuple[int, int],
fig_size: tuple[int, int],
) -> None:
# Load the data from the CSV file
df = pd.read_csv(input_path)
# Create a 3-sample rolling average
df["F1s"] = df.groupby("vowel")["F1"].transform(lambda x: x.rolling(3, min_periods=1).mean())
df["F2s"] = df.groupby("vowel")["F2"].transform(lambda x: x.rolling(3, min_periods=1).mean())
# Filter the data to exclude start and end points
df_plot = df[(df["time_index"] > 1) & (df["time_index"] < 10)].copy()
# Make another data frame of just the trajectory start points
df_startpt = df[df["time_index"] == 2].copy()
# Init plot and set theme
plt.figure(figsize=fig_size, dpi=600)
plt.style.use("seaborn-v0_8-whitegrid")
# Plot each vowel series separately to preserve order
for grouping, group in df_plot.groupby("vowel"):
label_text = str(grouping).strip()
if label_text == "" or label_text == "None":
continue
group = group.sort_values(by="time_index")
plt.plot(group["F2s"], group["F1s"], linewidth=3, label=label_text)
# Add labels at the start points
for i in range(len(df_startpt)):
label_text = str(df_startpt["vowel"].iloc[i]).strip()
if label_text == "" or label_text == "None":
continue
plt.text(
df_startpt["F2s"].iloc[i],
df_startpt["F1s"].iloc[i],
label_text,
verticalalignment="bottom",
horizontalalignment="right",
fontweight="bold",
)
# Reverse the x and y axes and set scales
plt.gca().invert_xaxis()
plt.gca().invert_yaxis()
plt.xlim(x_scale[1], x_scale[0])
plt.ylim(y_scale[1], y_scale[0])
plt.gca().set_aspect("equal")
# Add labels and title
plt.xlabel("F2 (Hz)")
plt.ylabel("F1 (Hz)")
plt.title(chart_title)
plt.legend(loc="upper left")
plt.savefig(output_path, bbox_inches="tight")
def reencode_as_utf8(input_path: str):
# Based upon https://stackoverflow.com/questions/191359/how-to-convert-a-file-to-utf-8-in-python
def get_encoding_type(file):
with open(file, "rb") as f:
rawdata = f.read()
return detect_encoding(rawdata)["encoding"]
temp_path = input_path + ".utf8"
# Re-encode
input_encoding = get_encoding_type(input_path)
with open(input_path, "r", encoding=input_encoding) as f, open(
temp_path, "w", encoding="utf-8"
) as e:
text = f.read()
e.write(text)
# Replace old file with new
os.remove(input_path)
os.rename(temp_path, input_path)
if __name__ == "__main__":
main()