-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVR_script.py
171 lines (142 loc) · 6.87 KB
/
VR_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import sys
import os
import math
from typing import Tuple
import re
from Levenshtein import distance as levenshtein_distance
PATH_TEXT_MAPPING = {
"1": (387, "solution-1-easy.md"),
"2": (980, "solution-2-medium.md"),
"3": (2084, "solution-3-challenging.md")
}
def remove_vowels(text: str) -> str:
lines = text.split("\n")
new_lines = []
for line in lines:
words = line.split()
new_words = []
for word in words:
if not re.search("[^aeiouAEIOUäüöÄÜÖ]", word):
new_words.append(word)
else:
new_words.append(re.sub("[aeiouAEIOUäüöÄÜÖ]", "", word))
new_lines.append(" ".join(new_words))
return "\n".join(new_lines)
def calculate_similarity(original_text: str, reconstructed_text: str) -> float:
original_words = original_text.split()
reconstructed_words = reconstructed_text.split()
total_chars = sum(len(word) for word in original_words)
matches = 0
index_reconstructed = 0
for original_word in original_words:
while index_reconstructed < len(reconstructed_words):
reconstructed_word = reconstructed_words[index_reconstructed]
if original_word == reconstructed_word: # exact match
matches += len(original_word)
index_reconstructed += 1
break
else:
# Check if a word is missing in the reconstructed text
missing_word = True
for c in reconstructed_word:
if c in original_word:
matches += 1
original_word = original_word.replace(c, "", 1)
missing_word = False
if missing_word:
index_reconstructed += 1
else:
break
similarity_percentage = (matches / total_chars) * 100
return similarity_percentage
def calculate_levenshtein(original_text: str, reconstructed_text: str) -> int:
return levenshtein_distance(original_text, reconstructed_text)
def process_scenario(arguments: Tuple[str, ...]):
if len(arguments) == 0:
print("Do you want to edit a text and remove its vowels or do you want to calculate a Sym-Score?")
print("1: Edit text")
print("2: Calculate Sym-Score")
choice = input("Enter 1 or 2: ")
if choice == "1":
txt_files = [f for f in os.listdir() if f.endswith(('.txt', '.md'))]
print("Select a file to be edited:")
for i, f in enumerate(txt_files):
print(f"{i + 1}: {f}")
file_choice = int(input("Enter the number of the file: ")) - 1
file_path = txt_files[file_choice]
with open(file_path, "r") as file:
content = file.read()
new_content = remove_vowels(content)
new_file_path = "task-" + file_path
with open(new_file_path, "w") as new_file:
new_file.write(new_content)
print(f"File was edited and saved as {new_file_path}.")
elif choice == "2":
print("Which Sym-Score do you want to calculate?")
print("Sym-1-Score")
print("Sym-2-Score")
print("Sym-3-Score")
sym_choice = input("Enter 1, 2 or 3: ")
txt_files = [f for f in os.listdir() if f.endswith(('.txt', '.md'))]
print("Select a file to calculate the Sym-Score:")
for i, f in enumerate(txt_files):
print(f"{i + 1}: {f}")
file_choice = int(input("Enter the number of the file: ")) - 1
file_path = txt_files[file_choice]
with open(file_path, "r") as file:
content = file.read()
process_scenario((sym_choice, content, file_path))
else:
print("Invalid entry. Please enter 1 or 2.")
return
elif len(arguments) == 3:
try:
original_text_length, original_text_path = PATH_TEXT_MAPPING[arguments[0]]
reconstructed_text = arguments[1]
file_path = arguments[2]
with open(original_text_path, "r") as original_file:
original_text = original_file.read()
similarity = calculate_similarity(original_text, reconstructed_text)
similarity2 = float(similarity) / 100
levenshtein = calculate_levenshtein(original_text, reconstructed_text)
levenshtein_ratio = float(levenshtein) / float(original_text_length)
sim_score2 = float(similarity2) / float(levenshtein_ratio)
# (Score^2 + 30), then take the log10, and finally square the result
log10_value = math.pow(math.log10(pow(sim_score2, 2) + 30), 2)
similarity_round = round(similarity, 1)
levenshtein_ratio_round = round(levenshtein_ratio, 2)
log10_value_round = round(log10_value, 2)
print(f"=============================")
print(f"Similarity Value {similarity_round}%")
print(f"Levenshtein-Distance {levenshtein}")
print(f"Lev-Distance(rel) {levenshtein_ratio_round}")
print(f"=============================")
print(f"Sym-{arguments[0]}-Score {log10_value_round}")
print(f"=============================")
# Save the results to a new file
new_file_name = f"sym_{arguments[0]}_score-{os.path.basename(file_path)}"
with open(new_file_name, "w") as new_file:
new_file.write(f"=============================\n")
new_file.write(f"Similarity Value {similarity_round}%\n")
new_file.write(f"Levenshtein-Distance {levenshtein}\n")
new_file.write(f"Lev-Distance(rel) {levenshtein_ratio_round}\n")
new_file.write(f"=============================\n")
new_file.write(f"Sym-{arguments[0]}-Score {log10_value_round}\n")
new_file.write(f"=============================\n\n")
new_file.write(reconstructed_text)
new_file.write("\n\n")
with open(original_text_path, "r") as original_file:
new_file.write(original_file.read())
print(f"Results saved to {new_file_name}")
except KeyError as e:
print("Error: Invalid option specified. Please provide 1, 2, or 3 as the first argument.")
else:
print("Error: Invalid number of arguments. Please provide either 0 or 3 arguments.")
if __name__ == "__main__":
if len(sys.argv) == 3:
_, sym_choice, file_path = sys.argv
with open(file_path, "r") as file:
content = file.read()
process_scenario((sym_choice, content, file_path))
else:
process_scenario(tuple(sys.argv[1:]))