tracked file extensions moved to settings.py + encoding added when reading comments

2024-11-05 23:13:13 +00:00
parent b7f9db0efc
commit beefb025d6
2 changed files with 7 additions and 6 deletions
--- a/utils/organiser.py
+++ b/utils/organiser.py
@@ -1,7 +1,7 @@
 import os, shutil, re
 from utils.extractor import extract_file_to_dir
-from utils.settings import BAD_DIR_NAME, BB_GRADEBOOKS_DIR, IGNORE_DIRS
+from utils.settings import BAD_DIR_NAME, BB_GRADEBOOKS_DIR, IGNORE_DIRS, TRACKED_FILE_EXT
 def validate_gradebook_dir_name(src_dir: str) -> None:
@@ -19,7 +19,7 @@ def get_comment_from_submission_txt(file_path: str) -> tuple[str, str] | None:
    no_comment_regex = f'Comments:\nThere are no student comments for this assignment.'
    no_comment_pattern = re.compile(no_comment_regex)
-    with open(file_path) as f:
+    with open(file_path, encoding='utf-8') as f:
        file_contents = f.read()
        if not no_comment_pattern.findall(file_contents):
            comment_regex = f'Comments:\n.*'
@@ -39,12 +39,11 @@ def get_gradebook_stats(src_dir: str) -> dict[str, int]:
    dirs = [ f for f in all_files if os.path.isdir(f) and BAD_DIR_NAME not in f ]
    normal_files = [ f for f in all_files if os.path.isfile(f) ]
    tracked_file_extensions = [ '.zip', '.rar', '.7z', '.txt' ]  # add extension in list to track stats for more
    files_counter = {}
    files_counter['all'], files_counter['dirs'], files_counter['normal'] = len(all_files), len(dirs), len(normal_files)
    tracked_files_counter = 0
-    for ext in tracked_file_extensions:
+    for ext in TRACKED_FILE_EXT:
        files_counter[ext] = len([ f for f in normal_files if f.lower().endswith(ext) ])
        tracked_files_counter += files_counter[ext]
@@ -52,9 +51,9 @@ def get_gradebook_stats(src_dir: str) -> dict[str, int]:
    files_counter['untracked'] = files_counter['normal'] - tracked_files_counter
    dirs_msg = f'. Also found {len(dirs)} dir(s), wasn\'t expecting any!' if len(dirs) else ''
-    tracked_files_list = [ f'{files_counter[ext]} {ext}' for ext in tracked_file_extensions ] 
+    tracked_files_list = [ f'{files_counter[ext]} {ext}' for ext in TRACKED_FILE_EXT ] 
    tracked_msg = f"{', '.join(str(f) for f in tracked_files_list)}"
-    msg = f'\n[Stats] Gradebook contains {files_counter["all"]} file(s){dirs_msg}\n[Stats] Tracking {len(tracked_file_extensions)} file extension(s), files found: {tracked_msg}\n[Stats] Files with untracked extension: {files_counter["untracked"]}'
+    msg = f'\n[Stats] Gradebook contains {files_counter["all"]} file(s){dirs_msg}\n[Stats] Tracking {len(TRACKED_FILE_EXT)} file extension(s), files found: {tracked_msg}\n[Stats] Files with untracked extension: {files_counter["untracked"]}'
    print(msg, flush=True)
    return files_counter
--- a/utils/settings.py
+++ b/utils/settings.py
@@ -6,3 +6,5 @@ BB_SUBMISSIONS_DIR = 'BB_submissions'  # directory with organised gradebook subm
 BAD_DIR_NAME = '__BAD__'  # for organise_gradebook.py - directory with corrupt/invalid compressed files
 CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect')  # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files
 IGNORE_DIRS = [ '__MACOSX', '.git', 'node_modules', 'vendor' ]  # list of dir names to ignore from extracting
 TRACKED_FILE_EXT = [ '.zip', '.rar', '.7z', '.txt', '.pde' ]  # add extension in list to track stats for more