Skip to content

Commit c325446

Browse files
committed
Fix #1897 #3300 Add --git-only
1 parent d8271bc commit c325446

File tree

2 files changed

+282
-61
lines changed

2 files changed

+282
-61
lines changed

codespell_lib/_codespell.py

Lines changed: 108 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import os
2525
import re
2626
import shlex
27+
import subprocess
2728
import sys
2829
import textwrap
2930
from collections.abc import Iterable, Sequence
@@ -655,6 +656,11 @@ def convert_arg_line_to_args(self, arg_line: str) -> list[str]:
655656
action="store_true",
656657
help="output just a single line for each misspelling in stdin mode",
657658
)
659+
parser.add_argument(
660+
"--git-only",
661+
action="store_true",
662+
help="When selected, only check files under git control",
663+
)
658664
parser.add_argument("--config", type=str, help="path to config file.")
659665
parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.")
660666
parser.add_argument("files", nargs="*", help="files or directories to check")
@@ -1224,6 +1230,82 @@ def flatten_clean_comma_separated_arguments(
12241230
]
12251231

12261232

1233+
def get_git_tracked_files(
1234+
root: str, files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1235+
) -> Iterable[str]:
1236+
# Flatten the list of files into a single list of arguments for git ls-files
1237+
file_args = []
1238+
for filename in files:
1239+
if os.path.isdir(filename):
1240+
file_args.append(f"{filename}/**")
1241+
else:
1242+
file_args.append(filename)
1243+
1244+
# Add the glob patterns to exclude
1245+
exclude_patterns = [
1246+
f":(exclude)**/{pattern}" for pattern in glob_match.pattern_list
1247+
]
1248+
1249+
# Add pattern to exclude hidden files if check_hidden is False
1250+
if not check_hidden:
1251+
exclude_patterns.append(":(exclude)**/.*")
1252+
exclude_patterns.append(":(exclude).*")
1253+
1254+
git_executable = "git" # Could be future option
1255+
1256+
try:
1257+
# ruff: noqa: S603
1258+
result = subprocess.run(
1259+
[git_executable, "ls-files", *file_args, *exclude_patterns],
1260+
cwd=root,
1261+
capture_output=True,
1262+
check=True,
1263+
text=True,
1264+
)
1265+
return set(result.stdout.splitlines())
1266+
except subprocess.CalledProcessError:
1267+
# If the command fails, assume no files are tracked
1268+
return set()
1269+
1270+
1271+
def build_file_list_with_os_walk(
1272+
files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1273+
) -> Iterable[str]:
1274+
all_files = []
1275+
for filename in files:
1276+
# ignore hidden files
1277+
if is_hidden(filename, check_hidden):
1278+
continue
1279+
if os.path.isdir(filename):
1280+
for root, dirs, dirfiles in os.walk(filename):
1281+
if glob_match.match(root): # skip (absolute) directories
1282+
dirs.clear()
1283+
continue
1284+
if is_hidden(root, check_hidden): # dir itself hidden
1285+
continue
1286+
for file_ in dirfiles:
1287+
if is_hidden(
1288+
file_, check_hidden
1289+
): # ignore hidden files in directories
1290+
continue
1291+
if glob_match.match(file_): # skip files
1292+
continue
1293+
fname = os.path.join(root, file_)
1294+
if glob_match.match(fname): # skip paths
1295+
continue
1296+
all_files.append(fname)
1297+
1298+
# skip (relative) directories
1299+
dirs[:] = [
1300+
dir_
1301+
for dir_ in dirs
1302+
if not glob_match.match(dir_) and not is_hidden(dir_, check_hidden)
1303+
]
1304+
elif not glob_match.match(filename) and not is_hidden(filename, check_hidden):
1305+
all_files.append(filename)
1306+
return all_files
1307+
1308+
12271309
def _script_main() -> int:
12281310
"""Wrap to main() for setuptools."""
12291311
try:
@@ -1406,68 +1488,33 @@ def main(*args: str) -> int:
14061488
"try escaping special characters",
14071489
)
14081490

1409-
bad_count = 0
1410-
for filename in sorted(options.files):
1411-
# ignore hidden files
1412-
if is_hidden(filename, options.check_hidden):
1413-
continue
1414-
1415-
if os.path.isdir(filename):
1416-
for root, dirs, files in os.walk(filename):
1417-
if glob_match.match(root): # skip (absolute) directories
1418-
dirs.clear()
1419-
continue
1420-
if is_hidden(root, options.check_hidden): # dir itself hidden
1421-
continue
1422-
for file_ in sorted(files):
1423-
# ignore hidden files in directories
1424-
if is_hidden(file_, options.check_hidden):
1425-
continue
1426-
if glob_match.match(file_): # skip files
1427-
continue
1428-
fname = os.path.join(root, file_)
1429-
if glob_match.match(fname): # skip paths
1430-
continue
1431-
bad_count += parse_file(
1432-
fname,
1433-
colors,
1434-
summary,
1435-
misspellings,
1436-
ignore_words_cased,
1437-
exclude_lines,
1438-
file_opener,
1439-
word_regex,
1440-
ignore_word_regex,
1441-
uri_regex,
1442-
uri_ignore_words,
1443-
context,
1444-
options,
1445-
)
1446-
1447-
# skip (relative) directories
1448-
dirs[:] = [
1449-
dir_
1450-
for dir_ in dirs
1451-
if not glob_match.match(dir_)
1452-
and not is_hidden(dir_, options.check_hidden)
1453-
]
1491+
# Build the list of all files based on the git_only option
1492+
if options.git_only:
1493+
all_files = get_git_tracked_files(
1494+
os.getcwd(), options.files, glob_match, options.check_hidden
1495+
)
1496+
else:
1497+
all_files = build_file_list_with_os_walk(
1498+
options.files, glob_match, options.check_hidden
1499+
)
14541500

1455-
elif not glob_match.match(filename): # skip files
1456-
bad_count += parse_file(
1457-
filename,
1458-
colors,
1459-
summary,
1460-
misspellings,
1461-
ignore_words_cased,
1462-
exclude_lines,
1463-
file_opener,
1464-
word_regex,
1465-
ignore_word_regex,
1466-
uri_regex,
1467-
uri_ignore_words,
1468-
context,
1469-
options,
1470-
)
1501+
bad_count = 0
1502+
for filename in sorted(all_files):
1503+
bad_count += parse_file(
1504+
filename,
1505+
colors,
1506+
summary,
1507+
misspellings,
1508+
ignore_words_cased,
1509+
exclude_lines,
1510+
file_opener,
1511+
word_regex,
1512+
ignore_word_regex,
1513+
uri_regex,
1514+
uri_ignore_words,
1515+
context,
1516+
options,
1517+
)
14711518

14721519
if summary:
14731520
print("\n-------8<-------\nSUMMARY:")

codespell_lib/tests/test_basic.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ def test_exclude_file(
525525
bad_name.write_bytes(
526526
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
527527
)
528+
528529
assert cs.main(bad_name) == 18
529530
fname = tmp_path / "tmp.txt"
530531
fname.write_bytes(
@@ -545,6 +546,77 @@ def test_exclude_file(
545546
assert cs.main("-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1
546547

547548

549+
def run_git(path: Path, *args: Union[Path, str]) -> None:
550+
subprocess.run( # noqa: S603
551+
["git", "-C", path, *list(args)], # noqa: S607
552+
capture_output=False,
553+
check=True,
554+
text=True,
555+
)
556+
557+
558+
def test_git_only_exclude_file(
559+
tmp_path: Path, capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch
560+
) -> None:
561+
monkeypatch.chdir(tmp_path)
562+
"""Test exclude file functionality."""
563+
bad_name = tmp_path / "bad.txt"
564+
# check all possible combinations of lines to ignore and ignores
565+
combinations = "".join(
566+
f"{n} abandonned {n}\n"
567+
f"{n} abandonned {n}\r\n"
568+
f"{n} abandonned {n} \n"
569+
f"{n} abandonned {n} \r\n"
570+
for n in range(1, 5)
571+
)
572+
bad_name.write_bytes(
573+
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
574+
)
575+
576+
run_git(tmp_path, "init")
577+
run_git(tmp_path, "add", bad_name)
578+
579+
assert cs.main(bad_name) == 18
580+
fname = tmp_path / "tmp.txt"
581+
fname.write_bytes(
582+
b"1 abandonned 1\n"
583+
b"2 abandonned 2\r\n"
584+
b"3 abandonned 3 \n"
585+
b"4 abandonned 4 \r\n"
586+
b"6 abandonned 6\n"
587+
)
588+
589+
# Not adding fname to git to exclude it
590+
591+
# Should have 23 total errors (bad_name + fname)
592+
assert cs.main(tmp_path) == 23
593+
594+
# Before adding to git, should not report on fname, only 18 error in bad.txt
595+
assert cs.main("--git-only", tmp_path) == 18
596+
run_git(tmp_path, "add", fname)
597+
assert cs.main(tmp_path) == 23
598+
# After adding to git, should report on fname
599+
assert cs.main("--git-only", tmp_path) == 23
600+
# After adding to git, should not report on excluded file
601+
assert cs.main("--git-only", "-x", fname, tmp_path) == 1
602+
# comma-separated list of files
603+
fname_dummy1 = tmp_path / "dummy1.txt"
604+
fname_dummy1.touch()
605+
fname_dummy2 = tmp_path / "dummy2.txt"
606+
fname_dummy2.touch()
607+
run_git(tmp_path, "add", fname_dummy1, fname_dummy2)
608+
assert (
609+
cs.main(
610+
"--git-only", "-x", fname_dummy1, "-x", fname, "-x", fname_dummy2, bad_name
611+
)
612+
== 1
613+
)
614+
assert (
615+
cs.main("--git-only", "-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name)
616+
== 1
617+
)
618+
619+
548620
def test_encoding(
549621
tmp_path: Path,
550622
capsys: pytest.CaptureFixture[str],
@@ -662,6 +734,108 @@ def test_check_filename_irregular_file(
662734
assert cs.main("-f", tmp_path) == 1
663735

664736

737+
def test_check_hidden_git(
738+
tmp_path: Path,
739+
capsys: pytest.CaptureFixture[str],
740+
monkeypatch: pytest.MonkeyPatch,
741+
) -> None:
742+
"""Test ignoring of hidden files."""
743+
monkeypatch.chdir(tmp_path)
744+
run_git(tmp_path, "init")
745+
# visible file
746+
#
747+
# tmp_path
748+
# └── test.txt
749+
#
750+
fname = tmp_path / "test.txt"
751+
fname.write_text("erorr\n")
752+
run_git(tmp_path, "add", ".")
753+
assert cs.main("--git-only", fname) == 1
754+
assert cs.main("--git-only", tmp_path) == 1
755+
756+
# hidden file
757+
#
758+
# tmp_path
759+
# └── .test.txt
760+
#
761+
hidden_file = tmp_path / ".test.txt"
762+
fname.rename(hidden_file)
763+
run_git(tmp_path, "add", ".")
764+
assert cs.main("--git-only", hidden_file) == 0
765+
assert cs.main("--git-only", tmp_path) == 0
766+
assert cs.main("--git-only", "--check-hidden", hidden_file) == 1
767+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
768+
769+
# hidden file with typo in name
770+
#
771+
# tmp_path
772+
# └── .abandonned.txt
773+
#
774+
typo_file = tmp_path / ".abandonned.txt"
775+
hidden_file.rename(typo_file)
776+
run_git(tmp_path, "add", ".")
777+
assert cs.main("--git-only", typo_file) == 0
778+
assert cs.main("--git-only", tmp_path) == 0
779+
assert cs.main("--git-only", "--check-hidden", typo_file) == 1
780+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
781+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", typo_file) == 2
782+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
783+
784+
# hidden directory
785+
#
786+
# tmp_path
787+
# ├── .abandonned
788+
# │ ├── .abandonned.txt
789+
# │ └── subdir
790+
# │ └── .abandonned.txt
791+
# └── .abandonned.txt
792+
#
793+
assert cs.main("--git-only", tmp_path) == 0
794+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
795+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
796+
hidden = tmp_path / ".abandonned"
797+
hidden.mkdir()
798+
copyfile(typo_file, hidden / typo_file.name)
799+
subdir = hidden / "subdir"
800+
subdir.mkdir()
801+
copyfile(typo_file, subdir / typo_file.name)
802+
run_git(tmp_path, "add", ".")
803+
assert cs.main("--git-only", tmp_path) == 0
804+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 3
805+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 8
806+
# check again with a relative path
807+
try:
808+
rel = op.relpath(tmp_path)
809+
except ValueError:
810+
# Windows: path is on mount 'C:', start on mount 'D:'
811+
pass
812+
else:
813+
assert cs.main("--git-only", rel) == 0
814+
assert cs.main("--git-only", "--check-hidden", rel) == 3
815+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", rel) == 8
816+
817+
# hidden subdirectory
818+
#
819+
# tmp_path
820+
# ├── .abandonned
821+
# │ ├── .abandonned.txt
822+
# │ └── subdir
823+
# │ └── .abandonned.txt
824+
# ├── .abandonned.txt
825+
# └── subdir
826+
# └── .abandonned
827+
# └── .abandonned.txt
828+
subdir = tmp_path / "subdir"
829+
subdir.mkdir()
830+
hidden = subdir / ".abandonned"
831+
hidden.mkdir()
832+
copyfile(typo_file, hidden / typo_file.name)
833+
run_git(tmp_path, "add", ".")
834+
assert cs.main("--git-only", tmp_path) == 0
835+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 4
836+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 11
837+
838+
665839
def test_check_hidden(
666840
tmp_path: Path,
667841
capsys: pytest.CaptureFixture[str],

0 commit comments

Comments
 (0)