detect-secrets/detect_secrets/plugins/base.py at master · IBM/detect-secrets · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
import base64
import binascii
import re
from abc import ABCMeta
from abc import abstractmethod
from abc import abstractproperty

from .common.constants import ALLOWLIST_REGEXES
from detect_secrets.core.code_snippet import CodeSnippetHighlighter
from detect_secrets.core.constants import VerifiedResult
from detect_secrets.core.potential_secret import PotentialSecret

# NOTE: In this whitepaper (Section V-D), it suggests that there's an
#       80% chance of finding a multi-factor secret (e.g. username +
#       password) within five lines of context, before and after a secret.
#
#       This number can be tweaked if desired, at the cost of performance.
#
#       https://www.ndss-symposium.org/wp-content/uploads/2019/02/ndss2019_04B-3_Meli_paper.pdf
LINES_OF_CONTEXT = 7


class classproperty(property):
    def __get__(self, cls, owner):
        return classmethod(self.fget).__get__(None, owner)()


class BasePlugin:
    """
    This is an abstract class to define Plugins API.

    :type secret_type: str
    :param secret_type: uniquely identifies the type of secret found in the baseline.
        e.g. {
            "hashed_secret": <hash>,
            "line_number": 123,
            "type": <secret_type>,
        }

        Be warned of modifying the `secret_type` once rolled out to clients since
        the hashed_secret uses this value to calculate a unique hash (and the baselines
        will no longer match).

    :type flag_text: str
    :param flag_text: text used as an command line argument flag to disable
        this specific plugin scan. does not include the `--` prefix.

    :type default_options: Dict[str, Any]
    :param default_options: configurable options to modify plugin behavior
    """
    __metaclass__ = ABCMeta

    @abstractproperty
    def secret_type(self):
        raise NotImplementedError

    def __init__(
        self,
        exclude_lines_regex=None,
        should_verify=False,
        false_positive_heuristics=None,
        **kwargs
    ):
        """
        :type exclude_lines_regex: str|None
        :param exclude_lines_regex: optional regex for ignored lines.

        :type should_verify: bool

        :type false_positive_heuristics: List[Callable]|None
        :param false_positive_heuristics: List of fp-heuristic functions
        applicable to this plugin
        """
        self.exclude_lines_regex = (
            re.compile(exclude_lines_regex)
            if exclude_lines_regex
            else None
        )

        self.should_verify = should_verify

        self.false_positive_heuristics = (
            false_positive_heuristics
            if false_positive_heuristics
            else []
        )

    @classproperty
    def flag_text(cls):
        name = cls.__name__
        if name.endswith('Detector'):
            name = name[:-len('Detector')]

        # turn camel case into hyphenated strings
        name_hyphen = ''
        for letter in name:
            if letter.upper() == letter and name_hyphen:
                name_hyphen += '-'
            name_hyphen += letter.lower()

        return 'no-{}-scan'.format(name_hyphen)

    @classproperty
    def default_options(cls):
        return {}

    def _is_excluded_line(self, line):
        return (
            any(
                allowlist_regex.search(line)
                for allowlist_regex in ALLOWLIST_REGEXES
            )
            or
            (
                self.exclude_lines_regex and
                self.exclude_lines_regex.search(line)
            )
        )

    def analyze(self, file, filename, output_raw=False, output_verified_false=False):
        """
        :param file:     The File object itself.
        :param filename: string; filename of File object, used for creating
                         PotentialSecret objects
        :param output_raw: whether or not to output the raw, unhashed secret
        :returns         dictionary representation of set (for random access by hash)
                         { detect_secrets.core.potential_secret.__hash__:
                               detect_secrets.core.potential_secret         }
        """
        potential_secrets = {}
        file_lines = tuple(file.readlines())
        for line_num, line in enumerate(file_lines, start=1):
            if self._is_excluded_line(line):
                continue

            results = self.analyze_line(line, line_num, filename, output_raw)
            if not results:
                continue

            if not self.should_verify:
                potential_secrets.update(results)
                continue

            filtered_results = {}
            for result in results:
                snippet = CodeSnippetHighlighter().get_code_snippet(
                    file_lines,
                    result.lineno,
                    lines_of_context=LINES_OF_CONTEXT,
                )

                is_verified = self.verify(
                    result.secret_value, content=str(snippet),
                    potential_secret=result,
                )

                if is_verified == VerifiedResult.UNVERIFIED:
                    result.is_verified = False
                elif is_verified == VerifiedResult.VERIFIED_TRUE:
                    result.is_verified = True
                    result.verified_result = True
                elif is_verified == VerifiedResult.VERIFIED_FALSE:
                    result.is_verified = True
                    result.verified_result = False

                if is_verified != VerifiedResult.VERIFIED_FALSE:  # unverified or true
                    filtered_results[result] = result
                elif is_verified == VerifiedResult.VERIFIED_FALSE and output_verified_false:
                    filtered_results[result] = result

            potential_secrets.update(filtered_results)

        return potential_secrets

    def analyze_line(self, string, line_num, filename, output_raw=False):
        """
        :param string:    string; the line to analyze
        :param line_num:  integer; line number that is currently being analyzed
        :param filename:  string; name of file being analyzed
        :returns:         dictionary
        NOTE: line_num and filename are used for PotentialSecret creation only.
        """
        # First, look for a result in the raw input string.
        result = self.analyze_string_content(
            string,
            line_num,
            filename,
            output_raw,
        )

        # If there was no result in the raw, look for encoded values.
        if not result:
            result = self.analyze_encoded_line(
                string,
                line_num,
                filename,
                output_raw,
            )

        # Return the result, if any.
        return result if result else {}

    def analyze_encoded_line(self, string, line_num, filename, output_raw=False):
        """Analyzes lines in certain files to identify and parse encoded values.
        TODO Move this to a plugin system, to support multiple decoders.

        :param string:    string; the line to analyze
        :param line_num:  integer; line number that is currently being analyzed
        :param filename:  string; name of file being analyzed
        :returns:         dictionary
        NOTE: line_num and filename are used for PotentialSecret creation only.
        """
        # Only process .npmrc files for encoded values.
        if not filename.endswith('.npmrc'):
            return

        # Look for an encoded _auth or _password field.
        pattern = r'(?P<key>_auth|_password) ?= ?(?P<encoded>[a-z0-9+/]+=*)'
        match = re.search(pattern, string, flags=re.IGNORECASE)
        if not match:
            return

        # Decode the encoded value, if possible.
        encoded = match.group('encoded')
        start = match.start('encoded')
        end = match.end('encoded')
        try:
            decoded = base64.b64decode(encoded).decode('utf-8')
        except (binascii.Error, binascii.Incomplete):
            # Ignore any errors encountered while decoding the line.
            # This is a fail-safe for any invalid or corrupted encoding.
            return

        # Generate a new string, using the decoded value, and analyze it.
        processed_string = string[:start] + decoded + string[end:]
        return self.analyze_string_content(
            processed_string,
            line_num,
            filename,
            output_raw,
        )

    @abstractmethod
    def analyze_string_content(self, string, line_num, filename, output_raw=False):
        """
        :param string:    string; the line to analyze
        :param line_num:  integer; line number that is currently being analyzed
        :param filename:  string; name of file being analyzed
        :param output_raw: whether or not to output the raw, unhashed secret
        :returns:         dictionary

        NOTE: line_num and filename are used for PotentialSecret creation only.
        """
        raise NotImplementedError

    @abstractmethod
    def secret_generator(self, string, *args, **kwargs):
        """Flags secrets in a given string, and yields the raw secret value.
        Used in self.analyze_line for PotentialSecret creation.

        :type string: str
        :param string: the secret to scan

        :rtype: iter
        :returns: Of all the identifiers found
        """
        raise NotImplementedError

    def adhoc_scan(self, string):
        """To support faster discovery, we want the ability to conveniently
        check what different plugins say regarding a single line/secret. This
        supports that.

        This is very similar to self.analyze_line, but allows the flexibility
        for subclasses to add any other notable info (rather than just a
        PotentialSecret type). e.g. HighEntropyStrings adds their Shannon
        entropy in which they made their decision.

        :type string: str
        :param string: the string to analyze

        :rtype: str
        :returns: descriptive string that fits the format
            <classname>: <returned-value>
        """
        # TODO: Handle multiple secrets on single line.
        results = self.analyze_line(
            string,
            line_num=0,
            filename='does_not_matter',
        )
        if not results:
            return 'False'

        if not self.should_verify:
            return 'True'

        verified_result = VerifiedResult.UNVERIFIED
        for result in results:
            is_verified = self.verify(result.secret_value, string, result)
            if is_verified != VerifiedResult.UNVERIFIED:
                verified_result = is_verified
                break

        output = {
            VerifiedResult.VERIFIED_FALSE: 'False (verified)',
            VerifiedResult.VERIFIED_TRUE: 'True  (verified)',
            VerifiedResult.UNVERIFIED: 'True  (unverified)',
        }

        return output[verified_result]

    def verify(self, token, content='', potential_secret=None):
        """
        To increase accuracy and reduce false positives, plugins can also
        optionally declare a method to verify their status.

        :type token: str
        :param token: secret found by current plugin

        :type content: str
        :param content: lines of context around identified secret

        :type potential_secret: PotentialSecret
        :param potential_secret: the PotentialSecret object may optionally be
        passed to verify to allow verification code to add additional factors to
        potential_secret.other_factors

        :rtype: VerifiedResult
        """
        return VerifiedResult.UNVERIFIED

    def is_secret_false_positive(self, token):
        """
        Checks if the input secret is a false-positive according to
        this plugin's heuristics.

        :type token: str
        :param token: secret found by current plugin
        """
        return any(
            func(token)
            for func in self.false_positive_heuristics
        ) if self.false_positive_heuristics else False

    @property
    def __dict__(self):
        return {
            'name': self.__class__.__name__,
        }


class RegexBasedDetector(BasePlugin):
    """Parent class for regular-expression based detectors.

    To create a new regex-based detector, subclass this and set
    `secret_type` with a description and `denylist`
    with a sequence of regular expressions, like:

    class FooDetector(RegexBasedDetector):

        secret_type = "foo"

        denylist = (
            re.compile(r'foo'),
        )
    """
    __metaclass__ = ABCMeta

    @abstractproperty
    def denylist(self):
        raise NotImplementedError

    @staticmethod
    def assign_regex_generator(prefix_regex, password_keyword_regex, password_regex):
        """Generate assignment regex

        It read 3 input parameters, each stands for regex. The return regex would look for
        secret in following format.

        <prefix_regex>(-|_|)<password_keyword_regex> <assignment> <password_regex>

        assignment would include =,:,:=,::,,,(
        keyname and value supports optional quotes
        """
        begin = r'(?:(?<=\W)|(?<=^))'
        opt_quote = r'(?:"|\'|)'
        opt_open_square_bracket = r'(?:\[|)'
        opt_close_square_bracket = r'(?:\]|)'
        opt_dash_undrscr = r'(?:_|-|)'
        opt_space = r'(?: *)'
        assignment = r'(?:=|:|:=|=>| +|::|,|\()'
        return re.compile(
            r'{begin}{opt_open_square_bracket}{opt_quote}{prefix_regex}{opt_dash_undrscr}'
            '{password_keyword_regex}{opt_quote}{opt_close_square_bracket}{opt_space}'
            '{assignment}{opt_space}{opt_quote}{password_regex}{opt_quote}'.format(
                begin=begin,
                opt_open_square_bracket=opt_open_square_bracket,
                opt_quote=opt_quote,
                prefix_regex=prefix_regex,
                opt_dash_undrscr=opt_dash_undrscr,
                password_keyword_regex=password_keyword_regex,
                opt_close_square_bracket=opt_close_square_bracket,
                opt_space=opt_space,
                assignment=assignment,
                password_regex=password_regex,
            ), flags=re.IGNORECASE,
        )

    def analyze_string_content(self, string, line_num, filename, output_raw=False):
        output = {}

        for identifier in self.secret_generator(string):
            secret = PotentialSecret(
                self.secret_type,
                filename,
                identifier,
                line_num,
                output_raw=output_raw,
            )
            output[secret] = secret

        return output

    def secret_generator(  # lgtm [py/inheritance/incorrect-overridden-signature]
        self,
        string,
        *args,
        **kwargs
    ):
        for regex in self.denylist:
            for match in regex.findall(string):
                yield match