From acc32c979612a3b67a01ecdba32431e4b00c4ecf Mon Sep 17 00:00:00 2001 From: Alexandre Aubin Date: Wed, 20 Mar 2019 18:32:53 +0100 Subject: [PATCH] Improve path traversal issue detection with proper nginx conf parser --- lib/nginxparser/nginxparser.py | 270 +++++++++++++++++++++++++++++++++ package_linter.py | 58 +++---- 2 files changed, 300 insertions(+), 28 deletions(-) create mode 100644 lib/nginxparser/nginxparser.py diff --git a/lib/nginxparser/nginxparser.py b/lib/nginxparser/nginxparser.py new file mode 100644 index 0000000..feeeec1 --- /dev/null +++ b/lib/nginxparser/nginxparser.py @@ -0,0 +1,270 @@ +"""Very low-level nginx config parser based on pyparsing.""" +# Taken from https://github.com/certbot/certbot (Apache licensed) +# Itself forked from https://github.com/fatiherikli/nginxparser (MIT Licensed) +import copy +import logging + +from pyparsing import ( + Literal, White, Forward, Group, Optional, OneOrMore, QuotedString, Regex, ZeroOrMore, Combine) +from pyparsing import stringEnd +from pyparsing import restOfLine +import six + +logger = logging.getLogger(__name__) + +class RawNginxParser(object): + # pylint: disable=expression-not-assigned + # pylint: disable=pointless-statement + """A class that parses nginx configuration with pyparsing.""" + + # constants + space = Optional(White()).leaveWhitespace() + required_space = White().leaveWhitespace() + + left_bracket = Literal("{").suppress() + right_bracket = space + Literal("}").suppress() + semicolon = Literal(";").suppress() + dquoted = QuotedString('"', multiline=True, unquoteResults=False, escChar='\\') + squoted = QuotedString("'", multiline=True, unquoteResults=False, escChar='\\') + quoted = dquoted | squoted + head_tokenchars = Regex(r"(\$\{)|[^{};\s'\"]") # if (last_space) + tail_tokenchars = Regex(r"(\$\{)|[^{;\s]") # else + tokenchars = Combine(head_tokenchars + ZeroOrMore(tail_tokenchars)) + paren_quote_extend = Combine(quoted + Literal(')') + ZeroOrMore(tail_tokenchars)) + # note: ')' allows extension, but then we fall into else, not last_space. + + token = paren_quote_extend | tokenchars | quoted + + whitespace_token_group = space + token + ZeroOrMore(required_space + token) + space + assignment = whitespace_token_group + semicolon + + comment = space + Literal('#') + restOfLine + + block = Forward() + + # order matters! see issue 518, and also http { # server { \n} + contents = Group(comment) | Group(block) | Group(assignment) + + block_begin = Group(whitespace_token_group) + block_innards = Group(ZeroOrMore(contents) + space).leaveWhitespace() + block << block_begin + left_bracket + block_innards + right_bracket + + script = OneOrMore(contents) + space + stringEnd + script.parseWithTabs().leaveWhitespace() + + def __init__(self, source): + self.source = source + + def parse(self): + """Returns the parsed tree.""" + return self.script.parseString(self.source) + + def as_list(self): + """Returns the parsed tree as a list.""" + return self.parse().asList() + +class RawNginxDumper(object): + # pylint: disable=too-few-public-methods + """A class that dumps nginx configuration from the provided tree.""" + def __init__(self, blocks): + self.blocks = blocks + + def __iter__(self, blocks=None): + """Iterates the dumped nginx content.""" + blocks = blocks or self.blocks + for b0 in blocks: + if isinstance(b0, six.string_types): + yield b0 + continue + item = copy.deepcopy(b0) + if spacey(item[0]): + yield item.pop(0) # indentation + if not item: + continue + + if isinstance(item[0], list): # block + yield "".join(item.pop(0)) + '{' + for parameter in item.pop(0): + for line in self.__iter__([parameter]): # negate "for b0 in blocks" + yield line + yield '}' + else: # not a block - list of strings + semicolon = ";" + if isinstance(item[0], six.string_types) and item[0].strip() == '#': # comment + semicolon = "" + yield "".join(item) + semicolon + + def __str__(self): + """Return the parsed block as a string.""" + return ''.join(self) + + +# Shortcut functions to respect Python's serialization interface +# (like pyyaml, picker or json) + +def loads(source): + """Parses from a string. + + :param str source: The string to parse + :returns: The parsed tree + :rtype: list + + """ + return UnspacedList(RawNginxParser(source).as_list()) + + +def load(_file): + """Parses from a file. + + :param file _file: The file to parse + :returns: The parsed tree + :rtype: list + + """ + return loads(_file.read()) + + +def dumps(blocks): + """Dump to a string. + + :param UnspacedList block: The parsed tree + :param int indentation: The number of spaces to indent + :rtype: str + + """ + return str(RawNginxDumper(blocks.spaced)) + + +def dump(blocks, _file): + """Dump to a file. + + :param UnspacedList block: The parsed tree + :param file _file: The file to dump to + :param int indentation: The number of spaces to indent + :rtype: NoneType + + """ + return _file.write(dumps(blocks)) + + +spacey = lambda x: (isinstance(x, six.string_types) and x.isspace()) or x == '' + +class UnspacedList(list): + """Wrap a list [of lists], making any whitespace entries magically invisible""" + + def __init__(self, list_source): + # ensure our argument is not a generator, and duplicate any sublists + self.spaced = copy.deepcopy(list(list_source)) + self.dirty = False + + # Turn self into a version of the source list that has spaces removed + # and all sub-lists also UnspacedList()ed + list.__init__(self, list_source) + for i, entry in reversed(list(enumerate(self))): + if isinstance(entry, list): + sublist = UnspacedList(entry) + list.__setitem__(self, i, sublist) + self.spaced[i] = sublist.spaced + elif spacey(entry): + # don't delete comments + if "#" not in self[:i]: + list.__delitem__(self, i) + + def _coerce(self, inbound): + """ + Coerce some inbound object to be appropriately usable in this object + + :param inbound: string or None or list or UnspacedList + :returns: (coerced UnspacedList or string or None, spaced equivalent) + :rtype: tuple + + """ + if not isinstance(inbound, list): # str or None + return (inbound, inbound) + else: + if not hasattr(inbound, "spaced"): + inbound = UnspacedList(inbound) + return (inbound, inbound.spaced) + + + def insert(self, i, x): + item, spaced_item = self._coerce(x) + slicepos = self._spaced_position(i) if i < len(self) else len(self.spaced) + self.spaced.insert(slicepos, spaced_item) + if not spacey(item): + list.insert(self, i, item) + self.dirty = True + + def append(self, x): + item, spaced_item = self._coerce(x) + self.spaced.append(spaced_item) + if not spacey(item): + list.append(self, item) + self.dirty = True + + def extend(self, x): + item, spaced_item = self._coerce(x) + self.spaced.extend(spaced_item) + list.extend(self, item) + self.dirty = True + + def __add__(self, other): + l = copy.deepcopy(self) + l.extend(other) + l.dirty = True + return l + + def pop(self, _i=None): + raise NotImplementedError("UnspacedList.pop() not yet implemented") + def remove(self, _): + raise NotImplementedError("UnspacedList.remove() not yet implemented") + def reverse(self): + raise NotImplementedError("UnspacedList.reverse() not yet implemented") + def sort(self, _cmp=None, _key=None, _Rev=None): + raise NotImplementedError("UnspacedList.sort() not yet implemented") + def __setslice__(self, _i, _j, _newslice): + raise NotImplementedError("Slice operations on UnspacedLists not yet implemented") + + def __setitem__(self, i, value): + if isinstance(i, slice): + raise NotImplementedError("Slice operations on UnspacedLists not yet implemented") + item, spaced_item = self._coerce(value) + self.spaced.__setitem__(self._spaced_position(i), spaced_item) + if not spacey(item): + list.__setitem__(self, i, item) + self.dirty = True + + def __delitem__(self, i): + self.spaced.__delitem__(self._spaced_position(i)) + list.__delitem__(self, i) + self.dirty = True + + def __deepcopy__(self, memo): + new_spaced = copy.deepcopy(self.spaced, memo=memo) + l = UnspacedList(new_spaced) + l.dirty = self.dirty + return l + + def is_dirty(self): + """Recurse through the parse tree to figure out if any sublists are dirty""" + if self.dirty: + return True + return any((isinstance(x, UnspacedList) and x.is_dirty() for x in self)) + + def _spaced_position(self, idx): + "Convert from indexes in the unspaced list to positions in the spaced one" + pos = spaces = 0 + # Normalize indexes like list[-1] etc, and save the result + if idx < 0: + idx = len(self) + idx + if not 0 <= idx < len(self): + raise IndexError("list index out of range") + idx0 = idx + # Count the number of spaces in the spaced list before idx in the unspaced one + while idx != -1: + if spacey(self.spaced[pos]): + spaces += 1 + else: + idx -= 1 + pos += 1 + return idx0 + spaces diff --git a/package_linter.py b/package_linter.py index a543860..b397105 100755 --- a/package_linter.py +++ b/package_linter.py @@ -184,36 +184,38 @@ class App(): # # Path traversal issues # - lines = open(self.path + "/conf/" + filename).readlines() - lines = [line.strip() for line in lines if not line.strip().startswith("#")] - # Let's find the first location line - location_line = None - path_traversal_vulnerable = False - lines_iter = lines.__iter__() - for line in lines_iter: - if line.startswith("location"): - location_line = line.split() - break - # Look at the next lines for an 'alias' directive - if location_line is not None: - for line in lines_iter: - if line.startswith("location"): - # Entering a new location block ... abort here - # and assume there's no alias block later... - break - if line.startswith("alias"): - # We should definitely check for path traversal issue - # Does the location target ends with / ? - target = location_line[-2] if location_line[-1] == "{" else location_line[-1] - if not target.endswith("/"): - path_traversal_vulnerable = True - break - if path_traversal_vulnerable: + from lib.nginxparser import nginxparser + nginxconf = nginxparser.load(open(self.path + "/conf/" + filename)) + + def find_location_with_alias(locationblock): + + if locationblock[0][0] != "location": + return + + location = locationblock[0][-1] + for line in locationblock[1]: + instruction = line[0] + if instruction == "alias": + yield location + elif isinstance(instruction, list) and instruction and instruction[0] == "location": + yield from find_location_with_alias(instruction) + else: + continue + + def find_path_traversal_issue(nginxconf): + + for block in nginxconf: + for location in find_location_with_alias(block): + if not location.endswith("/"): + yield location + + for location in find_path_traversal_issue(nginxconf): print_warning( - "The nginx configuration appears vulnerable to path traversal as explained in " + "The nginx configuration (especially location %s) " + "appears vulnerable to path traversal issues as explained in\n" "https://www.acunetix.com/vulnerabilities/web/path-traversal-via-misconfigured-nginx-alias/\n" - "To fix it, look at the first lines of the nginx conf of the example app : " - "https://github.com/YunoHost/example_ynh/blob/master/conf/nginx.conf" + "To fix it, look at the first lines of the nginx conf of the example app : \n" + "https://github.com/YunoHost/example_ynh/blob/master/conf/nginx.conf" % location ) def check_helper_consistency(self):