Source code for pcapgraph.pcap_math

# -*- coding: utf-8 -*-
# Copyright 2018 Ross Jacobs All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Do algebraic operations on sets like union, intersect, difference."""
import collections
import os
import time

from pcapgraph.manipulate_frames import strip_layers
from pcapgraph.manipulate_frames import get_flat_frame_dict
from pcapgraph.manipulate_frames import get_frame_list_by_pcap
from pcapgraph.manipulate_frames import get_frame_from_json
from pcapgraph.save_file import convert_to_pcaptext
import pcapgraph.save_file as save


[docs]class PcapMath:
    """Do algebraic operations on sets like union, intersect, difference.

    For multiple set operations, files are read in only once in __init__.
    Use different PcapMath objects if input files are different.
    """

    def __init__(self, filenames, options):
        """Prepare PcapMath object for one or multiple operations.

        Every PcapMath object should start with the data structures filled with
        the data that each operation needs to function.

        Args:
            filenames (list): List of filenames.
            options (dict): Whether to strip L2 and L3 headers.
        """
        self.filenames = filenames
        self.pcap_json_dict = strip_layers(filenames, options)
        pcap_json_list = [*self.pcap_json_dict.values()]
        self.frame_timestamp_dict = get_flat_frame_dict(pcap_json_list)
        self.frame_list_by_pcap = []
        self.exclude_empty = False
        self.options = options

[docs]    def parse_set_args(self, args):
        """Call the appropriate method per CLI flags.

        difference, union, intersect consist of {<op>: {frame: timestamp, ...}}
        bounded_intersect consists of {pcap: {frame: timestamp, ...}, ...}

        Args:
            args (dict): Dict of all arguments (including set args).
        Returns:
            filenames (list): List of all files, including ones generated
                by set operations.
        """
        new_files = []
        bounded_filelist = []
        intersect_file = ''
        self.exclude_empty = args['--exclude-empty']
        if args['--difference']:
            generated_file = self.difference_pcap()
            # As long as the difference exists.
            if generated_file:
                new_files.append(generated_file)
        if args['--intersection']:
            intersect_file = self.intersect_pcap()
            new_files.append(intersect_file)
        if args['--symmetric-difference']:
            generated_filelist = self.symmetric_difference_pcap()
            new_files.extend(generated_filelist)
        if args['--union']:
            generated_file = self.union_pcap()
            new_files.append(generated_file)

        if args['--bounded-intersection']:
            bounded_filelist = self.bounded_intersect_pcap()
            new_files.extend(bounded_filelist)
        if args['--inverse-bounded']:
            if not intersect_file:
                intersect_file = self.intersect_pcap()
            generated_filelist = self.inverse_bounded_intersect_pcap(
                bounded_filelist=bounded_filelist,
                intersect_file=intersect_file)
            if not args['--intersection']:
                os.remove(intersect_file)
            new_files.extend(generated_filelist)

        # Put filenames in a different place in memory so it is not altered.
        filenames = list(self.filenames)
        filenames.extend(new_files)
        return filenames

[docs]    def union_pcap(self):
        """Given sets A = (1, 2, 3), B = (2, 3, 4), A + B = (1, 2, 3, 4).

        About:
            This method uses tshark to get identifying information on
            pcaps and then mergepcap to save the combined pcap.

        Returns:
            (string): Name of generated pcap.
        """
        raw_packet_list = []
        for pcap in self.pcap_json_dict.values():
            for frame in pcap:
                raw_frame = get_frame_from_json(frame)
                raw_packet_list.append(raw_frame)

        self.print_10_most_common_frames(raw_packet_list)

        union_frame_dict = {}
        for frame in raw_packet_list:
            union_frame_dict[frame] = self.frame_timestamp_dict[frame]
        save.save_pcap(
            pcap_dict=union_frame_dict,
            name='union.pcap',
            options=self.options)

        return 'union.pcap'

[docs]    @staticmethod
    def print_10_most_common_frames(raw_frame_list):
        """After doing a packet union, find/print the 10 most common packets.

        This is a work in progress and may eventually use this bash:

        <packets> | text2pcap - - | tshark -r - -o 'gui.column.format:"No.",
        "%m","VLAN","%q","Src MAC","%uhs","Dst MAC","%uhd","Src IP","%us",
        "Dst IP","%ud","Protocol","%p","Src port","%uS","Dst port","%uD"'

        Alternatively, just use the existing information in pcap_dict.

        The goal is to print
        frame#, VLAN, src/dst MAC, src/dst IP, L4 src/dst ports, protocol

        This should likely be its own CLI flag in future.

        Args:
            raw_frame_list (list): List of raw frames
        """
        packet_stats = collections.Counter(raw_frame_list)
        # It's not a common frame if it is only seen once.
        packet_stats = {k: v for k, v in packet_stats.items() if v > 1}
        sorted_packets = sorted(
            packet_stats, key=packet_stats.__getitem__, reverse=True)
        counter = 0
        for packet in sorted_packets:
            counter += 1
            if counter == 10:
                break
            packet_text = convert_to_pcaptext(packet)

            print("Count: {: <7}\n{: <}".format(packet_stats[packet],
                                                packet_text))
        print("To view the content of these packets, subtract the count lines,"
              "\nadd and save to <textfile>, and then run "
              "\n\ntext2pcap <textfile> out.pcap\nwireshark out.pcap\n")

[docs]    def intersect_pcap(self):
        """Save pcap intersection. First filename is pivot packet capture.

        Returns:
            (str): Fileame of generated pcap.
        """
        # Generate intersection set of frames
        if not self.frame_list_by_pcap:
            self.frame_list_by_pcap = \
                get_frame_list_by_pcap(self.pcap_json_dict)
        frame_list = self.frame_list_by_pcap
        frame_intersection = set(frame_list[0]).intersection(*frame_list[1:])

        # Print intersection output like in docstring
        intersection_count = len(frame_intersection)
        print("{: <12} {: <}".format('\nSAME %', 'PCAP NAME'))
        for pcap in self.filenames:
            same_percent = str(
                round(100 * (intersection_count / len(frame_list[0])))) + '%'
            print("{: <12} {: <}".format(same_percent, pcap))

        intersect_frame_dict = {}
        for frame in frame_intersection:
            intersect_frame_dict[frame] = self.frame_timestamp_dict[frame]
        save.save_pcap(
            pcap_dict=intersect_frame_dict,
            name='intersect.pcap',
            options=self.options)

        if frame_intersection:
            return 'intersect.pcap'
        print('WARNING! Intersection between ', self.filenames,
              ' contains no packets!')
        return ''

[docs]    def difference_pcap(self, pivot_index=0):
        """Given sets A = (1, 2, 3), B = (2, 3, 4), C = (3, 4, 5), A-B-C = (1).

        Args:
            pivot_index [int]: Specify minuend by index of filename in list

        Returns:
            (string): Name of generated pcap.
        """
        pcap_json_list = [*self.pcap_json_dict.values()]
        minuend_pcap_json = pcap_json_list[pivot_index]
        minuend_name = self.filenames[pivot_index]
        # pcap json list - minuend json. With index 0, remove 1st pcap json.
        diff_pcap_json_list = pcap_json_list[:pivot_index] + \
            pcap_json_list[pivot_index+1:]

        minuend_frame_dict = get_flat_frame_dict([minuend_pcap_json])
        minuend_frame_list = list(minuend_frame_dict.keys())
        diff_frame_dict = get_flat_frame_dict(diff_pcap_json_list)
        diff_frame_list = list(diff_frame_dict.keys())
        packet_diff = set(minuend_frame_list).difference(set(diff_frame_list))

        diff_frame_dict = {}
        for frame in packet_diff:
            # Minuend frame dict should have all values we care about.
            diff_frame_dict[frame] = minuend_frame_dict[frame]
        diff_filename = 'diff_' + os.path.basename(minuend_name)
        # Save only if there are packets or -x flag is not used.
        if not packet_diff:
            print('WARNING! ' + minuend_name +
                  ' difference contains no packets!')
        if packet_diff or not self.exclude_empty:
            # If the file already exists, choose a different name.
            unique_diff_name = diff_filename
            while os.path.isfile(unique_diff_name):
                unique_diff_name = diff_filename[:-5] + '-' + \
                                   str(int(time.time())) + '.pcap'
            save.save_pcap(
                pcap_dict=diff_frame_dict,
                name=unique_diff_name,
                options=self.options)
            return unique_diff_name

        return ''

[docs]    def symmetric_difference_pcap(self):
        """For sets A = (1, 2, 3), B = (2, 3, 4), C = (3, 4, 5), A△B△C = (1, 5)

        For all pcaps, the symmetric difference produces a pcap that has the
        packets that are unique to only that pcap (unlike above where only one
        set is the result).

        Returns:
            (list(str)): Filenames of generated pcaps.
        """
        generated_filelist = []
        for index, file in enumerate(self.filenames):
            diff_filename = self.difference_pcap(pivot_index=index)
            if diff_filename:  # If diff file has packets.
                symdiff_filename = 'symdiff_' + os.path.basename(file)
                os.replace(diff_filename, symdiff_filename)
                generated_filelist.append(symdiff_filename)

        return generated_filelist

[docs]    def bounded_intersect_pcap(self):
        """Create a packet capture intersection out of two files using ip.ids.

        Create a packet capture by finding the earliest common packet by and
        then the latest common packet in both pcaps by ip.id.

        Returns:
            (list(string)): Filenames of generated pcaps.
        """
        # Init vars
        bounded_pcaps = self.get_bounded_pcaps()
        names = []  # Names of all generated pcaps
        for index, _ in enumerate(bounded_pcaps):
            names.append('bounded_intersect-simul' + str(index + 1) + '.pcap')
            save.save_pcap(
                pcap_dict=bounded_pcaps[index],
                name=names[index],
                options=self.options)

        return names

[docs]    def inverse_bounded_intersect_pcap(self,
                                       bounded_filelist=False,
                                       intersect_file=False):
        """Inverse of bounded intersection = (bounded intersect) - (intersect)

        Args:
            bounded_filelist (list): List of existing bounded pcaps generated
                by bounded_intersect_pcap()
            intersect_file (string): Location of intersect file.
        Returns:
            (list(string)): Filenames of generated pcaps.
        """
        generated_filelist = []
        has_bounded_intersect_flag = False
        if not bounded_filelist:
            # Don't generate twice if flags -be are used
            # Note that this runs after bounded_intersect if it would be run
            bounded_filelist = self.bounded_intersect_pcap()
            has_bounded_intersect_flag = True
        backup_filenames = self.filenames
        for index, bi_file in enumerate(bounded_filelist):
            self.filenames = [bounded_filelist[index], intersect_file]
            self.pcap_json_dict = strip_layers(self.filenames, self.options)
            pcap_json_list = [*self.pcap_json_dict.values()]
            self.frame_timestamp_dict = get_flat_frame_dict(pcap_json_list)
            difference_file = self.difference_pcap()
            if difference_file:
                generated_filelist.append(difference_file)
            if has_bounded_intersect_flag:
                # Do not keep bounded-intersect files if they are not necessary
                os.remove(bi_file)
        # Intersect is only used for comparison, so delete it when done.
        self.filenames = backup_filenames
        return generated_filelist

[docs]    def get_bounded_pcaps(self):
        """Get the pcap frame list for bounded_intersect_pcap

        Create a bounding box around each packet capture where the bounds are
        the min and max packets in the intersection.

        Returns:
            bounded_pcaps (list): A list of frame_dicts
        """
        min_frame, max_frame = self.get_minmax_common_frames()

        bounded_pcaps = []
        # Each frame_list corresponds to one pcap.
        for frame_list in self.frame_list_by_pcap:
            min_frame_index = -1
            max_frame_index = -1
            for frame in frame_list:
                if frame == min_frame:
                    min_frame_index = frame_list.index(frame)
                    break
            if min_frame_index == -1:
                print("ERROR: Bounding minimum packet not found!")
                raise IndexError
            for frame in reversed(frame_list):
                if frame == max_frame:
                    max_frame_index = frame_list.index(frame)
                    break
            if max_frame_index == -1:
                print("ERROR: Bounding maximum packet not found!")
                raise IndexError

            bounded_frame_list = \
                frame_list[min_frame_index:max_frame_index + 1]
            bounded_pcap_with_timestamps = {}
            for frame in bounded_frame_list:
                bounded_pcap_with_timestamps[frame] = \
                    self.frame_timestamp_dict[frame]
            bounded_pcaps.append(bounded_pcap_with_timestamps)

        return bounded_pcaps

[docs]    def get_minmax_common_frames(self):
        """Get first, last frames of intersection pcap.

        Returns:
            min_frame, max_frame (tuple(string)):
                Packet strings of the packets that are at the beginning and end
                of the intersection pcap based on timestamps.
        Raises:
            assert: If intersection is empty.
        """
        # Generate intersection set of frames
        if not self.frame_list_by_pcap:
            self.frame_list_by_pcap = \
                get_frame_list_by_pcap(self.pcap_json_dict)
        frame_list = self.frame_list_by_pcap
        frame_intersection = set(frame_list[0]).intersection(*frame_list[1:])

        # Set may reorder packets, so search for first/last.
        unix_32bit_end_of_time = 4294967296
        time_min = unix_32bit_end_of_time
        time_max = 0
        max_frame = ''
        min_frame = ''
        for frame in frame_intersection:
            frame_time = float(self.frame_timestamp_dict[frame])
            if frame_time > time_max:
                time_max = frame_time
                max_frame = frame
            if frame_time < time_min:
                time_min = frame_time
                min_frame = frame

        # If min/max frames are '', that likely means the intersection is empty
        assert max_frame != ''
        assert min_frame != ''

        return min_frame, max_frame