Source code for pcapgraph.pcap_math

# -*- coding: utf-8 -*-
# Copyright 2018 Ross Jacobs All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Do algebraic operations on sets like union, intersect, difference."""
import os

from pcapgraph.manipulate_framebytes import get_bytes_from_pcaps,\
    print_10_most_common_frames, get_ts_as_float, strip_l2, strip_l3


[docs]class PcapMath: """Do algebraic operations on sets like union, intersect, difference. For multiple set operations, files are read in only once in __init__. Use different PcapMath objects if input files are different. """ def __init__(self, filenames, strip_options): """Prepare PcapMath object for one or multiple operations. Every PcapMath object should start with the data structures filled with the data that each operation needs to function. Args: filenames (list): List of filenames. strip_options: Strip options (L2 and L3) """ self.filenames = filenames pcap_frame_list = get_bytes_from_pcaps(filenames) if '--strip-l2' in strip_options: self.pcap_frame_list = strip_l2(pcap_frame_list) elif '--strip-l3' in strip_options: self.pcap_frame_list = strip_l3(pcap_frame_list) else: self.pcap_frame_list = pcap_frame_list self.frame_list = [] # Flat ordered list of all frames self.timestamp_list = [] # Flat ordered list of all timestamps for pcap in self.pcap_frame_list: self.frame_list += self.pcap_frame_list[pcap]['frames'] self.timestamp_list += self.pcap_frame_list[pcap]['timestamps'] self.frame_list = list(filter(None, self.frame_list)) self.timestamp_list = list(filter(None, self.timestamp_list)) self.frame_timestamp_dict = { k: v for k, v in zip(self.frame_list, self.timestamp_list) }
[docs] def parse_set_args(self, args): """Call the appropriate method per CLI flags. difference, union, intersect consist of {<op>: {frame: timestamp, ...}} bounded_intersect consists of {pcap: {frame: timestamp, ...}, ...} Args: args (dict): Dict of all arguments (including set args). Returns: Return generated pcap frames dict with timestamps. """ exclude_empty = args['--exclude-empty'] generated_pcap_frames = {} bounded_intersect_filenames = [] if args['--difference']: diff_filename = 'diff_' + os.path.basename(self.filenames[0]) generated_pcap_frames[diff_filename] = self.difference_pcap() if args['--intersection']: generated_pcap_frames['intersect.pcap'] = self.intersect_pcap() if args['--symmetric-difference']: # Symmetric difference generates multiple files, so extend dict generated_pcap_frames = { **generated_pcap_frames, **self.symmetric_difference_pcap() } if args['--union']: generated_pcap_frames['union.pcap'] = self.union_pcap() if args['--most-common-frames']: print_10_most_common_frames(self.frame_list) if args['--bounded-intersection']: bounded_intersect_frames = self.bounded_intersect_pcap() bounded_intersect_filenames = list(bounded_intersect_frames) generated_pcap_frames = { **generated_pcap_frames, **bounded_intersect_frames } if args['--inverse-bounded']: inv_bounded_pcap_frames = self.inverse_bounded_intersect_pcap( generated_pcap_frames, bounded_intersect_filenames, args['--intersection'], ) generated_pcap_frames = { **generated_pcap_frames, **inv_bounded_pcap_frames } # Output link layer type will always be the link type of the first file pivot_file = self.filenames[0] link_type = self.pcap_frame_list[pivot_file]['link_type'] for pcap in generated_pcap_frames: if generated_pcap_frames[pcap] or not exclude_empty: self.pcap_frame_list[pcap] = generated_pcap_frames[pcap] return self.pcap_frame_list
[docs] def union_pcap(self): """Given sets A = (1, 2, 3), B = (2, 3, 4), A + B = (1, 2, 3, 4). About: This method uses tshark to get identifying information on pcaps and then mergepcap to save the combined pcap. Returns: (dict): {<FRAME>: <TIMESTAMP>, ...} """ union_frame_dict = {} for index, frame in enumerate(self.frame_list): union_frame_dict[frame] = self.timestamp_list[index] return union_frame_dict
[docs] def intersect_pcap(self): """Save pcap intersection. First filename is pivot packet capture. generate_intersection also exists as the frame intersect part is used by other functions. Returns: (dict): Intersection {<FRAME>: <TIMESTAMP>, ...} """ first_pcap = list(self.pcap_frame_list)[0] first_pcap_frames = self.pcap_frame_list[first_pcap]['frames'] frame_intersection = self.generate_intersection() # Print intersection output like in docstring print("{: <12} {: <}".format('\nSAME %', 'PCAP NAME')) for pcap in self.filenames: same_percent = \ str(round(100*(len(frame_intersection) / len(first_pcap_frames)))) + '%' print("{: <12} {: <}".format(same_percent, pcap)) intersect_frame_dict = {} arp_ethertype = '0806' lacp_ethertype = '8809' lldp_ethertype = '88CC' nonunique_ethertypes = [arp_ethertype, lldp_ethertype, lacp_ethertype] for frame in frame_intersection: ethertype = frame[42:44] + frame[45:47] # Filter out ARP because they are not unique enough if ethertype not in nonunique_ethertypes: intersect_frame_dict[frame] = self.frame_timestamp_dict[frame] if frame_intersection: return intersect_frame_dict filename_string = '\n\t'.join(map(str, self.filenames)) print('\nWARNING! Intersection between files contains no packets:\n\t' + filename_string + '\n') return ''
[docs] def generate_intersection(self): """Return the intersection of 2 or more pcaps.""" pcap_frame_list = dict(self.pcap_frame_list) first_pcap = list(pcap_frame_list)[0] first_pcap_frames = self.pcap_frame_list[first_pcap]['frames'] del pcap_frame_list[first_pcap] other_pcap_frames = [ pcap_frame_list[pcap]['frames'] for pcap in pcap_frame_list if pcap != first_pcap ] frame_intersection = set(first_pcap_frames).intersection( *other_pcap_frames) return frame_intersection
[docs] def difference_pcap(self, pivot_index=0): """Given sets A = (1, 2, 3), B = (2, 3, 4), C = (3, 4, 5), A-B-C = (1). Args: pivot_index [int]: Specify minuend by index of filename in list Returns: (dict): {<FRAME>: <TIMESTAMP>, ...} """ minuend_name = list(self.pcap_frame_list)[pivot_index] minuend_frame_list = self.pcap_frame_list[minuend_name]['frames'] other_frame_list = [] for pcap in self.filenames: if pcap != minuend_name: other_frame_list.extend(self.pcap_frame_list[pcap]['frames']) packet_diff = set(minuend_frame_list).difference(set(other_frame_list)) diff_frame_dict = {} for frame in packet_diff: # Minuend frame list should have all values we care about. diff_frame_dict[frame] = self.frame_timestamp_dict[frame] # Save only if there are packets or -x flag is not used. if not packet_diff: print('WARNING! ' + minuend_name + ' difference contains no packets!') return diff_frame_dict
[docs] def symmetric_difference_pcap(self): """For sets A = (1, 2, 3), B = (2, 3, 4), C = (3, 4, 5), A△B△C = (1, 5) For all pcaps, the symmetric difference produces a pcap that has the packets that are unique to only that pcap (unlike above where only one set is the result). Returns: (dict): {<SYMDIFF_PCAP_NAME>: {<FRAME>: <TIMESTAMP>, ...}, ...} """ diff_frame_list = {} for index, file in enumerate(self.filenames): diff_frames = self.difference_pcap(pivot_index=index) basename = os.path.splitext(os.path.basename(file))[0] symdiff_filename = 'symdiff_' + basename + '.pcap' diff_frame_list[symdiff_filename] = diff_frames return diff_frame_list
[docs] def inverse_bounded_intersect_pcap(self, new_pcap_frames, bounded_filenames, has_intersection): """Inverse of bounded intersection = (bounded intersect) - (intersect) Args: new_pcap_frames (dict): All frames and timestamps created by other operations thus far. bounded_filenames (list): Filenames of bounded intersections has_intersection (bool): Whether an intersection has been done Returns: (dict): Filenames of generated pcaps. """ inv_bounded_frame_dict = {} # Don't generate same dicts twice if bounded_filenames: bounded_frame_dict = {} for file in bounded_filenames: bounded_frame_dict[file] = dict(new_pcap_frames[file]) else: bounded_frame_dict = self.bounded_intersect_pcap() if has_intersection: intersect_frame_dict = new_pcap_frames['intersect.pcap'] else: intersect_frame_dict = self.intersect_pcap() intersect_set = set(intersect_frame_dict) for file in bounded_frame_dict: inv_bounded_frame_dict['inv_' + file] = \ set(bounded_frame_dict[file]).difference(intersect_set) return inv_bounded_frame_dict
[docs] def bounded_intersect_pcap(self): """Get the pcap frame list for bounded_intersect_pcap Create a bounding box around each packet capture where the bounds are the min and max packets in the intersection. Returns: (dict): {<BOUNDED_PCAP_NAME>: {<FRAME>: <TIMESTAMP>, ...}, ...} """ min_frame, max_frame = self.get_minmax_common_frames() bounded_pcaps = {} # Each frame_list corresponds to one pcap. for pcap in self.pcap_frame_list: min_frame_index = -1 max_frame_index = -1 frame_list = self.pcap_frame_list[pcap]['frames'] for frame in self.frame_list: if frame == min_frame: min_frame_index = frame_list.index(frame) break if min_frame_index == -1: print("ERROR: Bounding minimum packet not found!") raise IndexError for frame in reversed(frame_list): if frame == max_frame: max_frame_index = frame_list.index(frame) break if max_frame_index == -1: print("ERROR: Bounding maximum packet not found!") raise IndexError bounded_frame_list = \ frame_list[min_frame_index:max_frame_index + 1] bounded_pcap_with_timestamps = {} for frame in bounded_frame_list: bounded_pcap_with_timestamps[frame] = \ self.frame_timestamp_dict[frame] basename = os.path.splitext(os.path.basename(pcap))[0] bounded_filename = 'bounded_intersect-' + basename + '.pcap' bounded_pcaps[bounded_filename] = bounded_pcap_with_timestamps return bounded_pcaps
[docs] def get_minmax_common_frames(self): """Get first, last frames of intersection pcap. Returns: min_frame, max_frame (tuple(string)): Packet strings of the packets that are at the beginning and end of the intersection pcap based on timestamps. Raises: assert: If intersection is empty. """ # Generate intersection set of frames frame_intersection = self.generate_intersection() # Set may reorder packets, so search for first/last. unix_32bit_end_of_time = 4294967296 time_min = unix_32bit_end_of_time time_max = 0 max_frame = '' min_frame = '' for frame in frame_intersection: timestamp_bytes = self.frame_timestamp_dict[frame] frame_time = get_ts_as_float(timestamp_bytes) if frame_time > time_max: time_max = frame_time max_frame = frame if frame_time < time_min: time_min = frame_time min_frame = frame # If min/max frames are '', that likely means the intersection is empty assert max_frame != '' assert min_frame != '' return min_frame, max_frame