# -*- coding: utf-8 -*-
# Copyright 2018 Ross Jacobs All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Do algebraic operations on sets like union, intersect, difference."""
import collections
import os
import time
from pcapgraph.manipulate_frames import strip_layers
from pcapgraph.manipulate_frames import get_flat_frame_dict
from pcapgraph.manipulate_frames import get_frame_list_by_pcap
from pcapgraph.manipulate_frames import get_frame_from_json
from pcapgraph.save_file import convert_to_pcaptext
import pcapgraph.save_file as save
[docs]class PcapMath:
"""Do algebraic operations on sets like union, intersect, difference.
For multiple set operations, files are read in only once in __init__.
Use different PcapMath objects if input files are different.
"""
def __init__(self, filenames, options):
"""Prepare PcapMath object for one or multiple operations.
Every PcapMath object should start with the data structures filled with
the data that each operation needs to function.
Args:
filenames (list): List of filenames.
options (dict): Whether to strip L2 and L3 headers.
"""
self.filenames = filenames
self.pcap_json_dict = strip_layers(filenames, options)
pcap_json_list = [*self.pcap_json_dict.values()]
self.frame_timestamp_dict = get_flat_frame_dict(pcap_json_list)
self.frame_list_by_pcap = []
self.exclude_empty = False
self.options = options
[docs] def parse_set_args(self, args):
"""Call the appropriate method per CLI flags.
difference, union, intersect consist of {<op>: {frame: timestamp, ...}}
bounded_intersect consists of {pcap: {frame: timestamp, ...}, ...}
Args:
args (dict): Dict of all arguments (including set args).
Returns:
filenames (list): List of all files, including ones generated
by set operations.
"""
new_files = []
bounded_filelist = []
intersect_file = ''
self.exclude_empty = args['--exclude-empty']
if args['--difference']:
generated_file = self.difference_pcap()
# As long as the difference exists.
if generated_file:
new_files.append(generated_file)
if args['--intersection']:
intersect_file = self.intersect_pcap()
new_files.append(intersect_file)
if args['--symmetric-difference']:
generated_filelist = self.symmetric_difference_pcap()
new_files.extend(generated_filelist)
if args['--union']:
generated_file = self.union_pcap()
new_files.append(generated_file)
if args['--bounded-intersection']:
bounded_filelist = self.bounded_intersect_pcap()
new_files.extend(bounded_filelist)
if args['--inverse-bounded']:
if not intersect_file:
intersect_file = self.intersect_pcap()
generated_filelist = self.inverse_bounded_intersect_pcap(
bounded_filelist=bounded_filelist,
intersect_file=intersect_file)
if not args['--intersection']:
os.remove(intersect_file)
new_files.extend(generated_filelist)
# Put filenames in a different place in memory so it is not altered.
filenames = list(self.filenames)
filenames.extend(new_files)
return filenames
[docs] def union_pcap(self):
"""Given sets A = (1, 2, 3), B = (2, 3, 4), A + B = (1, 2, 3, 4).
About:
This method uses tshark to get identifying information on
pcaps and then mergepcap to save the combined pcap.
Returns:
(string): Name of generated pcap.
"""
raw_packet_list = []
for pcap in self.pcap_json_dict.values():
for frame in pcap:
raw_frame = get_frame_from_json(frame)
raw_packet_list.append(raw_frame)
self.print_10_most_common_frames(raw_packet_list)
union_frame_dict = {}
for frame in raw_packet_list:
union_frame_dict[frame] = self.frame_timestamp_dict[frame]
save.save_pcap(
pcap_dict=union_frame_dict,
name='union.pcap',
options=self.options)
return 'union.pcap'
[docs] @staticmethod
def print_10_most_common_frames(raw_frame_list):
"""After doing a packet union, find/print the 10 most common packets.
This is a work in progress and may eventually use this bash:
<packets> | text2pcap - - | tshark -r - -o 'gui.column.format:"No.",
"%m","VLAN","%q","Src MAC","%uhs","Dst MAC","%uhd","Src IP","%us",
"Dst IP","%ud","Protocol","%p","Src port","%uS","Dst port","%uD"'
Alternatively, just use the existing information in pcap_dict.
The goal is to print
frame#, VLAN, src/dst MAC, src/dst IP, L4 src/dst ports, protocol
This should likely be its own CLI flag in future.
Args:
raw_frame_list (list): List of raw frames
"""
packet_stats = collections.Counter(raw_frame_list)
# It's not a common frame if it is only seen once.
packet_stats = {k: v for k, v in packet_stats.items() if v > 1}
sorted_packets = sorted(
packet_stats, key=packet_stats.__getitem__, reverse=True)
counter = 0
for packet in sorted_packets:
counter += 1
if counter == 10:
break
packet_text = convert_to_pcaptext(packet)
print("Count: {: <7}\n{: <}".format(packet_stats[packet],
packet_text))
print("To view the content of these packets, subtract the count lines,"
"\nadd and save to <textfile>, and then run "
"\n\ntext2pcap <textfile> out.pcap\nwireshark out.pcap\n")
[docs] def intersect_pcap(self):
"""Save pcap intersection. First filename is pivot packet capture.
Returns:
(str): Fileame of generated pcap.
"""
# Generate intersection set of frames
if not self.frame_list_by_pcap:
self.frame_list_by_pcap = \
get_frame_list_by_pcap(self.pcap_json_dict)
frame_list = self.frame_list_by_pcap
frame_intersection = set(frame_list[0]).intersection(*frame_list[1:])
# Print intersection output like in docstring
intersection_count = len(frame_intersection)
print("{: <12} {: <}".format('\nSAME %', 'PCAP NAME'))
for pcap in self.filenames:
same_percent = str(
round(100 * (intersection_count / len(frame_list[0])))) + '%'
print("{: <12} {: <}".format(same_percent, pcap))
intersect_frame_dict = {}
for frame in frame_intersection:
intersect_frame_dict[frame] = self.frame_timestamp_dict[frame]
save.save_pcap(
pcap_dict=intersect_frame_dict,
name='intersect.pcap',
options=self.options)
if frame_intersection:
return 'intersect.pcap'
print('WARNING! Intersection between ', self.filenames,
' contains no packets!')
return ''
[docs] def difference_pcap(self, pivot_index=0):
"""Given sets A = (1, 2, 3), B = (2, 3, 4), C = (3, 4, 5), A-B-C = (1).
Args:
pivot_index [int]: Specify minuend by index of filename in list
Returns:
(string): Name of generated pcap.
"""
pcap_json_list = [*self.pcap_json_dict.values()]
minuend_pcap_json = pcap_json_list[pivot_index]
minuend_name = self.filenames[pivot_index]
# pcap json list - minuend json. With index 0, remove 1st pcap json.
diff_pcap_json_list = pcap_json_list[:pivot_index] + \
pcap_json_list[pivot_index+1:]
minuend_frame_dict = get_flat_frame_dict([minuend_pcap_json])
minuend_frame_list = list(minuend_frame_dict.keys())
diff_frame_dict = get_flat_frame_dict(diff_pcap_json_list)
diff_frame_list = list(diff_frame_dict.keys())
packet_diff = set(minuend_frame_list).difference(set(diff_frame_list))
diff_frame_dict = {}
for frame in packet_diff:
# Minuend frame dict should have all values we care about.
diff_frame_dict[frame] = minuend_frame_dict[frame]
diff_filename = 'diff_' + os.path.basename(minuend_name)
# Save only if there are packets or -x flag is not used.
if not packet_diff:
print('WARNING! ' + minuend_name +
' difference contains no packets!')
if packet_diff or not self.exclude_empty:
# If the file already exists, choose a different name.
unique_diff_name = diff_filename
while os.path.isfile(unique_diff_name):
unique_diff_name = diff_filename[:-5] + '-' + \
str(int(time.time())) + '.pcap'
save.save_pcap(
pcap_dict=diff_frame_dict,
name=unique_diff_name,
options=self.options)
return unique_diff_name
return ''
[docs] def symmetric_difference_pcap(self):
"""For sets A = (1, 2, 3), B = (2, 3, 4), C = (3, 4, 5), A△B△C = (1, 5)
For all pcaps, the symmetric difference produces a pcap that has the
packets that are unique to only that pcap (unlike above where only one
set is the result).
Returns:
(list(str)): Filenames of generated pcaps.
"""
generated_filelist = []
for index, file in enumerate(self.filenames):
diff_filename = self.difference_pcap(pivot_index=index)
if diff_filename: # If diff file has packets.
symdiff_filename = 'symdiff_' + os.path.basename(file)
os.replace(diff_filename, symdiff_filename)
generated_filelist.append(symdiff_filename)
return generated_filelist
[docs] def bounded_intersect_pcap(self):
"""Create a packet capture intersection out of two files using ip.ids.
Create a packet capture by finding the earliest common packet by and
then the latest common packet in both pcaps by ip.id.
Returns:
(list(string)): Filenames of generated pcaps.
"""
# Init vars
bounded_pcaps = self.get_bounded_pcaps()
names = [] # Names of all generated pcaps
for index, _ in enumerate(bounded_pcaps):
names.append('bounded_intersect-simul' + str(index + 1) + '.pcap')
save.save_pcap(
pcap_dict=bounded_pcaps[index],
name=names[index],
options=self.options)
return names
[docs] def inverse_bounded_intersect_pcap(self,
bounded_filelist=False,
intersect_file=False):
"""Inverse of bounded intersection = (bounded intersect) - (intersect)
Args:
bounded_filelist (list): List of existing bounded pcaps generated
by bounded_intersect_pcap()
intersect_file (string): Location of intersect file.
Returns:
(list(string)): Filenames of generated pcaps.
"""
generated_filelist = []
has_bounded_intersect_flag = False
if not bounded_filelist:
# Don't generate twice if flags -be are used
# Note that this runs after bounded_intersect if it would be run
bounded_filelist = self.bounded_intersect_pcap()
has_bounded_intersect_flag = True
backup_filenames = self.filenames
for index, bi_file in enumerate(bounded_filelist):
self.filenames = [bounded_filelist[index], intersect_file]
self.pcap_json_dict = strip_layers(self.filenames, self.options)
pcap_json_list = [*self.pcap_json_dict.values()]
self.frame_timestamp_dict = get_flat_frame_dict(pcap_json_list)
difference_file = self.difference_pcap()
if difference_file:
generated_filelist.append(difference_file)
if has_bounded_intersect_flag:
# Do not keep bounded-intersect files if they are not necessary
os.remove(bi_file)
# Intersect is only used for comparison, so delete it when done.
self.filenames = backup_filenames
return generated_filelist
[docs] def get_bounded_pcaps(self):
"""Get the pcap frame list for bounded_intersect_pcap
Create a bounding box around each packet capture where the bounds are
the min and max packets in the intersection.
Returns:
bounded_pcaps (list): A list of frame_dicts
"""
min_frame, max_frame = self.get_minmax_common_frames()
bounded_pcaps = []
# Each frame_list corresponds to one pcap.
for frame_list in self.frame_list_by_pcap:
min_frame_index = -1
max_frame_index = -1
for frame in frame_list:
if frame == min_frame:
min_frame_index = frame_list.index(frame)
break
if min_frame_index == -1:
print("ERROR: Bounding minimum packet not found!")
raise IndexError
for frame in reversed(frame_list):
if frame == max_frame:
max_frame_index = frame_list.index(frame)
break
if max_frame_index == -1:
print("ERROR: Bounding maximum packet not found!")
raise IndexError
bounded_frame_list = \
frame_list[min_frame_index:max_frame_index + 1]
bounded_pcap_with_timestamps = {}
for frame in bounded_frame_list:
bounded_pcap_with_timestamps[frame] = \
self.frame_timestamp_dict[frame]
bounded_pcaps.append(bounded_pcap_with_timestamps)
return bounded_pcaps
[docs] def get_minmax_common_frames(self):
"""Get first, last frames of intersection pcap.
Returns:
min_frame, max_frame (tuple(string)):
Packet strings of the packets that are at the beginning and end
of the intersection pcap based on timestamps.
Raises:
assert: If intersection is empty.
"""
# Generate intersection set of frames
if not self.frame_list_by_pcap:
self.frame_list_by_pcap = \
get_frame_list_by_pcap(self.pcap_json_dict)
frame_list = self.frame_list_by_pcap
frame_intersection = set(frame_list[0]).intersection(*frame_list[1:])
# Set may reorder packets, so search for first/last.
unix_32bit_end_of_time = 4294967296
time_min = unix_32bit_end_of_time
time_max = 0
max_frame = ''
min_frame = ''
for frame in frame_intersection:
frame_time = float(self.frame_timestamp_dict[frame])
if frame_time > time_max:
time_max = frame_time
max_frame = frame
if frame_time < time_min:
time_min = frame_time
min_frame = frame
# If min/max frames are '', that likely means the intersection is empty
assert max_frame != ''
assert min_frame != ''
return min_frame, max_frame