#!/usr/bin/env python

import pcapy
from pcapy import open_offline
import impacket
from impacket.ImpactDecoder import IPDecoderForTruncatedPkts
import sys
import glob
from struct import *

if len(sys.argv) < 3 or len(sys.argv) > 3:
 print "Usage:\n", sys.argv[0], "filename.pcap activity-timeout (ms), e.g. flow_analysis_rawIP.py split.pcap 1000"
 sys.exit() 

def try_int(s):
    "Convert to integer if possible."
    try: return int(s)
    except: return s

def natsort_key(s):
    "Used internally to get a tuple by which s is sorted."
    import re
    return map(try_int, re.findall(r'(\d+|\D+)', s))

def natcmp(a, b):
    "Natural string comparison, case sensitive."
    return cmp(natsort_key(a), natsort_key(b))

def natcasecmp(a, b):
    "Natural string comparison, ignores case."
    return natcmp(a.lower(), b.lower())

def natsort(seq, cmp=natcmp):
    "In-place natural string sort."
    seq.sort(cmp)
    
def natsorted(seq, cmp=natcmp):
    "Returns a copy of seq, sorted by natural string sort."
    import copy
    temp = copy.copy(seq)
    natsort(temp, cmp)
    return temp
    
def dottedQuadToNum(ip):
    "convert decimal dotted quad string to long integer"

    hexn = ''.join(["%02X" % long(i) for i in ip.split('.')])
    return long(hexn, 16)
 

#Split a big pcap file: tcpdump -r source.pcap -w split.pcap -C 10
pcaplist = []
for file in glob.glob("%s*"%sys.argv[1]):
    pcaplist.insert(0,file)
    
routeTable = {}
pkts=[]
pktcount = 0
hits = 0
reportfile = open('./active-flow-%s-to%s.txt'%(sys.argv[1],sys.argv[2]), 'w')
statfile = open('./stat-active-flow-%s-to%s.txt'%(sys.argv[1],sys.argv[2]), 'w')
arrivalfile = open('./arrival-active-flow-%s-to%s.txt'%(sys.argv[1],sys.argv[2]), 'w')
departurefile = open('./departure-active-flow-%s-to%s.txt'%(sys.argv[1],sys.argv[2]), 'w')

con_timeout=int(sys.argv[2])
print con_timeout
    
pcaplist=natsorted(pcaplist)
print pcaplist
started=False

for pcapf in pcaplist:
    print "Processing file %s..."%pcapf
    reader = open_offline('./%s'%pcapf)
    decoder = IPDecoderForTruncatedPkts()
    while True:
        #cnt+=1
        #print ('%s')%cnt
        try:
            (header, data) = reader.next()
            if header==None:
                #print "Niente"
                break
        except:
            print "unable to parse pcap entry"
        #try:
         #   ip = decoder.decode(data)
        #except:
         #   continue
        #de = ip.get_ip_dst()
        #print "%s"%de
        #print "%s"%ip.get_byte(24)
        #iphdr = ether.child()
        try:
	  ip = decoder.decode(data)
	  #tcphdr = iphdr.child()
	  tcphdr = ip.child()
	  prototyp = ip.get_ip_p()
	  #print prototyp
	  #port = tcphdr.get_uh_dport()
	  port = tcphdr.get_uh_dport()
	  if port==80 and prototyp==6 :
	      pktcount+=1
	      timestamp_s = header.getts()[0]
	      timestamp_us = header.getts()[1]
	      
	      sport = tcphdr.get_uh_sport()
	      dest = ip.get_ip_dst()
	      src = ip.get_ip_src()  
	      if started==False:
			  started=True
			  timeOffset_s = timestamp_s
			  timeOffset_us = timestamp_us
			  last_refresh_s = timestamp_s
			  last_refresh_us = timestamp_us
	      
	      
	      #check the presence of the route in the table
	      #
	      #print dest+"&"+src+"&"+str(sport)
	      key=dest+"&"+src+"&"+str(sport)
	      
	      if (key in routeTable):
		  # update values
		      #print "trovato"
		      (destq,last_arrival_s, last_arrival_us, first_arrival_s, first_arrival_us) = unpack('IIIII',routeTable[key])
		      value=pack('IIIII',destq, timestamp_s, timestamp_us, first_arrival_s, first_arrival_us)
		      routeTable[key]=value
		      #print "flow in routing table"
		      #print "key %s mean_int %d, var_int %d, last_arrival_s %d , last_arrival_us %d" %(key,mean_int, var_int, last_arrival_s, last_arrival_us)
	      else:
		      arrivalfile.write("%d %d %d \n" % ((dottedQuadToNum(dest)),timestamp_s, timestamp_us))
		      value=pack('IIIII',(dottedQuadToNum(dest)),timestamp_s, timestamp_us, timestamp_s, timestamp_us)
		      routeTable[key] = value
		      #print "flow added"
		      #print "key %s mean_int %d, var_int %d, last_arrival_s %d , last_arrival_us %d" %(key,mean_int, var_int, last_arrival_s, last_arrival_us)
	      
	      #print ((timestamp_s-last_refresh_s)*1000000+(timestamp_us-last_refresh_us))
	      
	      if ((timestamp_s-last_refresh_s)*1000000+(timestamp_us-last_refresh_us) > 100000) :
		# check and remove expired entrries
		activeRoutes=0;
		toRemove=[]
		for key, value in routeTable.iteritems():
		  (destq,last_arrival_s, last_arrival_us, first_arrival_s, first_arrival_us) = unpack('IIIII',value)
		  ito = con_timeout*1000
		  curr_delay = (timestamp_s-last_arrival_s)*1000000+(timestamp_us-last_arrival_us)
		  if (curr_delay>ito):
		    # append expired entry to the toRemove set
		    statfile.write("%d %d %d %d %d \n" % (destq,first_arrival_s, first_arrival_us, last_arrival_s, last_arrival_us))
		    departurefile.write("%d %d %d \n" % (destq,last_arrival_s, last_arrival_us))
		    toRemove.append(key)
		    print "expired flow %s" %key
		  else:
		    activeRoutes=activeRoutes+1
		# remove the expired entries
		for key in toRemove :
		  del routeTable[key]
		
		last_refresh_s = timestamp_s
		last_refresh_us = timestamp_us
		print "now %f , upperbound of active flows %d" % (1000.0*(timestamp_s-timeOffset_s)+1.0*(timestamp_us-timeOffset_us)/1000,activeRoutes)  
		reportfile.write( "%f %d \n" % (1000.0*(timestamp_s-timeOffset_s)+1.0*(timestamp_us-timeOffset_us)/1000,activeRoutes))
		  
		  
        except:
           continue
	  
for key, value in routeTable.iteritems():
  (destq,last_arrival_s, last_arrival_us, first_arrival_s, first_arrival_us) = unpack('IIIII',value)
  statfile.write("%d %d %d %d %d \n" % (destq,first_arrival_s, first_arrival_us, -1, -1))
	  
reportfile.close()
statfile.close()
arrivalfile.close()
departurefile.close()


