/* sad.cpp, Matt Mahoney, mmahoney@cs.fit.edu Simple anomaly detector, takes full advantage of simulation artifacts in the 1999 DARPA IDS evaluation data set. Copyright (C) 2003, Matt Mahoney. This program is distributed without warranty under terms of the GNU general public license. See http://www.gnu.org/licenses/gpl.txt Usage: sad 38 tcpdump_files... | perl afil.pl > sad.sim eval3 sad.sim eval4 s=sad.sim sad 45 in3tf in45tf | eval - The first argument is the byte offset in the network packet, including the 16 byte tcpdump header and 14 byte Ethernet header. 38 is the TTL field (byte 8 of the IP header). SAD examines 1 byte of inbound TCP SYN packets. During training (hard coded to weeks 1 or 3) it remembers which of the 256 possible values occurred. During testing (weeks 2, 4, or 5) it generates an alarm if the value was never observed in training and no other anomalies occurred in the last 60 seconds. The score is t * 1e-6 where t is the time in seconds since the last anomaly. Some good values on in3tf, in45tf (weeks 3-5 filtered with tf) are: Byte Det/FA out of 177 inside sniffer attacks ---- ------ 33 IP length low byte 15/2 38 TTL 22/4 42 Src IP addr byte 1 64/41 43 byte 2 67/42 44 byte 3 79/43 45 byte 4 71/16 50 source port hi byte 13/0 62 TCP header size 15/0 64 window size hi byte 15/0 65 window size lo byte 7/0 70 TCP options 1st byte 15/2 Unfiltered tcpdump files should give similar results, since tf doesn't remove most inbound TCP SYN packets. However this would increase run time from 1 sec. to 15 min. so I didn't test them all. */ #include #include #include #include #include using namespace std; // Convert 2 or 4 bytes to int, MSB first int i2(const unsigned char* p) { return (p[0]<<8)|p[1]; } unsigned long i4(const unsigned char* p) { return (((((p[0]<<8)|p[1])<<8)|p[2])<<8)|p[3]; } // Return the time (seconds since 1970 UT) in a readable format // Convert 1/1/70-4/3/99 to EST // Convert 4/4/99-12/31/99 to EDT const char* print_time(double seconds) { static char s[30]; time_t t=time_t(seconds); if (t<10957*86400) t-=4*3600; // EDT before 1/1/2000 if (t<10685*86400) t-=3600; // EST before 4/4/1999 tm* local=localtime(&t); if (local) strftime(s, 30, "%m/%d/%Y %H:%M:%S", local); else s[0]=0; return s; } /* PacketReader - a class for reading tcpdump packets from a file. PacketReader pr(int argc, char** argv); Prepares pr to read packets from a list of files named in argv[0..argc-1]. Files must be tcpdump files. const unsigned char* pr.read() Reads one packet and returns it, or 0 at end of last file. The first call reads the first packet from argv[0]. At the end of each file, the file is closed and read() returns the first packet from the next file. The length is i4(pr.read()+12) bytes. */ class PacketReader { private: enum {MAX_PACKET=1600}; // Max packet size including tcpdump header unsigned char* buf; // Current input packet, MAX_PACKET bytes int argc; // Number of files remaining to be read const char* const* argv; // Names of files remaining to be read FILE* f; // Currently open file, or 0 if all are closed void close(const char* msg=0); // Close file, print msg if any public: PacketReader(int ac, const char* const* av): buf(new unsigned char[MAX_PACKET]), argc(ac), argv(av), f(0) {} ~PacketReader() {delete[] buf;} const unsigned char* read(); }; // Close f and go to next file. If msg is not 0, print error message void PacketReader::close(const char* msg) { if (f) { fclose(f); f=0; } if (msg) fprintf(stderr, "%s: %s\n", argv[0], msg); --argc; ++argv; } // Read a packet and return its timestamp, or 0 at EOF const unsigned char* PacketReader::read() { while (true) { if (f) { if (fread(buf, 1, 16, f)!=16) close("OK"); // EOF else { unsigned long len1=i4(buf+8); // Recorded length <= len2 unsigned long len2=i4(buf+12); // Original length <= MAX_PACKET-16 if (len1>len2 || len2>MAX_PACKET-16) close("bad tcpdump packet header"); else if (fread(buf+16, 1, len1, f)!=len1) close("truncated packet"); else return buf; } } else { // Open file if (argc<1) return 0; // No file to open fprintf(stderr, "%s\n", argv[0]); f=fopen(argv[0], "rb"); if (!f) close("file not found"); else if (fread(buf, 1, 24, f)!=24) close("file is too small"); else if (i4(buf)!=0xa1b2c3d4) close("not in tcpdump format"); } } } // A sad case of anomaly detection int main(int argc, const char* const* argv) { if (argc<3) { fprintf(stderr, "SAD v1 (C) 2003, Matt Mahoney, mmahoney@cs.fit.edu\n" "Distributed without warranty under terms of the GNU general public\n" "license, see http://www.gnu.org/licenses/gpl.txt\n" "\n" "Usage: sad 38 in[3-5]* | perl afil.pl >sad.sim\n" " eval3 sad.sim\n" " eval4 s=sad.sim\n" "\n" "SAD (Simple Anomaly Detector, or Simulation Artifact Detector :)\n" "demonstrates anomaly detection in the 1999 DARPA IDS evaluation data set.\n" "38 (TTL) is the packet byte offset including the 16 byte tcpdump\n" "header and 14 byte Ethernet header. in1* and in3* are training\n" "tcpdump files. in2* in4* in5* are test files (inside.tcpdump).\n" "sad, afil.pl, eval3, and eval4 are available at\n" "http://cs.fit.edu/~mmahoney/dist/\n"); return 1; } // Anomaly model const int attr=atoi(argv[1]); // Attribute number static bool val[256]; // true if seen in training unsigned long last_anomaly=0, now=0; // Seconds since 1970 // These are just for printing statistics when finished unsigned long start_time=0; // Time of first packet int trains=0, tests=0, anomalies=0; // Counts int r=0; // Number of allowed values PacketReader pr(argc-2, argv+2); // Read tcpdump files const unsigned char* pkt=0; while ((pkt=pr.read())!=0) { int len=i4(pkt+8); // Packet length (truncated) without tcpdump header // TCP SYN to 172.16.x.x or 163.118.135.1 (www.cs.fit.edu) ? if (len>=54 && i2(pkt+28)==0x800 && pkt[39]==6 && pkt[63]==2 && attr10657*86400 && now<10664*86400; // Week 2? const bool test45=now>10678*86400 && now<10692*86400; // Week 4 or 5? const bool train=!test2 && !test45; // Train if (train) { if (++trains==1) // Print start time of first packet fprintf(stderr, "Training starts at %s\n", print_time(now)); if (!val[pkt[attr]]) { val[pkt[attr]]=true; last_anomaly=now; ++r; } } // Test else { if (++tests==1) { // Print training stats at first test packet if (now>start_time) fprintf(stderr, "Last learned value at %s (%f%% of training)\n", print_time(last_anomaly), 100.0*(last_anomaly-start_time)/(now-start_time)); fprintf(stderr, "Testing starts at %s\n", print_time(now)); } if (!val[pkt[attr]]) { double score=0.000001*(now-last_anomaly); last_anomaly=now; if (score>=0.00006) { // At least 1 minute since last anomaly? printf(" 0 %s %03d.%03d.%03d.%03d %8.6f # %3d\n", print_time(now), pkt[46], pkt[47], pkt[48], pkt[49], score, pkt[attr]); ++anomalies; } } } } } // Print the model and statistics fprintf(stderr, "%d allowed values for attribute %d:", r, attr); if (r<256) { for (int i=0; i<256; ++i) if (val[i]) fprintf(stderr, " %d", i); } fprintf(stderr, "\n%d anomalies in %d training and %d test packets\nfrom %s", anomalies, trains, tests, print_time(start_time)); fprintf(stderr, " to %s\n", print_time(now)); return 0; }