/* e3.cpp -- Merge and evaluate IDS's on the DARPA test set Copyright (C) 2002, Matt Mahoney. This program is distributed without warranty under terms of the GNU general public license. See http://www.gnu.org/licenses/gpl.txt Usage: e3 files... Input IDS files have lines with the format IIIIIIII MM/DD/YYYY HH:MM:SS AAA.AAA.AAA.AAA S.SSSSSS #Comments where IIIIIIII is an integer ID identifying the attack (ignored), MM/DD/YYYY HH:MM:SS is the date and time of the detection, AAA.AAA.AAA.AAA is the IP address of the victim, and S.SSSSSS is a score between 0 and 1 indicating the likelyhood of an attack. The comments are ignored. The file labels.txt must be present in the current directory. It indicates when the attacks actually occur, in the format: IIIIIIII MM/DD/YYYY HH:MM:SS AAA.AAA.AAA.AAA TTTTT attack_type where TTTTT is the duration of the attack in seconds. The date and time are for the start of the attack. There are 70 attack types. If two consecutive lines have the same ID field, they are considered part of the same attack. An attack is considered detected if the IP address matches and the time matches any part of the attack segment within 60 seconds. An attack may have multiple segments with different addresses and times, but the same ID. Multiple detections of the same attack are counted as one. If the last byte of the IP address is .000 in labels.txt, then only the first 3 bytes must match. Duplicate alarms (same victim and within 60 seconds) are discarded for the first 100 FA. The program outputs the detection rate at 10, 100, and 1000 false alarms for all combinations of IDS files. The files are sorted first by score (highest first), then by the time since the previous detection at the same IP (longest first), then by time (latest first), then by IP. Detections are taken round robin from the sorted lists (in command line order) until the specified number of false alarms are reached or no more detections are left. If there are n IDS files, then there are pow(2,n)-1 outputs, one for each combination of merged inputs. The number of detections for each attack type are reported. The attack types are taken from the file table1.txt, which has lines of the form II.IIIIII attack_type Clr/Stlth New/Old Category OS which are arbitraray strings terminated by a space. The ID has a . between the 2nd and 3rd digit, and is matched to the ID in labels.txt. */ #include #include #include #include #include #include #include #include using namespace std; // Read whitespace terminated word from s string getword(const char* s) { string r; while (*s>' ') r+= *s++; return r; } // Description of an attack (label file) or detection (sim file) struct Attack { long idn; // ID number from file long id; // 1-200 = attack instance, 0 = false alarm long time; // In seconds long interval; // Seconds since previous detection for victim unsigned long victim; // IP address as 32 bits double score; // Assigned by IDS Attack(): id(0), time(0), interval(0), victim(0), score(0) {} bool read(FILE* f); // Read a line from a label or sim file, false at EOF void print() const; // For debug }; void Attack::print() const { printf("%8lu %02d/%02d/1999 %02d:%02d:%02d %03d.%03d.%03d.%03d %8.6f # %s\n", id, (time-86400)/(86400*31), ((time-86400)/86400)%31+1, (time/3600)%24, (time/60)%60, time%60, (victim>>24)&255, (victim>>16)&255, (victim>>8)&255, victim&255, score); } // Read IDS file (compatible with labels.txt, score = duration) bool Attack::read(FILE* f) { id=0; idn=0; time=0; victim=0; score=0; char line[500]; if (!fgets(line, 500, f)) return false; int len=strlen(line); if (len<50) return true; idn=atoi(line); time=(((atoi(line+9)*31+atoi(line+12))*24+atoi(line+20))*60+atoi(line+23)) *60+atoi(line+26); // month, day, hour, minutes, seconds interval=time; victim=(atoi(line+29)<<24)+(atoi(line+33)<<16)+(atoi(line+37)<<8) +atoi(line+41); // IP bytes score=atof(line+45); return true; } // For sorting bool operator<(const Attack& a, const Attack& b) { return a.score>b.score || (a.score==b.score && a.interval>b.interval) || (a.score==b.score && a.interval==b.interval && a.victim > det; // List of detections by IDS for (int i=1; i det1; // List of detections for this IDS if (!f) perror(argv[1]); else { Attack a; while (a.read(f) && det1.size()<=8000) { a.id=0; a.idn=0; if (a.time>0 && a.victim>0) det1.push_back(a); } fclose(f); // Calculate intervals for sorting for (int j=0; j0 && interval a.time-60 && det[i][j].time < a.time+long(a.score)+60) { det[i][j].id=idcount; det[i][j].idn=a.idn; } } } } fclose(f); printf("%d attack instances\n\n", idcount); const int IDMAX=250; if (idcount>=IDMAX) return 1; // Read table1.txt map > table1; // ID -> attack, Clr, Old, Cat, OS char line[500]; f=fopen("table1.txt", "r"); if (!f) { perror("table1.txt"); return 1; } fgets(line, 500, f); // Skip header while (fgets(line, 500, f)) { if (strlen(line)>77) { long idn=atol(line)*1000000+atol(line+3); table1[idn].push_back(getword(line)+" "+getword(line+15)+" "+ getword(line+30)+" "+getword(line+45)+" "+getword(line+62) +" "+getword(line+77)); table1[idn].push_back(getword(line+15)+getword(line+30)); table1[idn].push_back(getword(line+62)+"-"+getword(line+77)); table1[idn].push_back(getword(line+62)); table1[idn].push_back(getword(line+77)); table1[idn].push_back(getword(line+45)); table1[idn].push_back(getword(line+62)+" "+getword(line+15)); } } fclose(f); // Evaluate all merged combinations printf("IDS's 5 10 20 50 100 200 500 1000 2000 5000 TP / FA\n"); static bool isdet[IDMAX]; // Map of detected attacks map all; // Count of detections by merged systems for (unsigned long i=1; i<((unsigned long)1< det1; // List of previous detections for this combo for (int j=0; !done; ++j) { done=true; for (int k=0; k-60) { dup=true; break; } } } } if (!dup || fa>=100) { if (d.id==0) { ++fa; if (fa==5 || fa==10 || fa==20 || fa==50 || fa==100 || fa==200 || fa==500 || fa==1000 || fa==2000 || fa==5000) printf(" %3d", tp); } else if (!isdet[d.id]) { ++tp; isdet[d.id]=true; if (fa<100 && i==(1L<& vs=table1[d.idn]; if (vs.size()==0) printf("idn=%ld vs.size=%d\n", d.idn, vs.size()); for (int l=0; l::iterator p=all.begin(); p!=all.end(); ++p) { sum+=p->second; printf("%2d %s\n", p->second, p->first.c_str()); } printf("%d attacks detected\n", sum/7); return 0; }