/* eval4.cpp -- Evaluate an IDS on the 1999 DARPA test set. Copyright (C) 2002, Matt Mahoney. This program is distributed without warranty under terms of the GNU general public license. See http://www.gnu.org/licenses/gpl.txt Usage: eval s=ids.sim a=all.atk l=labels.txt t=0 f=100 All options default as above. The output is a list showing the detection rate for various false alarm rates. If the t=1 option is present, then data from the missing inside.tcpdump file for week 4, day 2 (3/30/1999 0600 to 3/31/1999 0600) is excluded from the calculations. The DARPA test set contains attacks from 3/29/1999 0600 to 4/10/1999 0600. If t=4, then count only week 4 (ignore everything after 0000 4/3/99). If t=5, then count only week 5 (ignore everything before 0000 4/3/99). s=ids.sim specifies the output file of an IDS. It has one line per attack detection, in the format IIIIIIII MM/DD/YYYY HH:MM:SS AAA.AAA.AAA.AAA S.SSSSSS #Comments where IIIIIIII is an integer ID identifying the attack (ignored), MM/DD/YYYY HH:MM:SS is the date and time of the detection, AAA.AAA.AAA.AAA is the IP address of the victim, and S.SSSSSS is a score between 0 and 1 indicating the likelyhood of an attack. The comments are ignored but the # character must be present in column 55. Scores must be sorted in descending order (sort /R /+46). If the last byte of the address is 0 (.AAA = 0), then it means (1-255), or equivalently, 1, since the set is considered a single attack. l=labels.txt specifies the file containing a list of actual attacks in the format: IIIIIIII MM/DD/YYYY HH:MM:SS AAA.AAA.AAA.AAA TTTTT attack_type where TTTTT is the duration of the attack in seconds. The date and time are for the start of the attack. There are 70 attack types. If two lines have the same ID field, they are considered part of the same attack. a=all.atk is a list of attack types, one per line, which are to be evaluated. If a detection occurs within 60 seconds of one of these attacks, a true positive is scored. If a detection occurs within 60 seconds of an attack not in this list, then the detection is discarded. Otherwise a false positive is counted. The IP address of the victim must match, except that if the last byte is .000 in labels.txt, then only the first 3 bytes must match. If there is more than one detection for an attack, then the highest score is assigned and the other detections are discarded. f=100 says to stop after 100 false alarms. */ #include #include #include #include using namespace std; typedef enum {FP, SKIP, TP} DetectionType; const char* detectionName[3] = {"FP", "--", "TP"}; // Description of an attack (label file) or detection (sim file) struct Attack { long id; // Records with the same id are part of the same attack long time; // Seconds since beginning of year unsigned long victim; // IP address as 32 bit int duration; // In seconds (not used for detections) double score; // Assigned by IDS, or max score for an attack string name; // Name of attack (attack file only) string comments; // From attack file after # DetectionType type; Attack(): id(0), time(0), victim(0), duration(0), score(0), type(SKIP) {} bool read(FILE* f); // Read a line from a label or sim file void print() const; }; void Attack::print() const { printf( "%02ld/%02ld %02ld:%02ld:%02ld %03ld.%03ld.%03ld.%03ld %f %s %-12s %s\n", (time-86400)/(86400*31), ((time-86400)/86400)%31+1, (time/3600)%24, (time/60)%60, time%60, (victim>>24)&255, (victim>>16)&255, (victim>>8)&255, victim&255, score, detectionName[type], name.c_str(), comments.c_str()); } bool Attack::read(FILE* f) { id=0; time=0; victim=0; duration=0; score=0; name=""; comments=""; type=SKIP; char line[500]; if (!fgets(line, 500, f)) return false; int len=strlen(line); while (len>0 && line[len-1]<' ') // Chop trailing whitespace line[--len]=0; if (len<53) return true; time=(((atoi(line+9)*31+atoi(line+12))*24+atoi(line+20))*60+atoi(line+23)) *60+atoi(line+26); // month, day, hour, minutes, seconds victim=(atoi(line+29)<<24)+(atoi(line+33)<<16)+(atoi(line+37)<<8) +atoi(line+41); // IP bytes char* p=strchr(line, '#'); if (p) { // sim file? score=atof(line+45); comments=p+1; } else { // label file id=atol(line); duration=atoi(line+45); name=line+51; } return true; } main(int argc, char** argv) { // Process args int t_option=0; int f_option=100; char* sim_file="ids.sim"; // s option char* attack_file="all.atk"; // a option char* label_file="labels.txt"; // l option for (int i=1; i labels; Attack a; while (a.read(f)) if (a.time) labels.push_back(a); fclose(f); // Read the sim file f=fopen(sim_file, "r"); if (!f) { perror(sim_file); return 1; } vector sim; while (a.read(f)) if (a.time) sim.push_back(a); fclose(f); // Read the attack file f=fopen(attack_file, "r"); if (!f) { perror(attack_file); return 1; } vector attacks; char line[500]; while (fgets(line, 500, f)) { int len=strlen(line); while (len>0 && line[--len]<=' ') // Trim trailing white space line[len]=0; if (len>0) attacks.push_back(line); } // For each attack, mark TP or SKIP depending on if they are to be used for (int i=0; i=124.25*86400)) || (t_option==4 && labels[i].time<127*86400) || (t_option==5 && labels[i].time>=127*86400)) { for (int j=0; jlabels[j].time-60 && sim[i].timef_option) break; } } printf("%s %d/%d\n", sim_file, ta, fa); return 0; }