|
189 | 189 | "class DNSExfiltration(nn.Module):\n",
|
190 | 190 | " def __init__(self,input_size):\n",
|
191 | 191 | " super().__init__()\n",
|
192 |
| - " self.layer_1 = nn.Linear(input_size, 256) \n", |
193 |
| - " self.layer_2 = nn.Linear(256, 256)\n", |
194 |
| - " self.layer_out = nn.Linear(256, 1) \n", |
| 192 | + " self.layer_1 = nn.Linear(input_size, 128) \n", |
| 193 | + " self.layer_2 = nn.Linear(128, 128)\n", |
| 194 | + " self.layer_out = nn.Linear(128, 1) \n", |
195 | 195 | " \n",
|
196 | 196 | " \n",
|
197 | 197 | " self.relu = nn.ReLU()\n",
|
|
212 | 212 | "print (len(list(string.printable.strip())))\n",
|
213 | 213 | " \n",
|
214 | 214 | "def init(df,param):\n",
|
215 |
| - " model = DNSExfiltration(99)\n", |
| 215 | + " model = DNSExfiltration(98)\n", |
216 | 216 | " model.load_state_dict(torch.load(MODEL_DIRECTORY+'detect_dns_data_exfiltration_using_pretrained_model_in_dsdl.pt',map_location=torch.device('cpu')))\n",
|
217 | 217 | " model = model.to('cpu')\n",
|
218 | 218 | " model.eval()\n",
|
|
382 | 382 | " # length of domain\n",
|
383 | 383 | " df['len'] = df['request_without_domain'].apply(len)\n",
|
384 | 384 | " \n",
|
385 |
| - " # number of subdomains\n", |
386 |
| - " df['subdomains_count'] = df['request_without_domain'].apply(lambda x: len(str(x).split('.')))\n", |
387 |
| - " \n", |
388 | 385 | " # entropy\n",
|
389 | 386 | " df['entropy'] = df['request_without_domain'].apply(lambda x: entropy(x))\n",
|
390 | 387 | " \n",
|
391 | 388 | " # take most-recent request\n",
|
392 |
| - " recent_df = df.loc[df['count'] == 1]\n", |
| 389 | + " recent_df = df.loc[df['rank'] == 1]\n", |
393 | 390 | "\n",
|
394 | 391 | " # calculate feature by aggregating events\n",
|
395 | 392 | "\n",
|
|
401 | 398 | " \n",
|
402 | 399 | "# apply model on processed dataframe to predict exfiltration\n",
|
403 | 400 | "def apply(model,df,param):\n",
|
404 |
| - " df.drop(['_time'], axis=1,inplace=True)\n", |
| 401 | + " df.drop(['_time'], axis=1,inplace=True, errors='ignore')\n", |
405 | 402 | " recent_df = prepare_input_df(df)\n",
|
406 |
| - " input_df = recent_df.drop(['src' ,'query','count','request_without_domain','tld'], axis=1)\n", |
407 |
| - " recent_df.drop(['request_without_domain','tld','len','subdomains_count','entropy','size_avg','entropy_avg'], axis=1, inplace=True)\n", |
| 403 | + " input_df = recent_df.drop(['src' ,'query','rank','request_without_domain','tld'], axis=1)\n", |
| 404 | + " recent_df.drop(['request_without_domain','tld','len','entropy','size_avg','entropy_avg'], axis=1, inplace=True)\n", |
408 | 405 | " recent_df.drop(range(0, 94),axis=1,inplace=True)\n",
|
409 | 406 | " input_tensor = torch.FloatTensor(input_df.values)\n",
|
410 | 407 | " dataloader = DataLoader(input_tensor, shuffle=True, batch_size=256)\n",
|
|
416 | 413 | " text_rows.clear()\n",
|
417 | 414 | " size_avg.clear()\n",
|
418 | 415 | " entropy_avg.clear()\n",
|
419 |
| - " output = pd.merge(recent_df,df,on=['src','query','count'],how='right')\n", |
| 416 | + " output = pd.merge(recent_df,df,on=['src','query','rank'],how='right')\n", |
420 | 417 | " return output"
|
421 | 418 | ]
|
422 | 419 | },
|
|
461 | 458 | "source": [
|
462 | 459 | "# load model from name in expected convention \"<algo_name>_<model_name>\"\n",
|
463 | 460 | "def load(name):\n",
|
464 |
| - " model = DNSExfiltration(99)\n", |
| 461 | + " model = DNSExfiltration(98)\n", |
465 | 462 | " model.load_state_dict(torch.load(MODEL_DIRECTORY+'detect_dns_data_exfiltration_using_pretrained_model_in_dsdl.pt',map_location=torch.device('cpu')))\n",
|
466 | 463 | " model = model.to('cpu')\n",
|
467 | 464 | " model.eval()\n",
|
|
0 commit comments