13 Commits

Author SHA1 Message Date
Hieu Le 8987fd239f remove: redundant code 2021-03-31 23:47:39 +07:00
Hieu Le e9a85f1ef8 update: FIN flag checks 2021-03-31 23:42:08 +07:00
Hieu Le c1a61b40a3 fix: requirement packages 2021-03-31 23:41:35 +07:00
Hieu Le 12b1b6ef00 Update README.md 2021-03-28 05:21:07 +00:00
Le Quang Hieu 97c178bdf0 Merge branch 'master' of https://gitlab.com/hieulw/cicflowmeter 2021-02-20 01:07:53 +07:00
Le Quang Hieu a79c351424 quickfix: unsuport type float and Decimal 2021-02-20 01:07:35 +07:00
Hieu Le 0e91fc2d77 fix: get_min_forward_header_bytes() 2021-02-01 10:10:03 +07:00
Le Quang Hieu 104fe82e7f fix: setup.py 2021-01-31 12:17:18 +07:00
Le Quang Hieu b6521d355b add: README.md 2021-01-31 12:06:42 +07:00
Le Quang Hieu 18cb15e38a quickfix: flow session exception 2021-01-31 11:57:11 +07:00
Le Quang Hieu ddeeb5a3bf remove output 2020-12-14 14:46:32 +07:00
Hieu Le 853b00aade v0.1.5 2020-12-12 16:12:44 +07:00
Hieu Le 51936f6319 Print port and faster packet send rate 2020-12-12 16:11:35 +07:00
9 changed files with 103 additions and 82 deletions
+36 -1
View File
@@ -1,5 +1,40 @@
# Python CICFlowMeter
---
> This project is not maintained actively by me. If you found something wrong (bugs, incorrect results) feel free to create merge request.
### Installation
```sh
git clone https://gitlab.com/hieulw/cicflowmeter
cd cicflowmeter
python setup.py install
```
### Usage
```sh
usage: cicflowmeter [-h] (-i INPUT_INTERFACE | -f INPUT_FILE) [-c] [-u URL_MODEL] output
positional arguments:
output output file name (in flow mode) or directory (in sequence mode)
optional arguments:
-h, --help show this help message and exit
-i INPUT_INTERFACE capture online data from INPUT_INTERFACE
-f INPUT_FILE capture offline data from INPUT_FILE
-c, --csv, --flow output flows as csv
```
Convert pcap file to flow csv:
```
cicflowmeter -f example.pcap -c flows.csv
```
Sniff packets real-time from interface to flow csv: (**need root permission**)
```
cicflowmeter -i eth0 -c flows.csv
```
### Reference: https://www.unb.ca/cic/research/applications.html#CICFlowMeter
+3 -3
View File
@@ -1,4 +1,4 @@
numpy~=1.18
scipy~=1.4.1
scapy~=2.4.3
numpy==1.18
scipy==1.4.1
scapy==2.4.3
requests
+15 -1
View File
@@ -14,7 +14,20 @@ AUTHOR = "Le Hieu"
REQUIRES_PYTHON = ">=3.7.0"
VERSION = None
REQUIRED = ["numpy", "scipy", "scapy"]
def get_requirements(source: str = "requirements.txt"):
requirements = []
with open(source) as f:
for line in f:
package, _, comment = line.partition("#")
package = package.strip()
if package:
requirements.append(package)
return requirements
REQUIRED = get_requirements("requirements.txt")
# The rest you shouldn't have to touch too much :)
# ------------------------------------------------
@@ -47,6 +60,7 @@ setup(
version=about["__version__"],
description=DESCRIPTION,
long_description=long_description,
long_description_content_type="text/markdown",
author=AUTHOR,
author_email=EMAIL,
python_requires=REQUIRES_PYTHON,
+1 -1
View File
@@ -1 +1 @@
__version__ = "0.1.4"
__version__ = "0.1.6"
+1 -1
View File
@@ -1,4 +1,4 @@
EXPIRED_UPDATE = 240
CLUMP_TIMEOUT = 0.001
CLUMP_TIMEOUT = 1
ACTIVE_TIMEOUT = 0.005
BULK_BOUND = 4
+6
View File
@@ -163,6 +163,9 @@ class FlowBytes:
packets = self.feature.packets
if not packets:
return 0
return sum(
self._header_size(packet)
for packet, direction in packets
@@ -179,6 +182,9 @@ class FlowBytes:
packets = self.feature.packets
if not packets:
return 0
return min(
self._header_size(packet)
for packet, direction in packets
+5 -4
View File
@@ -252,11 +252,12 @@ class Flow:
"""
if (current_time - self.last_active) > constants.ACTIVE_TIMEOUT:
duration = self.last_active - self.start_active
duration = abs(float(self.last_active - self.start_active))
if duration > 0:
self.active.append(duration)
self.idle.append(current_time - self.last_active)
self.start_active = self.last_active = current_time
self.active.append(1e6 * duration)
self.idle.append(1e6 * (current_time - self.last_active))
self.start_active = current_time
self.last_active = current_time
else:
self.last_active = current_time
+32 -58
View File
@@ -1,7 +1,6 @@
import csv
from collections import defaultdict
import requests
from scapy.sessions import DefaultSession
from .features.context.packet_direction import PacketDirection
@@ -10,6 +9,7 @@ from .flow import Flow
EXPIRED_UPDATE = 40
MACHINE_LEARNING_API = "http://localhost:8000/predict"
GARBAGE_COLLECT_PACKETS = 100
class FlowSession(DefaultSession):
@@ -40,14 +40,19 @@ class FlowSession(DefaultSession):
direction = PacketDirection.FORWARD
if self.output_mode != "flow":
if "IP" not in packet:
if "TCP" not in packet:
return
elif "UDP" not in packet:
return
self.packets_count += 1
try:
# Creates a key variable to check
packet_flow_key = get_packet_flow_key(packet, direction)
flow = self.flows.get((packet_flow_key, count))
except Exception:
return
# Creates a key variable to check
packet_flow_key = get_packet_flow_key(packet, direction)
flow = self.flows.get((packet_flow_key, count))
self.packets_count += 1
# If there is no forward flow with a count of 0
if flow is None:
@@ -56,31 +61,18 @@ class FlowSession(DefaultSession):
packet_flow_key = get_packet_flow_key(packet, direction)
flow = self.flows.get((packet_flow_key, count))
if flow is None:
# If no flow exists create a new flow
direction = PacketDirection.FORWARD
flow = Flow(packet, direction)
packet_flow_key = get_packet_flow_key(packet, direction)
self.flows[(packet_flow_key, count)] = flow
elif (packet.time - flow.latest_timestamp) > EXPIRED_UPDATE:
# If the packet exists in the flow but the packet is sent
# after too much of a delay than it is a part of a new flow.
expired = EXPIRED_UPDATE
while (packet.time - flow.latest_timestamp) > expired:
count += 1
expired += EXPIRED_UPDATE
flow = self.flows.get((packet_flow_key, count))
if flow is None:
flow = Flow(packet, direction)
self.flows[(packet_flow_key, count)] = flow
break
if flow is None:
# If no flow exists create a new flow
direction = PacketDirection.FORWARD
flow = Flow(packet, direction)
packet_flow_key = get_packet_flow_key(packet, direction)
self.flows[(packet_flow_key, count)] = flow
elif (packet.time - flow.latest_timestamp) > EXPIRED_UPDATE:
# If the packet exists in the flow but the packet is sent
# after too much of a delay than it is a part of a new flow.
expired = EXPIRED_UPDATE
while (packet.time - flow.latest_timestamp) > expired:
count += 1
expired += EXPIRED_UPDATE
flow = self.flows.get((packet_flow_key, count))
@@ -89,13 +81,20 @@ class FlowSession(DefaultSession):
flow = Flow(packet, direction)
self.flows[(packet_flow_key, count)] = flow
break
elif "F" in str(packet.flags):
# If it has FIN flag then early collect flow and continue
flow.add_packet(packet.flags)
self.garbage_collect(packet.time)
return
flow.add_packet(packet, direction)
if self.packets_count % 10000 == 0 or (
if not self.url_model:
GARBAGE_COLLECT_PACKETS = 10000
if self.packets_count % GARBAGE_COLLECT_PACKETS == 0 or (
flow.duration > 120 and self.output_mode == "flow"
):
print("Packet count: {}".format(self.packets_count))
self.garbage_collect(packet.time)
def get_flows(self) -> list:
@@ -103,7 +102,8 @@ class FlowSession(DefaultSession):
def garbage_collect(self, latest_time) -> None:
# TODO: Garbage Collection / Feature Extraction should have a separate thread
print("Garbage Collection Began. Flows = {}".format(len(self.flows)))
if not self.url_model:
print("Garbage Collection Began. Flows = {}".format(len(self.flows)))
keys = list(self.flows.keys())
for k in keys:
flow = self.flows.get(k)
@@ -115,33 +115,6 @@ class FlowSession(DefaultSession):
):
data = flow.get_data()
# POST Request to Model API
payload = {"columns": list(data.keys()), "data": [list(data.values())]}
post = requests.post(
self.url_model,
json=payload,
headers={"Content-Type": "application/json; format=pandas-split"},
)
benign_threshold = 0.9
resp = post.json()
result = resp["result"].pop()
if result == 0:
if resp["probability"][0][result] < benign_threshold:
result_print = "Malicious"
else:
result_print = "Benign"
else:
result_print = "Malicious"
print(
"{: <15} -> {: <15} \t {} (~{:.2f}%)".format(
resp["src_ip"],
resp["dst_ip"],
result_print,
resp["probability"].pop()[result] * 100,
)
)
if self.csv_line == 0:
self.csv_writer.writerow(data.keys())
@@ -149,7 +122,8 @@ class FlowSession(DefaultSession):
self.csv_line += 1
del self.flows[k]
print("Garbage Collection Finished. Flows = {}".format(len(self.flows)))
if not self.url_model:
print("Garbage Collection Finished. Flows = {}".format(len(self.flows)))
def generate_session_class(output_mode, output_file, url_model):
+4 -13
View File
@@ -35,23 +35,22 @@ def main():
input_group = parser.add_mutually_exclusive_group(required=True)
input_group.add_argument(
"-n",
"--online",
"-i",
"--interface",
action="store",
dest="input_interface",
help="capture online data from INPUT_INTERFACE",
)
input_group.add_argument(
"-f",
"--offline",
"--file",
action="store",
dest="input_file",
help="capture offline data from INPUT_FILE",
)
output_group = parser.add_mutually_exclusive_group(required=True)
output_group = parser.add_mutually_exclusive_group(required=False)
output_group.add_argument(
"-c",
"--csv",
@@ -61,15 +60,6 @@ def main():
dest="output_mode",
help="output flows as csv",
)
output_group.add_argument(
"-s",
"--json",
"--sequence",
action="store_const",
const="sequence",
dest="output_mode",
help="output flow segments as json",
)
url_model = parser.add_mutually_exclusive_group(required=False)
url_model.add_argument(
@@ -84,6 +74,7 @@ def main():
"output",
help="output file name (in flow mode) or directory (in sequence mode)",
)
args = parser.parse_args()
sniffer = create_sniffer(