Full Format Specification¶

# paper
<paper> -> {
  "flow_aggregations": <flow-aggregations>,
  "flows": <flows>, 
  "packets": <packets>, 
  "methods": <methods>,
  "evaluations": <evaluations>,
  "datasets": <datasets>,
  "reference": <reference>
}
# end

# reference
<reference> -> {
  "author": <free-text>,
  "title": <free-text>, 
  "year": <free-integer>
}
# end

# method
<methods> -> [<method>+] | null
<method> -> {
  "name": <free-text>
  "supervision": <supervision>,
  "type": <type>,
  "similarity_metric": <similarity_metric>
}
<supervision> -> "supervised" | "unsupervised" | "semi_supervised" | "descriptive" | "nest"
<type> -> [<possible_type>+] | <possible_type>
<possible_type> -> "classification" | "regression" | "clustering" | "association_rules" | "anomaly_detection" | "statistics" | "heuristics" | "feature_selection" | "other"
<similarity_metric> -> [<possible_similarity>+] | <possible_similarity>
<possible_similarity> -> "euclidean" | "mutual_information" | "correlation" | "cosine" | "jaccard" | "mahalanobis" | "hamming" | "l1" | "exact_matching" | "probability" | "other"
# end

# evaluation
<evaluations> -> [<evaluation>+] | null
<evaluation> -> {
  "metrics": [<metric>+] | null
  "method_evaluation": <method_evaluation> | null  # null means no evaluation of method
}
<metric> -> "error_rate"   # e.g. accuracy, precision, recall, f-1, etc
<metric> -> "classification_loss"   # e.g. log-loss, etc
<metric> -> "error_rate_variation"  # e.g. ROC, AUC, etc
<metric> -> "error_distance"  # e.g. sum of squared error, absolute error, r^2, etc
<metric> -> "clustering_metrics"  # e.g. silhouette, etc
<metric> -> "time"  # time complexity/how much time is takes
<metric> -> "space"  # space complexity/how much space is takes
<method_evaluation> -> "internal"  # e.g. silhouette, metrics that do not depend on labels
<method_evaluation> -> "external" # e.g. accuracy, metrics dependent on labels
<method_evaluation> -> "both"  # both internal and external
# end

# dataset
<datasets> -> [<dataset>+] | null
<dataset> -> <free-text>  # dataset key
# end

# flows
<flows> -> [<flow>+] | null
<flow> -> {
  "features": <features>, 
  "goals": <goals>, 
  "key": <key>, 
  "tool": <tool>, 
  "window": <window>,
  "traffic_type": <traffic_type>
}
# end

# window
<window> -> <free-integer> | null
# end

# traffic_type
<traffic_type> [<traffic_types>+] | <traffic_types>
<traffic_types> -> "ip" | "tcp" | "udp" | "icmp" | "dns" | "http" | null
# end

# key
<key> -> {
  "bidirectional": <bidirectional>, 
  "key_features": <features>
} | null
# end
# bidirectional
<bidirectional> -> true | false | null | "separate_directions"  # "separate_directions" in the case where the key is bidirectional and each feature appears twice, one for each direction
# end

# packets
<packets> -> [<packet>+] | null
<packet> -> {
  "features": <features>, 
  "goals": <goals>, 
  "tool": <tool>, 
  "traffic_type": <traffic_type>
}
# end

# flow-aggregations
# flow-aggregations -- features are extracted from sets of flows
<flow-aggregations> -> [<flow-aggregation>+] | null
<flow-aggregation> -> {
  "flow": <flow>,
  "features": <features>, 
  "goals": <goals>, 
  "key": <key>, 
  "tool": <tool>, 
  "window": <window>,
  "traffic_type": <traffic_type>
}
# end

# features
<features> -> [<feature>+] | null
<feature> -> <value> | <base-feature>
# end

<packet-feature> -> <feature>
<flow-feature> -> <feature>
<aggregation-feature> -> <feature>

<tool> -> <free-text>  # tool key

# operation
# <value> always outputs a single number (a <value>)
<value> -> {"mean": [<values>]}
<value> -> {"stdev": [<values>]}
<value> -> {"variance": [<values>]}
<value> -> {"median": [<values>]}
<value> -> {"quantile": [<values>, <value>]} # second argument is a number from 0 to 1, where 0 is the minimum and 1 the maximum
<value> -> {"minimum": [<values>]} | {"minimum": [<value>+]}
<value> -> {"maximum": [<values>]} | {"maximum": [<value>+]}
<value> -> {"argmin": [<values>]} | {"argmin": [<value>+]}
<value> -> {"argmax": [<values>]} | {"argmax": [<value>+]}
<value> -> {"floor": [<value>]}
<value> -> {"ceil": [<value>]}
<value> -> {"mode": [<values>]} # returns the most frequent element in <values>
<value> -> {"count": [<selection>]} | {"count": [<values>]}  # returns number of selected objects
<value> -> {"distinct": [<values>]}  # returns number of distinct values in <feature> in the selected objects
<value> -> {"apply": [<feature>, <selection>]}  # returns a single feature value for the selection of objects
<value> -> {"add": [<value>+]} | {"add": [<values>]}
<value> -> {"subtract": [<value>, <value>]}
<value> -> {"multiply": [<value>+]} | {"multiply": [<values>]}
<value> -> {"divide": [<value>, <value>]}
<value> -> {"log": [<value>]}
<value> -> {"exp": [<value>]}
<value> -> {"entropy": [<value>]}
<value> -> {"get": [<value>, <values>]} | {"get": [<value>, <value>]}  # gets the <value>-th element of the second argument (if the second argument is also <value>, the elements are bits)
<value> -> {"ifelse": [<logic>, <value>, <value>]}  # if the condition is true, return the first argument else the second
<value> -> {"get_previous": [<aggregation-feature>]}  # gets feature at time = t-1
<value> -> {"left_shift": [<value>, <value>]}  # shift the bits in the first value left by the second value
<value> -> {"right_shift": [<value>, <value>]}  # shift the bits in the first value right by the second value
<value> -> <free-integer> | <base-feature> | <free-float>
# end

# values
# <values> outputs a list of <value>
<values> -> {"map": [<feature>, <selection>]}  # returns a feature value for each object in selection
<values> -> {"slice": [<value>, <value>, <values>]} | {"slice": [<value>, <value>, <value>]}  # gets third_argument[first_argument, second_argument] (if the third argument is also <value>, the elements are bits); indexing is like in Python
<values> -> {"quantile_range": [<values>, <value>, <value>]} # e.g. {"quantile_range": [<values>, 0, 0.25]} returns all values in the first quartile
<values> -> <feature>  # features from one level-down (in flows, packet features; in flow-aggregations, flow features)
# end

# selection
# <selection> outputs a list of objects (packets, flows or aggregations, depending on what kind of feature is used)
<selection> -> {"select": [<logic>]}
<selection> -> {"select_slice": [<value>, <value>]} | {"select_slice": [<value>, <value>, <selection>]}  # selects a slice from the first value to the second value, with Python-like indexing (if a <selection is not provided, default to selecting everything)
<selection> -> "forward" | "backward"  # special cases for selection; select objects in the forward (or backward) direction
<selection> -> {"select_flows": [<logic>]}  # same as "select", but outputs flows; only valid when used in flow aggregations
<selection> -> {"select_slice_flows": [<value>, <value>]} | {"select_slice_flows": [<value>, <value>, <selection>]}  # same as "select_slice", but outputs flows; only valid when used in flow aggregations
<selection> -> "forward_flows" | "backward_flows"  # same as "forward"/"backward", but outputs flows; only valid when used in flow aggregations
# end

# logic
# <logic> is used for selection, should be evaluated for each object
<logic> -> {"and": [<logic>+]} 
<logic> -> {"or": [<logic>+]}
<logic> -> {"geq": [<feature>, <value>]}
<logic> -> {"leq": [<feature>, <value>]}
<logic> -> {"less": [<feature>, <value>]}
<logic> -> {"greater": [<feature>, <value>]}
<logic> -> {"equal": [<feature>, <value>]}
<logic> -> true | false
# end