Main Blocks¶

The main blocks are ordered according to the typical processing pipeline::

Data > Preprocessing > Analysis Method > Evaluation > Result

The Reference block contains information about the publication itself (title, authors, etc) and the curator of the paper.

The Data block contains information about the datasets used in the publication.

The Preprocessing block contains information about the processing done to the data before passing it on to an analysis method. This process usually consists of extracting vectors of features from the original data.

The Analysis Method block contains information about what analytical methods were used in the publication.

The Evaluation block contains in information about how the performance of the methods in the previous section was evaluated.

The Result block contains information about the conclusions of the paper. The information for this block is usually completely contained in the conclusion section of the paper.

Additionally to the main blocks, in order to avoid confusion with past/future versions of the format, there is a mandatory field for version (which is always “v2”).

JSON example¶

Summary with only the main blocks:

{
  "version": "v2.1",
  "reference": {
    ...
  },
  "data": {
    ...
  },
  "preprocessing": {
    ...
  },
  "analysis_method": {
    ...
  },
  "evaluation": {
    ...
  },
  "result": {
    ...
  }
}

Example of a complete file:

{
  "version": "v3.0.0",
  "reference": {
    "title": "time-activity footprints in ip traffic",
    "authors": ["Iglesias, Félix", "Zseby, Tanja"],
    "publication_name": "computer networks",
    "publication_type": "peer_reviewed_journal",
    "year": 2016,
    "organization_publishers": [
      "elsevier"
    ],
    "pages_number_of": 12,
    "bibtex": {
        "type": "article",
        "volume": "107, Part 1",
        "issue": "missing",
        "pages": "64--75"
    },
    "access_open": false,
    "curated_by": "felix",
    "curated_last_revision": "10-04-2017",
    "curated_revision_number": 1
  },
  "data": {
    "datasets": [
      {
        "name": "mawi-2015",
        "availability": "public",
        "format": "packet",
        "types": [
          "ip"
        ],
        "generation": "captured",
        "generation_year": 2015,
        "covered_period": "minutes",
        "details": [
          "raw",
          "no_payload"
        ],
        "subsets": [
          "01-01-2015",
          "15-04-2015",
          "31-07-2015"
        ],
        "anonymized": true
      }
    ]
  },
  "preprocessing": {
    "performed_feature_selection": true,
    "packet_analysis_oriented": false,
    "flow_analysis_oriented": true,
    "flow_aggregation_analysis_oriented": false,
    "tools": [
      {
        "name": "tshark",
        "detail": "v2.0.0",
        "availability": "public"
      },
      {
        "name": "own_perl_scripts",
        "detail": "none",
        "availability": "private"
      }
    ],
    "normalization_type": "range",
    "transformations": [
      "flow_extraction",
      "log",
      "time_series",
      "feature_operation",
      "class_separation"
    ],
    "final_data_format": "numerical_vectors",
    "feature_selections": [
      {
        "name": "max-relevance min-redundancy filter (correlation and MI based)",
        "type": "filter",
        "classifier": "none",
        "role": "main"
      }
    ],
    "packets": "none",
    "flows": [
      {
        "selection": "expert_knowledge",
        "role": "main",
        "main_goal": "traffic_classification",
        "active_timeout": 60,
        "idle_timeout": 60,
        "bidirectional": false,
        "features": [
          {
            "log": [
              "octetTotalCount"
            ]
          },
          {
            "log": [
              "packetTotalCount"
            ]
          },
          "_activeForSeconds",
          {
            "log": [
              {
                "divide": [
                  "octetTotalCount",
                  "_activeForSeconds"
                ]
              }
            ]
          },
          {
            "log": [
              {
                "divide": [
                  "packetTotalCount",
                  "_activeForSeconds"
                ]
              }
            ]
          },
          "__maximumConsecutiveSeconds",
          "__minimumConsecutiveSeconds",
          {
            "maximum": [
              "_interPacketTimeMicroseconds"
            ]
          },
          {
            "minimum": [
              "_interPacketTimeMicroseconds"
            ]
          },
          "__numberOfActivityIntervals"
        ],
        "key_features": [
          "sourceIPv4Address",
          "destinationIPv4Address",
          "protocolIdentifier"
        ]
      },
      {
        "selection": "feature_selection",
        "role": "main",
        "main_goal": "traffic_classification",
        "active_timeout": 60,
        "idle_timeout": 60,
        "bidirectional": false,
        "features": [
          {
            "log": [
              "octetTotalCount"
            ]
          },
          {
            "log": [
              {
                "divide": [
                  "octetTotalCount",
                  "_activeForSeconds"
                ]
              }
            ]
          },
          {
            "maximum": [
              "_interPacketTimeMicroseconds"
            ]
          },
          {
            "minimum": [
              "_interPacketTimeMicroseconds"
            ]
          }
        ],
        "key_features": [
          "sourceIPv4Address",
          "destinationIPv4Address",
          "protocolIdentifier"
        ]
      }
    ],
    "flow_aggregations": "none"
  },
  "analysis_method": {
    "supervised_learning": false,
    "unsupervised_learning": true,
    "semisupervised_learning": true,
    "anomaly_detection": true,
    "tools": [
      {
        "name": "matlab_fuzzyclusteringtoolbox",
        "availability": "public",
        "detail": "none"
      },
      {
        "name": "own_matlab_scripts",
        "availability": "private",
        "detail": "none"
      }
    ],
    "algorithms": [
      {
        "family": "fuzzy_clustering",
        "detail": "Gustafson-kessel fuzzy clustering",
        "learning": "unsupervised",
        "role": "main",
        "type": "clustering",
        "metric/decision_criteria": "mahalanobis",
        "tools": [
          {
            "name": "matlab_fuzzyclusteringtoolbox",
            "detail": "none",
            "availability": "public"
          }
        ],
        "source": "referenced",
        "parameters_provided": false
      },
      {
        "family": "statistics",
        "detail": "Mad-based outlier removal",
        "learning": "statistics/model_fit",
        "role": "main",
        "type": "outlier_detection",
        "metric/decision_criteria": "mahalanobis",
        "tools": [
          {
            "name": "own_matlab_scripts",
            "detail": "none",
            "availability": "private"
          }
        ],
        "source": "referenced",
        "parameters_provided": false
      }
    ]
  },
  "evaluation": {
    "algorithm_comparison": false,
    "internal_validation": true,
    "external_validation": true,
    "dpi-based_validation": false,
    "port-based_validation": false,
    "pre-knowledge-based_validation": false,
    "manual_verification": true,
    "implementation_in_real_scenario": false,
    "train_test_separation": false,
    "methods": [
      {
        "name": "manual verification",
        "type": "external",
        "metrics": [
          "heuristic"
        ],
        "source": "popular"
      },
      {
        "name": "weighted vote",
        "type": "nest",
        "metrics": [
          "vote"
        ],
        "source": "popular"
      },
      {
        "name": "classification entropy",
        "type": "internal",
        "metrics": [
          "clustering_metrics"
        ],
        "source": "referenced"
      },
      {
        "name": "partition index",
        "type": "internal",
        "metrics": [
          "clustering_metrics"
        ],
        "source": "referenced"
      },
      {
        "name": "xie and benix index",
        "type": "internal",
        "metrics": [
          "clustering_metrics"
        ],
        "source": "referenced"
      },
      {
        "name": "clustering gain",
        "type": "internal",
        "metrics": [
          "clustering_metrics"
        ],
        "source": "referenced"
      },
      {
        "name": "own cluster validity",
        "type": "internal",
        "metrics": [
          "clustering_metrics"
        ],
        "source": "missing"
      }
    ]
  },
  "result": {
    "main_goal": "detect_anomalies",
    "focus_main": "methodology/framework",
    "claimed_improvements": [
      "improved_data_description",
      "improved_traffic_classification",
      "fast_processing",
      "_flaw_detection"
    ],
    "reproducibility": "replicable",
    "subgoals": [
      "traffic_classification"
    ]
  }
}