Skip to content

Commit

Permalink
[FEATURE] In stat: report multiple top values
Browse files Browse the repository at this point in the history
  • Loading branch information
hasherezade committed Sep 1, 2024
1 parent 62afa8e commit 31c2333
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 13 deletions.
7 changes: 2 additions & 5 deletions stats/multi_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,12 @@ namespace pesieve {
outs << std::dec << histogram.size();

std::set<BYTE> values;
size_t freq = stats::getMostFrequentValues<BYTE>(frequencies, values);
size_t freq = stats::getMostFrequentValues(frequencies, values, 16, 8);
if (freq && values.size()) {
outs << ",\n";
OUT_PADDED(outs, level, "\"most_freq_vals\" : ");
outs << std::hex << "\"";
for (auto itr = values.begin(); itr != values.end(); ++itr) {
BYTE mVal = *itr;
outs << util::escape_path_separators(stats::hexdumpValue<BYTE>(&mVal, sizeof(BYTE)));
}
outs << util::escape_path_separators(stats::hexdumpValues(values));
outs << "\"";
}
outs << ",\n";
Expand Down
4 changes: 2 additions & 2 deletions stats/stats_analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ namespace pesieve {
virtual bool _isMatching(IN const AreaMultiStats& stats)
{
const double kMinNBRatio = 0.17;
const BYTE mFreqVal = getMostFrequentValue<BYTE>(stats.currArea.frequencies);
BYTE mFreqVal = getMostFrequentValue(stats.currArea.frequencies);
double entropy = stats.currArea.entropy;
const size_t populationSize = stats.currArea.histogram.size();

Expand Down Expand Up @@ -281,7 +281,7 @@ namespace pesieve {
virtual bool _isMatching(IN const AreaMultiStats& stats)
{
double entropy = stats.currArea.entropy;
const BYTE mFreqVal = getMostFrequentValue<BYTE>(stats.currArea.frequencies);
const BYTE mFreqVal = getMostFrequentValue(stats.currArea.frequencies);
bool fullAreaEncrypted = (entropy > ENTROPY_STRONG_ENC_TRESHOLD);// strong encryption
if (mFreqVal != 0 && entropy > ENTROPY_ENC_TRESHOLD) {
if (stats.currArea.frequencies.size() > 1) {
Expand Down
33 changes: 27 additions & 6 deletions stats/stats_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <windows.h>
#include <iostream>
#include <string>
#include <map>
#include <set>

namespace pesieve {
namespace stats {
Expand All @@ -17,9 +19,20 @@ namespace pesieve {
return ss.str();
}

template <typename T>
std::string hexdumpValues(std::set<T> &values)
{
std::stringstream outs;
for (auto itr = values.begin(); itr != values.end(); ++itr) {
T mVal = *itr;
outs << stats::hexdumpValue<T>(&mVal, sizeof(T));
}
return outs.str();
}

// return the most frequent value
template <typename T>
T getMostFrequentValue(IN std::map<size_t, std::set< T >> frequencies)
BYTE getMostFrequentValue(IN const std::map<size_t, std::set< T >>& frequencies)
{
auto itr = frequencies.rbegin();
if (itr == frequencies.rend()) {
Expand All @@ -32,16 +45,24 @@ namespace pesieve {

// return the number of occurrencies
template <typename T>
size_t getMostFrequentValues(IN std::map<size_t, std::set< T >> frequencies, OUT std::set<T>& values)
size_t getMostFrequentValues(IN const std::map<size_t, std::set< T >> &frequencies, OUT std::set<T>& values, IN OPTIONAL size_t top = 0, IN OPTIONAL size_t maxDiff = 0)
{
auto itr = frequencies.rbegin();
if (itr == frequencies.rend()) {
return 0;
}

// find the highest frequency:
size_t mFreq = itr->first;
values.insert(itr->second.begin(), itr->second.end());
//the highest frequency
const size_t mFreq = itr->first;
size_t prev = mFreq;
for (size_t i = 0; i < top && itr != frequencies.rend(); ++itr, ++i) {
const size_t diff = prev - itr->first;
#ifdef _DEBUG
std::cout << "Freq: " << itr->first << " diff : " << diff << "\n";
#endif
if (diff > maxDiff) break;
prev = itr->first;
values.insert(itr->second.begin(), itr->second.end());
}
return mFreq;
}

Expand Down

0 comments on commit 31c2333

Please sign in to comment.