Skip to content

Commit

Permalink
v1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
LasmGratel committed Dec 1, 2021
1 parent d3ea15d commit d77b543
Show file tree
Hide file tree
Showing 11 changed files with 892 additions and 0 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# SuffixTreeSharp
[![NuGet Status](http://nugetstatus.com/SuffixTreeSharp.png)](http://nugetstatus.com/packages/SuffixTreeSharp)

Generalized Suffix Tree in pure C#

Targetting .NET Standard 1.6
20 changes: 20 additions & 0 deletions SuffixTreeSharp.Test/SuffixTreeSharp.Test.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>

<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
<PackageReference Include="MSTest.TestAdapter" Version="2.2.3" />
<PackageReference Include="MSTest.TestFramework" Version="2.2.3" />
<PackageReference Include="coverlet.collector" Version="3.0.2" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\SuffixTreeSharp\SuffixTreeSharp.csproj" />
</ItemGroup>

</Project>
249 changes: 249 additions & 0 deletions SuffixTreeSharp.Test/SuffixTreeTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace SuffixTreeSharp.Test
{
[TestClass]
public class SuffixTreeTest
{
public static void AssertEmpty<T>(ICollection<T> collection)
{
Assert.IsTrue(collection.Count == 0, "Expected empty collection.");
}

[TestMethod]
public void TestBasicTreeGeneration()
{
var input = new GeneralizedSuffixTree();

var word = "cacao";
input.Put(word, 0);

/* Test that every substring is contained within the tree */
foreach (var s in word.GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(0));
}

AssertEmpty(input.Search("caco"));
AssertEmpty(input.Search("cacaoo"));
AssertEmpty(input.Search("ccacao"));

input = new GeneralizedSuffixTree();
word = "bookkeeper";
input.Put(word, 0);
foreach (var s in word.GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(0));
}

AssertEmpty(input.Search("books"));
AssertEmpty(input.Search("boke"));
AssertEmpty(input.Search("ookepr"));
}

[TestMethod]
public void TestWeirdword()
{
var input = new GeneralizedSuffixTree();

var word = "cacacato";
input.Put(word, 0);

/* Test that every substring is contained within the tree */
foreach (var s in word.GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(0));
}
}

[TestMethod]
public void TestDouble()
{
// Test whether the tree can handle repetitions
var input = new GeneralizedSuffixTree();
var word = "cacao";
input.Put(word, 0);
input.Put(word, 1);

foreach (var s in word.GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(0));
Assert.IsTrue(input.Search(s).Contains(1));
}
}

[TestMethod]
public void TestBananaAddition()
{
var input = new GeneralizedSuffixTree();
var words = new[] { "banana", "bano", "ba" };
for (var i = 0; i < words.Length; ++i)
{
input.Put(words[i], i);

foreach (var s in words[i].GetSubstrings())
{
var result = input.Search(s);
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
}
}

// verify post-addition
for (var i = 0; i < words.Length; ++i)
{
foreach (var s in words[i].GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(i));
}
}

// add again, to see if it's stable
for (var i = 0; i < words.Length; ++i)
{
input.Put(words[i], i + words.Length);

foreach (var s in words[i].GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(i + words.Length));
}
}
}

[TestMethod]
public void TestAddition()
{
var input = new GeneralizedSuffixTree();
var words = new[] { "cacaor", "caricato", "cacato", "cacata", "caricata", "cacao", "banana" };
for (var i = 0; i < words.Length; ++i)
{
input.Put(words[i], i);

foreach (var s in words[i].GetSubstrings())
{
var result = input.Search(s);
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
}
}

// verify post-addition
for (var i = 0; i < words.Length; ++i)
{
foreach (var s in words[i].GetSubstrings())
{
var result = input.Search(s);
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
}
}

// add again, to see if it's stable
for (var i = 0; i < words.Length; ++i)
{
input.Put(words[i], i + words.Length);

foreach (var s in words[i].GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(i + words.Length));
}
}

// input.computeCount();
// TestResultsCount(input.getRoot());

AssertEmpty(input.Search("aoca"));
}

[TestMethod]
public void TestSampleAddition()
{
var input = new GeneralizedSuffixTree();
var words = new[]
{
"libertypike",
"franklintn",
"carothersjohnhenryhouse",
"carothersezealhouse",
"acrossthetauntonriverfromdightonindightonrockstatepark",
"dightonma",
"dightonrock",
"6mineoflowgaponlowgapfork",
"lowgapky",
"lemasterjohnjandellenhouse",
"lemasterhouse",
"70wilburblvd",
"poughkeepsieny",
"freerhouse",
"701laurelst",
"conwaysc",
"hollidayjwjrhouse",
"mainandappletonsts",
"menomoneefallswi",
"mainstreethistoricdistrict",
"addressrestricted",
"brownsmillsnj",
"hanoverfurnace",
"hanoverbogironfurnace",
"sofsavannahatfergusonaveandbethesdard",
"savannahga",
"bethesdahomeforboys",
"bethesda"
};
for (var i = 0; i < words.Length; ++i)
{
input.Put(words[i], i);

foreach (var s in words[i].GetSubstrings())
{
var result = input.Search(s);
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
}
}

// verify post-addition
for (var i = 0; i < words.Length; ++i)
{
foreach (var s in words[i].GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(i));
}
}

// add again, to see if it's stable
for (var i = 0; i < words.Length; ++i)
{
input.Put(words[i], i + words.Length);

foreach (var s in words[i].GetSubstrings())
{
Assert.IsTrue(input.Search(s).Contains(i + words.Length));
}
}

// input.computeCount();
// TestResultsCount(input.getRoot());

AssertEmpty(input.Search("aoca"));
}

// private void TestResultsCount(Node n) {
// for (Edge e : n.getEdges().values()) {
// assertEquals(n.getData(-1).size(), n.getResultCount());
// TestResultsCount(e.getDest());
// }
// }

/* Testing a Test method :) */
[TestMethod]
public void TestGetSubstrings()
{
var exp = new[] { "w", "r", "d", "wr", "rd", "wrd" }.ToHashSet();
var ret = "wrd".GetSubstrings();
Assert.IsTrue(ret.SetEquals(exp));
}
}
}
50 changes: 50 additions & 0 deletions SuffixTreeSharp.Test/Utils.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace SuffixTreeSharp.Test
{
public static class Utils
{
/**
* Normalize an input string
*
* @param input the input string to normalize
* @return <tt>input</tt> all lower-case, withoutput any non alphanumeric character
*/
public static string Normalize(this string input)
{
var output = new StringBuilder();
var l = input.ToLower();
foreach (var c in l.Where(c => c >= 'a' && c <= 'z' || c >= '0' && c <= '9'))
{
output.Append(c);
}
return output.ToString();
}

/**
* Computes the set of all the substrings contained within the <tt>str</tt>
*
* It is fairly inefficient, but it is used just in tests ;)
* @param str the string to compute substrings of
* @return the set of all possible substrings of str
*/
public static HashSet<string> GetSubstrings(this string str)
{
var ret = new HashSet<string>();
// compute all substrings
for (var len = 1; len <= str.Length; ++len)
{
for (var start = 0; start + len <= str.Length; ++start)
{
var itstr = str.Substring(start, len);
ret.Add(itstr);
}
}

return ret;
}
}
}
31 changes: 31 additions & 0 deletions SuffixTreeSharp.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.31911.196
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SuffixTreeSharp", "SuffixTreeSharp\SuffixTreeSharp.csproj", "{C0986C3D-E80F-4753-B0AD-F185EB838A1D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SuffixTreeSharp.Test", "SuffixTreeSharp.Test\SuffixTreeSharp.Test.csproj", "{902F9192-EED9-44B6-8B39-222B725545E3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Release|Any CPU.Build.0 = Release|Any CPU
{902F9192-EED9-44B6-8B39-222B725545E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{902F9192-EED9-44B6-8B39-222B725545E3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{902F9192-EED9-44B6-8B39-222B725545E3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{902F9192-EED9-44B6-8B39-222B725545E3}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {3879536C-DBC5-462A-82F3-AB8CED5E0F58}
EndGlobalSection
EndGlobal
34 changes: 34 additions & 0 deletions SuffixTreeSharp/CombinedSearchTrees.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace SuffixTreeSharp
{
public class CombinedSearchTrees : ISearchTree
{
public readonly List<ISearchTree> SearchTrees = new List<ISearchTree>();

public ISet<int> Search(string word)
{
ISet<int> searchResults = new HashSet<int>();
return SearchTrees.Select(searchTree => searchTree.Search(word)).Aggregate(searchResults, Union);
}

/// <summary>
/// Efficiently get all the elements from both sets.
/// Note that this implementation will alter the original sets.
/// </summary>
private static ISet<int> Union(ISet<int> set1, ISet<int> set2)
{
if (set1.Count > set2.Count)
{
set1.UnionWith(set2);
return set1;
}

set2.UnionWith(set1);
return set2;
}
}
}
Loading

0 comments on commit d77b543

Please sign in to comment.