Skip to content

Commit

Permalink
fixes #98
Browse files Browse the repository at this point in the history
  • Loading branch information
MariaHei committed Sep 25, 2023
1 parent d66c973 commit 598e974
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/make_cue_matrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,15 @@ function make_combined_cue_matrix(
)

data_combined = copy(data_train)
data_val = copy(data_val)
for col in names(data_combined)
if any(map(x -> x isa InlineString, data_combined[!,col]))
data_combined[!, col] = String.(data_combined[!,col])
end
if any(map(x -> x isa InlineString, data_val[!,col]))
data_val[!, col] = String.(data_val[!, col])
end
end
append!(data_combined, data_val)

cue_obj_combined = make_cue_matrix(
Expand Down
9 changes: 9 additions & 0 deletions src/make_semantic_matrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,15 @@ function make_combined_L_matrix(
)

data_combined = copy(data_train)
data_val = copy(data_val)
for col in names(data_combined)
if any(map(x -> x isa InlineString, data_combined[!,col]))
data_combined[!, col] = String.(data_combined[!,col])
end
if any(map(x -> x isa InlineString, data_val[!,col]))
data_val[!, col] = String.(data_val[!, col])
end
end
append!(data_combined, data_val)

make_L_matrix(
Expand Down
4 changes: 4 additions & 0 deletions test/data/latin_train.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Column1,Word,Lexeme,Person,Number,Tense,Voice,Mood
1,vocoo,vocare,p1,sg,present,active,ind
2,vocaas,vocare,p2,sg,present,active,ind
3,vocat,vocare,p3,sg,present,active,ind
7 changes: 7 additions & 0 deletions test/data/latin_val.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Column1,Word,Lexeme,Person,Number,Tense,Voice,Mood
10,clamaamus,clamare,p1,pl,present,active,ind
11,clamaatis,clamare,p2,pl,present,active,ind
12,clamant,clamare,p3,pl,present,active,ind
13,vocaabam,vocare,p1,sg,past,active,ind
14,vocaabaas,vocare,p2,sg,past,active,ind
15,vocaabat,vocare,p3,sg,past,active,ind
3 changes: 3 additions & 0 deletions test/data/semantic_train.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
X,Y,Z
A,MAAAAAAA,P
B,FAAAA,P
4 changes: 4 additions & 0 deletions test/data/semantic_val.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
X,Y,Z
C,F,S
D,M,
E,,P
24 changes: 24 additions & 0 deletions test/make_cue_matrix_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,28 @@ end
@test cue_obj_val.f2i["oco"] == 3
@test cue_obj_train.A[1, 3] == 0
@test cue_obj_val.A[1, 3] == 0

latin_train = DataFrame(CSV.File(joinpath(
@__DIR__,
"data",
"latin_train.csv",
)))
latin_val = DataFrame(CSV.File(joinpath(
@__DIR__,
"data",
"latin_val.csv",
)))

# check that indeed there are columns with differing string types
println(typeof(latin_train.Word) != typeof(latin_val.Word))

# but make combined cue matrix still runs
cue_obj_train, cue_obj_val = JudiLing.make_combined_cue_matrix(
latin_train,
latin_val,
grams = 3,
target_col = :Word,
tokenized = false,
keep_sep = false,
)
end
25 changes: 25 additions & 0 deletions test/make_semantic_matrix_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,31 @@ end
@test S_val[1, :] == L.L[L.f2i["F"], :]
@test S_val[2, :] == L.L[L.f2i["M"], :]
@test S_val[3, :] == zeros(Float64, n_features)

train = DataFrame(CSV.File(joinpath(
@__DIR__,
"data",
"latin_train.csv",
)))
val = DataFrame(CSV.File(joinpath(
@__DIR__,
"data",
"latin_val.csv",
)))

# check that indeed there are columns with differing string types
println(typeof(train.Word) != typeof(val.Word))

# but make combined cue matrix still runs
S_train, S_val = JudiLing.make_combined_S_matrix(
train,
val,
["Lexeme"],
ncol = n_features,
seed = seed,
isdeep = true,
add_noise = false,
)
end

@testset "make L matrix" begin
Expand Down

0 comments on commit 598e974

Please sign in to comment.