-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #155 from codedthinking/0.4-bugfixes
0.4 bugfixes
- Loading branch information
Showing
14 changed files
with
309 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,20 @@ | ||
### Summary | ||
<!-- Please include a summary of the changes made. --> | ||
|
||
### Bugs Fixed | ||
<!-- List any bugs that were fixed with this PR. --> | ||
### Related Issues | ||
<!-- List any bugs or feature requests that were fixed with this PR. --> | ||
- #123 | ||
|
||
### Number of Passing Unit Tests | ||
<!-- Provide the number of unit tests that are passing with this PR. --> | ||
Tests passing: 0/0 | ||
|
||
### Checklist | ||
- [ ] Unit tests have been updated | ||
- [ ] Version number has been bumped in Project.toml | ||
#### Tests | ||
- [ ] `] test` runs without errors | ||
- [ ] Unit tests have been added | ||
- [ ] This is a refactoring, no new tests required | ||
#### Documentation | ||
- [ ] Documentation has been updated | ||
- [ ] No change to public API, documentation not required |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
name = "Kezdi" | ||
uuid = "48308a23-c29e-446c-b4c0-d9446a767439" | ||
authors = ["Miklos Koren <[email protected]>", "Gergely Attila Kiss <[email protected]>"] | ||
version = "0.4.6" | ||
version = "0.4.7" | ||
|
||
[deps] | ||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
[deps] | ||
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" | ||
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" | ||
Kezdi = "48308a23-c29e-446c-b4c0-d9446a767439" | ||
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
using Kezdi | ||
using Random | ||
using BenchmarkTools | ||
using Pkg; Pkg.precompile() | ||
|
||
df = DataFrame(i = 1:10_000_000) | ||
df.g = rand(0:99, nrow(df)) | ||
|
||
println("Egen") | ||
@btime @with df @egen mean_i = mean(i), by(g) | ||
|
||
println("Collapse") | ||
@btime @with df @collapse mean_i = mean(i), by(g) | ||
|
||
println("Tabulate") | ||
@btime @with df @tabulate g | ||
|
||
println("Summarize") | ||
@btime @with df @summarize g | ||
|
||
println("Regress") | ||
@btime @with df @regress i g @if g > 50 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
|
||
___ ____ ____ ____ ____ ® | ||
/__ / ____/ / ____/ 18.0 | ||
___/ / /___/ / /___/ MP—Parallel Edition | ||
|
||
Statistics and Data Science Copyright 1985-2023 StataCorp LLC | ||
StataCorp | ||
4905 Lakeway Drive | ||
College Station, Texas 77845 USA | ||
800-STATA-PC https://www.stata.com | ||
979-696-4600 [email protected] | ||
|
||
Stata license: Single-user 2-core perpetual | ||
Serial number: 501806323834 | ||
Licensed to: Miklos Koren | ||
CEU MicroData | ||
|
||
Notes: | ||
1. Stata is running in batch mode. | ||
2. Unicode is supported; see help unicode_advice. | ||
3. More than 2 billion observations are allowed; see help obs_advice. | ||
4. Maximum number of variables is set to 5,000 but can be increased; | ||
see help set_maxvar. | ||
|
||
. do benchmark.do | ||
|
||
. * Create a dataset with 10,000,000 observations | ||
. clear | ||
|
||
. set obs 10000000 | ||
Number of observations (_N) was 0, now 10,000,000. | ||
|
||
. set processors 1 | ||
The maximum number of processors or cores being used is changed from 2 to | ||
1. It can be set to any number between 1 and 2 | ||
|
||
. | ||
. * Generate variable i from 1 to 10,000,000 | ||
. gen i = _n | ||
|
||
. | ||
. * Generate variable g with random integers between 0 and 99 | ||
. set seed 12345 | ||
|
||
. gen g = floor(runiform() * 100) | ||
|
||
. | ||
. * Measure time for mean calculation by group | ||
. timer clear 1 | ||
|
||
. preserve | ||
|
||
. timer on 1 | ||
|
||
. egen mean_i = mean(i), by(g) | ||
|
||
. timer off 1 | ||
|
||
. restore | ||
|
||
. timer list 1 | ||
1: 4.95 / 1 = 4.9450 | ||
|
||
. | ||
. * Measure time for collapse by group | ||
. preserve | ||
|
||
. timer clear 3 | ||
|
||
. timer on 3 | ||
|
||
. collapse (mean) mean_i=i, by(g) | ||
|
||
. timer off 3 | ||
|
||
. restore | ||
|
||
. timer list 3 | ||
3: 0.87 / 1 = 0.8690 | ||
|
||
. | ||
. * Measure time for tabulate | ||
. timer clear 5 | ||
|
||
. timer on 5 | ||
|
||
. tabulate g | ||
|
||
g | Freq. Percent Cum. | ||
------------+----------------------------------- | ||
0 | 100,103 1.00 1.00 | ||
1 | 100,447 1.00 2.01 | ||
2 | 100,143 1.00 3.01 | ||
3 | 99,667 1.00 4.00 | ||
4 | 100,209 1.00 5.01 | ||
5 | 99,356 0.99 6.00 | ||
6 | 100,398 1.00 7.00 | ||
7 | 99,710 1.00 8.00 | ||
8 | 99,688 1.00 9.00 | ||
9 | 99,875 1.00 10.00 | ||
10 | 99,808 1.00 10.99 | ||
11 | 99,800 1.00 11.99 | ||
12 | 99,851 1.00 12.99 | ||
13 | 100,130 1.00 13.99 | ||
14 | 100,182 1.00 14.99 | ||
15 | 100,554 1.01 16.00 | ||
16 | 100,055 1.00 17.00 | ||
17 | 100,136 1.00 18.00 | ||
18 | 100,077 1.00 19.00 | ||
19 | 99,932 1.00 20.00 | ||
20 | 100,605 1.01 21.01 | ||
21 | 99,862 1.00 22.01 | ||
22 | 99,673 1.00 23.00 | ||
23 | 100,462 1.00 24.01 | ||
24 | 100,321 1.00 25.01 | ||
25 | 99,272 0.99 26.00 | ||
26 | 100,209 1.00 27.01 | ||
27 | 100,213 1.00 28.01 | ||
28 | 99,854 1.00 29.01 | ||
29 | 99,631 1.00 30.00 | ||
30 | 99,849 1.00 31.00 | ||
31 | 100,241 1.00 32.00 | ||
32 | 99,636 1.00 33.00 | ||
33 | 100,112 1.00 34.00 | ||
34 | 100,287 1.00 35.00 | ||
35 | 99,257 0.99 36.00 | ||
36 | 99,778 1.00 36.99 | ||
37 | 99,706 1.00 37.99 | ||
38 | 100,155 1.00 38.99 | ||
39 | 100,180 1.00 39.99 | ||
40 | 100,047 1.00 40.99 | ||
41 | 100,423 1.00 42.00 | ||
42 | 100,224 1.00 43.00 | ||
43 | 99,993 1.00 44.00 | ||
44 | 100,327 1.00 45.00 | ||
45 | 100,070 1.00 46.01 | ||
46 | 100,123 1.00 47.01 | ||
47 | 99,902 1.00 48.01 | ||
48 | 100,065 1.00 49.01 | ||
49 | 99,339 0.99 50.00 | ||
50 | 99,386 0.99 50.99 | ||
51 | 100,216 1.00 52.00 | ||
52 | 100,126 1.00 53.00 | ||
53 | 100,180 1.00 54.00 | ||
54 | 100,217 1.00 55.00 | ||
55 | 100,506 1.01 56.01 | ||
56 | 100,074 1.00 57.01 | ||
57 | 99,921 1.00 58.01 | ||
58 | 100,096 1.00 59.01 | ||
59 | 100,385 1.00 60.01 | ||
60 | 100,168 1.00 61.01 | ||
61 | 99,796 1.00 62.01 | ||
62 | 100,822 1.01 63.02 | ||
63 | 99,955 1.00 64.02 | ||
64 | 99,832 1.00 65.02 | ||
65 | 99,583 1.00 66.01 | ||
66 | 100,148 1.00 67.01 | ||
67 | 99,705 1.00 68.01 | ||
68 | 99,880 1.00 69.01 | ||
69 | 99,625 1.00 70.01 | ||
70 | 100,524 1.01 71.01 | ||
71 | 100,183 1.00 72.01 | ||
72 | 99,906 1.00 73.01 | ||
73 | 99,746 1.00 74.01 | ||
74 | 99,455 0.99 75.00 | ||
75 | 100,127 1.00 76.00 | ||
76 | 99,627 1.00 77.00 | ||
77 | 100,044 1.00 78.00 | ||
78 | 100,517 1.01 79.01 | ||
79 | 100,199 1.00 80.01 | ||
80 | 99,781 1.00 81.01 | ||
81 | 99,651 1.00 82.00 | ||
82 | 99,477 0.99 83.00 | ||
83 | 100,390 1.00 84.00 | ||
84 | 99,808 1.00 85.00 | ||
85 | 100,744 1.01 86.01 | ||
86 | 99,818 1.00 87.01 | ||
87 | 100,096 1.00 88.01 | ||
88 | 99,604 1.00 89.00 | ||
89 | 100,159 1.00 90.00 | ||
90 | 100,118 1.00 91.01 | ||
91 | 99,622 1.00 92.00 | ||
92 | 99,436 0.99 93.00 | ||
93 | 99,803 1.00 93.99 | ||
94 | 99,955 1.00 94.99 | ||
95 | 100,537 1.01 96.00 | ||
96 | 99,702 1.00 97.00 | ||
97 | 99,507 1.00 97.99 | ||
98 | 100,643 1.01 99.00 | ||
99 | 100,263 1.00 100.00 | ||
------------+----------------------------------- | ||
Total | 10,000,000 100.00 | ||
|
||
. timer off 5 | ||
|
||
. timer list 5 | ||
5: 2.11 / 1 = 2.1140 | ||
|
||
. | ||
. * Measure time for summarize | ||
. timer clear 7 | ||
|
||
. timer on 7 | ||
|
||
. summarize g, detail | ||
|
||
g | ||
------------------------------------------------------------- | ||
Percentiles Smallest | ||
1% 0 0 | ||
5% 4 0 | ||
10% 10 0 Obs 10,000,000 | ||
25% 24 0 Sum of wgt. 10000000 | ||
|
||
50% 50 Mean 49.49736 | ||
Largest Std. dev. 28.86499 | ||
75% 74 99 | ||
90% 89 99 Variance 833.1875 | ||
95% 95 99 Skewness .0001618 | ||
99% 99 99 Kurtosis 1.800067 | ||
|
||
. timer off 7 | ||
|
||
. timer list 7 | ||
7: 10.59 / 1 = 10.5880 | ||
|
||
. | ||
. * Measure time for regress with condition | ||
. preserve | ||
|
||
. timer clear 9 | ||
|
||
. timer on 9 | ||
|
||
. regress i g if g > 50 | ||
|
||
Source | SS df MS Number of obs = 4,900,677 | ||
-------------+---------------------------------- F(1, 4900675) = 0.00 | ||
Model | 4.0936e+10 1 4.0936e+10 Prob > F = 0.9441 | ||
Residual | 4.0840e+19 4,900,675 8.3335e+12 R-squared = 0.0000 | ||
-------------+---------------------------------- Adj R-squared = -0.0000 | ||
Total | 4.0840e+19 4,900,676 8.3335e+12 Root MSE = 2.9e+06 | ||
|
||
------------------------------------------------------------------------------ | ||
i | Coefficient Std. err. t P>|t| [95% conf. interval] | ||
-------------+---------------------------------------------------------------- | ||
g | -6.460405 92.17776 -0.07 0.944 -187.1255 174.2047 | ||
_cons | 5000164 7034.513 710.80 0.000 4986376 5013951 | ||
------------------------------------------------------------------------------ | ||
|
||
. timer off 9 | ||
|
||
. restore | ||
|
||
. timer list 9 | ||
9: 0.87 / 1 = 0.8740 | ||
|
||
. | ||
end of do-file |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.