Skip to content

Commit

Permalink
Improvements to query optimizer
Browse files Browse the repository at this point in the history
  • Loading branch information
kno10 committed Nov 7, 2024
1 parent aecd7bb commit 3022ae5
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ public TypeInformation[] getInputTypeRestriction() {
public ClusterMergeHistory run(Relation<O> relation) {
DBIDEnum ids = DBIDUtil.ensureEnum(relation.getDBIDs());
ClusterMergeHistoryBuilder builder = new ClusterMergeHistoryBuilder(ids, distance.isSquared());
// Create one for testing we have a suitable index.
// TODO: enforce a well-tuned VP-tree?
PrioritySearcher<DBIDRef> pq = new QueryBuilder<>(relation, distance).priorityByDBID();
if(pq instanceof LinearScanPrioritySearcher || pq instanceof LinearScanEuclideanPrioritySearcher) {
throw new UnsupportedOperationException("No index acceleration available. This will be very slow.");
Expand Down Expand Up @@ -213,7 +211,7 @@ private void initializeHeap() {
if(builder.getSize(ca) > 1) {
continue; // duplicate
}
DoubleIntegerMinHeap h = heaps[a] = new DoubleIntegerMinHeap((int) Math.sqrt(ids.size()));
DoubleIntegerMinHeap h = heaps[a] = new DoubleIntegerMinHeap();
double t = Double.POSITIVE_INFINITY;
for(pq.search(ita); pq.valid() && pq.allLowerBound() < t; pq.advance()) {
final int b = ids.index(pq);
Expand All @@ -229,7 +227,7 @@ private void initializeHeap() {
continue outer;
}
h.add(d, b);
t = h.peekKey();
pq.decreaseCutoff(t = h.peekKey());
}
if(!h.isEmpty()) {
heap.add(t, a);
Expand Down Expand Up @@ -257,7 +255,7 @@ private void refillNeighbors(int a, int ca) {
continue;
}
h.add(pq.computeExactDistance(), b);
t = h.peekKey();
pq.decreaseCutoff(t = h.peekKey());
}
threshold[a] = pq.allLowerBound();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ public ClusterMergeHistory run(Relation<O> relation) {
DBIDEnum ids = DBIDUtil.ensureEnum(relation.getDBIDs());
ClusterMergeHistoryBuilder builder = new ClusterMergeHistoryBuilder(ids, distance.isSquared());
// Create one for testing we have a suitable index.
// TODO: enforce a well-tuned VP-tree?
PrioritySearcher<DBIDRef> pq = new QueryBuilder<>(relation, distance).priorityByDBID();
PrioritySearcher<DBIDRef> pq = new QueryBuilder<>(relation, distance) //
.lowSelectivity().priorityByDBID();
if(pq instanceof LinearScanPrioritySearcher || pq instanceof LinearScanEuclideanPrioritySearcher) {
throw new UnsupportedOperationException("No index acceleration available. This will be very slow.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ public IncrementalNearestNeighborChain(GeometricLinkage linkage) {
public ClusterMergeHistory run(Relation<O> relation) {
DBIDEnum ids = DBIDUtil.ensureEnum(relation.getDBIDs());
ClusterMergeHistoryBuilder builder = new ClusterMergeHistoryBuilder(ids, true);
// TODO: ensure we have good page size!
PrioritySearcher<O> pq = new QueryBuilder<>(relation, SquaredEuclideanDistance.STATIC).priorityByObject();
PrioritySearcher<O> pq = new QueryBuilder<>(relation, SquaredEuclideanDistance.STATIC) //
.lowSelectivity().priorityByObject();
if(pq instanceof LinearScanPrioritySearcher || pq instanceof LinearScanEuclideanPrioritySearcher) {
throw new UnsupportedOperationException("No index acceleration available. This will be very slow.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ public ClusterMergeHistory run(Relation<O> relation) {
DBIDEnum ids = DBIDUtil.ensureEnum(relation.getDBIDs());
ClusterMergeHistoryBuilder builder = new ClusterMergeHistoryBuilder(ids, distance.isSquared());
// Create one for testing we have a suitable index.
// TODO: enforce a well-tuned VP-tree?
PrioritySearcher<DBIDRef> pq = new QueryBuilder<>(relation, distance).priorityByDBID();
PrioritySearcher<DBIDRef> pq = new QueryBuilder<>(relation, distance) //
.lowSelectivity().priorityByDBID();
if(pq instanceof LinearScanPrioritySearcher || pq instanceof LinearScanEuclideanPrioritySearcher) {
throw new UnsupportedOperationException("No index acceleration available. This will be very slow.");
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* This file is part of ELKI:
* Environment for Developing KDD-Applications Supported by Index-Structures
*
* Copyright (C) 2024
* ELKI Development Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package elki.database.query.knn;

import elki.database.ids.KNNList;

/**
* Wrapper class to allow using an Euclidean search tree with squared Euclidean
* distance.
*
* @author Erich Schubert
*
* @param <O> Data type
*/
public class SquaredKNNSearcher<O> implements KNNSearcher<O> {
/**
* Wrapped searcher
*/
private KNNSearcher<O> inner;

/**
* Constructor.
*
* @param inner Inner searcher
*/
public SquaredKNNSearcher(KNNSearcher<O> inner) {
this.inner = inner;
}

@Override
public KNNList getKNN(O query, int k) {
return inner.getKNN(query, k).map(x -> x * x);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* This file is part of ELKI:
* Environment for Developing KDD-Applications Supported by Index-Structures
*
* Copyright (C) 2024
* ELKI Development Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package elki.database.query.range;

import elki.database.ids.DoubleDBIDListMIter;
import elki.database.ids.ModifiableDoubleDBIDList;

/**
* Wrapper class to allow using an Euclidean search tree with squared Euclidean
* distance.
*
* @author Erich Schubert
*
* @param <O> Data type
*/
public class SquaredRangeSearcher<O> implements RangeSearcher<O> {
/**
* Wrapped searcher
*/
private RangeSearcher<O> inner;

/**
* Constructor.
*
* @param inner Inner searcher
*/
public SquaredRangeSearcher(RangeSearcher<O> inner) {
this.inner = inner;
}

@Override
public ModifiableDoubleDBIDList getRange(O query, double range, ModifiableDoubleDBIDList result) {
int oldsize = result.size();
inner.getRange(query, Math.sqrt(range), result);
for(DoubleDBIDListMIter iter = result.iter().seek(oldsize); iter.valid(); iter.advance()) {
double d = iter.doubleValue();
iter.setDouble(d * d);
}
return result;
}
}
Loading

0 comments on commit 3022ae5

Please sign in to comment.