1 module faiss.clustering; 2 3 import faiss.common; 4 import faiss.index; 5 6 /** 7 * Copyright (c) Facebook, Inc. and its affiliates. 8 * 9 * This source code is licensed under the MIT license found in the 10 * LICENSE file in the root directory of this source tree. 11 */ 12 13 // Copyright 2004-present Facebook. All Rights Reserved 14 // -*- c -*- 15 16 extern (C): 17 18 /** Class for the clustering parameters. Can be passed to the 19 * constructor of the Clustering object. 20 */ 21 struct FaissClusteringParameters 22 { 23 int niter; ///< clustering iterations 24 int nredo; ///< redo clustering this many times and keep best 25 26 int verbose; ///< (bool) 27 int spherical; ///< (bool) do we want normalized centroids? 28 int int_centroids; ///< (bool) round centroids coordinates to integer 29 int update_index; ///< (bool) update index after each iteration? 30 int frozen_centroids; ///< (bool) use the centroids provided as input and do 31 ///< not change them during iterations 32 33 int min_points_per_centroid; ///< otherwise you get a warning 34 int max_points_per_centroid; ///< to limit size of dataset 35 36 int seed; ///< seed for the random number generator 37 size_t decode_block_size; ///< how many vectors at a time to decode 38 } 39 40 /// Sets the ClusteringParameters object with reasonable defaults 41 void faiss_ClusteringParameters_init (FaissClusteringParameters* params); 42 43 /** clustering based on assignment - centroid update iterations 44 * 45 * The clustering is based on an Index object that assigns training 46 * points to the centroids. Therefore, at each iteration the centroids 47 * are added to the index. 48 * 49 * On output, the centroids table is set to the latest version 50 * of the centroids and they are also added to the index. If the 51 * centroids table it is not empty on input, it is also used for 52 * initialization. 53 * 54 * To do several clusterings, just call train() several times on 55 * different training sets, clearing the centroid table in between. 56 */ 57 struct FaissClustering_H; 58 alias FaissClustering = FaissClustering_H; 59 60 int faiss_Clustering_niter (const(FaissClustering)*); 61 int faiss_Clustering_nredo (const(FaissClustering)*); 62 int faiss_Clustering_verbose (const(FaissClustering)*); 63 int faiss_Clustering_spherical (const(FaissClustering)*); 64 int faiss_Clustering_int_centroids (const(FaissClustering)*); 65 int faiss_Clustering_update_index (const(FaissClustering)*); 66 int faiss_Clustering_frozen_centroids (const(FaissClustering)*); 67 68 int faiss_Clustering_min_points_per_centroid (const(FaissClustering)*); 69 int faiss_Clustering_max_points_per_centroid (const(FaissClustering)*); 70 71 int faiss_Clustering_seed (const(FaissClustering)*); 72 size_t faiss_Clustering_decode_block_size (const(FaissClustering)*); 73 74 /// getter for d 75 size_t faiss_Clustering_d (const(FaissClustering)*); 76 77 /// getter for k 78 size_t faiss_Clustering_k (const(FaissClustering)*); 79 80 struct FaissClusteringIterationStats_H; 81 alias FaissClusteringIterationStats = FaissClusteringIterationStats_H; 82 float faiss_ClusteringIterationStats_obj (const(FaissClusteringIterationStats)*); 83 double faiss_ClusteringIterationStats_time (const(FaissClusteringIterationStats)*); 84 double faiss_ClusteringIterationStats_time_search (const(FaissClusteringIterationStats)*); 85 double faiss_ClusteringIterationStats_imbalance_factor (const(FaissClusteringIterationStats)*); 86 int faiss_ClusteringIterationStats_nsplit (const(FaissClusteringIterationStats)*); 87 88 /// getter for centroids (size = k * d) 89 void faiss_Clustering_centroids ( 90 FaissClustering* clustering, 91 float** centroids, 92 size_t* size); 93 94 /// getter for iteration stats 95 void faiss_Clustering_iteration_stats ( 96 FaissClustering* clustering, 97 FaissClusteringIterationStats** iteration_stats, 98 size_t* size); 99 100 /// the only mandatory parameters are k and d 101 int faiss_Clustering_new (FaissClustering** p_clustering, int d, int k); 102 103 int faiss_Clustering_new_with_params ( 104 FaissClustering** p_clustering, 105 int d, 106 int k, 107 const(FaissClusteringParameters)* cp); 108 109 int faiss_Clustering_train ( 110 FaissClustering* clustering, 111 idx_t n, 112 const(float)* x, 113 FaissIndex* index); 114 115 void faiss_Clustering_free (FaissClustering* clustering); 116 117 /** simplified interface 118 * 119 * @param d dimension of the data 120 * @param n nb of training vectors 121 * @param k nb of output centroids 122 * @param x training set (size n * d) 123 * @param centroids output centroids (size k * d) 124 * @param q_error final quantization error 125 * @return error code 126 */ 127 int faiss_kmeans_clustering ( 128 size_t d, 129 size_t n, 130 size_t k, 131 const(float)* x, 132 float* centroids, 133 float* q_error); 134