1 module faiss.clustering;
2 
3 import faiss.common;
4 import faiss.index;
5 
6 /**
7  * Copyright (c) Facebook, Inc. and its affiliates.
8  *
9  * This source code is licensed under the MIT license found in the
10  * LICENSE file in the root directory of this source tree.
11  */
12 
13 // Copyright 2004-present Facebook. All Rights Reserved
14 // -*- c -*-
15 
16 extern (C):
17 
18 /** Class for the clustering parameters. Can be passed to the
19  * constructor of the Clustering object.
20  */
21 struct FaissClusteringParameters
22 {
23     int niter; ///< clustering iterations
24     int nredo; ///< redo clustering this many times and keep best
25 
26     int verbose; ///< (bool)
27     int spherical; ///< (bool) do we want normalized centroids?
28     int int_centroids; ///< (bool) round centroids coordinates to integer
29     int update_index; ///< (bool) update index after each iteration?
30     int frozen_centroids; ///< (bool) use the centroids provided as input and do
31     ///< not change them during iterations
32 
33     int min_points_per_centroid; ///< otherwise you get a warning
34     int max_points_per_centroid; ///< to limit size of dataset
35 
36     int seed; ///< seed for the random number generator
37     size_t decode_block_size; ///< how many vectors at a time to decode
38 }
39 
40 /// Sets the ClusteringParameters object with reasonable defaults
41 void faiss_ClusteringParameters_init (FaissClusteringParameters* params);
42 
43 /** clustering based on assignment - centroid update iterations
44  *
45  * The clustering is based on an Index object that assigns training
46  * points to the centroids. Therefore, at each iteration the centroids
47  * are added to the index.
48  *
49  * On output, the centroids table is set to the latest version
50  * of the centroids and they are also added to the index. If the
51  * centroids table it is not empty on input, it is also used for
52  * initialization.
53  *
54  * To do several clusterings, just call train() several times on
55  * different training sets, clearing the centroid table in between.
56  */
57 struct FaissClustering_H;
58 alias FaissClustering = FaissClustering_H;
59 
60 int faiss_Clustering_niter (const(FaissClustering)*);
61 int faiss_Clustering_nredo (const(FaissClustering)*);
62 int faiss_Clustering_verbose (const(FaissClustering)*);
63 int faiss_Clustering_spherical (const(FaissClustering)*);
64 int faiss_Clustering_int_centroids (const(FaissClustering)*);
65 int faiss_Clustering_update_index (const(FaissClustering)*);
66 int faiss_Clustering_frozen_centroids (const(FaissClustering)*);
67 
68 int faiss_Clustering_min_points_per_centroid (const(FaissClustering)*);
69 int faiss_Clustering_max_points_per_centroid (const(FaissClustering)*);
70 
71 int faiss_Clustering_seed (const(FaissClustering)*);
72 size_t faiss_Clustering_decode_block_size (const(FaissClustering)*);
73 
74 /// getter for d
75 size_t faiss_Clustering_d (const(FaissClustering)*);
76 
77 /// getter for k
78 size_t faiss_Clustering_k (const(FaissClustering)*);
79 
80 struct FaissClusteringIterationStats_H;
81 alias FaissClusteringIterationStats = FaissClusteringIterationStats_H;
82 float faiss_ClusteringIterationStats_obj (const(FaissClusteringIterationStats)*);
83 double faiss_ClusteringIterationStats_time (const(FaissClusteringIterationStats)*);
84 double faiss_ClusteringIterationStats_time_search (const(FaissClusteringIterationStats)*);
85 double faiss_ClusteringIterationStats_imbalance_factor (const(FaissClusteringIterationStats)*);
86 int faiss_ClusteringIterationStats_nsplit (const(FaissClusteringIterationStats)*);
87 
88 /// getter for centroids (size = k * d)
89 void faiss_Clustering_centroids (
90     FaissClustering* clustering,
91     float** centroids,
92     size_t* size);
93 
94 /// getter for iteration stats
95 void faiss_Clustering_iteration_stats (
96     FaissClustering* clustering,
97     FaissClusteringIterationStats** iteration_stats,
98     size_t* size);
99 
100 /// the only mandatory parameters are k and d
101 int faiss_Clustering_new (FaissClustering** p_clustering, int d, int k);
102 
103 int faiss_Clustering_new_with_params (
104     FaissClustering** p_clustering,
105     int d,
106     int k,
107     const(FaissClusteringParameters)* cp);
108 
109 int faiss_Clustering_train (
110     FaissClustering* clustering,
111     idx_t n,
112     const(float)* x,
113     FaissIndex* index);
114 
115 void faiss_Clustering_free (FaissClustering* clustering);
116 
117 /** simplified interface
118  *
119  * @param d dimension of the data
120  * @param n nb of training vectors
121  * @param k nb of output centroids
122  * @param x training set (size n * d)
123  * @param centroids output centroids (size k * d)
124  * @param q_error final quantization error
125  * @return error code
126  */
127 int faiss_kmeans_clustering (
128     size_t d,
129     size_t n,
130     size_t k,
131     const(float)* x,
132     float* centroids,
133     float* q_error);
134