1 module faiss.index_ivf;
2 
3 import faiss.common;
4 import faiss.index;
5 
6 /**
7  * Copyright (c) Facebook, Inc. and its affiliates.
8  *
9  * This source code is licensed under the MIT license found in the
10  * LICENSE file in the root directory of this source tree.
11  */
12 
13 // Copyright 2004-present Facebook. All Rights Reserved.
14 // -*- c -*-
15 
16 extern (C):
17 
18 /** Index based on a inverted file (IVF)
19  *
20  * In the inverted file, the quantizer (an Index instance) provides a
21  * quantization index for each vector to be added. The quantization
22  * index maps to a list (aka inverted list or posting list), where the
23  * id of the vector is then stored.
24  *
25  * At search time, the vector to be searched is also quantized, and
26  * only the list corresponding to the quantization index is
27  * searched. This speeds up the search by making it
28  * non-exhaustive. This can be relaxed using multi-probe search: a few
29  * (nprobe) quantization indices are selected and several inverted
30  * lists are visited.
31  *
32  * Sub-classes implement a post-filtering of the index that refines
33  * the distance estimation from the query to database vectors.
34  */
35 
36 struct FaissIndex_H;
37 alias FaissIndexIVF = FaissIndex_H;
38 void faiss_IndexIVF_free (FaissIndexIVF* obj);
39 FaissIndexIVF* faiss_IndexIVF_cast (FaissIndex*); /// number of possible key values
40 size_t faiss_IndexIVF_nlist (const(FaissIndexIVF)*);
41 /// number of probes at query time
42 size_t faiss_IndexIVF_nprobe (const(FaissIndexIVF)*);
43 void faiss_IndexIVF_set_nprobe (FaissIndexIVF*, size_t);
44 /// quantizer that maps vectors to inverted lists
45 FaissIndex* faiss_IndexIVF_quantizer (const(FaissIndexIVF)*);
46 /**
47  * = 0: use the quantizer as index in a kmeans training
48  * = 1: just pass on the training set to the train() of the quantizer
49  * = 2: kmeans training on a flat index + add the centroids to the quantizer
50  */
51 char faiss_IndexIVF_quantizer_trains_alone (const(FaissIndexIVF)*);
52 
53 /// whether object owns the quantizer
54 int faiss_IndexIVF_own_fields (const(FaissIndexIVF)*);
55 void faiss_IndexIVF_set_own_fields (FaissIndexIVF*, int);
56 
57 /** moves the entries from another dataset to self. On output,
58  * other is empty. add_id is added to all moved ids (for
59  * sequential ids, this would be this->ntotal */
60 int faiss_IndexIVF_merge_from (
61     FaissIndexIVF* index,
62     FaissIndexIVF* other,
63     idx_t add_id);
64 
65 /** copy a subset of the entries index to the other index
66  *
67  * if subset_type == 0: copies ids in [a1, a2)
68  * if subset_type == 1: copies ids if id % a1 == a2
69  * if subset_type == 2: copies inverted lists such that a1
70  *                      elements are left before and a2 elements are after
71  */
72 int faiss_IndexIVF_copy_subset_to (
73     const(FaissIndexIVF)* index,
74     FaissIndexIVF* other,
75     int subset_type,
76     idx_t a1,
77     idx_t a2);
78 
79 /** search a set of vectors, that are pre-quantized by the IVF
80  *  quantizer. Fill in the corresponding heaps with the query
81  *  results. search() calls this.
82  *
83  * @param n      nb of vectors to query
84  * @param x      query vectors, size nx * d
85  * @param assign coarse quantization indices, size nx * nprobe
86  * @param centroid_dis
87  *               distances to coarse centroids, size nx * nprobe
88  * @param distance
89  *               output distances, size n * k
90  * @param labels output labels, size n * k
91  * @param store_pairs store inv list index + inv list offset
92  *                     instead in upper/lower 32 bit of result,
93  *                     instead of ids (used for reranking).
94  */
95 int faiss_IndexIVF_search_preassigned (
96     const(FaissIndexIVF)* index,
97     idx_t n,
98     const(float)* x,
99     idx_t k,
100     const(idx_t)* assign,
101     const(float)* centroid_dis,
102     float* distances,
103     idx_t* labels,
104     int store_pairs);
105 
106 size_t faiss_IndexIVF_get_list_size (const(FaissIndexIVF)* index, size_t list_no);
107 
108 /** initialize a direct map
109  *
110  * @param new_maintain_direct_map    if true, create a direct map,
111  *                                   else clear it
112  */
113 int faiss_IndexIVF_make_direct_map (
114     FaissIndexIVF* index,
115     int new_maintain_direct_map);
116 
117 /** Check the inverted lists' imbalance factor.
118  *
119  * 1= perfectly balanced, >1: imbalanced
120  */
121 double faiss_IndexIVF_imbalance_factor (const(FaissIndexIVF)* index);
122 
123 /// display some stats about the inverted lists of the index
124 void faiss_IndexIVF_print_stats (const(FaissIndexIVF)* index);
125 
126 /// Get the IDs in an inverted list. IDs are written to `invlist`, which must be
127 /// large enough
128 //// to accommodate the full list.
129 ///
130 /// @param list_no the list ID
131 /// @param invlist output pointer to a slice of memory, at least as long as the
132 /// list's size
133 /// @see faiss_IndexIVF_get_list_size(size_t)
134 void faiss_IndexIVF_invlists_get_ids (
135     const(FaissIndexIVF)* index,
136     size_t list_no,
137     idx_t* invlist);
138 
139 struct FaissIndexIVFStats
140 {
141     size_t nq; // nb of queries run
142     size_t nlist; // nb of inverted lists scanned
143     size_t ndis; // nb of distances computed
144     size_t nheap_updates; // nb of times the heap was updated
145     double quantization_time; // time spent quantizing vectors (in ms)
146     double search_time; // time spent searching lists (in ms)
147 }
148 
149 void faiss_IndexIVFStats_reset (FaissIndexIVFStats* stats);
150 
151 void faiss_IndexIVFStats_init (FaissIndexIVFStats* stats);
152 
153 /// global var that collects all statists
154 FaissIndexIVFStats* faiss_get_indexIVF_stats ();
155