1 module faiss.index_ivf; 2 3 import faiss.common; 4 import faiss.index; 5 6 /** 7 * Copyright (c) Facebook, Inc. and its affiliates. 8 * 9 * This source code is licensed under the MIT license found in the 10 * LICENSE file in the root directory of this source tree. 11 */ 12 13 // Copyright 2004-present Facebook. All Rights Reserved. 14 // -*- c -*- 15 16 extern (C): 17 18 /** Index based on a inverted file (IVF) 19 * 20 * In the inverted file, the quantizer (an Index instance) provides a 21 * quantization index for each vector to be added. The quantization 22 * index maps to a list (aka inverted list or posting list), where the 23 * id of the vector is then stored. 24 * 25 * At search time, the vector to be searched is also quantized, and 26 * only the list corresponding to the quantization index is 27 * searched. This speeds up the search by making it 28 * non-exhaustive. This can be relaxed using multi-probe search: a few 29 * (nprobe) quantization indices are selected and several inverted 30 * lists are visited. 31 * 32 * Sub-classes implement a post-filtering of the index that refines 33 * the distance estimation from the query to database vectors. 34 */ 35 36 struct FaissIndex_H; 37 alias FaissIndexIVF = FaissIndex_H; 38 void faiss_IndexIVF_free (FaissIndexIVF* obj); 39 FaissIndexIVF* faiss_IndexIVF_cast (FaissIndex*); /// number of possible key values 40 size_t faiss_IndexIVF_nlist (const(FaissIndexIVF)*); 41 /// number of probes at query time 42 size_t faiss_IndexIVF_nprobe (const(FaissIndexIVF)*); 43 void faiss_IndexIVF_set_nprobe (FaissIndexIVF*, size_t); 44 /// quantizer that maps vectors to inverted lists 45 FaissIndex* faiss_IndexIVF_quantizer (const(FaissIndexIVF)*); 46 /** 47 * = 0: use the quantizer as index in a kmeans training 48 * = 1: just pass on the training set to the train() of the quantizer 49 * = 2: kmeans training on a flat index + add the centroids to the quantizer 50 */ 51 char faiss_IndexIVF_quantizer_trains_alone (const(FaissIndexIVF)*); 52 53 /// whether object owns the quantizer 54 int faiss_IndexIVF_own_fields (const(FaissIndexIVF)*); 55 void faiss_IndexIVF_set_own_fields (FaissIndexIVF*, int); 56 57 /** moves the entries from another dataset to self. On output, 58 * other is empty. add_id is added to all moved ids (for 59 * sequential ids, this would be this->ntotal */ 60 int faiss_IndexIVF_merge_from ( 61 FaissIndexIVF* index, 62 FaissIndexIVF* other, 63 idx_t add_id); 64 65 /** copy a subset of the entries index to the other index 66 * 67 * if subset_type == 0: copies ids in [a1, a2) 68 * if subset_type == 1: copies ids if id % a1 == a2 69 * if subset_type == 2: copies inverted lists such that a1 70 * elements are left before and a2 elements are after 71 */ 72 int faiss_IndexIVF_copy_subset_to ( 73 const(FaissIndexIVF)* index, 74 FaissIndexIVF* other, 75 int subset_type, 76 idx_t a1, 77 idx_t a2); 78 79 /** search a set of vectors, that are pre-quantized by the IVF 80 * quantizer. Fill in the corresponding heaps with the query 81 * results. search() calls this. 82 * 83 * @param n nb of vectors to query 84 * @param x query vectors, size nx * d 85 * @param assign coarse quantization indices, size nx * nprobe 86 * @param centroid_dis 87 * distances to coarse centroids, size nx * nprobe 88 * @param distance 89 * output distances, size n * k 90 * @param labels output labels, size n * k 91 * @param store_pairs store inv list index + inv list offset 92 * instead in upper/lower 32 bit of result, 93 * instead of ids (used for reranking). 94 */ 95 int faiss_IndexIVF_search_preassigned ( 96 const(FaissIndexIVF)* index, 97 idx_t n, 98 const(float)* x, 99 idx_t k, 100 const(idx_t)* assign, 101 const(float)* centroid_dis, 102 float* distances, 103 idx_t* labels, 104 int store_pairs); 105 106 size_t faiss_IndexIVF_get_list_size (const(FaissIndexIVF)* index, size_t list_no); 107 108 /** initialize a direct map 109 * 110 * @param new_maintain_direct_map if true, create a direct map, 111 * else clear it 112 */ 113 int faiss_IndexIVF_make_direct_map ( 114 FaissIndexIVF* index, 115 int new_maintain_direct_map); 116 117 /** Check the inverted lists' imbalance factor. 118 * 119 * 1= perfectly balanced, >1: imbalanced 120 */ 121 double faiss_IndexIVF_imbalance_factor (const(FaissIndexIVF)* index); 122 123 /// display some stats about the inverted lists of the index 124 void faiss_IndexIVF_print_stats (const(FaissIndexIVF)* index); 125 126 /// Get the IDs in an inverted list. IDs are written to `invlist`, which must be 127 /// large enough 128 //// to accommodate the full list. 129 /// 130 /// @param list_no the list ID 131 /// @param invlist output pointer to a slice of memory, at least as long as the 132 /// list's size 133 /// @see faiss_IndexIVF_get_list_size(size_t) 134 void faiss_IndexIVF_invlists_get_ids ( 135 const(FaissIndexIVF)* index, 136 size_t list_no, 137 idx_t* invlist); 138 139 struct FaissIndexIVFStats 140 { 141 size_t nq; // nb of queries run 142 size_t nlist; // nb of inverted lists scanned 143 size_t ndis; // nb of distances computed 144 size_t nheap_updates; // nb of times the heap was updated 145 double quantization_time; // time spent quantizing vectors (in ms) 146 double search_time; // time spent searching lists (in ms) 147 } 148 149 void faiss_IndexIVFStats_reset (FaissIndexIVFStats* stats); 150 151 void faiss_IndexIVFStats_init (FaissIndexIVFStats* stats); 152 153 /// global var that collects all statists 154 FaissIndexIVFStats* faiss_get_indexIVF_stats (); 155